drm/i915: refactor error detection & collection

This patch refactors the existing error detection and collection code,
placing most of it in i915_handle_error(). Additionally, we introduce a
work queue for scheduling post-crash tasks such as generating a uevent.
Using the uevent facility, userspace should be able to capture a
post-mortem dump for diagnostics.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ben Gamari <bgamari.foss@gmail.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
Jesse Barnes 2009-07-11 16:48:03 -04:00 committed by Eric Anholt
parent 832cc28d5b
commit 8a90523639
3 changed files with 161 additions and 74 deletions

View file

@ -229,6 +229,7 @@ typedef struct drm_i915_private {
spinlock_t error_lock;
struct drm_i915_error_state *first_error;
struct work_struct error_work;
/* Register state */
u8 saveLBB;

View file

@ -343,6 +343,8 @@ static int i915_error_state(struct seq_file *m, void *unused)
error = dev_priv->first_error;
seq_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
error->time.tv_usec);
seq_printf(m, "EIR: 0x%08x\n", error->eir);
seq_printf(m, " PGTBL_ER: 0x%08x\n", error->pgtbl_er);
seq_printf(m, " INSTPM: 0x%08x\n", error->instpm);

View file

@ -290,6 +290,35 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
return ret;
}
/**
* i915_error_work_func - do process context error handling work
* @work: work struct
*
* Fire an error uevent so userspace can see that a hang or error
* was detected.
*/
static void i915_error_work_func(struct work_struct *work)
{
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
error_work);
struct drm_device *dev = dev_priv->dev;
char *event_string = "ERROR=1";
char *envp[] = { event_string, NULL };
DRM_DEBUG("generating error event\n");
kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
}
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
*
* Should be called when an error is detected (either a hang or an error
* interrupt) to capture error state from the time of the error. Fills
* out a structure which becomes available in debugfs for user level tools
* to pick up.
*/
static void i915_capture_error_state(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@ -325,12 +354,137 @@ static void i915_capture_error_state(struct drm_device *dev)
error->acthd = I915_READ(ACTHD_I965);
}
do_gettimeofday(&error->time);
dev_priv->first_error = error;
out:
spin_unlock_irqrestore(&dev_priv->error_lock, flags);
}
/**
* i915_handle_error - handle an error interrupt
* @dev: drm device
*
* Do some basic checking of regsiter state at error interrupt time and
* dump it to the syslog. Also call i915_capture_error_state() to make
* sure we get a record and make it available in debugfs. Fire a uevent
* so userspace knows something bad happened (should trigger collection
* of a ring dump etc.).
*/
static void i915_handle_error(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 eir = I915_READ(EIR);
u32 pipea_stats = I915_READ(PIPEASTAT);
u32 pipeb_stats = I915_READ(PIPEBSTAT);
i915_capture_error_state(dev);
printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
eir);
if (IS_G4X(dev)) {
if (eir & (GM45_ERROR_MEM_PRIV | GM45_ERROR_CP_PRIV)) {
u32 ipeir = I915_READ(IPEIR_I965);
printk(KERN_ERR " IPEIR: 0x%08x\n",
I915_READ(IPEIR_I965));
printk(KERN_ERR " IPEHR: 0x%08x\n",
I915_READ(IPEHR_I965));
printk(KERN_ERR " INSTDONE: 0x%08x\n",
I915_READ(INSTDONE_I965));
printk(KERN_ERR " INSTPS: 0x%08x\n",
I915_READ(INSTPS));
printk(KERN_ERR " INSTDONE1: 0x%08x\n",
I915_READ(INSTDONE1));
printk(KERN_ERR " ACTHD: 0x%08x\n",
I915_READ(ACTHD_I965));
I915_WRITE(IPEIR_I965, ipeir);
(void)I915_READ(IPEIR_I965);
}
if (eir & GM45_ERROR_PAGE_TABLE) {
u32 pgtbl_err = I915_READ(PGTBL_ER);
printk(KERN_ERR "page table error\n");
printk(KERN_ERR " PGTBL_ER: 0x%08x\n",
pgtbl_err);
I915_WRITE(PGTBL_ER, pgtbl_err);
(void)I915_READ(PGTBL_ER);
}
}
if (IS_I9XX(dev)) {
if (eir & I915_ERROR_PAGE_TABLE) {
u32 pgtbl_err = I915_READ(PGTBL_ER);
printk(KERN_ERR "page table error\n");
printk(KERN_ERR " PGTBL_ER: 0x%08x\n",
pgtbl_err);
I915_WRITE(PGTBL_ER, pgtbl_err);
(void)I915_READ(PGTBL_ER);
}
}
if (eir & I915_ERROR_MEMORY_REFRESH) {
printk(KERN_ERR "memory refresh error\n");
printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
pipea_stats);
printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
pipeb_stats);
/* pipestat has already been acked */
}
if (eir & I915_ERROR_INSTRUCTION) {
printk(KERN_ERR "instruction error\n");
printk(KERN_ERR " INSTPM: 0x%08x\n",
I915_READ(INSTPM));
if (!IS_I965G(dev)) {
u32 ipeir = I915_READ(IPEIR);
printk(KERN_ERR " IPEIR: 0x%08x\n",
I915_READ(IPEIR));
printk(KERN_ERR " IPEHR: 0x%08x\n",
I915_READ(IPEHR));
printk(KERN_ERR " INSTDONE: 0x%08x\n",
I915_READ(INSTDONE));
printk(KERN_ERR " ACTHD: 0x%08x\n",
I915_READ(ACTHD));
I915_WRITE(IPEIR, ipeir);
(void)I915_READ(IPEIR);
} else {
u32 ipeir = I915_READ(IPEIR_I965);
printk(KERN_ERR " IPEIR: 0x%08x\n",
I915_READ(IPEIR_I965));
printk(KERN_ERR " IPEHR: 0x%08x\n",
I915_READ(IPEHR_I965));
printk(KERN_ERR " INSTDONE: 0x%08x\n",
I915_READ(INSTDONE_I965));
printk(KERN_ERR " INSTPS: 0x%08x\n",
I915_READ(INSTPS));
printk(KERN_ERR " INSTDONE1: 0x%08x\n",
I915_READ(INSTDONE1));
printk(KERN_ERR " ACTHD: 0x%08x\n",
I915_READ(ACTHD_I965));
I915_WRITE(IPEIR_I965, ipeir);
(void)I915_READ(IPEIR_I965);
}
}
I915_WRITE(EIR, eir);
(void)I915_READ(EIR);
eir = I915_READ(EIR);
if (eir) {
/*
* some errors might have become stuck,
* mask them.
*/
DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
I915_WRITE(EMR, I915_READ(EMR) | eir);
I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
}
schedule_work(&dev_priv->error_work);
}
irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
{
struct drm_device *dev = (struct drm_device *) arg;
@ -372,6 +526,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
pipea_stats = I915_READ(PIPEASTAT);
pipeb_stats = I915_READ(PIPEBSTAT);
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
i915_handle_error(dev);
/*
* Clear the PIPE(A|B)STAT regs before the IIR
*/
@ -409,80 +566,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
I915_READ(PORT_HOTPLUG_STAT);
}
if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) {
u32 eir = I915_READ(EIR);
i915_capture_error_state(dev);
printk(KERN_ERR "render error detected, EIR: 0x%08x\n",
eir);
if (eir & I915_ERROR_PAGE_TABLE) {
u32 pgtbl_err = I915_READ(PGTBL_ER);
printk(KERN_ERR "page table error\n");
printk(KERN_ERR " PGTBL_ER: 0x%08x\n",
pgtbl_err);
I915_WRITE(PGTBL_ER, pgtbl_err);
(void)I915_READ(PGTBL_ER);
}
if (eir & I915_ERROR_MEMORY_REFRESH) {
printk(KERN_ERR "memory refresh error\n");
printk(KERN_ERR "PIPEASTAT: 0x%08x\n",
pipea_stats);
printk(KERN_ERR "PIPEBSTAT: 0x%08x\n",
pipeb_stats);
/* pipestat has already been acked */
}
if (eir & I915_ERROR_INSTRUCTION) {
printk(KERN_ERR "instruction error\n");
printk(KERN_ERR " INSTPM: 0x%08x\n",
I915_READ(INSTPM));
if (!IS_I965G(dev)) {
u32 ipeir = I915_READ(IPEIR);
printk(KERN_ERR " IPEIR: 0x%08x\n",
I915_READ(IPEIR));
printk(KERN_ERR " IPEHR: 0x%08x\n",
I915_READ(IPEHR));
printk(KERN_ERR " INSTDONE: 0x%08x\n",
I915_READ(INSTDONE));
printk(KERN_ERR " ACTHD: 0x%08x\n",
I915_READ(ACTHD));
I915_WRITE(IPEIR, ipeir);
(void)I915_READ(IPEIR);
} else {
u32 ipeir = I915_READ(IPEIR_I965);
printk(KERN_ERR " IPEIR: 0x%08x\n",
I915_READ(IPEIR_I965));
printk(KERN_ERR " IPEHR: 0x%08x\n",
I915_READ(IPEHR_I965));
printk(KERN_ERR " INSTDONE: 0x%08x\n",
I915_READ(INSTDONE_I965));
printk(KERN_ERR " INSTPS: 0x%08x\n",
I915_READ(INSTPS));
printk(KERN_ERR " INSTDONE1: 0x%08x\n",
I915_READ(INSTDONE1));
printk(KERN_ERR " ACTHD: 0x%08x\n",
I915_READ(ACTHD_I965));
I915_WRITE(IPEIR_I965, ipeir);
(void)I915_READ(IPEIR_I965);
}
}
I915_WRITE(EIR, eir);
(void)I915_READ(EIR);
eir = I915_READ(EIR);
if (eir) {
/*
* some errors might have become stuck,
* mask them.
*/
DRM_ERROR("EIR stuck: 0x%08x, masking\n", eir);
I915_WRITE(EMR, I915_READ(EMR) | eir);
I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
}
}
I915_WRITE(IIR, iir);
new_iir = I915_READ(IIR); /* Flush posted writes */
@ -830,6 +913,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
atomic_set(&dev_priv->irq_received, 0);
INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
INIT_WORK(&dev_priv->error_work, i915_error_work_func);
if (IS_IGDNG(dev)) {
igdng_irq_preinstall(dev);