[PATCH] i386/x86-64: Fix NMI watchdog suspend/resume

Making NMI suspend/resume work with SMP. We use CPU hotplug to offline
APs in SMP suspend/resume. Only BSP executes sysdev's .suspend/.resume
method. APs should follow CPU hotplug code path.

And:

+From: Don Zickus <dzickus@redhat.com>

Makes the start/stop paths of nmi watchdog more robust to handle the
suspend/resume cases more gracefully.

AK: I merged the two patches together

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
This commit is contained in:
Shaohua Li 2006-09-26 10:52:27 +02:00 committed by Andi Kleen
parent c41c5cd3b2
commit 4038f901cf
6 changed files with 58 additions and 15 deletions

View file

@ -63,7 +63,6 @@ struct nmi_watchdog_ctlblk {
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */ /* local prototypes */
static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
extern void show_registers(struct pt_regs *regs); extern void show_registers(struct pt_regs *regs);
@ -341,15 +340,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{ {
/* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active); nmi_pm_active = atomic_read(&nmi_active);
disable_lapic_nmi_watchdog(); stop_apic_nmi_watchdog(NULL);
BUG_ON(atomic_read(&nmi_active) != 0);
return 0; return 0;
} }
static int lapic_nmi_resume(struct sys_device *dev) static int lapic_nmi_resume(struct sys_device *dev)
{ {
if (nmi_pm_active > 0) /* only CPU0 goes here, other CPUs should be offline */
enable_lapic_nmi_watchdog(); if (nmi_pm_active > 0) {
setup_apic_nmi_watchdog(NULL);
touch_nmi_watchdog();
}
return 0; return 0;
} }
@ -626,11 +630,21 @@ static void stop_p4_watchdog(void)
void setup_apic_nmi_watchdog (void *unused) void setup_apic_nmi_watchdog (void *unused)
{ {
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
/* only support LOCAL and IO APICs for now */ /* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) && if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC)) (nmi_watchdog != NMI_IO_APIC))
return; return;
if (wd->enabled == 1)
return;
/* cheap hack to support suspend/resume */
/* if cpu0 is not active neither should the other cpus */
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
return;
if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) { switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
@ -663,17 +677,22 @@ void setup_apic_nmi_watchdog (void *unused)
return; return;
} }
} }
__get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1; wd->enabled = 1;
atomic_inc(&nmi_active); atomic_inc(&nmi_active);
} }
static void stop_apic_nmi_watchdog(void *unused) void stop_apic_nmi_watchdog(void *unused)
{ {
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
/* only support LOCAL and IO APICs for now */ /* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) && if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC)) (nmi_watchdog != NMI_IO_APIC))
return; return;
if (wd->enabled == 0)
return;
if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) { switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
@ -697,7 +716,7 @@ static void stop_apic_nmi_watchdog(void *unused)
return; return;
} }
} }
__get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0; wd->enabled = 0;
atomic_dec(&nmi_active); atomic_dec(&nmi_active);
} }

View file

@ -1376,7 +1376,8 @@ int __cpu_disable(void)
*/ */
if (cpu == 0) if (cpu == 0)
return -EBUSY; return -EBUSY;
if (nmi_watchdog == NMI_LOCAL_APIC)
stop_apic_nmi_watchdog(NULL);
clear_local_APIC(); clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */ /* Allow any queued timer interrupts to get serviced */
local_irq_enable(); local_irq_enable();

View file

@ -63,7 +63,6 @@ struct nmi_watchdog_ctlblk {
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */ /* local prototypes */
static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
/* converts an msr to an appropriate reservation bit */ /* converts an msr to an appropriate reservation bit */
@ -337,15 +336,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{ {
/* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active); nmi_pm_active = atomic_read(&nmi_active);
disable_lapic_nmi_watchdog(); stop_apic_nmi_watchdog(NULL);
BUG_ON(atomic_read(&nmi_active) != 0);
return 0; return 0;
} }
static int lapic_nmi_resume(struct sys_device *dev) static int lapic_nmi_resume(struct sys_device *dev)
{ {
if (nmi_pm_active > 0) /* only CPU0 goes here, other CPUs should be offline */
enable_lapic_nmi_watchdog(); if (nmi_pm_active > 0) {
setup_apic_nmi_watchdog(NULL);
touch_nmi_watchdog();
}
return 0; return 0;
} }
@ -561,11 +565,21 @@ static void stop_p4_watchdog(void)
void setup_apic_nmi_watchdog(void *unused) void setup_apic_nmi_watchdog(void *unused)
{ {
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
/* only support LOCAL and IO APICs for now */ /* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) && if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC)) (nmi_watchdog != NMI_IO_APIC))
return; return;
if (wd->enabled == 1)
return;
/* cheap hack to support suspend/resume */
/* if cpu0 is not active neither should the other cpus */
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
return;
if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) { switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
@ -582,17 +596,22 @@ void setup_apic_nmi_watchdog(void *unused)
return; return;
} }
} }
__get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1; wd->enabled = 1;
atomic_inc(&nmi_active); atomic_inc(&nmi_active);
} }
static void stop_apic_nmi_watchdog(void *unused) void stop_apic_nmi_watchdog(void *unused)
{ {
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
/* only support LOCAL and IO APICs for now */ /* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) && if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC)) (nmi_watchdog != NMI_IO_APIC))
return; return;
if (wd->enabled == 0)
return;
if (nmi_watchdog == NMI_LOCAL_APIC) { if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) { switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD: case X86_VENDOR_AMD:
@ -607,7 +626,7 @@ static void stop_apic_nmi_watchdog(void *unused)
return; return;
} }
} }
__get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0; wd->enabled = 0;
atomic_dec(&nmi_active); atomic_dec(&nmi_active);
} }

View file

@ -1233,6 +1233,8 @@ int __cpu_disable(void)
if (cpu == 0) if (cpu == 0)
return -EBUSY; return -EBUSY;
if (nmi_watchdog == NMI_LOCAL_APIC)
stop_apic_nmi_watchdog(NULL);
clear_local_APIC(); clear_local_APIC();
/* /*

View file

@ -23,6 +23,7 @@ extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *); extern void setup_apic_nmi_watchdog (void *);
extern void stop_apic_nmi_watchdog (void *);
extern void disable_timer_nmi_watchdog(void); extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);

View file

@ -54,6 +54,7 @@ extern int reserve_evntsel_nmi(unsigned int);
extern void release_evntsel_nmi(unsigned int); extern void release_evntsel_nmi(unsigned int);
extern void setup_apic_nmi_watchdog (void *); extern void setup_apic_nmi_watchdog (void *);
extern void stop_apic_nmi_watchdog (void *);
extern void disable_timer_nmi_watchdog(void); extern void disable_timer_nmi_watchdog(void);
extern void enable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void);
extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);