mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 11:16:11 +00:00
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (21 commits) x86, mce: Fix compilation with !CONFIG_DEBUG_FS in mce-severity.c x86, mce: CE in last bank prevents panic by unknown MCE x86, mce: Fake panic support for MCE testing x86, mce: Move debugfs mce dir creating to mce.c x86, mce: Support specifying raise mode for software MCE injection x86, mce: Support specifying context for software mce injection x86, mce: fix reporting of Thermal Monitoring mechanism enabled x86, mce: remove never executed code x86, mce: add missing __cpuinit tags x86, mce: fix "mce" boot option handling for CONFIG_X86_NEW_MCE x86, mce: don't log boot MCEs on Pentium M (model == 13) CPUs x86: mce: Lower maximum number of banks to architecture limit x86: mce: macros to compute banks MSRs x86: mce: Move per bank data in a single datastructure x86: mce: Move code in mce.c x86: mce: Rename CONFIG_X86_NEW_MCE to CONFIG_X86_MCE x86: mce: Remove old i386 machine check code x86: mce: Update X86_MCE description in x86/Kconfig x86: mce: Make CONFIG_X86_ANCIENT_MCE dependent on CONFIG_X86_MCE x86, mce: use atomic_inc_return() instead of add by 1 ... Manually fixed up trivial conflicts: Documentation/feature-removal-schedule.txt arch/x86/kernel/cpu/mcheck/mce.c
This commit is contained in:
commit
df58bee21e
20 changed files with 372 additions and 786 deletions
|
@ -428,16 +428,6 @@ Who: Johannes Berg <johannes@sipsolutions.net>
|
|||
|
||||
----------------------------
|
||||
|
||||
What: CONFIG_X86_OLD_MCE
|
||||
When: 2.6.32
|
||||
Why: Remove the old legacy 32bit machine check code. This has been
|
||||
superseded by the newer machine check code from the 64bit port,
|
||||
but the old version has been kept around for easier testing. Note this
|
||||
doesn't impact the old P5 and WinChip machine check handlers.
|
||||
Who: Andi Kleen <andi@firstfloor.org>
|
||||
|
||||
----------------------------
|
||||
|
||||
What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
|
||||
exported interface anymore.
|
||||
When: 2.6.33
|
||||
|
|
|
@ -783,41 +783,17 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
|
|||
increased on these systems.
|
||||
|
||||
config X86_MCE
|
||||
bool "Machine Check Exception"
|
||||
bool "Machine Check / overheating reporting"
|
||||
---help---
|
||||
Machine Check Exception support allows the processor to notify the
|
||||
kernel if it detects a problem (e.g. overheating, component failure).
|
||||
Machine Check support allows the processor to notify the
|
||||
kernel if it detects a problem (e.g. overheating, data corruption).
|
||||
The action the kernel takes depends on the severity of the problem,
|
||||
ranging from a warning message on the console, to halting the machine.
|
||||
Your processor must be a Pentium or newer to support this - check the
|
||||
flags in /proc/cpuinfo for mce. Note that some older Pentium systems
|
||||
have a design flaw which leads to false MCE events - hence MCE is
|
||||
disabled on all P5 processors, unless explicitly enabled with "mce"
|
||||
as a boot argument. Similarly, if MCE is built in and creates a
|
||||
problem on some new non-standard machine, you can boot with "nomce"
|
||||
to disable it. MCE support simply ignores non-MCE processors like
|
||||
the 386 and 486, so nearly everyone can say Y here.
|
||||
|
||||
config X86_OLD_MCE
|
||||
depends on X86_32 && X86_MCE
|
||||
bool "Use legacy machine check code (will go away)"
|
||||
default n
|
||||
select X86_ANCIENT_MCE
|
||||
---help---
|
||||
Use the old i386 machine check code. This is merely intended for
|
||||
testing in a transition period. Try this if you run into any machine
|
||||
check related software problems, but report the problem to
|
||||
linux-kernel. When in doubt say no.
|
||||
|
||||
config X86_NEW_MCE
|
||||
depends on X86_MCE
|
||||
bool
|
||||
default y if (!X86_OLD_MCE && X86_32) || X86_64
|
||||
ranging from warning messages to halting the machine.
|
||||
|
||||
config X86_MCE_INTEL
|
||||
def_bool y
|
||||
prompt "Intel MCE features"
|
||||
depends on X86_NEW_MCE && X86_LOCAL_APIC
|
||||
depends on X86_MCE && X86_LOCAL_APIC
|
||||
---help---
|
||||
Additional support for intel specific MCE features such as
|
||||
the thermal monitor.
|
||||
|
@ -825,14 +801,14 @@ config X86_MCE_INTEL
|
|||
config X86_MCE_AMD
|
||||
def_bool y
|
||||
prompt "AMD MCE features"
|
||||
depends on X86_NEW_MCE && X86_LOCAL_APIC
|
||||
depends on X86_MCE && X86_LOCAL_APIC
|
||||
---help---
|
||||
Additional support for AMD specific MCE features such as
|
||||
the DRAM Error Threshold.
|
||||
|
||||
config X86_ANCIENT_MCE
|
||||
def_bool n
|
||||
depends on X86_32
|
||||
depends on X86_32 && X86_MCE
|
||||
prompt "Support for old Pentium 5 / WinChip machine checks"
|
||||
---help---
|
||||
Include support for machine check handling on old Pentium 5 or WinChip
|
||||
|
@ -845,36 +821,16 @@ config X86_MCE_THRESHOLD
|
|||
default y
|
||||
|
||||
config X86_MCE_INJECT
|
||||
depends on X86_NEW_MCE
|
||||
depends on X86_MCE
|
||||
tristate "Machine check injector support"
|
||||
---help---
|
||||
Provide support for injecting machine checks for testing purposes.
|
||||
If you don't know what a machine check is and you don't do kernel
|
||||
QA it is safe to say n.
|
||||
|
||||
config X86_MCE_NONFATAL
|
||||
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
|
||||
depends on X86_OLD_MCE
|
||||
---help---
|
||||
Enabling this feature starts a timer that triggers every 5 seconds which
|
||||
will look at the machine check registers to see if anything happened.
|
||||
Non-fatal problems automatically get corrected (but still logged).
|
||||
Disable this if you don't want to see these messages.
|
||||
Seeing the messages this option prints out may be indicative of dying
|
||||
or out-of-spec (ie, overclocked) hardware.
|
||||
This option only does something on certain CPUs.
|
||||
(AMD Athlon/Duron and Intel Pentium 4)
|
||||
|
||||
config X86_MCE_P4THERMAL
|
||||
bool "check for P4 thermal throttling interrupt."
|
||||
depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
|
||||
---help---
|
||||
Enabling this feature will cause a message to be printed when the P4
|
||||
enters thermal throttling.
|
||||
|
||||
config X86_THERMAL_VECTOR
|
||||
def_bool y
|
||||
depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
|
||||
depends on X86_MCE_INTEL
|
||||
|
||||
config VM86
|
||||
bool "Enable VM86 support" if EMBEDDED
|
||||
|
|
|
@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
|
|||
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
#ifdef CONFIG_X86_MCE
|
||||
BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
|
||||
#endif
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
*/
|
||||
|
||||
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
|
||||
#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
|
||||
#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
|
||||
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
|
||||
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
|
||||
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
|
||||
|
@ -38,6 +38,14 @@
|
|||
#define MCM_ADDR_MEM 3 /* memory address */
|
||||
#define MCM_ADDR_GENERIC 7 /* generic */
|
||||
|
||||
#define MCJ_CTX_MASK 3
|
||||
#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
|
||||
#define MCJ_CTX_RANDOM 0 /* inject context: random */
|
||||
#define MCJ_CTX_PROCESS 1 /* inject context: process */
|
||||
#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
|
||||
#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
|
||||
#define MCJ_EXCEPTION 8 /* raise as exception */
|
||||
|
||||
/* Fields are zero when not available */
|
||||
struct mce {
|
||||
__u64 status;
|
||||
|
@ -48,8 +56,8 @@ struct mce {
|
|||
__u64 tsc; /* cpu time stamp counter */
|
||||
__u64 time; /* wall time_t when error was detected */
|
||||
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
|
||||
__u8 pad1;
|
||||
__u16 pad2;
|
||||
__u8 inject_flags; /* software inject flags */
|
||||
__u16 pad;
|
||||
__u32 cpuid; /* CPUID 1 EAX */
|
||||
__u8 cs; /* code segment */
|
||||
__u8 bank; /* machine check bank */
|
||||
|
@ -115,13 +123,6 @@ void mcheck_init(struct cpuinfo_x86 *c);
|
|||
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_OLD_MCE
|
||||
extern int nr_mce_banks;
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ANCIENT_MCE
|
||||
void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
|
||||
void winchip_mcheck_init(struct cpuinfo_x86 *c);
|
||||
|
@ -137,10 +138,11 @@ void mce_log(struct mce *m);
|
|||
DECLARE_PER_CPU(struct sys_device, mce_dev);
|
||||
|
||||
/*
|
||||
* To support more than 128 would need to escape the predefined
|
||||
* Linux defined extended banks first.
|
||||
* Maximum banks number.
|
||||
* This is the limit of the current register layout on
|
||||
* Intel CPUs.
|
||||
*/
|
||||
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
|
||||
#define MAX_NR_BANKS 32
|
||||
|
||||
#ifdef CONFIG_X86_MCE_INTEL
|
||||
extern int mce_cmci_disabled;
|
||||
|
@ -208,11 +210,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
|||
|
||||
void intel_init_thermal(struct cpuinfo_x86 *c);
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
void mce_log_therm_throt_event(__u64 status);
|
||||
#else
|
||||
static inline void mce_log_therm_throt_event(__u64 status) {}
|
||||
#endif
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _ASM_X86_MCE_H */
|
||||
|
|
|
@ -81,8 +81,15 @@
|
|||
#define MSR_IA32_MC0_ADDR 0x00000402
|
||||
#define MSR_IA32_MC0_MISC 0x00000403
|
||||
|
||||
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
|
||||
#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
|
||||
#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
|
||||
#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
|
||||
|
||||
/* These are consecutive and not in the normal 4er MCE bank block */
|
||||
#define MSR_IA32_MC0_CTL2 0x00000280
|
||||
#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
|
||||
|
||||
#define CMCI_EN (1ULL << 30)
|
||||
#define CMCI_THRESHOLD_MASK 0xffffULL
|
||||
|
||||
|
@ -215,6 +222,10 @@
|
|||
|
||||
#define THERM_STATUS_PROCHOT (1 << 0)
|
||||
|
||||
#define MSR_THERM2_CTL 0x0000019d
|
||||
|
||||
#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
|
||||
|
||||
#define MSR_IA32_MISC_ENABLE 0x000001a0
|
||||
|
||||
/* MISC_ENABLE bits: architectural */
|
||||
|
|
|
@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
|
|||
|
||||
static inline int mce_in_progress(void)
|
||||
{
|
||||
#if defined(CONFIG_X86_NEW_MCE)
|
||||
#if defined(CONFIG_X86_MCE)
|
||||
return atomic_read(&mce_entry) > 0;
|
||||
#endif
|
||||
return 0;
|
||||
|
|
|
@ -1,11 +1,8 @@
|
|||
obj-y = mce.o
|
||||
obj-y = mce.o mce-severity.o
|
||||
|
||||
obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
|
||||
obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
|
||||
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
|
||||
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
|
||||
obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
|
||||
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
|
||||
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
|
||||
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
||||
|
||||
|
|
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Athlon specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Dave Jones <davej@redhat.com>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* Machine Check Handler For AMD Athlon/Duron: */
|
||||
static void k7_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
recover = 0;
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
||||
for (i = 1; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
|
||||
misc[0] = '\0';
|
||||
addr[0] = '\0';
|
||||
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
if (high & (1<<25))
|
||||
recover |= 2;
|
||||
high &= ~(1<<31);
|
||||
|
||||
if (high & (1<<27)) {
|
||||
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
|
||||
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
|
||||
}
|
||||
if (high & (1<<26)) {
|
||||
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
|
||||
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
|
||||
smp_processor_id(), i, high, low, misc, addr);
|
||||
|
||||
/* Clear it: */
|
||||
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
if (recover & 2)
|
||||
panic("CPU context corrupt");
|
||||
if (recover & 1)
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
|
||||
/* AMD K7 machine check is Intel like: */
|
||||
void amd_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
machine_check_vector = k7_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
/*
|
||||
* Clear status for MC index 0 separately, we don't touch CTL,
|
||||
* as some K7 Athlons cause spurious MCEs when its enabled:
|
||||
*/
|
||||
if (boot_cpu_data.x86 == 6) {
|
||||
wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
i = 1;
|
||||
} else
|
||||
i = 0;
|
||||
|
||||
for (; i < nr_mce_banks; i++) {
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
}
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
|
@ -18,7 +18,12 @@
|
|||
#include <linux/string.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/apic.h>
|
||||
|
||||
/* Update fake mce registers on current CPU. */
|
||||
static void inject_mce(struct mce *m)
|
||||
|
@ -39,44 +44,141 @@ static void inject_mce(struct mce *m)
|
|||
i->finished = 1;
|
||||
}
|
||||
|
||||
struct delayed_mce {
|
||||
struct timer_list timer;
|
||||
struct mce m;
|
||||
};
|
||||
|
||||
/* Inject mce on current CPU */
|
||||
static void raise_mce(unsigned long data)
|
||||
static void raise_poll(struct mce *m)
|
||||
{
|
||||
struct delayed_mce *dm = (struct delayed_mce *)data;
|
||||
struct mce *m = &dm->m;
|
||||
int cpu = m->extcpu;
|
||||
unsigned long flags;
|
||||
mce_banks_t b;
|
||||
|
||||
inject_mce(m);
|
||||
if (m->status & MCI_STATUS_UC) {
|
||||
struct pt_regs regs;
|
||||
memset(&b, 0xff, sizeof(mce_banks_t));
|
||||
local_irq_save(flags);
|
||||
machine_check_poll(0, &b);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static void raise_exception(struct mce *m, struct pt_regs *pregs)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
unsigned long flags;
|
||||
|
||||
if (!pregs) {
|
||||
memset(®s, 0, sizeof(struct pt_regs));
|
||||
regs.ip = m->ip;
|
||||
regs.cs = m->cs;
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
do_machine_check(®s, 0);
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
} else {
|
||||
mce_banks_t b;
|
||||
memset(&b, 0xff, sizeof(mce_banks_t));
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
machine_check_poll(0, &b);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Finished machine check poll on CPU %d\n",
|
||||
cpu);
|
||||
pregs = ®s;
|
||||
}
|
||||
kfree(dm);
|
||||
/* in mcheck exeception handler, irq will be disabled */
|
||||
local_irq_save(flags);
|
||||
do_machine_check(pregs, 0);
|
||||
local_irq_restore(flags);
|
||||
m->finished = 0;
|
||||
}
|
||||
|
||||
static cpumask_t mce_inject_cpumask;
|
||||
|
||||
static int mce_raise_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct die_args *args = (struct die_args *)data;
|
||||
int cpu = smp_processor_id();
|
||||
struct mce *m = &__get_cpu_var(injectm);
|
||||
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
|
||||
return NOTIFY_DONE;
|
||||
cpu_clear(cpu, mce_inject_cpumask);
|
||||
if (m->inject_flags & MCJ_EXCEPTION)
|
||||
raise_exception(m, args->regs);
|
||||
else if (m->status)
|
||||
raise_poll(m);
|
||||
return NOTIFY_STOP;
|
||||
}
|
||||
|
||||
static struct notifier_block mce_raise_nb = {
|
||||
.notifier_call = mce_raise_notify,
|
||||
.priority = 1000,
|
||||
};
|
||||
|
||||
/* Inject mce on current CPU */
|
||||
static int raise_local(struct mce *m)
|
||||
{
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
int ret = 0;
|
||||
int cpu = m->extcpu;
|
||||
|
||||
if (m->inject_flags & MCJ_EXCEPTION) {
|
||||
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
|
||||
switch (context) {
|
||||
case MCJ_CTX_IRQ:
|
||||
/*
|
||||
* Could do more to fake interrupts like
|
||||
* calling irq_enter, but the necessary
|
||||
* machinery isn't exported currently.
|
||||
*/
|
||||
/*FALL THROUGH*/
|
||||
case MCJ_CTX_PROCESS:
|
||||
raise_exception(m, NULL);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_INFO "Invalid MCE context\n");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
|
||||
} else if (m->status) {
|
||||
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
|
||||
raise_poll(m);
|
||||
mce_notify_irq();
|
||||
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
|
||||
} else
|
||||
m->finished = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void raise_mce(struct mce *m)
|
||||
{
|
||||
int context = MCJ_CTX(m->inject_flags);
|
||||
|
||||
inject_mce(m);
|
||||
|
||||
if (context == MCJ_CTX_RANDOM)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
if (m->inject_flags & MCJ_NMI_BROADCAST) {
|
||||
unsigned long start;
|
||||
int cpu;
|
||||
get_online_cpus();
|
||||
mce_inject_cpumask = cpu_online_map;
|
||||
cpu_clear(get_cpu(), mce_inject_cpumask);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct mce *mcpu = &per_cpu(injectm, cpu);
|
||||
if (!mcpu->finished ||
|
||||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
|
||||
cpu_clear(cpu, mce_inject_cpumask);
|
||||
}
|
||||
if (!cpus_empty(mce_inject_cpumask))
|
||||
apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
|
||||
start = jiffies;
|
||||
while (!cpus_empty(mce_inject_cpumask)) {
|
||||
if (!time_before(jiffies, start + 2*HZ)) {
|
||||
printk(KERN_ERR
|
||||
"Timeout waiting for mce inject NMI %lx\n",
|
||||
*cpus_addr(mce_inject_cpumask));
|
||||
break;
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
raise_local(m);
|
||||
put_cpu();
|
||||
put_online_cpus();
|
||||
} else
|
||||
#endif
|
||||
raise_local(m);
|
||||
}
|
||||
|
||||
/* Error injection interface */
|
||||
static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
||||
size_t usize, loff_t *off)
|
||||
{
|
||||
struct delayed_mce *dm;
|
||||
struct mce m;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
|
@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
|
|||
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
|
||||
return -EINVAL;
|
||||
|
||||
dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
|
||||
if (!dm)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* Need to give user space some time to set everything up,
|
||||
* so do it a jiffie or two later everywhere.
|
||||
* Should we use a hrtimer here for better synchronization?
|
||||
*/
|
||||
memcpy(&dm->m, &m, sizeof(struct mce));
|
||||
setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
|
||||
dm->timer.expires = jiffies + 2;
|
||||
add_timer_on(&dm->timer, m.extcpu);
|
||||
schedule_timeout(2);
|
||||
raise_mce(&m);
|
||||
return usize;
|
||||
}
|
||||
|
||||
|
@ -116,6 +211,7 @@ static int inject_init(void)
|
|||
{
|
||||
printk(KERN_INFO "Machine check injector initialized\n");
|
||||
mce_chrdev_ops.write = mce_write;
|
||||
register_die_notifier(&mce_raise_nb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include <linux/sysdev.h>
|
||||
#include <asm/mce.h>
|
||||
|
||||
enum severity_level {
|
||||
|
@ -10,6 +11,20 @@ enum severity_level {
|
|||
MCE_PANIC_SEVERITY,
|
||||
};
|
||||
|
||||
#define ATTR_LEN 16
|
||||
|
||||
/* One object for each MCE bank, shared by all CPUs */
|
||||
struct mce_bank {
|
||||
u64 ctl; /* subevents to enable */
|
||||
unsigned char init; /* initialise bank? */
|
||||
struct sysdev_attribute attr; /* sysdev attribute */
|
||||
char attrname[ATTR_LEN]; /* attribute name */
|
||||
};
|
||||
|
||||
int mce_severity(struct mce *a, int tolerant, char **msg);
|
||||
struct dentry *mce_get_debugfs_dir(void);
|
||||
|
||||
extern int mce_ser;
|
||||
|
||||
extern struct mce_bank *mce_banks;
|
||||
|
||||
|
|
|
@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static void *s_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
if (*pos >= ARRAY_SIZE(severities))
|
||||
|
@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
|
|||
{
|
||||
struct dentry *dmce = NULL, *fseverities_coverage = NULL;
|
||||
|
||||
dmce = debugfs_create_dir("mce", NULL);
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (dmce == NULL)
|
||||
goto err_out;
|
||||
fseverities_coverage = debugfs_create_file("severities-coverage",
|
||||
|
@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
|
|||
return 0;
|
||||
|
||||
err_out:
|
||||
if (fseverities_coverage)
|
||||
debugfs_remove(fseverities_coverage);
|
||||
if (dmce)
|
||||
debugfs_remove(dmce);
|
||||
return -ENOMEM;
|
||||
}
|
||||
late_initcall(severities_debugfs_init);
|
||||
#endif
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <linux/smp.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hw_irq.h>
|
||||
|
@ -45,21 +46,8 @@
|
|||
|
||||
#include "mce-internal.h"
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
unexpected_machine_check;
|
||||
|
||||
int mce_disabled __read_mostly;
|
||||
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
|
||||
#define MISC_MCELOG_MINOR 227
|
||||
|
||||
#define SPINUNIT 100 /* 100ns */
|
||||
|
@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
|
|||
*/
|
||||
static int tolerant __read_mostly = 1;
|
||||
static int banks __read_mostly;
|
||||
static u64 *bank __read_mostly;
|
||||
static int rip_msr __read_mostly;
|
||||
static int mce_bootlog __read_mostly = -1;
|
||||
static int monarch_timeout __read_mostly = -1;
|
||||
|
@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly;
|
|||
int mce_ignore_ce __read_mostly;
|
||||
int mce_ser __read_mostly;
|
||||
|
||||
struct mce_bank *mce_banks __read_mostly;
|
||||
|
||||
/* User mode helper program triggered by machine check event */
|
||||
static unsigned long mce_need_notify;
|
||||
static char mce_helper[128];
|
||||
static char *mce_helper_argv[2] = { mce_helper, NULL };
|
||||
|
||||
static unsigned long dont_init_banks;
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
|
||||
static DEFINE_PER_CPU(struct mce, mces_seen);
|
||||
static int cpu_missing;
|
||||
|
@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
|
|||
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
|
||||
};
|
||||
|
||||
static inline int skip_bank_init(int i)
|
||||
{
|
||||
return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct work_struct, mce_work);
|
||||
|
||||
/* Do initial initialization of a struct mce */
|
||||
|
@ -232,6 +214,9 @@ static void print_mce_tail(void)
|
|||
|
||||
static atomic_t mce_paniced;
|
||||
|
||||
static int fake_panic;
|
||||
static atomic_t mce_fake_paniced;
|
||||
|
||||
/* Panic in progress. Enable interrupts and wait for final IPI */
|
||||
static void wait_for_panic(void)
|
||||
{
|
||||
|
@ -249,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
|
|||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Make sure only one CPU runs in machine check panic
|
||||
*/
|
||||
if (atomic_add_return(1, &mce_paniced) > 1)
|
||||
wait_for_panic();
|
||||
barrier();
|
||||
if (!fake_panic) {
|
||||
/*
|
||||
* Make sure only one CPU runs in machine check panic
|
||||
*/
|
||||
if (atomic_inc_return(&mce_paniced) > 1)
|
||||
wait_for_panic();
|
||||
barrier();
|
||||
|
||||
bust_spinlocks(1);
|
||||
console_verbose();
|
||||
bust_spinlocks(1);
|
||||
console_verbose();
|
||||
} else {
|
||||
/* Don't log too much for fake panic */
|
||||
if (atomic_inc_return(&mce_fake_paniced) > 1)
|
||||
return;
|
||||
}
|
||||
print_mce_head();
|
||||
/* First print corrected ones that are still unlogged */
|
||||
for (i = 0; i < MCE_LOG_LEN; i++) {
|
||||
|
@ -284,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
|
|||
print_mce_tail();
|
||||
if (exp)
|
||||
printk(KERN_EMERG "Machine check: %s\n", exp);
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = mce_panic_timeout;
|
||||
panic(msg);
|
||||
if (!fake_panic) {
|
||||
if (panic_timeout == 0)
|
||||
panic_timeout = mce_panic_timeout;
|
||||
panic(msg);
|
||||
} else
|
||||
printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
|
||||
}
|
||||
|
||||
/* Support code for software error injection */
|
||||
|
@ -296,11 +290,11 @@ static int msr_to_offset(u32 msr)
|
|||
unsigned bank = __get_cpu_var(injectm.bank);
|
||||
if (msr == rip_msr)
|
||||
return offsetof(struct mce, ip);
|
||||
if (msr == MSR_IA32_MC0_STATUS + bank*4)
|
||||
if (msr == MSR_IA32_MCx_STATUS(bank))
|
||||
return offsetof(struct mce, status);
|
||||
if (msr == MSR_IA32_MC0_ADDR + bank*4)
|
||||
if (msr == MSR_IA32_MCx_ADDR(bank))
|
||||
return offsetof(struct mce, addr);
|
||||
if (msr == MSR_IA32_MC0_MISC + bank*4)
|
||||
if (msr == MSR_IA32_MCx_MISC(bank))
|
||||
return offsetof(struct mce, misc);
|
||||
if (msr == MSR_IA32_MCG_STATUS)
|
||||
return offsetof(struct mce, mcgstatus);
|
||||
|
@ -505,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
|
||||
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!bank[i] || !test_bit(i, *b))
|
||||
if (!mce_banks[i].ctl || !test_bit(i, *b))
|
||||
continue;
|
||||
|
||||
m.misc = 0;
|
||||
|
@ -514,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
m.tsc = 0;
|
||||
|
||||
barrier();
|
||||
m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
|
||||
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
|
||||
if (!(m.status & MCI_STATUS_VAL))
|
||||
continue;
|
||||
|
||||
|
@ -529,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
continue;
|
||||
|
||||
if (m.status & MCI_STATUS_MISCV)
|
||||
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
|
||||
m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
|
||||
if (m.status & MCI_STATUS_ADDRV)
|
||||
m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
|
||||
m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
|
||||
|
||||
if (!(flags & MCP_TIMESTAMP))
|
||||
m.tsc = 0;
|
||||
|
@ -547,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
/*
|
||||
* Clear state for this bank.
|
||||
*/
|
||||
mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
|
||||
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -568,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
|
||||
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
|
||||
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
|
||||
return 1;
|
||||
}
|
||||
|
@ -628,7 +622,7 @@ out:
|
|||
* This way we prevent any potential data corruption in a unrecoverable case
|
||||
* and also makes sure always all CPU's errors are examined.
|
||||
*
|
||||
* Also this detects the case of an machine check event coming from outer
|
||||
* Also this detects the case of a machine check event coming from outer
|
||||
* space (not detected by any CPUs) In this case some external agent wants
|
||||
* us to shut down, so panic too.
|
||||
*
|
||||
|
@ -681,7 +675,7 @@ static void mce_reign(void)
|
|||
* No machine check event found. Must be some external
|
||||
* source or one CPU is hung. Panic.
|
||||
*/
|
||||
if (!m && tolerant < 3)
|
||||
if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
|
||||
mce_panic("Machine check from unknown source", NULL, NULL);
|
||||
|
||||
/*
|
||||
|
@ -715,7 +709,7 @@ static int mce_start(int *no_way_out)
|
|||
* global_nwo should be updated before mce_callin
|
||||
*/
|
||||
smp_wmb();
|
||||
order = atomic_add_return(1, &mce_callin);
|
||||
order = atomic_inc_return(&mce_callin);
|
||||
|
||||
/*
|
||||
* Wait for everyone.
|
||||
|
@ -852,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear)
|
|||
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (test_bit(i, toclear))
|
||||
mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
|
||||
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -905,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
mce_setup(&m);
|
||||
|
||||
m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
|
||||
no_way_out = mce_no_way_out(&m, &msg);
|
||||
|
||||
final = &__get_cpu_var(mces_seen);
|
||||
*final = m;
|
||||
|
||||
no_way_out = mce_no_way_out(&m, &msg);
|
||||
|
||||
barrier();
|
||||
|
||||
/*
|
||||
|
@ -926,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
order = mce_start(&no_way_out);
|
||||
for (i = 0; i < banks; i++) {
|
||||
__clear_bit(i, toclear);
|
||||
if (!bank[i])
|
||||
if (!mce_banks[i].ctl)
|
||||
continue;
|
||||
|
||||
m.misc = 0;
|
||||
m.addr = 0;
|
||||
m.bank = i;
|
||||
|
||||
m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
|
||||
m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
|
||||
if ((m.status & MCI_STATUS_VAL) == 0)
|
||||
continue;
|
||||
|
||||
|
@ -974,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
|
|||
kill_it = 1;
|
||||
|
||||
if (m.status & MCI_STATUS_MISCV)
|
||||
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
|
||||
m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
|
||||
if (m.status & MCI_STATUS_ADDRV)
|
||||
m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
|
||||
m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
|
||||
|
||||
/*
|
||||
* Action optional error. Queue address for later processing.
|
||||
|
@ -1169,10 +1163,25 @@ int mce_notify_irq(void)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(mce_notify_irq);
|
||||
|
||||
static int mce_banks_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
|
||||
if (!mce_banks)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < banks; i++) {
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
b->ctl = -1ULL;
|
||||
b->init = 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize Machine Checks for a CPU.
|
||||
*/
|
||||
static int mce_cap_init(void)
|
||||
static int __cpuinit mce_cap_init(void)
|
||||
{
|
||||
unsigned b;
|
||||
u64 cap;
|
||||
|
@ -1192,11 +1201,10 @@ static int mce_cap_init(void)
|
|||
/* Don't support asymmetric configurations today */
|
||||
WARN_ON(banks != 0 && b != banks);
|
||||
banks = b;
|
||||
if (!bank) {
|
||||
bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
|
||||
if (!bank)
|
||||
return -ENOMEM;
|
||||
memset(bank, 0xff, banks * sizeof(u64));
|
||||
if (!mce_banks) {
|
||||
int err = mce_banks_init();
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Use accurate RIP reporting if available. */
|
||||
|
@ -1228,15 +1236,16 @@ static void mce_init(void)
|
|||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (skip_bank_init(i))
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
if (!b->init)
|
||||
continue;
|
||||
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
|
||||
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
|
||||
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
|
||||
wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add per CPU specific workarounds here */
|
||||
static int mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
|
||||
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
|
||||
|
@ -1251,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
|
|||
* trips off incorrectly with the IOMMU & 3ware
|
||||
* & Cerberus:
|
||||
*/
|
||||
clear_bit(10, (unsigned long *)&bank[4]);
|
||||
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
|
||||
}
|
||||
if (c->x86 <= 17 && mce_bootlog < 0) {
|
||||
/*
|
||||
|
@ -1265,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
|
|||
* by default.
|
||||
*/
|
||||
if (c->x86 == 6 && banks > 0)
|
||||
bank[0] = 0;
|
||||
mce_banks[0].ctl = 0;
|
||||
}
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
|
@ -1278,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
|
|||
* valid event later, merely don't write CTL0.
|
||||
*/
|
||||
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A)
|
||||
__set_bit(0, &dont_init_banks);
|
||||
if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
|
||||
mce_banks[0].init = 0;
|
||||
|
||||
/*
|
||||
* All newer Intel systems support MCE broadcasting. Enable
|
||||
|
@ -1348,6 +1357,17 @@ static void mce_init_timer(void)
|
|||
add_timer_on(t, smp_processor_id());
|
||||
}
|
||||
|
||||
/* Handle unconfigured int18 (should never happen) */
|
||||
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
|
||||
smp_processor_id());
|
||||
}
|
||||
|
||||
/* Call the installed machine check handler for this CPU setup. */
|
||||
void (*machine_check_vector)(struct pt_regs *, long error_code) =
|
||||
unexpected_machine_check;
|
||||
|
||||
/*
|
||||
* Called for each booted CPU to set up machine checks.
|
||||
* Must be called with preempt off:
|
||||
|
@ -1561,8 +1581,10 @@ static struct miscdevice mce_log_device = {
|
|||
*/
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
if (*str == 0)
|
||||
if (*str == 0) {
|
||||
enable_p5_mce();
|
||||
return 1;
|
||||
}
|
||||
if (*str == '=')
|
||||
str++;
|
||||
if (!strcmp(str, "off"))
|
||||
|
@ -1603,8 +1625,9 @@ static int mce_disable(void)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!skip_bank_init(i))
|
||||
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
if (b->init)
|
||||
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -1679,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
|
|||
__cpuinitdata
|
||||
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
|
||||
|
||||
static struct sysdev_attribute *bank_attrs;
|
||||
static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
|
||||
{
|
||||
return container_of(attr, struct mce_bank, attr);
|
||||
}
|
||||
|
||||
static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
u64 b = bank[attr - bank_attrs];
|
||||
|
||||
return sprintf(buf, "%llx\n", b);
|
||||
return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
|
||||
}
|
||||
|
||||
static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
|
||||
|
@ -1697,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
|
|||
if (strict_strtoull(buf, 0, &new) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
bank[attr - bank_attrs] = new;
|
||||
attr_to_bank(attr)->ctl = new;
|
||||
mce_restart();
|
||||
|
||||
return size;
|
||||
|
@ -1839,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
|
|||
}
|
||||
for (j = 0; j < banks; j++) {
|
||||
err = sysdev_create_file(&per_cpu(mce_dev, cpu),
|
||||
&bank_attrs[j]);
|
||||
&mce_banks[j].attr);
|
||||
if (err)
|
||||
goto error2;
|
||||
}
|
||||
|
@ -1848,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
|
|||
return 0;
|
||||
error2:
|
||||
while (--j >= 0)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
|
||||
error:
|
||||
while (--i >= 0)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
|
||||
|
||||
sysdev_unregister(&per_cpu(mce_dev, cpu));
|
||||
|
||||
|
@ -1869,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
|
|||
sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
|
||||
|
||||
for (i = 0; i < banks; i++)
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
|
||||
sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
|
||||
|
||||
sysdev_unregister(&per_cpu(mce_dev, cpu));
|
||||
cpumask_clear_cpu(cpu, mce_dev_initialized);
|
||||
|
@ -1886,8 +1910,9 @@ static void mce_disable_cpu(void *h)
|
|||
if (!(action & CPU_TASKS_FROZEN))
|
||||
cmci_clear();
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!skip_bank_init(i))
|
||||
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
if (b->init)
|
||||
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1902,8 +1927,9 @@ static void mce_reenable_cpu(void *h)
|
|||
if (!(action & CPU_TASKS_FROZEN))
|
||||
cmci_reenable();
|
||||
for (i = 0; i < banks; i++) {
|
||||
if (!skip_bank_init(i))
|
||||
wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
if (b->init)
|
||||
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1951,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
|
|||
.notifier_call = mce_cpu_callback,
|
||||
};
|
||||
|
||||
static __init int mce_init_banks(void)
|
||||
static __init void mce_init_banks(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
|
||||
GFP_KERNEL);
|
||||
if (!bank_attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < banks; i++) {
|
||||
struct sysdev_attribute *a = &bank_attrs[i];
|
||||
struct mce_bank *b = &mce_banks[i];
|
||||
struct sysdev_attribute *a = &b->attr;
|
||||
|
||||
a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
|
||||
if (!a->attr.name)
|
||||
goto nomem;
|
||||
a->attr.name = b->attrname;
|
||||
snprintf(b->attrname, ATTR_LEN, "bank%d", i);
|
||||
|
||||
a->attr.mode = 0644;
|
||||
a->show = show_bank;
|
||||
a->store = set_bank;
|
||||
}
|
||||
return 0;
|
||||
|
||||
nomem:
|
||||
while (--i >= 0)
|
||||
kfree(bank_attrs[i].attr.name);
|
||||
kfree(bank_attrs);
|
||||
bank_attrs = NULL;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static __init int mce_init_device(void)
|
||||
|
@ -1992,9 +2004,7 @@ static __init int mce_init_device(void)
|
|||
|
||||
zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
|
||||
|
||||
err = mce_init_banks();
|
||||
if (err)
|
||||
return err;
|
||||
mce_init_banks();
|
||||
|
||||
err = sysdev_class_register(&mce_sysclass);
|
||||
if (err)
|
||||
|
@ -2014,51 +2024,6 @@ static __init int mce_init_device(void)
|
|||
|
||||
device_initcall(mce_init_device);
|
||||
|
||||
#else /* CONFIG_X86_OLD_MCE: */
|
||||
|
||||
int nr_mce_banks;
|
||||
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
|
||||
|
||||
/* This has to be run for each processor */
|
||||
void mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
if (mce_disabled)
|
||||
return;
|
||||
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
amd_mcheck_init(c);
|
||||
break;
|
||||
|
||||
case X86_VENDOR_INTEL:
|
||||
if (c->x86 == 5)
|
||||
intel_p5_mcheck_init(c);
|
||||
if (c->x86 == 6)
|
||||
intel_p6_mcheck_init(c);
|
||||
if (c->x86 == 15)
|
||||
intel_p4_mcheck_init(c);
|
||||
break;
|
||||
|
||||
case X86_VENDOR_CENTAUR:
|
||||
if (c->x86 == 5)
|
||||
winchip_mcheck_init(c);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
|
||||
}
|
||||
|
||||
static int __init mcheck_enable(char *str)
|
||||
{
|
||||
mce_p5_enabled = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("mce", mcheck_enable);
|
||||
|
||||
#endif /* CONFIG_X86_OLD_MCE */
|
||||
|
||||
/*
|
||||
* Old style boot options parsing. Only for compatibility.
|
||||
*/
|
||||
|
@ -2068,3 +2033,56 @@ static int __init mcheck_disable(char *str)
|
|||
return 1;
|
||||
}
|
||||
__setup("nomce", mcheck_disable);
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *mce_get_debugfs_dir(void)
|
||||
{
|
||||
static struct dentry *dmce;
|
||||
|
||||
if (!dmce)
|
||||
dmce = debugfs_create_dir("mce", NULL);
|
||||
|
||||
return dmce;
|
||||
}
|
||||
|
||||
static void mce_reset(void)
|
||||
{
|
||||
cpu_missing = 0;
|
||||
atomic_set(&mce_fake_paniced, 0);
|
||||
atomic_set(&mce_executing, 0);
|
||||
atomic_set(&mce_callin, 0);
|
||||
atomic_set(&global_nwo, 0);
|
||||
}
|
||||
|
||||
static int fake_panic_get(void *data, u64 *val)
|
||||
{
|
||||
*val = fake_panic;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fake_panic_set(void *data, u64 val)
|
||||
{
|
||||
mce_reset();
|
||||
fake_panic = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
|
||||
fake_panic_set, "%llu\n");
|
||||
|
||||
static int __init mce_debugfs_init(void)
|
||||
{
|
||||
struct dentry *dmce, *ffake_panic;
|
||||
|
||||
dmce = mce_get_debugfs_dir();
|
||||
if (!dmce)
|
||||
return -ENOMEM;
|
||||
ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
|
||||
&fake_panic_fops);
|
||||
if (!ffake_panic)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(mce_debugfs_init);
|
||||
#endif
|
||||
|
|
|
@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
|
|||
if (test_bit(i, owned))
|
||||
continue;
|
||||
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Already owned by someone else? */
|
||||
if (val & CMCI_EN) {
|
||||
|
@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
|
|||
}
|
||||
|
||||
val |= CMCI_EN | CMCI_THRESHOLD;
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
|
||||
/* Did the enable bit stick? -- the bank supports CMCI */
|
||||
if (val & CMCI_EN) {
|
||||
|
@ -152,9 +152,9 @@ void cmci_clear(void)
|
|||
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
|
||||
continue;
|
||||
/* Disable CMCI */
|
||||
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
|
||||
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
|
||||
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
|
||||
__clear_bit(i, __get_cpu_var(mce_banks_owned));
|
||||
}
|
||||
spin_unlock_irqrestore(&cmci_discover_lock, flags);
|
||||
|
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Non Fatal Machine Check Exception Reporting
|
||||
*
|
||||
* (C) Copyright 2002 Dave Jones. <davej@redhat.com>
|
||||
*
|
||||
* This file contains routines to check for non-fatal MCEs every 15s
|
||||
*
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
static int firstbank;
|
||||
|
||||
#define MCE_RATE (15*HZ) /* timer rate is 15s */
|
||||
|
||||
static void mce_checkregs(void *info)
|
||||
{
|
||||
u32 low, high;
|
||||
int i;
|
||||
|
||||
for (i = firstbank; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
|
||||
if (!(high & (1<<31)))
|
||||
continue;
|
||||
|
||||
printk(KERN_INFO "MCE: The hardware reports a non fatal, "
|
||||
"correctable incident occurred on CPU %d.\n",
|
||||
smp_processor_id());
|
||||
|
||||
printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
|
||||
|
||||
/*
|
||||
* Scrub the error so we don't pick it up in MCE_RATE
|
||||
* seconds time:
|
||||
*/
|
||||
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
|
||||
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
static void mce_work_fn(struct work_struct *work);
|
||||
static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
|
||||
|
||||
static void mce_work_fn(struct work_struct *work)
|
||||
{
|
||||
on_each_cpu(mce_checkregs, NULL, 1);
|
||||
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
|
||||
}
|
||||
|
||||
static int __init init_nonfatal_mce_checker(void)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
/* Check for MCE support */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return -ENODEV;
|
||||
|
||||
/* Check for PPro style MCA */
|
||||
if (!cpu_has(c, X86_FEATURE_MCA))
|
||||
return -ENODEV;
|
||||
|
||||
/* Some Athlons misbehave when we frob bank 0 */
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
|
||||
boot_cpu_data.x86 == 6)
|
||||
firstbank = 1;
|
||||
else
|
||||
firstbank = 0;
|
||||
|
||||
/*
|
||||
* Check for non-fatal errors every MCE_RATE s
|
||||
*/
|
||||
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
|
||||
printk(KERN_INFO "Machine check exception polling timer started.\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(init_nonfatal_mce_checker);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
|
@ -1,163 +0,0 @@
|
|||
/*
|
||||
* P4 specific Machine Check Exception Reporting
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* as supported by the P4/Xeon family */
|
||||
struct intel_mce_extended_msrs {
|
||||
u32 eax;
|
||||
u32 ebx;
|
||||
u32 ecx;
|
||||
u32 edx;
|
||||
u32 esi;
|
||||
u32 edi;
|
||||
u32 ebp;
|
||||
u32 esp;
|
||||
u32 eflags;
|
||||
u32 eip;
|
||||
/* u32 *reserved[]; */
|
||||
};
|
||||
|
||||
static int mce_num_extended_msrs;
|
||||
|
||||
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
|
||||
static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
{
|
||||
u32 h;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
|
||||
rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
|
||||
rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
|
||||
rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
|
||||
rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
|
||||
rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
|
||||
rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
|
||||
rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
|
||||
rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
|
||||
rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
|
||||
}
|
||||
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
recover = 0;
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
||||
if (mce_num_extended_msrs > 0) {
|
||||
struct intel_mce_extended_msrs dbg;
|
||||
|
||||
intel_get_extended_msrs(&dbg);
|
||||
|
||||
printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
|
||||
"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
|
||||
"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
|
||||
smp_processor_id(), dbg.eip, dbg.eflags,
|
||||
dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
|
||||
dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
|
||||
misc[0] = addr[0] = '\0';
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
if (high & (1<<25))
|
||||
recover |= 2;
|
||||
high &= ~(1<<31);
|
||||
if (high & (1<<27)) {
|
||||
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
|
||||
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
|
||||
}
|
||||
if (high & (1<<26)) {
|
||||
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
|
||||
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
|
||||
}
|
||||
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
|
||||
smp_processor_id(), i, high, low, misc, addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (recover & 2)
|
||||
panic("CPU context corrupt");
|
||||
if (recover & 1)
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
* recoverable/continuable.This will allow BIOS to look at the MSRs
|
||||
* for errors if the OS could not log the error.
|
||||
*/
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
u32 msr;
|
||||
msr = MSR_IA32_MC0_STATUS+i*4;
|
||||
rdmsr(msr, low, high);
|
||||
if (high&(1<<31)) {
|
||||
/* Clear it */
|
||||
wrmsr(msr, 0UL, 0UL);
|
||||
/* Serialize */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
||||
machine_check_vector = intel_machine_check;
|
||||
wmb();
|
||||
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
}
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
|
||||
/* Check for P4/Xeon extended MCE MSRs */
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<9)) {/* MCG_EXT_P */
|
||||
mce_num_extended_msrs = (l >> 16) & 0xff;
|
||||
printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
|
||||
" available\n",
|
||||
smp_processor_id(), mce_num_extended_msrs);
|
||||
|
||||
#ifdef CONFIG_X86_MCE_P4THERMAL
|
||||
/* Check for P4/Xeon Thermal monitor */
|
||||
intel_init_thermal(c);
|
||||
#endif
|
||||
}
|
||||
}
|
|
@ -1,127 +0,0 @@
|
|||
/*
|
||||
* P6 specific Machine Check Exception Reporting
|
||||
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/system.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/msr.h>
|
||||
|
||||
/* Machine Check Handler For PII/PIII */
|
||||
static void intel_machine_check(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int recover = 1;
|
||||
int i;
|
||||
|
||||
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
recover = 0;
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
|
||||
if (high & (1<<31)) {
|
||||
char misc[20];
|
||||
char addr[24];
|
||||
|
||||
misc[0] = '\0';
|
||||
addr[0] = '\0';
|
||||
|
||||
if (high & (1<<29))
|
||||
recover |= 1;
|
||||
if (high & (1<<25))
|
||||
recover |= 2;
|
||||
high &= ~(1<<31);
|
||||
|
||||
if (high & (1<<27)) {
|
||||
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
|
||||
snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
|
||||
}
|
||||
if (high & (1<<26)) {
|
||||
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
|
||||
snprintf(addr, 24, " at %08x%08x", ahigh, alow);
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
|
||||
smp_processor_id(), i, high, low, misc, addr);
|
||||
}
|
||||
}
|
||||
|
||||
if (recover & 2)
|
||||
panic("CPU context corrupt");
|
||||
if (recover & 1)
|
||||
panic("Unable to continue");
|
||||
|
||||
printk(KERN_EMERG "Attempting to continue.\n");
|
||||
/*
|
||||
* Do not clear the MSR_IA32_MCi_STATUS if the error is not
|
||||
* recoverable/continuable.This will allow BIOS to look at the MSRs
|
||||
* for errors if the OS could not log the error:
|
||||
*/
|
||||
for (i = 0; i < nr_mce_banks; i++) {
|
||||
unsigned int msr;
|
||||
|
||||
msr = MSR_IA32_MC0_STATUS+i*4;
|
||||
rdmsr(msr, low, high);
|
||||
if (high & (1<<31)) {
|
||||
/* Clear it: */
|
||||
wrmsr(msr, 0UL, 0UL);
|
||||
/* Serialize: */
|
||||
wmb();
|
||||
add_taint(TAINT_MACHINE_CHECK);
|
||||
}
|
||||
}
|
||||
mcgstl &= ~(1<<2);
|
||||
wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
}
|
||||
|
||||
/* Set up machine check reporting for processors with Intel style MCE: */
|
||||
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
int i;
|
||||
|
||||
/* Check for MCE support */
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
/* Check for PPro style MCA */
|
||||
if (!cpu_has(c, X86_FEATURE_MCA))
|
||||
return;
|
||||
|
||||
/* Ok machine check is available */
|
||||
machine_check_vector = intel_machine_check;
|
||||
/* Make sure the vector pointer is visible before we enable MCEs: */
|
||||
wmb();
|
||||
|
||||
printk(KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr(MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
|
||||
nr_mce_banks = l & 0xff;
|
||||
|
||||
/*
|
||||
* Following the example in IA-32 SDM Vol 3:
|
||||
* - MC0_CTL should not be written
|
||||
* - Status registers on all banks should be cleared on reset
|
||||
*/
|
||||
for (i = 1; i < nr_mce_banks; i++)
|
||||
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
|
||||
for (i = 0; i < nr_mce_banks; i++)
|
||||
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
|
||||
set_in_cr4(X86_CR4_MCE);
|
||||
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
|
||||
smp_processor_id());
|
||||
}
|
|
@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
return;
|
||||
}
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
|
||||
tm2 = 1;
|
||||
|
||||
/* Check whether a vector already exists */
|
||||
if (h & APIC_VECTOR_MASK) {
|
||||
printk(KERN_DEBUG
|
||||
|
@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
return;
|
||||
}
|
||||
|
||||
/* early Pentium M models use different method for enabling TM2 */
|
||||
if (cpu_has(c, X86_FEATURE_TM2)) {
|
||||
if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
|
||||
rdmsr(MSR_THERM2_CTL, l, h);
|
||||
if (l & MSR_THERM2_CTL_TM_SELECT)
|
||||
tm2 = 1;
|
||||
} else if (l & MSR_IA32_MISC_ENABLE_TM2)
|
||||
tm2 = 1;
|
||||
}
|
||||
|
||||
/* We'll mask the thermal vector in the lapic till we're ready: */
|
||||
h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
|
||||
apic_write(APIC_LVTTHMR, h);
|
||||
|
|
|
@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
|
|||
seq_printf(p, " Threshold APIC interrupts\n");
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
#ifdef CONFIG_X86_MCE
|
||||
seq_printf(p, "%*s: ", prec, "MCE");
|
||||
for_each_online_cpu(j)
|
||||
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
|
||||
|
@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
|||
sum += irq_stats(cpu)->irq_threshold_count;
|
||||
# endif
|
||||
#endif
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
#ifdef CONFIG_X86_MCE
|
||||
sum += per_cpu(mce_exception_count, cpu);
|
||||
sum += per_cpu(mce_poll_count, cpu);
|
||||
#endif
|
||||
|
|
|
@ -190,7 +190,7 @@ static void __init apic_intr_init(void)
|
|||
#ifdef CONFIG_X86_MCE_THRESHOLD
|
||||
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
|
||||
#endif
|
||||
#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
|
||||
#endif
|
||||
|
||||
|
|
|
@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs)
|
|||
void
|
||||
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
|
||||
{
|
||||
#ifdef CONFIG_X86_NEW_MCE
|
||||
#ifdef CONFIG_X86_MCE
|
||||
/* notify userspace of pending MCEs */
|
||||
if (thread_info_flags & _TIF_MCE_NOTIFY)
|
||||
mce_notify_process();
|
||||
|
|
Loading…
Reference in a new issue