diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 860a9087549..b8fadf5f75a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -570,6 +570,12 @@ and is between 256 and 4096 characters. It is defined in the file See drivers/char/README.epca and Documentation/digiepca.txt. + disable_mtrr_trim [X86-64, Intel only] + By default the kernel will trim any uncacheable + memory out of your available memory pool based on + MTRR settings. This parameter disables that behavior, + possibly causing your machine to run very slowly. + dmasound= [HW,OSS] Sound subsystem buffers dscc4.setup= [NET] diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c index 9a189cef640..8f520f93ffd 100644 --- a/arch/x86/kernel/bugs_64.c +++ b/arch/x86/kernel/bugs_64.c @@ -13,7 +13,6 @@ void __init check_bugs(void) { identify_cpu(&boot_cpu_data); - mtrr_bp_init(); #if !defined(CONFIG_SMP) printk("CPU: "); print_cpu_info(&boot_cpu_data); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55d31ff118f..103d61a59b1 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,7 +14,7 @@ #include "mtrr.h" struct mtrr_state { - struct mtrr_var_range *var_ranges; + struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; mtrr_type fixed_ranges[NUM_FIXED_RANGES]; unsigned char enabled; unsigned char have_fixed; @@ -86,12 +86,6 @@ void __init get_mtrr_state(void) struct mtrr_var_range *vrs; unsigned lo, dummy; - if (!mtrr_state.var_ranges) { - mtrr_state.var_ranges = kmalloc(num_var_ranges * sizeof (struct mtrr_var_range), - GFP_KERNEL); - if (!mtrr_state.var_ranges) - return; - } vrs = mtrr_state.var_ranges; rdmsr(MTRRcap_MSR, lo, dummy); diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 14535686c09..91e150acb46 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -11,10 +11,6 @@ #include #include "mtrr.h" -/* RED-PEN: this is accessed without any locking */ -extern unsigned int *usage_table; - - #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) static const char *const mtrr_strings[MTRR_NUM_TYPES] = @@ -397,7 +393,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) for (i = 0; i < max; i++) { mtrr_if->get(i, &base, &size, &type); if (size == 0) - usage_table[i] = 0; + mtrr_usage_table[i] = 0; else { if (size < (0x100000 >> PAGE_SHIFT)) { /* less than 1MB */ @@ -411,7 +407,7 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) len += seq_printf(seq, "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), usage_table[i]); + mtrr_attrib_to_str(type), mtrr_usage_table[i]); } } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 60af5ed2b5c..ccd36ed2187 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -38,8 +38,8 @@ #include #include +#include #include - #include #include #include @@ -47,7 +47,7 @@ u32 num_var_ranges = 0; -unsigned int *usage_table; +unsigned int mtrr_usage_table[MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -121,13 +121,8 @@ static void __init init_table(void) int i, max; max = num_var_ranges; - if ((usage_table = kmalloc(max * sizeof *usage_table, GFP_KERNEL)) - == NULL) { - printk(KERN_ERR "mtrr: could not allocate\n"); - return; - } for (i = 0; i < max; i++) - usage_table[i] = 1; + mtrr_usage_table[i] = 1; } struct set_mtrr_data { @@ -383,7 +378,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, goto out; } if (increment) - ++usage_table[i]; + ++mtrr_usage_table[i]; error = i; goto out; } @@ -391,15 +386,15 @@ int mtrr_add_page(unsigned long base, unsigned long size, i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { set_mtrr(i, base, size, type); - if (likely(replace < 0)) - usage_table[i] = 1; - else { - usage_table[i] = usage_table[replace]; + if (likely(replace < 0)) { + mtrr_usage_table[i] = 1; + } else { + mtrr_usage_table[i] = mtrr_usage_table[replace]; if (increment) - usage_table[i]++; + mtrr_usage_table[i]++; if (unlikely(replace != i)) { set_mtrr(replace, 0, 0, 0); - usage_table[replace] = 0; + mtrr_usage_table[replace] = 0; } } } else @@ -529,11 +524,11 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); goto out; } - if (usage_table[reg] < 1) { + if (mtrr_usage_table[reg] < 1) { printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); goto out; } - if (--usage_table[reg] < 1) + if (--mtrr_usage_table[reg] < 1) set_mtrr(reg, 0, 0, 0); error = reg; out: @@ -593,16 +588,11 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value * mtrr_state; +static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { int i; - int size = num_var_ranges * sizeof(struct mtrr_value); - - mtrr_state = kzalloc(size,GFP_ATOMIC); - if (!mtrr_state) - return -ENOMEM; for (i = 0; i < num_var_ranges; i++) { mtrr_if->get(i, @@ -624,7 +614,6 @@ static int mtrr_restore(struct sys_device * sysdev) mtrr_state[i].lsize, mtrr_state[i].ltype); } - kfree(mtrr_state); return 0; } @@ -635,6 +624,109 @@ static struct sysdev_driver mtrr_sysdev_driver = { .resume = mtrr_restore, }; +#ifdef CONFIG_X86_64 +static int disable_mtrr_trim; + +static int __init disable_mtrr_trim_setup(char *str) +{ + disable_mtrr_trim = 1; + return 0; +} +early_param("disable_mtrr_trim", disable_mtrr_trim_setup); + +/* + * Newer AMD K8s and later CPUs have a special magic MSR way to force WB + * for memory >4GB. Check for that here. + * Note this won't check if the MTRRs < 4GB where the magic bit doesn't + * apply to are wrong, but so far we don't know of any such case in the wild. + */ +#define Tom2Enabled (1U << 21) +#define Tom2ForceMemTypeWB (1U << 22) + +static __init int amd_special_default_mtrr(unsigned long end_pfn) +{ + u32 l, h; + + /* Doesn't apply to memory < 4GB */ + if (end_pfn <= (0xffffffff >> PAGE_SHIFT)) + return 0; + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) + return 0; + /* In case some hypervisor doesn't pass SYSCFG through */ + if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + return 0; + /* + * Memory between 4GB and top of mem is forced WB by this magic bit. + * Reserved before K8RevF, but should be zero there. + */ + if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) == + (Tom2Enabled | Tom2ForceMemTypeWB)) + return 1; + return 0; +} + +/** + * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs + * + * Some buggy BIOSes don't setup the MTRRs properly for systems with certain + * memory configurations. This routine checks that the highest MTRR matches + * the end of memory, to make sure the MTRRs having a write back type cover + * all of the memory the kernel is intending to use. If not, it'll trim any + * memory off the end by adjusting end_pfn, removing it from the kernel's + * allocation pools, warning the user with an obnoxious message. + */ +int __init mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + unsigned long i, base, size, highest_addr = 0, def, dummy; + mtrr_type type; + u64 trim_start, trim_size; + + /* + * Make sure we only trim uncachable memory on machines that + * support the Intel MTRR architecture: + */ + rdmsr(MTRRdefType_MSR, def, dummy); + def &= 0xff; + if (!is_cpu(INTEL) || disable_mtrr_trim || def != MTRR_TYPE_UNCACHABLE) + return 0; + + /* Find highest cached pfn */ + for (i = 0; i < num_var_ranges; i++) { + mtrr_if->get(i, &base, &size, &type); + if (type != MTRR_TYPE_WRBACK) + continue; + base <<= PAGE_SHIFT; + size <<= PAGE_SHIFT; + if (highest_addr < base + size) + highest_addr = base + size; + } + + if (amd_special_default_mtrr(end_pfn)) + return 0; + + if ((highest_addr >> PAGE_SHIFT) < end_pfn) { + printk(KERN_WARNING "***************\n"); + printk(KERN_WARNING "**** WARNING: likely BIOS bug\n"); + printk(KERN_WARNING "**** MTRRs don't cover all of " + "memory, trimmed %ld pages\n", end_pfn - + (highest_addr >> PAGE_SHIFT)); + printk(KERN_WARNING "***************\n"); + + printk(KERN_INFO "update e820 for mtrr\n"); + trim_start = highest_addr; + trim_size = end_pfn; + trim_size <<= PAGE_SHIFT; + trim_size -= trim_start; + add_memory_region(trim_start, trim_size, E820_RESERVED); + update_e820(); + return 1; + } + + return 0; +} +#endif /** * mtrr_bp_init - initialize mtrrs on the boot CPU diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 54347e9a95c..fb74a2c2081 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -12,6 +12,7 @@ #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) #define NUM_FIXED_RANGES 88 +#define MAX_VAR_RANGES 256 #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -32,6 +33,8 @@ an 8 bit field: */ typedef u8 mtrr_type; +extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; + struct mtrr_ops { u32 vendor; u32 use_intel_if; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 6cbd15625dc..12948316e6a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -310,6 +310,13 @@ void __init setup_arch(char **cmdline_p) * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + /* update e820 for memory not covered by WB MTRRs */ + mtrr_bp_init(); + if (mtrr_trim_uncached_memory(end_pfn)) { + e820_register_active_regions(0, 0, -1UL); + end_pfn = e820_end_of_ram(); + } + num_physpages = end_pfn; check_efer(); diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h index 262670e4207..319d065800b 100644 --- a/include/asm-x86/mtrr.h +++ b/include/asm-x86/mtrr.h @@ -97,6 +97,7 @@ extern int mtrr_del_page (int reg, unsigned long base, unsigned long size); extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); extern void mtrr_ap_init(void); extern void mtrr_bp_init(void); +extern int mtrr_trim_uncached_memory(unsigned long end_pfn); # else #define mtrr_save_fixed_ranges(arg) do {} while (0) #define mtrr_save_state() do {} while (0) @@ -120,7 +121,10 @@ static __inline__ int mtrr_del_page (int reg, unsigned long base, { return -ENODEV; } - +static inline int mtrr_trim_uncached_memory(unsigned long end_pfn) +{ + return 0; +} static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;} #define mtrr_ap_init() do {} while (0)