From 4c7f8900f1d8a0e464e7092f132a7e93f7c20f2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Feb 2008 07:06:55 +0100 Subject: [PATCH 001/682] x86: stackprotector & PARAVIRT fix on paravirt enabled 64-bit kernels the paravirt ops do function calls themselves - which is bad with the stackprotector - for example pda_init() loads 0 into %gs and then does MSR_GS_BASE write (which modifies gs.base) - but that MSR write is a function call on paravirt, which with stackprotector tries to read the stack canary from the PDA ... crashing the bootup. the solution was suggested by Arjan van de Ven: to exclude paravirt.c from stackprotector, too many lowlevel functionality is in it. It's not like we'll have paravirt functions with character arrays on their stack anyway... Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b472..8161e5a91ca 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,7 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc_64.o := $(nostackp) +CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps_$(BITS).o irq_$(BITS).o From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: [PATCH 002/682] x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 - include/asm-x86/pda.h | 2 -- include/asm-x86/system.h | 6 +++++- include/linux/sched.h | 3 +-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988..d8640388039 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -640,7 +640,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); #ifdef CONFIG_CC_STACKPROTECTOR - write_pda(stack_canary, next_p->stack_canary); /* * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 101fb9e1195..62b734986a4 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -16,11 +16,9 @@ struct x8664_pda { unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int cpunumber; /* 36 Logical CPU number */ -#ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at offset 40!!! */ -#endif char *irqstackptr; unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index a2f04cd79b2..172f5418509 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -92,6 +92,8 @@ do { \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -103,7 +105,9 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ + [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)), \ + [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..d6a51515878 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with From ce22bd92cba0958e052cb1ce0f89f1d3a02b60a7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 12 May 2008 15:44:31 +0200 Subject: [PATCH 003/682] x86: setup stack canary for the idle threads The idle threads for non-boot CPUs are a bit special in how they are created; the result is that these don't have the stack canary set up properly in their PDA. Easiest fix is to just always set the PDA up correctly when entering the idle thread; this is a NOP for the boot cpu. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d8640388039..9e69e223023 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,6 +146,15 @@ static inline void play_dead(void) void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; + +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us. This is as good a place as any for + * doing that. + */ + write_pda(stack_canary, current->stack_canary); +#endif /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); From 7e09b2a02dae4616a5a26000169964b32f86cd35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:22:34 +0100 Subject: [PATCH 004/682] x86: fix canary of the boot CPU's idle task the boot CPU's idle task has a zero stackprotector canary value. this is a special task that is never forked, so the fork code does not randomize its canary. Do it when we hit cpu_idle(). Academic sidenote: this means that the early init code runs with a zero canary and hence the canary becomes predictable for this short, boot-only amount of time. Although attack vectors against early init code are very rare, it might make sense to move this initialization to an earlier point. (to one of the early init functions that never return - such as start_kernel()) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9e69e223023..d4c7ac7aa43 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -150,9 +150,13 @@ void cpu_idle(void) #ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us. This is as good a place as any for - * doing that. + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): */ + current->stack_canary = get_random_int(); write_pda(stack_canary, current->stack_canary); #endif /* endless idle loop with no priority at all */ From 517a92c4e19fcea815332d3155e9fb7723251274 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:02:13 +0100 Subject: [PATCH 005/682] panic: print more informative messages on stackprotect failure pointed out by pageexec@freemail.hu: we just simply panic() when there's a stackprotector attack - giving the attacked person no information about what kernel code the attack went against. print out the attacked function. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 425567f45b9..f236001cc4d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -327,7 +327,8 @@ EXPORT_SYMBOL(warn_on_slowpath); */ void __stack_chk_fail(void) { - panic("stack-protector: Kernel stack is corrupted"); + panic("stack-protector: Kernel stack is corrupted in: %p\n", + __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); #endif From 5cb273013e182a35e7db614d3e20a144cba71e53 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:07:01 +0100 Subject: [PATCH 006/682] panic: print out stacktrace if DEBUG_BUGVERBOSE if CONFIG_DEBUG_BUGVERBOSE is set then the user most definitely wanted to see as much information about kernel crashes as possible - so give them at least a stack dump. this is particularly useful for stackprotector related panics, where the stacktrace can give us the exact location of the (attempted) attack. Pointed out by pageexec@freemail.hu in the stackprotector breakage threads. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index f236001cc4d..17aad578a2f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -80,6 +80,9 @@ NORET_TYPE void panic(const char * fmt, ...) vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); +#ifdef CONFIG_DEBUG_BUGVERBOSE + dump_stack(); +#endif bust_spinlocks(0); /* From 72370f2a5b227bd3817593a6b15ea3f53f51dfcb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 13 Feb 2008 16:15:34 +0100 Subject: [PATCH 007/682] x86: if stackprotector is enabled, thn use stack-protector-all by default also enable the rodata and nx tests. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 3 ++- arch/x86/Kconfig.debug | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dcbec34154c..83d8392c133 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1142,7 +1142,7 @@ config SECCOMP config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 && EXPERIMENTAL && BROKEN + depends on X86_64 help This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of critical functions, a canary @@ -1159,6 +1159,7 @@ config CC_STACKPROTECTOR config CC_STACKPROTECTOR_ALL bool "Use stack-protector for all functions" depends on CC_STACKPROTECTOR + default y help Normally, GCC only inserts the canary value protection for functions that use large-ish on-stack buffers. By enabling diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ac1e31ba479..b5c5b55d043 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -102,6 +102,7 @@ config DIRECT_GBPAGES config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA + default y help This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:41:09 +0100 Subject: [PATCH 008/682] stackprotector: include files create for core kernel files to include. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 4 ++++ include/linux/stackprotector.h | 8 ++++++++ init/main.c | 1 + 3 files changed, 13 insertions(+) create mode 100644 include/asm-x86/stackprotector.h create mode 100644 include/linux/stackprotector.h diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h new file mode 100644 index 00000000000..dcac7a6bdba --- /dev/null +++ b/include/asm-x86/stackprotector.h @@ -0,0 +1,4 @@ +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H 1 + +#endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h new file mode 100644 index 00000000000..d3e8bbe602f --- /dev/null +++ b/include/linux/stackprotector.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACKPROTECTOR_H +#define _LINUX_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR +# include +#endif + +#endif diff --git a/init/main.c b/init/main.c index f7fb20021d4..a84322ca64a 100644 --- a/init/main.c +++ b/init/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: [PATCH 009/682] stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 6 ++---- include/asm-x86/stackprotector.h | 20 ++++++++++++++++++++ include/linux/stackprotector.h | 4 ++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index d4c7ac7aa43..5107cb214c7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -147,7 +147,6 @@ void cpu_idle(void) { current_thread_info()->status |= TS_POLLING; -#ifdef CONFIG_CC_STACKPROTECTOR /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have @@ -156,9 +155,8 @@ void cpu_idle(void) * invalid canaries already on the stack wont ever * trigger): */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); -#endif + boot_init_stack_canary(); + /* endless idle loop with no priority at all */ while (1) { tick_nohz_stop_sched_tick(); diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index dcac7a6bdba..0f91f7a2688 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,4 +1,24 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + /* + * If we're the non-boot CPU, nothing set the PDA stack + * canary up for us - and if we are the boot CPU we have + * a 0 stack canary. This is a good place for updating + * it, as we wont ever return from this function (so the + * invalid canaries already on the stack wont ever + * trigger): + */ + current->stack_canary = get_random_int(); + write_pda(stack_canary, current->stack_canary); +} + #endif diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index d3e8bbe602f..422e71aafd0 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -3,6 +3,10 @@ #ifdef CONFIG_CC_STACKPROTECTOR # include +#else +static inline void boot_init_stack_canary(void) +{ +} #endif #endif From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: [PATCH 010/682] x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 1 + include/linux/stackprotector.h | 4 ++++ init/main.c | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5107cb214c7..cce47f7fbf2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -16,6 +16,7 @@ #include +#include #include #include #include diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 422e71aafd0..6f3e54c704c 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -1,6 +1,10 @@ #ifndef _LINUX_STACKPROTECTOR_H #define _LINUX_STACKPROTECTOR_H 1 +#include +#include +#include + #ifdef CONFIG_CC_STACKPROTECTOR # include #else diff --git a/init/main.c b/init/main.c index a84322ca64a..b44e4eb0f5e 100644 --- a/init/main.c +++ b/init/main.c @@ -546,6 +546,12 @@ asmlinkage void __init start_kernel(void) unwind_init(); lockdep_init(); debug_objects_early_init(); + + /* + * Set up the the initial canary ASAP: + */ + boot_init_stack_canary(); + cgroup_init_early(); local_irq_disable(); From 960a672bd9f1ec06e8f197cf81a50fd07ea02e7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:56:04 +0100 Subject: [PATCH 011/682] x86: stackprotector: mix TSC to the boot canary mix the TSC to the boot canary. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/stackprotector.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/include/asm-x86/stackprotector.h b/include/asm-x86/stackprotector.h index 0f91f7a2688..3baf7ad89be 100644 --- a/include/asm-x86/stackprotector.h +++ b/include/asm-x86/stackprotector.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#include + /* * Initialize the stackprotector canary value. * @@ -9,16 +11,28 @@ */ static __always_inline void boot_init_stack_canary(void) { + u64 canary; + u64 tsc; + /* * If we're the non-boot CPU, nothing set the PDA stack * canary up for us - and if we are the boot CPU we have * a 0 stack canary. This is a good place for updating * it, as we wont ever return from this function (so the * invalid canaries already on the stack wont ever - * trigger): + * trigger). + * + * We both use the random pool and the current TSC as a source + * of randomness. The TSC only matters for very early init, + * there it already has some randomness on most systems. Later + * on during the bootup the random pool has true entropy too. */ - current->stack_canary = get_random_int(); - write_pda(stack_canary, current->stack_canary); + get_random_bytes(&canary, sizeof(canary)); + tsc = __native_read_tsc(); + canary += tsc + (tsc << 32UL); + + current->stack_canary = canary; + write_pda(stack_canary, canary); } #endif From 113c5413cf9051cc50b88befdc42e3402bb92115 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 10:36:03 +0100 Subject: [PATCH 012/682] x86: unify stackprotector features streamline the stackprotector features under a single option and make the stronger feature the one accessible. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 83d8392c133..0cd1695c24f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1140,13 +1140,17 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config CC_STACKPROTECTOR_ALL + bool + config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on X86_64 + select CC_STACKPROTECTOR_ALL help - This option turns on the -fstack-protector GCC feature. This - feature puts, at the beginning of critical functions, a canary - value on the stack just before the return address, and validates + This option turns on the -fstack-protector GCC feature. This + feature puts, at the beginning of functions, a canary value on + the stack just before the return address, and validates the value just before actually returning. Stack based buffer overflows (that need to overwrite this return address) now also overwrite the canary, which gets detected and the attack is then @@ -1154,16 +1158,8 @@ config CC_STACKPROTECTOR This feature requires gcc version 4.2 or above, or a distribution gcc with the feature backported. Older versions are automatically - detected and for those versions, this configuration option is ignored. - -config CC_STACKPROTECTOR_ALL - bool "Use stack-protector for all functions" - depends on CC_STACKPROTECTOR - default y - help - Normally, GCC only inserts the canary value protection for - functions that use large-ish on-stack buffers. By enabling - this option, GCC will be asked to do this for ALL functions. + detected and for those versions, this configuration option is + ignored. (and a warning is printed during bootup) source kernel/Kconfig.hz From 54371a43a66f4477889769b4fa00df936855dc8f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Feb 2008 15:33:12 -0800 Subject: [PATCH 013/682] x86: add CONFIG_CC_STACKPROTECTOR self-test This patch adds a simple self-test capability to the stackprotector feature. The test deliberately overflows a stack buffer and then checks if the canary trap function gets called. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/kernel/panic.c b/kernel/panic.c index 17aad578a2f..50cf9257b23 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -324,14 +324,82 @@ EXPORT_SYMBOL(warn_on_slowpath); #endif #ifdef CONFIG_CC_STACKPROTECTOR + +static unsigned long __stack_check_testing; +/* + * Self test function for the stack-protector feature. + * This test requires that the local variable absolutely has + * a stack slot, hence the barrier()s. + */ +static noinline void __stack_chk_test_func(void) +{ + unsigned long foo; + barrier(); + /* + * we need to make sure we're not about to clobber the return address, + * while real exploits do this, it's unhealthy on a running system. + * Besides, if we would, the test is already failed anyway so + * time to pull the emergency brake on it. + */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+1)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#ifdef CONFIG_FRAME_POINTER + /* We also don't want to clobber the frame pointer */ + if ((unsigned long)__builtin_return_address(0) == + *(((unsigned long *)&foo)+2)) { + printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); + return; + } +#endif + barrier(); + if (current->stack_canary == *(((unsigned long *)&foo)+1)) + *(((unsigned long *)&foo)+1) = 0; + else + printk(KERN_ERR "No -fstack-protector canary found\n"); + barrier(); +} + +static int __stack_chk_test(void) +{ + printk(KERN_INFO "Testing -fstack-protector-all feature\n"); + __stack_check_testing = (unsigned long)&__stack_chk_test_func; + __stack_chk_test_func(); + if (__stack_check_testing) { + printk(KERN_ERR "-fstack-protector-all test failed\n"); + WARN_ON(1); + } + return 0; +} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { + if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { + long delta; + + delta = (unsigned long)__builtin_return_address(0) - + __stack_check_testing; + /* + * The test needs to happen inside the test function, so + * check if the return address is close to that function. + * The function is only 2 dozen bytes long, but keep a wide + * safety margin to avoid panic()s for normal users regardless + * of the quality of the compiler. + */ + if (delta >= 0 && delta <= 400) { + __stack_check_testing = 0; + return; + } + } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); + +late_initcall(__stack_chk_test); #endif From b719ac56c0032bc1602914c6ea70b0f1581b08c7 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Mon, 14 Apr 2008 10:03:50 -0700 Subject: [PATCH 014/682] panic.c: fix whitespace additions trivial: remove white space addition in stack protector Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/panic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 50cf9257b23..866be9b72e4 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -341,14 +341,14 @@ static noinline void __stack_chk_test_func(void) * Besides, if we would, the test is already failed anyway so * time to pull the emergency brake on it. */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == + if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); return; From b40a4392a3c262e0d1b5379b4e142a8eefa63439 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 18 Apr 2008 06:16:45 -0700 Subject: [PATCH 015/682] stackprotector: turn not having the right gcc into a #warning If the user selects the stack-protector config option, but does not have a gcc that has the right bits enabled (for example because it isn't build with a glibc that supports TLS, as is common for cross-compilers, but also because it may be too old), then the runtime test fails right now. This patch adds a warning message for this scenario. This warning accomplishes two goals 1) the user is informed that the security option he selective isn't available 2) the user is suggested to turn of the CONFIG option that won't work for him, and would make the runtime test fail anyway. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Makefile | 2 +- kernel/panic.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 3cff3c894cf..c3e0eeeb1dd 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -73,7 +73,7 @@ else stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" -fstack-protector ) + "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ "$(CC)" -fstack-protector-all ) diff --git a/kernel/panic.c b/kernel/panic.c index 866be9b72e4..6729e3f4ebc 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -325,6 +325,9 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifdef CONFIG_CC_STACKPROTECTOR +#ifndef GCC_HAS_SP +#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. +#endif static unsigned long __stack_check_testing; /* * Self test function for the stack-protector feature. From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: [PATCH 016/682] stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 7 +++++++ include/linux/magic.h | 1 + include/linux/sched.h | 13 +++++++++++++ kernel/exit.c | 5 +---- kernel/fork.c | 5 +++++ kernel/sched.c | 7 +------ 6 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fd7e1798c75..1f524df68b9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -581,6 +582,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) unsigned long address; int write, si_code; int fault; + unsigned long *stackend; + #ifdef CONFIG_X86_64 unsigned long flags; #endif @@ -850,6 +853,10 @@ no_context: show_fault_oops(regs, error_code, address); + stackend = end_of_stack(tsk); + if (*stackend != STACK_END_MAGIC) + printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); + tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; diff --git a/include/linux/magic.h b/include/linux/magic.h index 1fa0c2ce4de..74e68e20116 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -42,4 +42,5 @@ #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA +#define STACK_END_MAGIC 0x57AC6E9D #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a51515878..c5181e77f30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ diff --git a/kernel/exit.c b/kernel/exit.c index 8f6185e69b6..fb8de6cbf2c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -899,12 +899,9 @@ static void check_stack_usage(void) { static DEFINE_SPINLOCK(low_water_lock); static int lowest_to_date = THREAD_SIZE; - unsigned long *n = end_of_stack(current); unsigned long free; - while (*n == 0) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(current); + free = stack_not_used(current); if (free >= lowest_to_date) return; diff --git a/kernel/fork.c b/kernel/fork.c index 19908b26cf8..d428336e7aa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; + unsigned long *stackend; + int err; prepare_to_copy(orig); @@ -211,6 +214,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) goto out; setup_thread_stack(tsk, orig); + stackend = end_of_stack(tsk); + *stackend = STACK_END_MAGIC; /* for overflow detection */ #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a9153..a964ed94509 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5748,12 +5748,7 @@ void sched_show_task(struct task_struct *p) printk(KERN_CONT " %016lx ", thread_saved_pc(p)); #endif #ifdef CONFIG_DEBUG_STACK_USAGE - { - unsigned long *n = end_of_stack(p); - while (!*n) - n++; - free = (unsigned long)n - (unsigned long)end_of_stack(p); - } + free = stack_not_used(p); #endif printk(KERN_CONT "%5lu %5d %6d\n", free, task_pid_nr(p), task_pid_nr(p->real_parent)); From aa92db14270b79f0f91a9060b547a46f9e2639da Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 11 Jul 2008 05:09:55 -0700 Subject: [PATCH 017/682] stackprotector: better self-test check stackprotector functionality by manipulating the canary briefly during bootup. far more robust than trying to overflow the stack. (which is architecture dependent, etc.) Signed-off-by: Ingo Molnar --- kernel/panic.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 6729e3f4ebc..28153aec710 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -347,22 +347,18 @@ static noinline void __stack_chk_test_func(void) if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+1)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #ifdef CONFIG_FRAME_POINTER /* We also don't want to clobber the frame pointer */ if ((unsigned long)__builtin_return_address(0) == *(((unsigned long *)&foo)+2)) { printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - return; } #endif - barrier(); - if (current->stack_canary == *(((unsigned long *)&foo)+1)) - *(((unsigned long *)&foo)+1) = 0; - else + if (current->stack_canary != *(((unsigned long *)&foo)+1)) printk(KERN_ERR "No -fstack-protector canary found\n"); - barrier(); + + current->stack_canary = ~current->stack_canary; } static int __stack_chk_test(void) @@ -373,7 +369,8 @@ static int __stack_chk_test(void) if (__stack_check_testing) { printk(KERN_ERR "-fstack-protector-all test failed\n"); WARN_ON(1); - } + }; + current->stack_canary = ~current->stack_canary; return 0; } /* From af9ff7868f0f76d3364351b1641b9dfa99588e77 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 12 Jul 2008 09:36:38 -0700 Subject: [PATCH 018/682] x86: simplify stackprotector self-check Clean up the code by removing no longer needed code; make sure the pda is updated and kept in sync Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/asm-x86/pda.h | 1 + kernel/panic.c | 29 +++++++---------------------- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index 62b734986a4..a5ff5bb7629 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -131,4 +131,5 @@ do { \ #define PDA_STACKOFFSET (5*8) +#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) #endif diff --git a/kernel/panic.c b/kernel/panic.c index 28153aec710..87445a894c3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -328,37 +328,21 @@ EXPORT_SYMBOL(warn_on_slowpath); #ifndef GCC_HAS_SP #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif + static unsigned long __stack_check_testing; + /* * Self test function for the stack-protector feature. * This test requires that the local variable absolutely has - * a stack slot, hence the barrier()s. + * a stack slot. */ static noinline void __stack_chk_test_func(void) { - unsigned long foo; - barrier(); - /* - * we need to make sure we're not about to clobber the return address, - * while real exploits do this, it's unhealthy on a running system. - * Besides, if we would, the test is already failed anyway so - * time to pull the emergency brake on it. - */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+1)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#ifdef CONFIG_FRAME_POINTER - /* We also don't want to clobber the frame pointer */ - if ((unsigned long)__builtin_return_address(0) == - *(((unsigned long *)&foo)+2)) { - printk(KERN_ERR "No -fstack-protector-stack-frame!\n"); - } -#endif - if (current->stack_canary != *(((unsigned long *)&foo)+1)) - printk(KERN_ERR "No -fstack-protector canary found\n"); + unsigned long dummy_buffer[64]; /* force gcc to use the canary */ current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); + dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ } static int __stack_chk_test(void) @@ -371,6 +355,7 @@ static int __stack_chk_test(void) WARN_ON(1); }; current->stack_canary = ~current->stack_canary; + refresh_stack_canary(); return 0; } /* From 4f962d4d65923d7b722192e729840cfb79af0a5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Jul 2008 21:42:44 +0200 Subject: [PATCH 019/682] stackprotector: remove self-test turns out gcc generates such stackprotector-failure sequences in certain circumstances: movq -8(%rbp), %rax # D.16032, xorq %gs:40, %rax #, jne .L17 #, leave ret .L17: call __stack_chk_fail # .size __stack_chk_test_func, .-__stack_chk_test_func .section .init.text,"ax",@progbits .type panic_setup, @function panic_setup: pushq %rbp # note that there's no jump back to the failing context after the call to __stack_chk_fail - i.e. it has a ((noreturn)) attribute. Which is fair enough in the normal case but kills the self-test. (as we cannot reliably return in the self-test) Signed-off-by: Ingo Molnar --- kernel/panic.c | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 87445a894c3..c35c9eca3eb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -329,62 +329,15 @@ EXPORT_SYMBOL(warn_on_slowpath); #warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. #endif -static unsigned long __stack_check_testing; - -/* - * Self test function for the stack-protector feature. - * This test requires that the local variable absolutely has - * a stack slot. - */ -static noinline void __stack_chk_test_func(void) -{ - unsigned long dummy_buffer[64]; /* force gcc to use the canary */ - - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - dummy_buffer[3] = 1; /* fool gcc into keeping the variable */ -} - -static int __stack_chk_test(void) -{ - printk(KERN_INFO "Testing -fstack-protector-all feature\n"); - __stack_check_testing = (unsigned long)&__stack_chk_test_func; - __stack_chk_test_func(); - if (__stack_check_testing) { - printk(KERN_ERR "-fstack-protector-all test failed\n"); - WARN_ON(1); - }; - current->stack_canary = ~current->stack_canary; - refresh_stack_canary(); - return 0; -} /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ void __stack_chk_fail(void) { - if (__stack_check_testing == (unsigned long)&__stack_chk_test_func) { - long delta; - - delta = (unsigned long)__builtin_return_address(0) - - __stack_check_testing; - /* - * The test needs to happen inside the test function, so - * check if the return address is close to that function. - * The function is only 2 dozen bytes long, but keep a wide - * safety margin to avoid panic()s for normal users regardless - * of the quality of the compiler. - */ - if (delta >= 0 && delta <= 400) { - __stack_check_testing = 0; - return; - } - } panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); } EXPORT_SYMBOL(__stack_chk_fail); -late_initcall(__stack_chk_test); #endif From bc22c17e12c130dc929218a95aa347e0f3fd05dc Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:16 +0100 Subject: [PATCH 020/682] bzip2/lzma: library support for gzip, bzip2 and lzma decompression Impact: Replaces inflate.c with a wrapper around zlib_inflate; new library code This is the first part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - changed inflate.c to accomodate rest of patch - implementation of bzip2 compression (not used at this stage yet) - implementation of lzma compression (not used at this stage yet) - Makefile routines to support bzip2 and lzma kernel compression Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- include/linux/decompress/bunzip2.h | 10 + include/linux/decompress/generic.h | 30 ++ include/linux/decompress/inflate.h | 13 + include/linux/decompress/mm.h | 87 ++++ include/linux/decompress/unlzma.h | 12 + lib/decompress_bunzip2.c | 735 +++++++++++++++++++++++++++++ lib/decompress_inflate.c | 167 +++++++ lib/decompress_unlzma.c | 647 +++++++++++++++++++++++++ lib/zlib_inflate/inflate.h | 4 + lib/zlib_inflate/inftrees.h | 4 + scripts/Makefile.lib | 14 + scripts/bin_size | 10 + 12 files changed, 1733 insertions(+) create mode 100644 include/linux/decompress/bunzip2.h create mode 100644 include/linux/decompress/generic.h create mode 100644 include/linux/decompress/inflate.h create mode 100644 include/linux/decompress/mm.h create mode 100644 include/linux/decompress/unlzma.h create mode 100644 lib/decompress_bunzip2.c create mode 100644 lib/decompress_inflate.c create mode 100644 lib/decompress_unlzma.c create mode 100644 scripts/bin_size diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h new file mode 100644 index 00000000000..115272137a9 --- /dev/null +++ b/include/linux/decompress/bunzip2.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_BUNZIP2_H +#define DECOMPRESS_BUNZIP2_H + +int bunzip2(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h new file mode 100644 index 00000000000..f847f514f78 --- /dev/null +++ b/include/linux/decompress/generic.h @@ -0,0 +1,30 @@ +#ifndef DECOMPRESS_GENERIC_H +#define DECOMPRESS_GENERIC_H + +/* Minimal chunksize to be read. + *Bzip2 prefers at least 4096 + *Lzma prefers 0x10000 */ +#define COMPR_IOBUF_SIZE 4096 + +typedef int (*decompress_fn) (unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*writebb)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x)); + +/* inbuf - input buffer + *len - len of pre-read data in inbuf + *fill - function to fill inbuf if empty + *writebb - function to write out outbug + *posp - if non-null, input position (number of bytes read) will be + * returned here + * + *If len != 0, the inbuf is initialized (with as much data), and fill + *should not be called + *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE + *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE + */ + + +#endif diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h new file mode 100644 index 00000000000..f9b06ccc3e5 --- /dev/null +++ b/include/linux/decompress/inflate.h @@ -0,0 +1,13 @@ +#ifndef INFLATE_H +#define INFLATE_H + +/* Other housekeeping constants */ +#define INBUFSIZ 4096 + +int gunzip(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error_fn)(char *x)); +#endif diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h new file mode 100644 index 00000000000..12ff8c3f1d0 --- /dev/null +++ b/include/linux/decompress/mm.h @@ -0,0 +1,87 @@ +/* + * linux/compr_mm.h + * + * Memory management for pre-boot and ramdisk uncompressors + * + * Authors: Alain Knaff + * + */ + +#ifndef DECOMPR_MM_H +#define DECOMPR_MM_H + +#ifdef STATIC + +/* Code active when included from pre-boot environment: */ + +/* A trivial malloc implementation, adapted from + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + */ +static unsigned long malloc_ptr; +static int malloc_count; + +static void *malloc(int size) +{ + void *p; + + if (size < 0) + error("Malloc error"); + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + + malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; + + if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) + error("Out of memory"); + + malloc_count++; + return p; +} + +static void free(void *where) +{ + malloc_count--; + if (!malloc_count) + malloc_ptr = free_mem_ptr; +} + +#define large_malloc(a) malloc(a) +#define large_free(a) free(a) + +#define set_error_fn(x) + +#define INIT + +#else /* STATIC */ + +/* Code active when compiled standalone for use when loading ramdisk: */ + +#include +#include +#include +#include + +/* Use defines rather than static inline in order to avoid spurious + * warnings when not needed (indeed large_malloc / large_free are not + * needed by inflate */ + +#define malloc(a) kmalloc(a, GFP_KERNEL) +#define free(a) kfree(a) + +#define large_malloc(a) vmalloc(a) +#define large_free(a) vfree(a) + +static void(*error)(char *m); +#define set_error_fn(x) error = x; + +#define INIT __init +#define STATIC + +#include + +#endif /* STATIC */ + +#endif /* DECOMPR_MM_H */ diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h new file mode 100644 index 00000000000..7796538f1bf --- /dev/null +++ b/include/linux/decompress/unlzma.h @@ -0,0 +1,12 @@ +#ifndef DECOMPRESS_UNLZMA_H +#define DECOMPRESS_UNLZMA_H + +int unlzma(unsigned char *, int, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ); + +#endif diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c new file mode 100644 index 00000000000..5d3ddb5fcfd --- /dev/null +++ b/lib/decompress_bunzip2.c @@ -0,0 +1,735 @@ +/* vi: set sw = 4 ts = 4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + This code is licensed under the LGPLv2: + LGPL (http://www.gnu.org/copyleft/lgpl.html +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_unzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization in the name of the woman I loved, who + passed away Feb. 12, 2003. + + In memory of Toni W. Hagan + + Hospice of Acadiana, Inc. + 2600 Johnston St., Suite 200 + Lafayette, LA 70503-3240 + + Phone (337) 232-1234 or 1-800-738-2226 + Fax (337) 232-1297 + + http://www.hospiceacadiana.com/ + + Manuel + */ + +/* + Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) +*/ + + +#ifndef STATIC +#include +#endif /* !STATIC */ + +#include + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define BZIP2_IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinal value. */ + int limit[MAX_HUFCODE_BITS+1]; + int base[MAX_HUFCODE_BITS]; + int permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + memory that persists between calls to bunzip */ +struct bunzip_data { + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; + /* I/O tracking data (file handles, buffers, positions, etc.) */ + int (*fill)(void*, unsigned int); + int inbufCount, inbufPos /*, outbufPos*/; + unsigned char *inbuf /*,*outbuf*/; + unsigned int inbufBitCount, inbufBits; + /* The CRC values stored in the block header and calculated from the + data */ + unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; + /* Intermediate buffer and its size (in bytes) */ + unsigned int *dbuf, dbufSize; + /* These things are a bit too big to go on the stack */ + unsigned char selectors[32768]; /* nSelectors = 15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ + int io_error; /* non-zero if we have IO error */ +}; + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) +{ + unsigned int bits = 0; + + /* If we need to get more data from the byte buffer, do so. + (Loop getting one byte at a time to enforce endianness and avoid + unaligned access.) */ + while (bd->inbufBitCount < bits_wanted) { + /* If we need to read more data from file into byte buffer, do + so */ + if (bd->inbufPos == bd->inbufCount) { + if (bd->io_error) + return 0; + bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); + if (bd->inbufCount <= 0) { + bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; + return 0; + } + bd->inbufPos = 0; + } + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bd->inbufBitCount >= 24) { + bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); + bits_wanted -= bd->inbufBitCount; + bits <<= bits_wanted; + bd->inbufBitCount = 0; + } + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + } + /* Calculate result */ + bd->inbufBitCount -= bits_wanted; + bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ + +static int INIT get_next_block(struct bunzip_data *bd) +{ + struct group_data *hufGroup = NULL; + int *base = NULL; + int *limit = NULL; + int dbufCount, nextSym, dbufSize, groupCount, selector, + i, j, k, t, runPos, symCount, symTotal, nSelectors, + byteCount[256]; + unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + unsigned int *dbuf, origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) + return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) + return RETVAL_NOT_BZIP_DATA; + /* We can add support for blockRandomised if anybody complains. + There was some code for this in busybox 1.0.0-pre3, but nobody ever + noticed that it didn't actually work. */ + if (get_bits(bd, 1)) + return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if (origPtr > dbufSize) + return RETVAL_DATA_ERROR; + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the + symbols back to the corresponding bytes. */ + t = get_bits(bd, 16); + symTotal = 0; + for (i = 0; i < 16; i++) { + if (t&(1 << (15-i))) { + k = get_bits(bd, 16); + for (j = 0; j < 16; j++) + if (k&(1 << (15-j))) + symToByte[symTotal++] = (16*i)+j; + } + } + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + /* nSelectors: Every GROUP_SIZE many symbols we select a new + Huffman coding group. Read in the group selector list, + which is stored as MTF encoded bit runs. (MTF = Move To + Front, as each value is used it's moved to the start of the + list.) */ + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + for (i = 0; i < nSelectors; i++) { + /* Get next value */ + for (j = 0; get_bits(bd, 1); j++) + if (j >= groupCount) + return RETVAL_DATA_ERROR; + /* Decode MTF to get the next selector */ + uc = mtfSymbol[j]; + for (; j; j--) + mtfSymbol[j] = mtfSymbol[j-1]; + mtfSymbol[0] = selectors[i] = uc; + } + /* Read the Huffman coding tables for each group, which code + for symTotal literal symbols, plus two run symbols (RUNA, + RUNB) */ + symCount = symTotal+2; + for (j = 0; j < groupCount; j++) { + unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp; + /* Read Huffman code lengths for each symbol. They're + stored in a way similar to mtf; record a starting + value for the first symbol, and an offset from the + previous value for everys symbol after that. + (Subtracting 1 before the loop and then adding it + back at the end is an optimization that makes the + test inside the loop simpler: symbol length 0 + becomes negative, so an unsigned inequality catches + it.) */ + t = get_bits(bd, 5)-1; + for (i = 0; i < symCount; i++) { + for (;;) { + if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else + second bit indicates whether to + increment or decrement the value. + Optimization: grab 2 bits and unget + the second if the first was 0. */ + + k = get_bits(bd, 2); + if (k < 2) { + bd->inbufBitCount++; + break; + } + /* Add one if second bit 1, else + * subtract 1. Avoids if/else */ + t += (((k+1)&2)-1); + } + /* Correct for the initial -1, to get the + * final symbol length */ + length[i] = t+1; + } + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) + maxLen = length[i]; + else if (length[i] < minLen) + minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from + * length[]. + * + * permute[] is the lookup table for converting + * Huffman coded symbols into decoded symbols. base[] + * is the amount to subtract from the value of a + * Huffman symbol of a given length when using + * permute[]. + * + * limit[] indicates the largest numerical value a + * symbol with a given number of bits can have. This + * is how the Huffman codes can vary in length: each + * code with a value > limit[length] needs another + * bit. + */ + hufGroup = bd->groups+j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + /* Note that minLen can't be smaller than 1, so we + adjust the base and limit array pointers so we're + not always wasting the first entry. We do this + again when using them (during symbol decoding).*/ + base = hufGroup->base-1; + limit = hufGroup->limit-1; + /* Calculate permute[]. Concurently, initialize + * temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + temp[i] = limit[i] = 0; + for (t = 0; t < symCount; t++) + if (length[t] == i) + hufGroup->permute[pp++] = t; + } + /* Count symbols coded for at each bit length */ + for (i = 0; i < symCount; i++) + temp[length[i]]++; + /* Calculate limit[] (the largest symbol-coding value + *at each bit length, which is (previous limit << + *1)+symbols at this level), and base[] (number of + *symbols to ignore at each bit length, which is limit + *minus the cumulative count of symbols coded for + *already). */ + pp = t = 0; + for (i = minLen; i < maxLen; i++) { + pp += temp[i]; + /* We read the largest possible symbol size + and then unget bits after determining how + many we need, and those extra bits could be + set to anything. (They're noise from + future symbols.) At each level we're + really only interested in the first few + bits, so here we set all the trailing + to-be-ignored bits to 1 so they don't + affect the value > limit[length] + comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + base[i+1] = pp-(t += temp[i]); + } + limit[maxLen+1] = INT_MAX; /* Sentinal value for + * reading next sym. */ + limit[maxLen] = pp+temp[maxLen]-1; + base[minLen] = 0; + } + /* We've finished reading and digesting the block header. Now + read this block's Huffman coded symbols from the file and + undo the Huffman coding and run length encoding, saving the + result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To + * Front table */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (unsigned char)i; + } + /* Loop through compressed symbols. */ + runPos = dbufCount = symCount = selector = 0; + for (;;) { + /* Determine which Huffman coding group to use. */ + if (!(symCount--)) { + symCount = GROUP_SIZE-1; + if (selector >= nSelectors) + return RETVAL_DATA_ERROR; + hufGroup = bd->groups+selectors[selector++]; + base = hufGroup->base-1; + limit = hufGroup->limit-1; + } + /* Read next Huffman-coded symbol. */ + /* Note: It is far cheaper to read maxLen bits and + back up than it is to read minLen bits and then an + additional bit at a time, testing as we go. + Because there is a trailing last block (with file + CRC), there is no danger of the overread causing an + unexpected EOF for a valid compressed file. As a + further optimization, we do the read inline + (falling back to a call to get_bits if the buffer + runs dry). The following (up to got_huff_bits:) is + equivalent to j = get_bits(bd, hufGroup->maxLen); + */ + while (bd->inbufBitCount < hufGroup->maxLen) { + if (bd->inbufPos == bd->inbufCount) { + j = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = + (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount -= hufGroup->maxLen; + j = (bd->inbufBits >> bd->inbufBitCount)& + ((1 << hufGroup->maxLen)-1); +got_huff_bits: + /* Figure how how many bits are in next symbol and + * unget extras */ + i = hufGroup->minLen; + while (j > limit[i]) + ++i; + bd->inbufBitCount += (hufGroup->maxLen - i); + /* Huffman decode value to get nextSym (with bounds checking) */ + if ((i > hufGroup->maxLen) + || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) + >= MAX_SYMBOLS)) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[j]; + /* We have now decoded the symbol, which indicates + either a new literal byte, or a repeated run of the + most recent literal byte. First, check if nextSym + indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ + /* If this is the start of a new run, zero out + * counter */ + if (!runPos) { + runPos = 1; + t = 0; + } + /* Neat trick that saves 1 symbol: instead of + or-ing 0 or 1 at each bit position, add 1 + or 2 instead. For example, 1011 is 1 << 0 + + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 + + 1 << 2. You can make any bit pattern + that way using 1 less symbol than the basic + or 0/1 method (except all bits 0, which + would use no symbols, but a run of length 0 + doesn't mean anything in this context). + Thus space is saved. */ + t += (runPos << nextSym); + /* +runPos if RUNA; +2*runPos if RUNB */ + + runPos <<= 1; + continue; + } + /* When we hit the first non-run symbol after a run, + we now know how many times to repeat the last + literal, so append that many copies to our buffer + of decoded symbols (dbuf) now. (The last literal + used is the one at the head of the mtfSymbol + array.) */ + if (runPos) { + runPos = 0; + if (dbufCount+t >= dbufSize) + return RETVAL_DATA_ERROR; + + uc = symToByte[mtfSymbol[0]]; + byteCount[uc] += t; + while (t--) + dbuf[dbufCount++] = uc; + } + /* Is this the terminating symbol? */ + if (nextSym > symTotal) + break; + /* At this point, nextSym indicates a new literal + character. Subtract one to get the position in the + MTF array at which this literal is currently to be + found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another + instance of the first symbol in the mtf array, + position 0, would have been handled as part of a + run above. Therefore 1 unused mtf position minus 2 + non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) + return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + /* Adjust the MTF array. Since we typically expect to + *move only a small number of symbols, and are bound + *by 256 in any case, using memmove here would + *typically be bigger and slower due to function call + *overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (unsigned int)uc; + } + /* At this point, we've read all the Huffman-coded symbols + (and repeated runs) for this block from the input stream, + and decoded them into the intermediate buffer. There are + dbufCount many decoded bytes in dbuf[]. Now undo the + Burrows-Wheeler transform on dbuf. See + http://dogma.net/markn/articles/bwt/bwt.htm + */ + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + k = j+byteCount[i]; + byteCount[i] = j; + j = k; + } + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uc = (unsigned char)(dbuf[i] & 0xff); + dbuf[byteCount[uc]] |= (i << 8); + byteCount[uc]++; + } + /* Decode first byte by hand to initialize "previous" byte. + Note that it doesn't get output, and if the first three + characters are identical it doesn't qualify as a run (hence + writeRunCountdown = 5). */ + if (dbufCount) { + if (origPtr >= dbufCount) + return RETVAL_DATA_ERROR; + bd->writePos = dbuf[origPtr]; + bd->writeCurrent = (unsigned char)(bd->writePos&0xff); + bd->writePos >>= 8; + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo burrows-wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd =-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. +*/ + +static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) +{ + const unsigned int *dbuf; + int pos, xcurrent, previous, gotcount; + + /* If last read was short due to end of file, return last block now */ + if (bd->writeCount < 0) + return bd->writeCount; + + gotcount = 0; + dbuf = bd->dbuf; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + + if (bd->writeCopies) { + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + /* Loop outputting bytes */ + for (;;) { + /* If the output buffer is full, snapshot + * state and return */ + if (gotcount >= len) { + bd->writePos = pos; + bd->writeCurrent = xcurrent; + bd->writeCopies++; + return len; + } + /* Write next byte into output buffer, updating CRC */ + outbuf[gotcount++] = xcurrent; + bd->writeCRC = (((bd->writeCRC) << 8) + ^bd->crc32Table[((bd->writeCRC) >> 24) + ^xcurrent]); + /* Loop now if we're outputting multiple + * copies of this byte */ + if (bd->writeCopies) { + --bd->writeCopies; + continue; + } +decode_next_byte: + if (!bd->writeCount--) + break; + /* Follow sequence vector to undo + * Burrows-Wheeler transform */ + previous = xcurrent; + pos = dbuf[pos]; + xcurrent = pos&0xff; + pos >>= 8; + /* After 3 consecutive copies of the same + byte, the 4th is a repeat count. We count + down from 4 instead *of counting up because + testing for non-zero is faster */ + if (--bd->writeRunCountdown) { + if (xcurrent != previous) + bd->writeRunCountdown = 4; + } else { + /* We have a repeated run, this byte + * indicates the count */ + bd->writeCopies = xcurrent; + xcurrent = previous; + bd->writeRunCountdown = 5; + /* Sometimes there are just 3 bytes + * (run length 0) */ + if (!bd->writeCopies) + goto decode_next_byte; + /* Subtract the 1 copy we'd output + * anyway to get extras */ + --bd->writeCopies; + } + } + /* Decompression of this block completed successfully */ + bd->writeCRC = ~bd->writeCRC; + bd->totalCRC = ((bd->totalCRC << 1) | + (bd->totalCRC >> 31)) ^ bd->writeCRC; + /* If this block had a CRC error, force file level CRC error. */ + if (bd->writeCRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC+1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next + * block of input */ + /* (previous is just a convenient unused temp variable here) */ + previous = get_next_block(bd); + if (previous) { + bd->writeCount = previous; + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; + } + bd->writeCRC = 0xffffffffUL; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + goto decode_next_byte; +} + +static int INIT nofill(void *buf, unsigned int len) +{ + return -1; +} + +/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, + int (*fill)(void*, unsigned int)) +{ + struct bunzip_data *bd; + unsigned int i, j, c; + const unsigned int BZh0 = + (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) + +(((unsigned int)'h') << 8)+(unsigned int)'0'; + + /* Figure out how much data to allocate */ + i = sizeof(struct bunzip_data); + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = malloc(i); + memset(bd, 0, sizeof(struct bunzip_data)); + /* Setup input buffer */ + bd->inbuf = inbuf; + bd->inbufCount = len; + if (fill != NULL) + bd->fill = fill; + else + bd->fill = nofill; + + /* Init the CRC32 table (big endian) */ + for (i = 0; i < 256; i++) { + c = i << 24; + for (j = 8; j; j--) + c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + bd->crc32Table[i] = c; + } + + /* Ensure that file starts with "BZh['1'-'9']." */ + i = get_bits(bd, 32); + if (((unsigned int)(i-BZh0-1)) >= 9) + return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + bd->dbufSize = 100000*(i-BZh0); + + bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + return RETVAL_OK; +} + +/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, + not end of file.) */ +STATIC int INIT bunzip2(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + int *pos, + void(*error_fn)(char *x)) +{ + struct bunzip_data *bd; + int i = -1; + unsigned char *inbuf; + + set_error_fn(error_fn); + if (flush) + outbuf = malloc(BZIP2_IOBUF_SIZE); + else + len -= 4; /* Uncompressed size hack active in pre-boot + environment */ + if (!outbuf) { + error("Could not allocate output bufer"); + return -1; + } + if (buf) + inbuf = buf; + else + inbuf = malloc(BZIP2_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + i = start_bunzip(&bd, inbuf, len, fill); + if (!i) { + for (;;) { + i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); + if (i <= 0) + break; + if (!flush) + outbuf += i; + else + if (i != flush(outbuf, i)) { + i = RETVAL_UNEXPECTED_OUTPUT_EOF; + break; + } + } + } + /* Check CRC and release memory */ + if (i == RETVAL_LAST_BLOCK) { + if (bd->headerCRC != bd->totalCRC) + error("Data integrity error when decompressing."); + else + i = RETVAL_OK; + } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { + error("Compressed file ends unexpectedly"); + } + if (bd->dbuf) + large_free(bd->dbuf); + if (pos) + *pos = bd->inbufPos; + free(bd); + if (!buf) + free(inbuf); +exit_0: + if (flush) + free(outbuf); + return i; +} + +#define decompress bunzip2 diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c new file mode 100644 index 00000000000..163e66aea5f --- /dev/null +++ b/lib/decompress_inflate.c @@ -0,0 +1,167 @@ +#ifdef STATIC +/* Pre-boot environment: included */ + +/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots + * errors about console_printk etc... on ARM */ +#define _LINUX_KERNEL_H + +#include "zlib_inflate/inftrees.c" +#include "zlib_inflate/inffast.c" +#include "zlib_inflate/inflate.c" + +#else /* STATIC */ +/* initramfs et al: linked */ + +#include + +#include "zlib_inflate/inftrees.h" +#include "zlib_inflate/inffast.h" +#include "zlib_inflate/inflate.h" + +#include "zlib_inflate/infutil.h" + +#endif /* STATIC */ + +#include + +#define INBUF_LEN (16*1024) + +/* Included from initramfs et al code */ +STATIC int INIT gunzip(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *out_buf, + int *pos, + void(*error_fn)(char *x)) { + u8 *zbuf; + struct z_stream_s *strm; + int rc; + size_t out_len; + + set_error_fn(error_fn); + rc = -1; + if (flush) { + out_len = 0x8100; /* 32 K */ + out_buf = malloc(out_len); + } else { + out_len = 0x7fffffff; /* no limit */ + } + if (!out_buf) { + error("Out of memory while allocating output buffer"); + goto gunzip_nomem1; + } + + if (buf) + zbuf = buf; + else { + zbuf = malloc(INBUF_LEN); + len = 0; + } + if (!zbuf) { + error("Out of memory while allocating input buffer"); + goto gunzip_nomem2; + } + + strm = malloc(sizeof(*strm)); + if (strm == NULL) { + error("Out of memory while allocating z_stream"); + goto gunzip_nomem3; + } + + strm->workspace = malloc(flush ? zlib_inflate_workspacesize() : + sizeof(struct inflate_state)); + if (strm->workspace == NULL) { + error("Out of memory while allocating workspace"); + goto gunzip_nomem4; + } + + if (len == 0) + len = fill(zbuf, INBUF_LEN); + + /* verify the gzip header */ + if (len < 10 || + zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) { + if (pos) + *pos = 0; + error("Not a gzip file"); + goto gunzip_5; + } + + /* skip over gzip header (1f,8b,08... 10 bytes total + + * possible asciz filename) + */ + strm->next_in = zbuf + 10; + /* skip over asciz filename */ + if (zbuf[3] & 0x8) { + while (strm->next_in[0]) + strm->next_in++; + strm->next_in++; + } + strm->avail_in = len - 10; + + strm->next_out = out_buf; + strm->avail_out = out_len; + + rc = zlib_inflateInit2(strm, -MAX_WBITS); + + if (!flush) { + WS(strm)->inflate_state.wsize = 0; + WS(strm)->inflate_state.window = NULL; + } + + while (rc == Z_OK) { + if (strm->avail_in == 0) { + /* TODO: handle case where both pos and fill are set */ + len = fill(zbuf, INBUF_LEN); + if (len < 0) { + rc = -1; + error("read error"); + break; + } + strm->next_in = zbuf; + strm->avail_in = len; + } + rc = zlib_inflate(strm, 0); + + /* Write any data generated */ + if (flush && strm->next_out > out_buf) { + int l = strm->next_out - out_buf; + if (l != flush(out_buf, l)) { + rc = -1; + error("write error"); + break; + } + strm->next_out = out_buf; + strm->avail_out = out_len; + } + + /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ + if (rc == Z_STREAM_END) { + rc = 0; + break; + } else if (rc != Z_OK) { + error("uncompression error"); + rc = -1; + } + } + + zlib_inflateEnd(strm); + if (pos) + /* add + 8 to skip over trailer */ + *pos = strm->next_in - zbuf+8; + +gunzip_5: + free(strm->workspace); +gunzip_nomem4: + free(strm); +gunzip_nomem3: + if (!buf) + free(zbuf); +gunzip_nomem2: + if (flush) + free(out_buf); +gunzip_nomem1: + return rc; /* returns Z_OK (0) if successful */ +} + +#define decompress gunzip diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c new file mode 100644 index 00000000000..546f2f4c157 --- /dev/null +++ b/lib/decompress_unlzma.c @@ -0,0 +1,647 @@ +/* Lzma decompressor for Linux kernel. Shamelessly snarfed + *from busybox 1.1.1 + * + *Linux kernel adaptation + *Copyright (C) 2006 Alain < alain@knaff.lu > + * + *Based on small lzma deflate implementation/Small range coder + *implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (C) 1999-2005 Igor Pavlov + * + *Copyrights of the parts, see headers below. + * + * + *This program is free software; you can redistribute it and/or + *modify it under the terms of the GNU Lesser General Public + *License as published by the Free Software Foundation; either + *version 2.1 of the License, or (at your option) any later version. + * + *This program is distributed in the hope that it will be useful, + *but WITHOUT ANY WARRANTY; without even the implied warranty of + *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + *Lesser General Public License for more details. + * + *You should have received a copy of the GNU Lesser General Public + *License along with this library; if not, write to the Free Software + *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef STATIC +#include +#endif /* STATIC */ + +#include + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static long long INIT read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (c) 1999-2005 Igor Pavlov + */ + +#include + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + int (*fill)(void*, unsigned int); + uint8_t *ptr; + uint8_t *buffer; + uint8_t *buffer_end; + int buffer_size; + uint32_t code; + uint32_t range; + uint32_t bound; +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +/* Called twice: once at startup and once in rc_normalize() */ +static void INIT rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void INIT rc_init(struct rc *rc, + int (*fill)(void*, unsigned int), + char *buffer, int buffer_size) +{ + rc->fill = fill; + rc->buffer = (uint8_t *)buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void INIT rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called once. TODO: bb_maybe_free() */ +static inline void INIT rc_free(struct rc *rc) +{ + free(rc->buffer); +} + +/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ +static void INIT rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void INIT rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int INIT rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void INIT +rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + uint8_t *buffer; + uint8_t previous_byte; + size_t buffer_pos; + int bufsize; + size_t global_pos; + int(*flush)(void*, unsigned int); + struct lzma_header *header; +}; + +struct cstate { + int state; + uint32_t rep0, rep1, rep2, rep3; +}; + +static inline size_t INIT get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline uint8_t INIT peek_old_byte(struct writer *wr, + uint32_t offs) +{ + if (!wr->flush) { + int32_t pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + uint32_t pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline void INIT write_byte(struct writer *wr, uint8_t byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + wr->flush((char *)wr->buffer, wr->header->dict_size); + } +} + + +static inline void INIT copy_byte(struct writer *wr, uint32_t offs) +{ + write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline void INIT copy_bytes(struct writer *wr, + uint32_t rep0, int len) +{ + do { + copy_byte(wr, rep0); + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); +} + +static inline void INIT process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob, + int lc, uint32_t literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + uint16_t *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + uint16_t *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + write_byte(wr, mi); + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; +} + +static inline void INIT process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob) { + int offset; + uint16_t *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + copy_byte(wr, cst->rep0); + return; + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob = + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return; + } + + len += LZMA_MATCH_MIN_LEN; + + copy_bytes(wr, cst->rep0, len); +} + + + +STATIC inline int INIT unlzma(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error_fn)(char *x) + ) +{ + struct lzma_header header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + set_error_fn(error_fn); + if (!flush) + in_len -= 4; /* Uncompressed size hack active in pre-boot + environment */ + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) + error("bad header"); + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = large_malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); + if (p == 0) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) + process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask); + else { + process_bit1(&wr, &rc, &cst, p, pos_state, prob); + if (cst.rep0 == 0) + break; + } + } + + if (posp) + *posp = rc.ptr-rc.buffer; + if (wr.flush) + wr.flush(wr.buffer, wr.buffer_pos); + ret = 0; + large_free(p); +exit_2: + if (!output) + large_free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +} + +#define decompress unlzma diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h index df8a6c92052..3d17b3d1b21 100644 --- a/lib/zlib_inflate/inflate.h +++ b/lib/zlib_inflate/inflate.h @@ -1,3 +1,6 @@ +#ifndef INFLATE_H +#define INFLATE_H + /* inflate.h -- internal inflate state definition * Copyright (C) 1995-2004 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h @@ -105,3 +108,4 @@ struct inflate_state { unsigned short work[288]; /* work area for code table building */ code codes[ENOUGH]; /* space for code tables */ }; +#endif diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h index 5f5219b1240..b70b4731ac7 100644 --- a/lib/zlib_inflate/inftrees.h +++ b/lib/zlib_inflate/inftrees.h @@ -1,3 +1,6 @@ +#ifndef INFTREES_H +#define INFTREES_H + /* inftrees.h -- header to use inftrees.c * Copyright (C) 1995-2005 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h @@ -53,3 +56,4 @@ typedef enum { extern int zlib_inflate_table (codetype type, unsigned short *lens, unsigned codes, code **table, unsigned *bits, unsigned short *work); +#endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index e06365775bd..70b4676e3b9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -186,3 +186,17 @@ quiet_cmd_gzip = GZIP $@ cmd_gzip = gzip -f -9 < $< > $@ +# Bzip2 +# --------------------------------------------------------------------------- + +# Bzip2 does not include size in file... so we have to fake that +size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size + +quiet_cmd_bzip2 = BZIP2 $@ +cmd_bzip2 = (bzip2 -9 < $< ; $(size_append) $<) > $@ || (rm -f $@ ; false) + +# Lzma +# --------------------------------------------------------------------------- + +quiet_cmd_lzma = LZMA $@ +cmd_lzma = (lzma -9 -c $< ; $(size_append) $<) >$@ || (rm -f $@ ; false) diff --git a/scripts/bin_size b/scripts/bin_size new file mode 100644 index 00000000000..43e1b360cee --- /dev/null +++ b/scripts/bin_size @@ -0,0 +1,10 @@ +#!/bin/sh + +if [ $# = 0 ] ; then + echo Usage: $0 file +fi + +size_dec=`stat -c "%s" $1` +size_hex_echo_string=`printf "%08x" $size_dec | + sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'` +/bin/echo -ne $size_hex_echo_string From 30d65dbfe3add7f010a075991dc0bfeaebb7d9e1 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:17 +0100 Subject: [PATCH 021/682] bzip2/lzma: config and initramfs support for bzip2/lzma decompression Impact: New code for initramfs decompression, new features This is the second part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - support for new compressions (bzip2 and lzma) in initramfs and old-style ramdisk - config dialog for kernel compression (but new kernel compressions not yet supported) Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- drivers/block/Kconfig | 25 ++++++ init/Kconfig | 50 ++++++++++++ init/do_mounts_rd.c | 182 +++++++++++++++--------------------------- init/initramfs.c | 123 ++++++++++------------------ lib/Makefile | 3 +- 5 files changed, 184 insertions(+), 199 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 0344a8a8321..79559444242 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,6 +358,31 @@ config BLK_DEV_XIP will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). +config RD_BZIP2 + bool "Initial ramdisk compressed using bzip2" + default n + depends on BLK_DEV_INITRD=y + help + Support loading of a bzip2 encoded initial ramdisk or cpio buffer + If unsure, say N. + +config RD_LZMA + bool "Initial ramdisk compressed using lzma" + default n + depends on BLK_DEV_INITRD=y + help + Support loading of a lzma encoded initial ramdisk or cpio buffer + If unsure, say N. + +config RD_GZIP + bool "Initial ramdisk compressed using gzip" + default y + depends on BLK_DEV_INITRD=y + select ZLIB_INFLATE + help + Support loading of a gzip encoded initial ramdisk or cpio buffer. + If unsure, say Y. + config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/init/Kconfig b/init/Kconfig index f6281711166..df84625b137 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -101,6 +101,56 @@ config LOCALVERSION_AUTO which is done within the script "scripts/setlocalversion".) +choice + prompt "Kernel compression mode" + default KERNEL_GZIP + help + The linux kernel is a kind of self-extracting executable. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. + + If you have any problems with bzip2 or lzma compressed + kernels, mail me (Alain Knaff) . (An older + version of this functionality (bzip2 only), for 2.4, was + supplied by Christian Ludwig) + + High compression options are mostly useful for users, who + are low on disk space (embedded systems), but for whom ram + size matters less. + + If in doubt, select 'gzip' + +config KERNEL_GZIP + bool "Gzip" + help + The old and tried gzip compression. Its compression ratio is + the poorest among the 3 choices; however its speed (both + compression and decompression) is the fastest. + +config KERNEL_BZIP2 + bool "Bzip2" + help + Its compression ratio and speed is intermediate. + Decompression speed is slowest among the 3. + The kernel size is about 10 per cent smaller with bzip2, + in comparison to gzip. + Bzip2 uses a large amount of memory. For modern kernels + you will need at least 8MB RAM or more for booting. + +config KERNEL_LZMA + bool "LZMA" + help + The most recent compression algorithm. + Its ratio is best, decompression speed is between the other + 2. Compression is slowest. + The kernel size is about 33 per cent smaller with lzma, + in comparison to gzip. + +endchoice + + config SWAP bool "Support for paging of anonymous memory (swap)" depends on MMU && BLOCK diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a7c748fa977..dcaeb1f90b3 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -10,6 +10,12 @@ #include "do_mounts.h" +#include + +#include +#include +#include + int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ static int __init prompt_ramdisk(char *str) @@ -28,7 +34,7 @@ static int __init ramdisk_start_setup(char *str) } __setup("ramdisk_start=", ramdisk_start_setup); -static int __init crd_load(int in_fd, int out_fd); +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); /* * This routine tries to find a RAM disk image to load, and returns the @@ -44,7 +50,7 @@ static int __init crd_load(int in_fd, int out_fd); * gzip */ static int __init -identify_ramdisk_image(int fd, int start_block) +identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) { const int size = 512; struct minix_super_block *minixsb; @@ -70,6 +76,7 @@ identify_ramdisk_image(int fd, int start_block) sys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); +#ifdef CONFIG_RD_GZIP /* * If it matches the gzip magic numbers, return 0 */ @@ -77,9 +84,39 @@ identify_ramdisk_image(int fd, int start_block) printk(KERN_NOTICE "RAMDISK: Compressed image found at block %d\n", start_block); + *decompressor = gunzip; nblocks = 0; goto done; } +#endif + +#ifdef CONFIG_RD_BZIP2 + /* + * If it matches the bzip2 magic numbers, return -1 + */ + if (buf[0] == 0x42 && (buf[1] == 0x5a)) { + printk(KERN_NOTICE + "RAMDISK: Bzipped image found at block %d\n", + start_block); + *decompressor = bunzip2; + nblocks = 0; + goto done; + } +#endif + +#ifdef CONFIG_RD_LZMA + /* + * If it matches the lzma magic numbers, return -1 + */ + if (buf[0] == 0x5d && (buf[1] == 0x00)) { + printk(KERN_NOTICE + "RAMDISK: Lzma image found at block %d\n", + start_block); + *decompressor = unlzma; + nblocks = 0; + goto done; + } +#endif /* romfs is at block zero too */ if (romfsb->word0 == ROMSB_WORD0 && @@ -143,6 +180,7 @@ int __init rd_load_image(char *from) int nblocks, i, disk; char *buf = NULL; unsigned short rotate = 0; + decompress_fn decompressor = NULL; #if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES) char rotator[4] = { '|' , '/' , '-' , '\\' }; #endif @@ -155,12 +193,12 @@ int __init rd_load_image(char *from) if (in_fd < 0) goto noclose_input; - nblocks = identify_ramdisk_image(in_fd, rd_image_start); + nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor); if (nblocks < 0) goto done; if (nblocks == 0) { - if (crd_load(in_fd, out_fd) == 0) + if (crd_load(in_fd, out_fd, decompressor) == 0) goto successful_load; goto done; } @@ -259,138 +297,48 @@ int __init rd_load_disk(int n) return rd_load_image("/dev/root"); } -/* - * gzip declarations - */ - -#define OF(args) args - -#ifndef memzero -#define memzero(s, n) memset ((s), 0, (n)) -#endif - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define INBUFSIZ 4096 -#define WSIZE 0x8000 /* window size--must be a power of two, and */ - /* at least 32K for zip's deflate method */ - -static uch *inbuf; -static uch *window; - -static unsigned insize; /* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt; /* bytes in output buffer */ static int exit_code; -static int unzip_error; -static long bytes_out; +static int decompress_error; static int crd_infd, crd_outfd; -#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) - -/* Diagnostic functions (stubbed out) */ -#define Assert(cond,msg) -#define Trace(x) -#define Tracev(x) -#define Tracevv(x) -#define Tracec(c,x) -#define Tracecv(c,x) - -#define STATIC static -#define INIT __init - -static int __init fill_inbuf(void); -static void __init flush_window(void); -static void __init error(char *m); - -#define NO_INFLATE_MALLOC - -#include "../lib/inflate.c" - -/* =========================================================================== - * Fill the input buffer. This is called only when the buffer is empty - * and at least one byte is really needed. - * Returning -1 does not guarantee that gunzip() will ever return. - */ -static int __init fill_inbuf(void) +static int __init compr_fill(void *buf, unsigned int len) { - if (exit_code) return -1; - - insize = sys_read(crd_infd, inbuf, INBUFSIZ); - if (insize == 0) { - error("RAMDISK: ran out of compressed data"); - return -1; - } - - inptr = 1; - - return inbuf[0]; + int r = sys_read(crd_infd, buf, len); + if (r < 0) + printk(KERN_ERR "RAMDISK: error while reading compressed data"); + else if (r == 0) + printk(KERN_ERR "RAMDISK: EOF while reading compressed data"); + return r; } -/* =========================================================================== - * Write the output window window[0..outcnt-1] and update crc and bytes_out. - * (Used for the decompressed data only.) - */ -static void __init flush_window(void) +static int __init compr_flush(void *window, unsigned int outcnt) { - ulg c = crc; /* temporary variable */ - unsigned n, written; - uch *in, ch; - - written = sys_write(crd_outfd, window, outcnt); - if (written != outcnt && unzip_error == 0) { - printk(KERN_ERR "RAMDISK: incomplete write (%d != %d) %ld\n", - written, outcnt, bytes_out); - unzip_error = 1; - } - in = window; - for (n = 0; n < outcnt; n++) { - ch = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; + int written = sys_write(crd_outfd, window, outcnt); + if (written != outcnt) { + if (decompress_error == 0) + printk(KERN_ERR + "RAMDISK: incomplete write (%d != %d)\n", + written, outcnt); + decompress_error = 1; + return -1; + } + return outcnt; } static void __init error(char *x) { printk(KERN_ERR "%s\n", x); exit_code = 1; - unzip_error = 1; + decompress_error = 1; } -static int __init crd_load(int in_fd, int out_fd) +static int __init crd_load(int in_fd, int out_fd, decompress_fn deco) { int result; - - insize = 0; /* valid bytes in inbuf */ - inptr = 0; /* index of next byte to be processed in inbuf */ - outcnt = 0; /* bytes in output buffer */ - exit_code = 0; - bytes_out = 0; - crc = (ulg)0xffffffffL; /* shift register contents */ - crd_infd = in_fd; crd_outfd = out_fd; - inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); - if (!inbuf) { - printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n"); - return -1; - } - window = kmalloc(WSIZE, GFP_KERNEL); - if (!window) { - printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n"); - kfree(inbuf); - return -1; - } - makecrc(); - result = gunzip(); - if (unzip_error) + result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error); + if (decompress_error) result = 1; - kfree(inbuf); - kfree(window); return result; } diff --git a/init/initramfs.c b/init/initramfs.c index 4f5ba75aaa7..40bd4fb9578 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -389,11 +389,14 @@ static int __init write_buffer(char *buf, unsigned len) return len - count; } -static void __init flush_buffer(char *buf, unsigned len) + +static int __init flush_buffer(void *bufv, unsigned len) { + char *buf = (char *) bufv; int written; + int origLen = len; if (message) - return; + return -1; while ((written = write_buffer(buf, len)) < len && !message) { char c = buf[written]; if (c == '0') { @@ -407,73 +410,14 @@ static void __init flush_buffer(char *buf, unsigned len) } else error("junk in compressed archive"); } + return origLen; } -/* - * gzip declarations - */ +static unsigned my_inptr; /* index of next byte to be processed in inbuf */ -#define OF(args) args - -#ifndef memzero -#define memzero(s, n) memset ((s), 0, (n)) -#endif - -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; - -#define WSIZE 0x8000 /* window size--must be a power of two, and */ - /* at least 32K for zip's deflate method */ - -static uch *inbuf; -static uch *window; - -static unsigned insize; /* valid bytes in inbuf */ -static unsigned inptr; /* index of next byte to be processed in inbuf */ -static unsigned outcnt; /* bytes in output buffer */ -static long bytes_out; - -#define get_byte() (inptr < insize ? inbuf[inptr++] : -1) - -/* Diagnostic functions (stubbed out) */ -#define Assert(cond,msg) -#define Trace(x) -#define Tracev(x) -#define Tracevv(x) -#define Tracec(c,x) -#define Tracecv(c,x) - -#define STATIC static -#define INIT __init - -static void __init flush_window(void); -static void __init error(char *m); - -#define NO_INFLATE_MALLOC - -#include "../lib/inflate.c" - -/* =========================================================================== - * Write the output window window[0..outcnt-1] and update crc and bytes_out. - * (Used for the decompressed data only.) - */ -static void __init flush_window(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, ch; - - flush_buffer(window, outcnt); - in = window; - for (n = 0; n < outcnt; n++) { - ch = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} +#include +#include +#include static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { @@ -482,9 +426,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) header_buf = kmalloc(110, GFP_KERNEL); symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); - window = kmalloc(WSIZE, GFP_KERNEL); - if (!window || !header_buf || !symlink_buf || !name_buf) + + if (!header_buf || !symlink_buf || !name_buf) panic("can't allocate buffers"); + state = Start; this_header = 0; message = NULL; @@ -504,22 +449,38 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; - insize = len; - inbuf = buf; - inptr = 0; - outcnt = 0; /* bytes in output buffer */ - bytes_out = 0; - crc = (ulg)0xffffffffL; /* shift register contents */ - makecrc(); - gunzip(); + if (!gunzip(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) + goto ok; + +#ifdef CONFIG_RD_BZIP2 + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!bunzip2(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif + +#ifdef CONFIG_RD_LZMA + message = NULL; /* Zero out message, or else cpio will + think an error has already occured */ + if (!unlzma(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error) && + message == NULL) { + goto ok; + } +#endif +ok: if (state != Reset) - error("junk in gzipped archive"); - this_header = saved_offset + inptr; - buf += inptr; - len -= inptr; + error("junk in compressed archive"); + this_header = saved_offset + my_inptr; + buf += my_inptr; + len -= my_inptr; } dir_utime(); - kfree(window); kfree(name_buf); kfree(symlink_buf); kfree(header_buf); diff --git a/lib/Makefile b/lib/Makefile index 32b0e64ded2..e2a21d5a264 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o \ + decompress_inflate.o decompress_bunzip2.o decompress_unlzma.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o From b172fd882d201cf8aa67cddd9c1a023421fd4956 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 4 Jan 2009 14:42:52 -0800 Subject: [PATCH 022/682] bzip2/lzma: use a table to search for initramfs compression formats Impact: Code simplification Instead of open-coding testing for initramfs compression formats, use a table. Signed-off-by: H. Peter Anvin --- init/do_mounts_rd.c | 82 +++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 47 deletions(-) diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index dcaeb1f90b3..9c9d7dbcf9c 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -43,13 +43,31 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * numbers could not be found. * * We currently check for the following magic numbers: - * minix - * ext2 + * minix + * ext2 * romfs * cramfs - * gzip + * gzip */ -static int __init +static const struct compress_format { + unsigned char magic[2]; + const char *name; + decompress_fn decompressor; +} compressed_formats[] = { +#ifdef CONFIG_RD_GZIP + { {037, 0213}, "gzip", gunzip }, + { {037, 0236}, "gzip", gunzip }, +#endif +#ifdef CONFIG_RD_BZIP2 + { {0x42, 0x5a}, "bzip2", bunzip2 }, +#endif +#ifdef CONFIG_RD_LZMA + { {0x5d, 0x00}, "lzma", unlzma }, +#endif + { {0, 0}, NULL, NULL } +}; + +static int __init identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) { const int size = 512; @@ -59,6 +77,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) struct cramfs_super *cramfsb; int nblocks = -1; unsigned char *buf; + const struct compress_format *cf; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -71,52 +90,21 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) memset(buf, 0xe5, size); /* - * Read block 0 to test for gzipped kernel + * Read block 0 to test for compressed kernel */ sys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); -#ifdef CONFIG_RD_GZIP - /* - * If it matches the gzip magic numbers, return 0 - */ - if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) { - printk(KERN_NOTICE - "RAMDISK: Compressed image found at block %d\n", - start_block); - *decompressor = gunzip; - nblocks = 0; - goto done; + for (cf = compressed_formats; cf->decompressor; cf++) { + if (buf[0] == cf->magic[0] && buf[1] == cf->magic[1]) { + printk(KERN_NOTICE + "RAMDISK: %s image found at block %d\n", + cf->name, start_block); + *decompressor = cf->decompressor; + nblocks = 0; + goto done; + } } -#endif - -#ifdef CONFIG_RD_BZIP2 - /* - * If it matches the bzip2 magic numbers, return -1 - */ - if (buf[0] == 0x42 && (buf[1] == 0x5a)) { - printk(KERN_NOTICE - "RAMDISK: Bzipped image found at block %d\n", - start_block); - *decompressor = bunzip2; - nblocks = 0; - goto done; - } -#endif - -#ifdef CONFIG_RD_LZMA - /* - * If it matches the lzma magic numbers, return -1 - */ - if (buf[0] == 0x5d && (buf[1] == 0x00)) { - printk(KERN_NOTICE - "RAMDISK: Lzma image found at block %d\n", - start_block); - *decompressor = unlzma; - nblocks = 0; - goto done; - } -#endif /* romfs is at block zero too */ if (romfsb->word0 == ROMSB_WORD0 && @@ -165,7 +153,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) printk(KERN_NOTICE "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n", start_block); - + done: sys_lseek(fd, start_block * BLOCK_SIZE, 0); kfree(buf); @@ -224,7 +212,7 @@ int __init rd_load_image(char *from) nblocks, rd_blocks); goto done; } - + /* * OK, time to copy in the data */ From ae03c49964af5033534e518eebe439c3b90f43a7 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:17 +0100 Subject: [PATCH 023/682] bzip2/lzma: x86 kernel compression support Impact: Replaces x86 kernel decompressor with new code This is the third part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - support for new bzip2 and lzma kernel compression for x86 Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/Makefile | 21 +++++- arch/x86/boot/compressed/misc.c | 118 ++++-------------------------- arch/x86/include/asm/boot.h | 12 ++- 3 files changed, 44 insertions(+), 107 deletions(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 1771c804e02..3ca4c194b8e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -4,7 +4,7 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIC @@ -47,18 +47,35 @@ ifeq ($(CONFIG_X86_32),y) ifdef CONFIG_RELOCATABLE $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,lzma) else $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) endif LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T else + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T endif -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE +suffix_$(CONFIG_KERNEL_GZIP) = gz +suffix_$(CONFIG_KERNEL_BZIP2) = bz2 +suffix_$(CONFIG_KERNEL_LZMA) = lzma + +$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index da062216948..e45be73684f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -116,71 +116,13 @@ /* * gzip declarations */ - -#define OF(args) args #define STATIC static #undef memset #undef memcpy #define memzero(s, n) memset((s), 0, (n)) -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; -/* - * Window size must be at least 32k, and a power of two. - * We don't actually have a window just a huge output buffer, - * so we report a 2G window size, as that should always be - * larger than our output buffer: - */ -#define WSIZE 0x80000000 - -/* Input buffer: */ -static unsigned char *inbuf; - -/* Sliding window buffer (and final output buffer): */ -static unsigned char *window; - -/* Valid bytes in inbuf: */ -static unsigned insize; - -/* Index of next byte to be processed in inbuf: */ -static unsigned inptr; - -/* Bytes in output buffer: */ -static unsigned outcnt; - -/* gzip flag byte */ -#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ -#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gz file */ -#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ -#define ORIG_NAM 0x08 /* bit 3 set: original file name present */ -#define COMMENT 0x10 /* bit 4 set: file comment present */ -#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ -#define RESERVED 0xC0 /* bit 6, 7: reserved */ - -#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) - -/* Diagnostic functions */ -#ifdef DEBUG -# define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0) -# define Trace(x) do { fprintf x; } while (0) -# define Tracev(x) do { if (verbose) fprintf x ; } while (0) -# define Tracevv(x) do { if (verbose > 1) fprintf x ; } while (0) -# define Tracec(c, x) do { if (verbose && (c)) fprintf x ; } while (0) -# define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0) -#else -# define Assert(cond, msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c, x) -# define Tracecv(c, x) -#endif - -static int fill_inbuf(void); -static void flush_window(void); static void error(char *m); /* @@ -189,13 +131,8 @@ static void error(char *m); static struct boot_params *real_mode; /* Pointer to real-mode data */ static int quiet; -extern unsigned char input_data[]; -extern int input_len; - -static long bytes_out; - static void *memset(void *s, int c, unsigned n); -static void *memcpy(void *dest, const void *src, unsigned n); +void *memcpy(void *dest, const void *src, unsigned n); static void __putstr(int, const char *); #define putstr(__x) __putstr(0, __x) @@ -213,7 +150,17 @@ static char *vidmem; static int vidport; static int lines, cols; -#include "../../../../lib/inflate.c" +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif static void scroll(void) { @@ -282,7 +229,7 @@ static void *memset(void *s, int c, unsigned n) return s; } -static void *memcpy(void *dest, const void *src, unsigned n) +void *memcpy(void *dest, const void *src, unsigned n) { int i; const char *s = src; @@ -293,38 +240,6 @@ static void *memcpy(void *dest, const void *src, unsigned n) return dest; } -/* =========================================================================== - * Fill the input buffer. This is called only when the buffer is empty - * and at least one byte is really needed. - */ -static int fill_inbuf(void) -{ - error("ran out of input data"); - return 0; -} - -/* =========================================================================== - * Write the output window window[0..outcnt-1] and update crc and bytes_out. - * (Used for the decompressed data only.) - */ -static void flush_window(void) -{ - /* With my window equal to my output buffer - * I only need to compute the crc here. - */ - unsigned long c = crc; /* temporary variable */ - unsigned n; - unsigned char *in, ch; - - in = window; - for (n = 0; n < outcnt; n++) { - ch = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (unsigned long)outcnt; - outcnt = 0; -} static void error(char *x) { @@ -407,12 +322,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, lines = real_mode->screen_info.orig_video_lines; cols = real_mode->screen_info.orig_video_cols; - window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; - inbuf = input_data; /* Input buffer */ - insize = input_len; - inptr = 0; #ifdef CONFIG_X86_64 if ((unsigned long)output & (__KERNEL_ALIGN - 1)) @@ -430,10 +341,9 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, #endif #endif - makecrc(); if (!quiet) putstr("\nDecompressing Linux... "); - gunzip(); + decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); if (!quiet) putstr("done.\nBooting the kernel.\n"); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index dd61616cb73..c0e8e68a31f 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -15,11 +15,21 @@ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +#if (defined CONFIG_KERNEL_BZIP2) +#define BOOT_HEAP_SIZE 0x400000 +#else + #ifdef CONFIG_X86_64 #define BOOT_HEAP_SIZE 0x7000 -#define BOOT_STACK_SIZE 0x4000 #else #define BOOT_HEAP_SIZE 0x4000 +#endif + +#endif + +#ifdef CONFIG_X86_64 +#define BOOT_STACK_SIZE 0x4000 +#else #define BOOT_STACK_SIZE 0x1000 #endif From 0f5e2d2484ccd3062bb1f63083bafa37041bc868 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 4 Jan 2009 15:10:40 -0800 Subject: [PATCH 024/682] bzip2/lzma: handle failures from bzip2 and lzma correctly Impact: Bug fix If bzip2 or lzma fails (for example, if they aren't installed on the system), we need to propagate the failure out to "make". However, they were masked by being followed by a semicolon. Signed-off-by: H. Peter Anvin --- scripts/Makefile.lib | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 70b4676e3b9..3b949a35447 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -193,10 +193,10 @@ cmd_gzip = gzip -f -9 < $< > $@ size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size quiet_cmd_bzip2 = BZIP2 $@ -cmd_bzip2 = (bzip2 -9 < $< ; $(size_append) $<) > $@ || (rm -f $@ ; false) +cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false) # Lzma # --------------------------------------------------------------------------- quiet_cmd_lzma = LZMA $@ -cmd_lzma = (lzma -9 -c $< ; $(size_append) $<) >$@ || (rm -f $@ ; false) +cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false) From 2e9f3bddcbc711bb14d86c6f068a779bf3710247 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 4 Jan 2009 15:41:25 -0800 Subject: [PATCH 025/682] bzip2/lzma: make config machinery an arch configurable Impact: Bug fix (we should not show this menu on irrelevant architectures) Make the config machinery to drive the gzip/bzip2/lzma selection dependent on the architecture advertising HAVE_KERNEL_* so that we don't display this for architectures where it doesn't matter. Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 3 +++ init/Kconfig | 52 +++++++++++++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 862adb9bf0d..7b66c34d0aa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -39,6 +39,9 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_BZIP2 + select HAVE_KERNEL_LZMA config ARCH_DEFCONFIG string diff --git a/init/Kconfig b/init/Kconfig index df84625b137..f9633c03cb1 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -101,10 +101,20 @@ config LOCALVERSION_AUTO which is done within the script "scripts/setlocalversion".) +config HAVE_KERNEL_GZIP + bool + +config HAVE_KERNEL_BZIP2 + bool + +config HAVE_KERNEL_LZMA + bool + choice - prompt "Kernel compression mode" - default KERNEL_GZIP - help + prompt "Kernel compression mode" + default KERNEL_GZIP + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA + help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ in efficiency, compression and decompression speed. @@ -123,34 +133,34 @@ choice If in doubt, select 'gzip' config KERNEL_GZIP - bool "Gzip" - help - The old and tried gzip compression. Its compression ratio is - the poorest among the 3 choices; however its speed (both - compression and decompression) is the fastest. + bool "Gzip" + depends on HAVE_KERNEL_GZIP + help + The old and tried gzip compression. Its compression ratio is + the poorest among the 3 choices; however its speed (both + compression and decompression) is the fastest. config KERNEL_BZIP2 bool "Bzip2" + depends on HAVE_KERNEL_BZIP2 help Its compression ratio and speed is intermediate. - Decompression speed is slowest among the 3. - The kernel size is about 10 per cent smaller with bzip2, - in comparison to gzip. - Bzip2 uses a large amount of memory. For modern kernels - you will need at least 8MB RAM or more for booting. + Decompression speed is slowest among the three. The kernel + size is about 10% smaller with bzip2, in comparison to gzip. + Bzip2 uses a large amount of memory. For modern kernels you + will need at least 8MB RAM or more for booting. config KERNEL_LZMA - bool "LZMA" - help - The most recent compression algorithm. - Its ratio is best, decompression speed is between the other - 2. Compression is slowest. - The kernel size is about 33 per cent smaller with lzma, - in comparison to gzip. + bool "LZMA" + depends on HAVE_KERNEL_LZMA + help + The most recent compression algorithm. + Its ratio is best, decompression speed is between the other + two. Compression is slowest. The kernel size is about 33% + smaller with LZMA in comparison to gzip. endchoice - config SWAP bool "Support for paging of anonymous memory (swap)" depends on MMU && BLOCK From c8531ab343dec88ed8005e403b1b304c710b7494 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 5 Jan 2009 13:48:31 -0800 Subject: [PATCH 026/682] bzip2/lzma: proper Kconfig dependencies for the ramdisk options Impact: Partial resolution of build failure Make all the compression algorithms properly configurable, and make sure the ramdisk options pull in the proper compression algorithms, as they should. Signed-off-by: H. Peter Anvin --- drivers/block/Kconfig | 20 +++++++++++--------- lib/Kconfig | 13 +++++++++++++ lib/Makefile | 7 +++++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 79559444242..cc9fa69c1ce 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,10 +358,20 @@ config BLK_DEV_XIP will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). +config RD_GZIP + bool "Initial ramdisk compressed using gzip" + default y + depends on BLK_DEV_INITRD=y + select DECOMPRESS_GZIP + help + Support loading of a gzip encoded initial ramdisk or cpio buffer. + If unsure, say Y. + config RD_BZIP2 bool "Initial ramdisk compressed using bzip2" default n depends on BLK_DEV_INITRD=y + select DECOMPRESS_BZIP2 help Support loading of a bzip2 encoded initial ramdisk or cpio buffer If unsure, say N. @@ -370,19 +380,11 @@ config RD_LZMA bool "Initial ramdisk compressed using lzma" default n depends on BLK_DEV_INITRD=y + select DECOMPRESS_LZMA help Support loading of a lzma encoded initial ramdisk or cpio buffer If unsure, say N. -config RD_GZIP - bool "Initial ramdisk compressed using gzip" - default y - depends on BLK_DEV_INITRD=y - select ZLIB_INFLATE - help - Support loading of a gzip encoded initial ramdisk or cpio buffer. - If unsure, say Y. - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/lib/Kconfig b/lib/Kconfig index 03c2c24b908..e37f061fd32 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -97,6 +97,19 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +# +# These all provide a common interface (hence the apparent duplication with +# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) +# +config DECOMPRESS_GZIP + tristate + +config DECOMPRESS_BZIP2 + tristate + +config DECOMPRESS_LZMA + tristate + # # Generic allocator support is selected if needed # diff --git a/lib/Makefile b/lib/Makefile index e2a21d5a264..d9ac5a414fa 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,8 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o \ - decompress_inflate.o decompress_bunzip2.o decompress_unlzma.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -66,6 +65,10 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o +obj-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o +obj-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o + obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o From 7856a16ea03ed9b17860d756ee6473c2e57882b2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 7 Jan 2009 00:01:43 -0800 Subject: [PATCH 027/682] bzip2/lzma: DECOMPRESS_GZIP should select ZLIB_INFLATE Impact: Partial resolution of build failure DECOMPRESS_GZIP is just a common-interface wrapper around the zlib_inflate code; it thus need to select it. Signed-off-by: H. Peter Anvin --- lib/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig b/lib/Kconfig index e37f061fd32..daa481824d9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -102,6 +102,7 @@ config LZO_DECOMPRESS # ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) # config DECOMPRESS_GZIP + select ZLIB_INFLATE tristate config DECOMPRESS_BZIP2 From fb9a4ca9820fd4d7c4906bd393004662451e273e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 7 Jan 2009 00:03:49 -0800 Subject: [PATCH 028/682] bzip2/lzma: move initrd/ramfs options out of BLK_DEV Impact: Partial resolution of build failure Move the initrd/initramfs configuration options from drivers/block/Kconfig to usr/Kconfig, since they do not and should not depend on CONFIG_BLK_DEV. This fixes builds when CONFIG_BLK_DEV=n. Signed-off-by: H. Peter Anvin --- drivers/block/Kconfig | 27 --------------------------- usr/Kconfig | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index cc9fa69c1ce..0344a8a8321 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -358,33 +358,6 @@ config BLK_DEV_XIP will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). -config RD_GZIP - bool "Initial ramdisk compressed using gzip" - default y - depends on BLK_DEV_INITRD=y - select DECOMPRESS_GZIP - help - Support loading of a gzip encoded initial ramdisk or cpio buffer. - If unsure, say Y. - -config RD_BZIP2 - bool "Initial ramdisk compressed using bzip2" - default n - depends on BLK_DEV_INITRD=y - select DECOMPRESS_BZIP2 - help - Support loading of a bzip2 encoded initial ramdisk or cpio buffer - If unsure, say N. - -config RD_LZMA - bool "Initial ramdisk compressed using lzma" - default n - depends on BLK_DEV_INITRD=y - select DECOMPRESS_LZMA - help - Support loading of a lzma encoded initial ramdisk or cpio buffer - If unsure, say N. - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/usr/Kconfig b/usr/Kconfig index 86cecb59dd0..a691a8f5898 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -44,3 +44,30 @@ config INITRAMFS_ROOT_GID owned by group root in the initial ramdisk image. If you are not sure, leave it set to "0". + +config RD_GZIP + bool "Initial ramdisk compressed using gzip" + default y + depends on BLK_DEV_INITRD=y + select DECOMPRESS_GZIP + help + Support loading of a gzip encoded initial ramdisk or cpio buffer. + If unsure, say Y. + +config RD_BZIP2 + bool "Initial ramdisk compressed using bzip2" + default n + depends on BLK_DEV_INITRD=y + select DECOMPRESS_BZIP2 + help + Support loading of a bzip2 encoded initial ramdisk or cpio buffer + If unsure, say N. + +config RD_LZMA + bool "Initial ramdisk compressed using lzma" + default n + depends on BLK_DEV_INITRD=y + select DECOMPRESS_LZMA + help + Support loading of a lzma encoded initial ramdisk or cpio buffer + If unsure, say N. From a26ee60f90daffe1de6be0d093af86e7279b3dfd Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Wed, 7 Jan 2009 00:10:27 -0800 Subject: [PATCH 029/682] bzip2/lzma: fix built-in initramfs vs CONFIG_RD_GZIP Impact: Resolves build failures in some configurations Makes it possible to disable CONFIG_RD_GZIP . In that case, the built-in initramfs will be compressed by whatever compressor is available (bzip2 or lzma) or left uncompressed if none is available. It also removes a couple of warnings which occur when no ramdisk compression at all is chosen. It also restores the select ZLIB_INFLATE in drivers/block/Kconfig which somehow came missing. This is needed to activate compilation of the stuff in zlib_deflate. Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- init/initramfs.c | 7 +++++- scripts/gen_initramfs_list.sh | 17 +++++++++----- usr/Makefile | 42 +++++++++++++++++++++++++---------- usr/initramfs_data.S | 2 +- usr/initramfs_data.bz2.S | 29 ++++++++++++++++++++++++ usr/initramfs_data.gz.S | 29 ++++++++++++++++++++++++ usr/initramfs_data.lzma.S | 29 ++++++++++++++++++++++++ 7 files changed, 135 insertions(+), 20 deletions(-) create mode 100644 usr/initramfs_data.bz2.S create mode 100644 usr/initramfs_data.gz.S create mode 100644 usr/initramfs_data.lzma.S diff --git a/init/initramfs.c b/init/initramfs.c index 40bd4fb9578..a3ba91cdab8 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -389,7 +389,7 @@ static int __init write_buffer(char *buf, unsigned len) return len - count; } - +#if defined CONFIG_RD_GZIP || defined CONFIG_RD_BZIP2 || defined CONFIG_RD_LZMA static int __init flush_buffer(void *bufv, unsigned len) { char *buf = (char *) bufv; @@ -412,6 +412,7 @@ static int __init flush_buffer(void *bufv, unsigned len) } return origLen; } +#endif static unsigned my_inptr; /* index of next byte to be processed in inbuf */ @@ -449,10 +450,12 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; +#ifdef CONFIG_RD_GZIP if (!gunzip(buf, len, NULL, flush_buffer, NULL, &my_inptr, error) && message == NULL) goto ok; +#endif #ifdef CONFIG_RD_BZIP2 message = NULL; /* Zero out message, or else cpio will @@ -473,7 +476,9 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) goto ok; } #endif +#if defined CONFIG_RD_GZIP || defined CONFIG_RD_BZIP2 || defined CONFIG_RD_LZMA ok: +#endif if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 5f3415f2873..41041e4923f 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -5,7 +5,7 @@ # Released under the terms of the GNU GPL # # Generate a cpio packed initramfs. It uses gen_init_cpio to generate -# the cpio archive, and gzip to pack it. +# the cpio archive, and then compresses it. # The script may also be used to generate the inputfile used for gen_init_cpio # This script assumes that gen_init_cpio is located in usr/ directory @@ -16,8 +16,8 @@ usage() { cat << EOF Usage: $0 [-o ] [-u ] [-g ] {-d | } ... - -o Create gzipped initramfs file named using - gen_init_cpio and gzip + -o Create compressed initramfs file named using + gen_init_cpio and compressor depending on the extension -u User ID to map to user ID 0 (root). is only meaningful if is a directory. "squash" forces all files to uid 0. @@ -225,6 +225,7 @@ cpio_list= output="/dev/stdout" output_file="" is_cpio_compressed= +compr="gzip -9 -f" arg="$1" case "$arg" in @@ -233,11 +234,15 @@ case "$arg" in echo "deps_initramfs := \\" shift ;; - "-o") # generate gzipped cpio image named $1 + "-o") # generate compressed cpio image named $1 shift output_file="$1" cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)" output=${cpio_list} + echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f" + echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f" + echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f" + echo "$output_file" | grep -q "\.cpio$" && compr="cat" shift ;; esac @@ -274,7 +279,7 @@ while [ $# -gt 0 ]; do esac done -# If output_file is set we will generate cpio archive and gzip it +# If output_file is set we will generate cpio archive and compress it # we are carefull to delete tmp files if [ ! -z ${output_file} ]; then if [ -z ${cpio_file} ]; then @@ -287,7 +292,7 @@ if [ ! -z ${output_file} ]; then if [ "${is_cpio_compressed}" = "compressed" ]; then cat ${cpio_tfile} > ${output_file} else - cat ${cpio_tfile} | gzip -f -9 - > ${output_file} + cat ${cpio_tfile} | ${compr} - > ${output_file} fi [ -z ${cpio_file} ] && rm ${cpio_tfile} fi diff --git a/usr/Makefile b/usr/Makefile index 201f27f8cba..451cdff7dff 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -5,14 +5,32 @@ klibcdirs:; PHONY += klibcdirs +# Find out "preferred" ramdisk compressor. Order of preference is +# 1. bzip2 efficient, and likely to be present +# 2. gzip former default +# 3. lzma +# 4. none + +# None of the above +suffix_y = + +# Lzma, but no gzip nor bzip2 +suffix_$(CONFIG_RD_LZMA) = .lzma + +# Gzip, but no bzip2 +suffix_$(CONFIG_RD_GZIP) = .gz + +# Bzip2 +suffix_$(CONFIG_RD_BZIP2) = .bz2 + # Generate builtin.o based on initramfs_data.o -obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o +obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data$(suffix_y).o -# initramfs_data.o contains the initramfs_data.cpio.gz image. +# initramfs_data.o contains the compressed initramfs_data.cpio image. # The image is included using .incbin, a dependency which is not # tracked automatically. -$(obj)/initramfs_data.o: $(obj)/initramfs_data.cpio.gz FORCE +$(obj)/initramfs_data$(suffix_y).o: $(obj)/initramfs_data.cpio$(suffix_y) FORCE ##### # Generate the initramfs cpio archive @@ -25,28 +43,28 @@ ramfs-args := \ $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \ $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID)) -# .initramfs_data.cpio.gz.d is used to identify all files included +# .initramfs_data.cpio.d is used to identify all files included # in initramfs and to detect if any files are added/removed. # Removed files are identified by directory timestamp being updated # The dependency list is generated by gen_initramfs.sh -l -ifneq ($(wildcard $(obj)/.initramfs_data.cpio.gz.d),) - include $(obj)/.initramfs_data.cpio.gz.d +ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),) + include $(obj)/.initramfs_data.cpio.d endif quiet_cmd_initfs = GEN $@ cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input) -targets := initramfs_data.cpio.gz +targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio # do not try to update files included in initramfs $(deps_initramfs): ; $(deps_initramfs): klibcdirs -# We rebuild initramfs_data.cpio.gz if: -# 1) Any included file is newer then initramfs_data.cpio.gz +# We rebuild initramfs_data.cpio if: +# 1) Any included file is newer then initramfs_data.cpio # 2) There are changes in which files are included (added or deleted) -# 3) If gen_init_cpio are newer than initramfs_data.cpio.gz +# 3) If gen_init_cpio are newer than initramfs_data.cpio # 4) arguments to gen_initramfs.sh changes -$(obj)/initramfs_data.cpio.gz: $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs - $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.gz.d +$(obj)/initramfs_data.cpio$(suffix_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs + $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d $(call if_changed,initfs) diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S index c2e1ad424f4..7c6973d8d82 100644 --- a/usr/initramfs_data.S +++ b/usr/initramfs_data.S @@ -26,5 +26,5 @@ SECTIONS */ .section .init.ramfs,"a" -.incbin "usr/initramfs_data.cpio.gz" +.incbin "usr/initramfs_data.cpio" diff --git a/usr/initramfs_data.bz2.S b/usr/initramfs_data.bz2.S new file mode 100644 index 00000000000..bc54d090365 --- /dev/null +++ b/usr/initramfs_data.bz2.S @@ -0,0 +1,29 @@ +/* + initramfs_data includes the compressed binary that is the + filesystem used for early user space. + Note: Older versions of "as" (prior to binutils 2.11.90.0.23 + released on 2001-07-14) dit not support .incbin. + If you are forced to use older binutils than that then the + following trick can be applied to create the resulting binary: + + + ld -m elf_i386 --format binary --oformat elf32-i386 -r \ + -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o + ld -m elf_i386 -r -o built-in.o initramfs_data.o + + initramfs_data.scr looks like this: +SECTIONS +{ + .init.ramfs : { *(.data) } +} + + The above example is for i386 - the parameters vary from architectures. + Eventually look up LDFLAGS_BLOB in an older version of the + arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. + + Using .incbin has the advantage over ld that the correct flags are set + in the ELF header, as required by certain architectures. +*/ + +.section .init.ramfs,"a" +.incbin "usr/initramfs_data.cpio.bz2" diff --git a/usr/initramfs_data.gz.S b/usr/initramfs_data.gz.S new file mode 100644 index 00000000000..890c8dd1d6b --- /dev/null +++ b/usr/initramfs_data.gz.S @@ -0,0 +1,29 @@ +/* + initramfs_data includes the compressed binary that is the + filesystem used for early user space. + Note: Older versions of "as" (prior to binutils 2.11.90.0.23 + released on 2001-07-14) dit not support .incbin. + If you are forced to use older binutils than that then the + following trick can be applied to create the resulting binary: + + + ld -m elf_i386 --format binary --oformat elf32-i386 -r \ + -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o + ld -m elf_i386 -r -o built-in.o initramfs_data.o + + initramfs_data.scr looks like this: +SECTIONS +{ + .init.ramfs : { *(.data) } +} + + The above example is for i386 - the parameters vary from architectures. + Eventually look up LDFLAGS_BLOB in an older version of the + arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. + + Using .incbin has the advantage over ld that the correct flags are set + in the ELF header, as required by certain architectures. +*/ + +.section .init.ramfs,"a" +.incbin "usr/initramfs_data.cpio.gz" diff --git a/usr/initramfs_data.lzma.S b/usr/initramfs_data.lzma.S new file mode 100644 index 00000000000..e11469e4856 --- /dev/null +++ b/usr/initramfs_data.lzma.S @@ -0,0 +1,29 @@ +/* + initramfs_data includes the compressed binary that is the + filesystem used for early user space. + Note: Older versions of "as" (prior to binutils 2.11.90.0.23 + released on 2001-07-14) dit not support .incbin. + If you are forced to use older binutils than that then the + following trick can be applied to create the resulting binary: + + + ld -m elf_i386 --format binary --oformat elf32-i386 -r \ + -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o + ld -m elf_i386 -r -o built-in.o initramfs_data.o + + initramfs_data.scr looks like this: +SECTIONS +{ + .init.ramfs : { *(.data) } +} + + The above example is for i386 - the parameters vary from architectures. + Eventually look up LDFLAGS_BLOB in an older version of the + arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced. + + Using .incbin has the advantage over ld that the correct flags are set + in the ELF header, as required by certain architectures. +*/ + +.section .init.ramfs,"a" +.incbin "usr/initramfs_data.cpio.lzma" From 7760ec77ab2a9e48bdd0d13341446a8a51f0b9f1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:10:13 +0530 Subject: [PATCH 030/682] x86: smp.h remove obsolete function declaration Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 830b9fcb642..83a4cc07431 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,7 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void (*mtrr_hook)(void); extern void zap_low_mappings(void); extern int __cpuinit get_local_pda(int cpu); From dacf7333571d770366bff74d10b56aa545434605 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 17:26:35 +0530 Subject: [PATCH 031/682] x86: smp.h move zap_low_mappings declartion to tlbflush.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/mm/init_32.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 83a4cc07431..64c9e848f13 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,8 +22,6 @@ extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_callin_map; -extern void zap_low_mappings(void); - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..aed0b700b83 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -175,4 +175,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } +extern void zap_low_mappings(void); + #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432..a9dd0b7ad61 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,7 +49,6 @@ #include #include #include -#include unsigned int __VMALLOC_RESERVE = 128 << 20; From 6e5385d44b2df05e50a8d07ba0e14d3e32685237 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 18:11:35 +0530 Subject: [PATCH 032/682] x86: smp.h move prefill_possible_map declartion to cpu.h Impact: cleanup, moving NON-SMP stuff from smp.h Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 10 ++++++++++ arch/x86/include/asm/smp.h | 6 ------ arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 1 - 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index bae482df603..29aa6d0752b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,16 @@ #include #include +#ifdef CONFIG_SMP + +extern void prefill_possible_map(void); + +#else /* CONFIG_SMP */ + +static inline void prefill_possible_map(void) {} + +#endif /* CONFIG_SMP */ + struct x86_cpu { struct cpu cpu; }; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 64c9e848f13..62bd3f68269 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -138,8 +138,6 @@ void play_dead_common(void); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -extern void prefill_possible_map(void); - void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) @@ -148,10 +146,6 @@ static inline int num_booting_cpus(void) { return cpus_weight(cpu_callout_map); } -#else -static inline void prefill_possible_map(void) -{ -} #endif /* CONFIG_SMP */ extern unsigned disabled_cpus __cpuinitdata; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d8042cf6..f41c4486c27 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,7 +89,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 07576bee03e..f8c885bed18 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include From f472cdba849cc3d838f3788469316e8572463a8c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:34:25 +0530 Subject: [PATCH 033/682] x86: smp.h move stack_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 2 ++ arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/cpu/common.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 29aa6d0752b..f958e7e49c0 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,8 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define stack_smp_processor_id() 0 + #endif /* CONFIG_SMP */ struct x86_cpu { diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 62bd3f68269..ed4af9a89cf 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,7 +173,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 -#define stack_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3f95a40f718..f7619a2eaff 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include From 96b89dc6598a50e3aac8e2c6d826ae3795b7d030 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:35:48 +0530 Subject: [PATCH 034/682] x86: smp.h move safe_smp_processor_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/reboot.c | 1 + arch/x86/mach-voyager/setup.c | 1 + 5 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index f958e7e49c0..4c16888ffa3 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index ed4af9a89cf..c92b93594ab 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -172,7 +172,6 @@ extern int safe_smp_processor_id(void); #else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ #define cpu_physical_id(cpu) boot_cpu_physical_apicid -#define safe_smp_processor_id() 0 #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index c689d19e35a..11b93cabdf7 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643..f8536fee5c1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index a580b9562e7..0ade62555ff 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -9,6 +9,7 @@ #include #include #include +#include void __init pre_intr_init_hook(void) { From af8968abf09fe5984bdd206e54e0eeb1dc1fa29c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:37:33 +0530 Subject: [PATCH 035/682] x86: smp.h move cpu_physical_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 1 + arch/x86/include/asm/smp.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4c16888ffa3..89edafb9339 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -15,6 +15,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} +#define cpu_physical_id(cpu) boot_cpu_physical_apicid #define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c92b93594ab..c975b6f83c6 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -170,8 +170,6 @@ extern int safe_smp_processor_id(void); }) #define safe_smp_processor_id() smp_processor_id() -#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */ -#define cpu_physical_id(cpu) boot_cpu_physical_apicid #endif #ifdef CONFIG_X86_LOCAL_APIC From 6d652ea1d056390a0c33db92b44ed219284b71af Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 7 Jan 2009 21:38:59 +0530 Subject: [PATCH 036/682] x86: smp.h move boot_cpu_id declartion to cpu.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu.h | 7 +++++++ arch/x86/include/asm/smp.h | 6 ------ arch/x86/kernel/io_apic.c | 1 + drivers/pci/intr_remapping.c | 1 + 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 89edafb9339..f03b23e3286 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -31,4 +31,11 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c975b6f83c6..74ad9ef6ae0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -212,11 +212,5 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..109c91db202 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b2252..5a57753ea9f 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "intr_remapping.h" From 41401db698cbb5d1869776bf336881db267e7d19 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:42:46 +0530 Subject: [PATCH 037/682] x86: rename intel_mp_floating to mpf_intel Impact: cleanup, solve 80 columns wrap problems intel_mp_floating should be renamed to mpf_intel. The reason: the 'f' in MPF already means 'floating' which means MP Floating pointer structure - no need to repeat that in the type name. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 3 ++- arch/x86/kernel/mpparse.c | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 59568bc4767..187dc920193 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -24,7 +24,8 @@ # endif #endif -struct intel_mp_floating { +/* Intel MP Floating Pointer Structure */ +struct mpf_intel { char mpf_signature[4]; /* "_MP_" */ unsigned int mpf_physptr; /* Configuration table address */ unsigned char mpf_length; /* Our length (paragraphs) */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c0601c2848a..6cea941c4db 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -569,14 +569,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } } -static struct intel_mp_floating *mpf_found; +static struct mpf_intel *mpf_found; /* * Scan the memory blocks for an SMP configuration block. */ static void __init __get_smp_config(unsigned int early) { - struct intel_mp_floating *mpf = mpf_found; + struct mpf_intel *mpf = mpf_found; if (!mpf) return; @@ -687,14 +687,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { unsigned int *bp = phys_to_virt(base); - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", bp, length); BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { - mpf = (struct intel_mp_floating *)bp; + mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && (mpf->mpf_length == 1) && !mpf_checksum((unsigned char *)bp, 16) && @@ -1000,7 +1000,7 @@ static int __init update_mp_table(void) { char str[16]; char oem[10]; - struct intel_mp_floating *mpf; + struct mpf_intel *mpf; struct mpc_table *mpc, *mpc_new; if (!enable_update_mptable) @@ -1052,7 +1052,7 @@ static int __init update_mp_table(void) mpc = mpc_new; /* check if we can modify that */ if (mpc_new_phys - mpf->mpf_physptr) { - struct intel_mp_floating *mpf_new; + struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); From 1eb1b3b65dc3e3ffcc6a60e115c085c0c11c1077 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 8 Jan 2009 15:43:26 +0530 Subject: [PATCH 038/682] x86: rename all fields of mpf_intel mpf_X to X Impact: cleanup, solve 80 columns wrap problems It would be cleaner to rename all the mpf->mpf_X fields to mpf->X - that alone would give 4 characters per usage site. (we already know that it's an 'mpf' entity - no need to duplicate that in the field too) Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec_def.h | 20 ++++++------- arch/x86/kernel/mpparse.c | 50 +++++++++++++++---------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h index 187dc920193..4a7f96d7c18 100644 --- a/arch/x86/include/asm/mpspec_def.h +++ b/arch/x86/include/asm/mpspec_def.h @@ -26,16 +26,16 @@ /* Intel MP Floating Pointer Structure */ struct mpf_intel { - char mpf_signature[4]; /* "_MP_" */ - unsigned int mpf_physptr; /* Configuration table address */ - unsigned char mpf_length; /* Our length (paragraphs) */ - unsigned char mpf_specification;/* Specification version */ - unsigned char mpf_checksum; /* Checksum (makes sum 0) */ - unsigned char mpf_feature1; /* Standard or configuration ? */ - unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ - unsigned char mpf_feature3; /* Unused (0) */ - unsigned char mpf_feature4; /* Unused (0) */ - unsigned char mpf_feature5; /* Unused (0) */ + char signature[4]; /* "_MP_" */ + unsigned int physptr; /* Configuration table address */ + unsigned char length; /* Our length (paragraphs) */ + unsigned char specification; /* Specification version */ + unsigned char checksum; /* Checksum (makes sum 0) */ + unsigned char feature1; /* Standard or configuration ? */ + unsigned char feature2; /* Bit7 set for IMCR|PIC */ + unsigned char feature3; /* Unused (0) */ + unsigned char feature4; /* Unused (0) */ + unsigned char feature5; /* Unused (0) */ }; #define MPC_SIGNATURE "PCMP" diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6cea941c4db..8385d4e7e15 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -597,9 +597,9 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", - mpf->mpf_specification); + mpf->specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) - if (mpf->mpf_feature2 & (1 << 7)) { + if (mpf->feature2 & (1 << 7)) { printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); pic_mode = 1; } else { @@ -610,7 +610,7 @@ static void __init __get_smp_config(unsigned int early) /* * Now see if we need to read further. */ - if (mpf->mpf_feature1 != 0) { + if (mpf->feature1 != 0) { if (early) { /* * local APIC has default address @@ -620,16 +620,16 @@ static void __init __get_smp_config(unsigned int early) } printk(KERN_INFO "Default MP configuration #%d\n", - mpf->mpf_feature1); - construct_default_ISA_mptable(mpf->mpf_feature1); + mpf->feature1); + construct_default_ISA_mptable(mpf->feature1); - } else if (mpf->mpf_physptr) { + } else if (mpf->physptr) { /* * Read the physical hardware table. Anything here will * override the defaults. */ - if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { + if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 0; #endif @@ -696,10 +696,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, while (length > 0) { mpf = (struct mpf_intel *)bp; if ((*bp == SMP_MAGIC_IDENT) && - (mpf->mpf_length == 1) && + (mpf->length == 1) && !mpf_checksum((unsigned char *)bp, 16) && - ((mpf->mpf_specification == 1) - || (mpf->mpf_specification == 4))) { + ((mpf->specification == 1) + || (mpf->specification == 4))) { #ifdef CONFIG_X86_LOCAL_APIC smp_found_config = 1; #endif @@ -712,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, return 1; reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, BOOTMEM_DEFAULT); - if (mpf->mpf_physptr) { + if (mpf->physptr) { unsigned long size = PAGE_SIZE; #ifdef CONFIG_X86_32 /* @@ -721,14 +721,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * the bottom is mapped now. * PC-9800's MPC table places on the very last * of physical memory; so that simply reserving - * PAGE_SIZE from mpg->mpf_physptr yields BUG() + * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->mpf_physptr + size > end) - size = end - mpf->mpf_physptr; + if (mpf->physptr + size > end) + size = end - mpf->physptr; #endif - reserve_bootmem_generic(mpf->mpf_physptr, size, + reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); } @@ -1013,19 +1013,19 @@ static int __init update_mp_table(void) /* * Now see if we need to go further. */ - if (mpf->mpf_feature1 != 0) + if (mpf->feature1 != 0) return 0; - if (!mpf->mpf_physptr) + if (!mpf->physptr) return 0; - mpc = phys_to_virt(mpf->mpf_physptr); + mpc = phys_to_virt(mpf->physptr); if (!smp_check_mpc(mpc, oem, str)) return 0; printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); - printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); + printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { mpc_new_phys = 0; @@ -1046,23 +1046,23 @@ static int __init update_mp_table(void) } printk(KERN_INFO "use in-positon replacing\n"); } else { - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; mpc_new = phys_to_virt(mpc_new_phys); memcpy(mpc_new, mpc, mpc->length); mpc = mpc_new; /* check if we can modify that */ - if (mpc_new_phys - mpf->mpf_physptr) { + if (mpc_new_phys - mpf->physptr) { struct mpf_intel *mpf_new; /* steal 16 bytes from [0, 1k) */ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); mpf_new = phys_to_virt(0x400 - 16); memcpy(mpf_new, mpf, 16); mpf = mpf_new; - mpf->mpf_physptr = mpc_new_phys; + mpf->physptr = mpc_new_phys; } - mpf->mpf_checksum = 0; - mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); - printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); + mpf->checksum = 0; + mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16); + printk(KERN_INFO "physptr new: %x\n", mpf->physptr); } /* From 5619448fc55650cb76cbb825e16e4bb5d1390399 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:09:12 -0800 Subject: [PATCH 039/682] bzip2/lzma: fix constant in decompress_inflate Impact: Cleanup Fix constant 0x8100 /* 32K */; according to Alain the value 0x8100 was left over test code to test misalignment, the correct value is indeed 0x8000 == 32K. Signed-off-by: H. Peter Anvin --- lib/decompress_inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 163e66aea5f..d2e7c758d1c 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -41,7 +41,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, set_error_fn(error_fn); rc = -1; if (flush) { - out_len = 0x8100; /* 32 K */ + out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len); } else { out_len = 0x7fffffff; /* no limit */ From 6c11b12ac6f101732d43b5682f5b3ae4dda4205f Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 8 Jan 2009 15:10:19 -0800 Subject: [PATCH 040/682] bzip2/lzma: fix decompress_inflate.c vs multi-block-with-embedded-filename Impact: Bug fix Fix gunzip uncompression, so that it also works with files with embedded filenames that are larger than one block. Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- lib/decompress_inflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d2e7c758d1c..839a329b4fc 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -97,7 +97,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, strm->next_in++; strm->next_in++; } - strm->avail_in = len - 10; + strm->avail_in = len - (strm->next_in - zbuf); strm->next_out = out_buf; strm->avail_out = out_len; From 889c92d21db40be0b7d22a59395060237895bb85 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:14:17 -0800 Subject: [PATCH 041/682] bzip2/lzma: centralize format detection Centralize the compression format detection to a common routine in the lib directory, and use it for both initramfs and initrd. Signed-off-by: H. Peter Anvin --- include/linux/decompress/generic.h | 3 ++ init/do_mounts_rd.c | 38 +++++------------------ init/initramfs.c | 39 +++++------------------ lib/Makefile | 9 +++--- lib/decompress.c | 50 ++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 67 deletions(-) create mode 100644 lib/decompress.c diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index f847f514f78..6dfb856327b 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -26,5 +26,8 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len, *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE */ +/* Utility routine to detect the decompression method */ +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name); #endif diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 9c9d7dbcf9c..a06ed4f92e0 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -12,9 +12,6 @@ #include -#include -#include -#include int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ @@ -49,24 +46,6 @@ static int __init crd_load(int in_fd, int out_fd, decompress_fn deco); * cramfs * gzip */ -static const struct compress_format { - unsigned char magic[2]; - const char *name; - decompress_fn decompressor; -} compressed_formats[] = { -#ifdef CONFIG_RD_GZIP - { {037, 0213}, "gzip", gunzip }, - { {037, 0236}, "gzip", gunzip }, -#endif -#ifdef CONFIG_RD_BZIP2 - { {0x42, 0x5a}, "bzip2", bunzip2 }, -#endif -#ifdef CONFIG_RD_LZMA - { {0x5d, 0x00}, "lzma", unlzma }, -#endif - { {0, 0}, NULL, NULL } -}; - static int __init identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) { @@ -77,7 +56,7 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) struct cramfs_super *cramfsb; int nblocks = -1; unsigned char *buf; - const struct compress_format *cf; + const char *compress_name; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -95,15 +74,12 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) sys_lseek(fd, start_block * BLOCK_SIZE, 0); sys_read(fd, buf, size); - for (cf = compressed_formats; cf->decompressor; cf++) { - if (buf[0] == cf->magic[0] && buf[1] == cf->magic[1]) { - printk(KERN_NOTICE - "RAMDISK: %s image found at block %d\n", - cf->name, start_block); - *decompressor = cf->decompressor; - nblocks = 0; - goto done; - } + *decompressor = decompress_method(buf, size, &compress_name); + if (*decompressor) { + printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", + compress_name, start_block); + nblocks = 0; + goto done; } /* romfs is at block zero too */ diff --git a/init/initramfs.c b/init/initramfs.c index a3ba91cdab8..2f42984e558 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -416,13 +416,13 @@ static int __init flush_buffer(void *bufv, unsigned len) static unsigned my_inptr; /* index of next byte to be processed in inbuf */ -#include -#include -#include +#include static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { int written; + decompress_fn decompress; + dry_run = check_only; header_buf = kmalloc(110, GFP_KERNEL); symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); @@ -450,35 +450,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; -#ifdef CONFIG_RD_GZIP - if (!gunzip(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) - goto ok; -#endif - -#ifdef CONFIG_RD_BZIP2 - message = NULL; /* Zero out message, or else cpio will - think an error has already occured */ - if (!bunzip2(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) { - goto ok; - } -#endif - -#ifdef CONFIG_RD_LZMA - message = NULL; /* Zero out message, or else cpio will - think an error has already occured */ - if (!unlzma(buf, len, NULL, flush_buffer, NULL, - &my_inptr, error) && - message == NULL) { - goto ok; - } -#endif -#if defined CONFIG_RD_GZIP || defined CONFIG_RD_BZIP2 || defined CONFIG_RD_LZMA -ok: -#endif + decompress = decompress_method(buf, len, NULL); + if (decompress) + decompress(buf, len, NULL, flush_buffer, NULL, + &my_inptr, error); if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; diff --git a/lib/Makefile b/lib/Makefile index d9ac5a414fa..790de7c25d0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o + proportions.o prio_heap.o ratelimit.o show_mem.o \ + is_single_threaded.o decompress.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -65,9 +66,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ -obj-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o -obj-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o -obj-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o +lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o +lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o +lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o obj-$(CONFIG_TEXTSEARCH) += textsearch.o obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o diff --git a/lib/decompress.c b/lib/decompress.c new file mode 100644 index 00000000000..edac55cc782 --- /dev/null +++ b/lib/decompress.c @@ -0,0 +1,50 @@ +/* + * decompress.c + * + * Detect the decompression method based on magic number + */ + +#include + +#include +#include +#include + +#include +#include + +static const struct compress_format { + unsigned char magic[2]; + const char *name; + decompress_fn decompressor; +} compressed_formats[] = { +#ifdef CONFIG_DECOMPRESS_GZIP + { {037, 0213}, "gzip", gunzip }, + { {037, 0236}, "gzip", gunzip }, +#endif +#ifdef CONFIG_DECOMPRESS_BZIP2 + { {0x42, 0x5a}, "bzip2", bunzip2 }, +#endif +#ifdef CONFIG_DECOMPRESS_LZMA + { {0x5d, 0x00}, "lzma", unlzma }, +#endif + { {0, 0}, NULL, NULL } +}; + +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name) +{ + const struct compress_format *cf; + + if (len < 2) + return NULL; /* Need at least this much... */ + + for (cf = compressed_formats; cf->decompressor; cf++) { + if (!memcmp(inbuf, cf->magic, 2)) + break; + + } + if (name) + *name = cf->name; + return cf->decompressor; +} From 736f93236ce786d1bcf09ad4dcb38a360d35ea1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 10 Jan 2009 12:06:19 +0100 Subject: [PATCH 042/682] bzip2/lzma: make flush_buffer() unconditional Impact: build fix flush_buffer() is used unconditionally: init/initramfs.c:456: error: 'flush_buffer' undeclared (first use in this function) init/initramfs.c:456: error: (Each undeclared identifier is reported only once init/initramfs.c:456: error: for each function it appears in.) So remove the decompressor #ifdefs from around it. Signed-off-by: Ingo Molnar --- init/initramfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index f8241e832aa..76f4a012533 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -390,7 +390,6 @@ static int __init write_buffer(char *buf, unsigned len) return len - count; } -#if defined CONFIG_RD_GZIP || defined CONFIG_RD_BZIP2 || defined CONFIG_RD_LZMA static int __init flush_buffer(void *bufv, unsigned len) { char *buf = (char *) bufv; @@ -413,7 +412,6 @@ static int __init flush_buffer(void *bufv, unsigned len) } return origLen; } -#endif static unsigned my_inptr; /* index of next byte to be processed in inbuf */ From 068790334cececc3d2d945617ccc585477da2e38 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:17:37 +0530 Subject: [PATCH 043/682] x86: smp.h move cpu_callin_mask and cpu_callin_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 19 +++++++++++++++++++ arch/x86/include/asm/smp.h | 3 --- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/setup_percpu.c | 1 + arch/x86/kernel/smpboot.c | 1 + 5 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/cpumask.h diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h new file mode 100644 index 00000000000..308dddd5632 --- /dev/null +++ b/arch/x86/include/asm/cpumask.h @@ -0,0 +1,19 @@ +#ifndef _ASM_X86_CPUMASK_H +#define _ASM_X86_CPUMASK_H +#ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_X86_64 + +extern cpumask_var_t cpu_callin_mask; + +#else /* CONFIG_X86_32 */ + +extern cpumask_t cpu_callin_map; + +#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) + +#endif /* CONFIG_X86_32 */ + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1963e27673c..c35aa5c0dd1 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -20,19 +20,16 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 14e543b6fd4..f0025846244 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #include diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 55c46074eba..bf63de72b64 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6c2b8444b83..84ac1cf46d8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include From fb8fd077fbf0de6662acfd240e8e6b25cf3202ca Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:20:24 +0530 Subject: [PATCH 044/682] x86: smp.h move cpu_callout_mask and cpu_callout_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 4 +--- arch/x86/kernel/smpboot.c | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 308dddd5632..9933fcad3c8 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -6,12 +6,15 @@ #ifdef CONFIG_X86_64 extern cpumask_var_t cpu_callin_mask; +extern cpumask_var_t cpu_callout_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; +extern cpumask_t cpu_callout_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) +#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c35aa5c0dd1..a3afec5cad0 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,20 +17,18 @@ #endif #include #include +#include #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84ac1cf46d8..6c2b8444b83 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include From 493f6ca54e1ea59732dd334e35c5fe2d8e440b06 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:48:22 +0530 Subject: [PATCH 045/682] x86: smp.h move cpu_initialized_mask and cpu_initialized declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 9933fcad3c8..d4cfd120b84 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -7,14 +7,17 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; +extern cpumask_var_t cpu_initialized_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; +extern cpumask_t cpu_initialized; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) +#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a3afec5cad0..7d2a80319e8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -21,15 +21,12 @@ #ifdef CONFIG_X86_64 -extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ -extern cpumask_t cpu_initialized; extern cpumask_t cpu_sibling_setup_map; -#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ From 52811d8c9beb67da6bc4b770de3c4134376788a1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 10 Jan 2009 12:58:50 +0530 Subject: [PATCH 046/682] x86: smp.h move cpu_sibling_setup_mask and cpu_sibling_setup_map declartion to cpumask.h Impact: cleanup Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpumask.h | 3 +++ arch/x86/include/asm/smp.h | 12 ------------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index d4cfd120b84..26c6dad9047 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -8,16 +8,19 @@ extern cpumask_var_t cpu_callin_mask; extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; +extern cpumask_var_t cpu_sibling_setup_mask; #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; extern cpumask_t cpu_callout_map; extern cpumask_t cpu_initialized; +extern cpumask_t cpu_sibling_setup_map; #define cpu_callin_mask ((struct cpumask *)&cpu_callin_map) #define cpu_callout_mask ((struct cpumask *)&cpu_callout_map) #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) +#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 7d2a80319e8..a8cea7b0943 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,18 +19,6 @@ #include #include -#ifdef CONFIG_X86_64 - -extern cpumask_var_t cpu_sibling_setup_mask; - -#else /* CONFIG_X86_32 */ - -extern cpumask_t cpu_sibling_setup_map; - -#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) - -#endif /* CONFIG_X86_32 */ - extern int __cpuinit get_local_pda(int cpu); extern int smp_num_siblings; From 7106a5ab89c50c6b5aadea0850b40323804a922d Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 10 Jan 2009 23:00:22 -0500 Subject: [PATCH 047/682] x86-64: remove locked instruction from switch_to() Impact: micro-optimization The patch below removes an unnecessary locked instruction from switch_to(). TIF_FORK is only ever set in copy_thread() on initial process creation, and gets cleared during the first scheduling of the process. As such, it is safe to use an unlocked test for the flag within switch_to(). Signed-off-by: Benjamin LaHaise Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 6 +++--- arch/x86/kernel/entry_64.S | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1..fa47b1e6a86 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -96,15 +96,15 @@ do { \ "thread_return:\n\t" \ "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ - LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ - "jc ret_from_fork\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ - [tif_fork] "i" (TIF_FORK), \ + [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ : "memory", "cc" __EXTRA_CLOBBER) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..38dd37458e4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -408,6 +408,8 @@ END(save_paranoid) ENTRY(ret_from_fork) DEFAULT_FRAME + LOCK ; btr $TIF_FORK,TI_flags(%r8) + push kernel_eflags(%rip) CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: [PATCH 048/682] cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- arch/x86/kernel/io_apic.c | 20 +++++----- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- drivers/xen/events.c | 4 +- include/linux/irq.h | 81 +++++++++++++++++++++++++++++++++++++-- kernel/irq/chip.c | 5 ++- kernel/irq/handle.c | 26 +++++++------ kernel/irq/manage.c | 12 +++--- kernel/irq/migration.c | 12 +++--- kernel/irq/numa_migrate.c | 12 +++++- kernel/irq/proc.c | 4 +- 11 files changed, 135 insertions(+), 45 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..1337eab60ec 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -356,7 +356,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpumask_intersects(&desc->affinity, mask)) + if (!cpumask_intersects(desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -579,9 +579,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - cpumask_and(&desc->affinity, cfg->domain, mask); + cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); + return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -2383,7 +2383,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (cfg->move_in_progress) send_cleanup_vector(cfg); - cpumask_copy(&desc->affinity, mask); + cpumask_copy(desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2405,11 +2405,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, &desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2434,7 +2434,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, &desc->pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2448,7 +2448,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, mask); + cpumask_copy(desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } @@ -2516,7 +2516,7 @@ static void irq_complete_move(struct irq_desc **descp) /* domain has not changed, but affinity did */ me = smp_processor_id(); - if (cpu_isset(me, desc->affinity)) { + if (cpumask_test_cpu(me, desc->affinity)) { *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; @@ -4039,7 +4039,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = &desc->affinity; + mask = desc->affinity; else mask = TARGET_CPUS; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 74b9ff7341e..e0f29be8ab0 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -248,7 +248,7 @@ void fixup_irqs(void) if (irq == 2) continue; - affinity = &desc->affinity; + affinity = desc->affinity; if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); affinity = cpu_all_mask; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 63c88e6ec02..0b21cb1ea11 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -100,7 +100,7 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); - affinity = &desc->affinity; + affinity = desc->affinity; if (!irq_has_action(irq) || cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index eb0dfdeaa94..e0767ff35d6 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -125,7 +125,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); @@ -142,7 +142,7 @@ static void init_evtchn_cpu_bindings(void) /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { - desc->affinity = cpumask_of_cpu(0); + cpumask_copy(desc->affinity, cpumask_of(0)); } #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index f899b502f18..fa27210f1df 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -182,11 +182,11 @@ struct irq_desc { unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP - cpumask_t affinity; + cpumask_var_t affinity; unsigned int cpu; -#endif #ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_t pending_mask; + cpumask_var_t pending_mask; +#endif #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; @@ -422,4 +422,79 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #endif /* !CONFIG_S390 */ +#ifdef CONFIG_SMP +/** + * init_alloc_desc_masks - allocate cpumasks for irq_desc + * @desc: pointer to irq_desc struct + * @boot: true if need bootmem + * + * Allocates affinity and pending_mask cpumask if required. + * Returns true if successful (or not required). + * Side effect: affinity has all bits set, pending_mask has all bits clear. + */ +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + if (boot) { + alloc_bootmem_cpumask_var(&desc->affinity); + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + alloc_bootmem_cpumask_var(&desc->pending_mask); + cpumask_clear(desc->pending_mask); +#endif + return true; + } + + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) + return false; + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + free_cpumask_var(desc->affinity); + return false; + } + cpumask_clear(desc->pending_mask); +#endif + return true; +} + +/** + * init_copy_desc_masks - copy cpumasks for irq_desc + * @old_desc: pointer to old irq_desc struct + * @new_desc: pointer to new irq_desc struct + * + * Insures affinity and pending_masks are copied to new irq_desc. + * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the + * irq_desc struct so the copy is redundant. + */ + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +#ifdef CONFIG_CPUMASKS_OFFSTACK + cpumask_copy(new_desc->affinity, old_desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); +#endif +#endif +} + +#else /* !CONFIG_SMP */ + +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + return true; +} + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +} + +#endif /* CONFIG_SMP */ + #endif /* _LINUX_IRQ_H */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706d25e..c248eba98b4 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq) desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpumask_setall(&desc->affinity); + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif #endif spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c20db0be917..b8fa1354f01 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -64,9 +64,6 @@ static struct irq_desc irq_desc_init = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif }; void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) @@ -88,6 +85,8 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { + int node = cpu_to_node(cpu); + memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -101,6 +100,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } arch_init_chip_data(desc, cpu); } @@ -119,9 +122,6 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -141,7 +141,7 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - + init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; } @@ -188,6 +188,10 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); + BUG_ON(1); + } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; @@ -207,9 +211,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { .handle_irq = handle_bad_irq, .depth = 1, .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), -#ifdef CONFIG_SMP - .affinity = CPU_MASK_ALL -#endif } }; @@ -222,9 +223,10 @@ int __init early_irq_init(void) desc = irq_desc; count = ARRAY_SIZE(irq_desc); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { desc[i].irq = i; - + init_alloc_desc_masks(&desc[i], 0, true); + } return arch_early_irq_init(); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index cd0cd8dcb34..b98739af455 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -98,14 +98,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - cpumask_copy(&desc->pending_mask, cpumask); + cpumask_copy(desc->pending_mask, cpumask); } #else - cpumask_copy(&desc->affinity, cpumask); + cpumask_copy(desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@ -127,16 +127,16 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpumask_any_and(&desc->affinity, cpu_online_mask) + if (cpumask_any_and(desc->affinity, cpu_online_mask) < nr_cpu_ids) goto set_affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity); + cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity); set_affinity: - desc->chip->set_affinity(irq, &desc->affinity); + desc->chip->set_affinity(irq, desc->affinity); return 0; } diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index bd72329e630..e05ad9be43b 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -18,7 +18,7 @@ void move_masked_irq(int irq) desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpumask_empty(&desc->pending_mask))) + if (unlikely(cpumask_empty(desc->pending_mask))) return; if (!desc->chip->set_affinity) @@ -38,13 +38,13 @@ void move_masked_irq(int irq) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask) + if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)) { - cpumask_and(&desc->affinity, - &desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, &desc->affinity); + cpumask_and(desc->affinity, + desc->pending_mask, cpu_online_mask); + desc->chip->set_affinity(irq, desc->affinity); } - cpumask_clear(&desc->pending_mask); + cpumask_clear(desc->pending_mask); } void move_native_irq(int irq) diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ecf765c6a77..f001a4ea641 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -46,6 +46,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); } @@ -76,11 +77,20 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { - printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq); + printk(KERN_ERR "irq %d: can not get new irq_desc " + "for migration.\n", irq); /* still use old one */ desc = old_desc; goto out_unlock; } + if (!init_alloc_desc_masks(desc, node, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + /* still use old one */ + kfree(desc); + desc = old_desc; + goto out_unlock; + } init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index aae3f742bce..692363dd591 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir; static int irq_affinity_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = &desc->affinity; + const struct cpumask *mask = desc->affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PENDING) - mask = &desc->pending_mask; + mask = desc->pending_mask; #endif seq_cpumask(m, mask); seq_putc(m, '\n'); From fbd59a8d1f7cf325fdb6828659f1fb76631e87b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: [PATCH 049/682] cpumask: Use topology_core_cpumask()/topology_thread_cpumask() Impact: reduce stack usage, use new cpumask API. This actually uses topology_core_cpumask() and topology_thread_cpumask(), removing the only users of topology_core_siblings() and topology_thread_siblings() Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: linux-net-drivers@solarflare.com --- Documentation/cputopology.txt | 6 +++--- drivers/base/topology.c | 33 ++++++++++++++++----------------- drivers/net/sfc/efx.c | 4 ++-- include/linux/topology.h | 6 ++++++ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt index 45932ec21ce..b41f3e58aef 100644 --- a/Documentation/cputopology.txt +++ b/Documentation/cputopology.txt @@ -18,11 +18,11 @@ For an architecture to support this feature, it must define some of these macros in include/asm-XXX/topology.h: #define topology_physical_package_id(cpu) #define topology_core_id(cpu) -#define topology_thread_siblings(cpu) -#define topology_core_siblings(cpu) +#define topology_thread_cpumask(cpu) +#define topology_core_cpumask(cpu) The type of **_id is int. -The type of siblings is cpumask_t. +The type of siblings is (const) struct cpumask *. To be consistent on all architectures, include/linux/topology.h provides default definitions for any of the above macros that are diff --git a/drivers/base/topology.c b/drivers/base/topology.c index a778fb52b11..bf6b13206d0 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -31,7 +31,10 @@ #include #include -#define define_one_ro(_name) \ +#define define_one_ro_named(_name, _func) \ +static SYSDEV_ATTR(_name, 0444, _func, NULL) + +#define define_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, show_##_name, NULL) #define define_id_show_func(name) \ @@ -42,8 +45,8 @@ static ssize_t show_##name(struct sys_device *dev, \ return sprintf(buf, "%d\n", topology_##name(cpu)); \ } -#if defined(topology_thread_siblings) || defined(topology_core_siblings) -static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) +#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) +static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf) { ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; int n = 0; @@ -65,7 +68,7 @@ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, char *buf) \ { \ unsigned int cpu = dev->id; \ - return show_cpumap(0, &(topology_##name(cpu)), buf); \ + return show_cpumap(0, topology_##name(cpu), buf); \ } #define define_siblings_show_list(name) \ @@ -74,7 +77,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ char *buf) \ { \ unsigned int cpu = dev->id; \ - return show_cpumap(1, &(topology_##name(cpu)), buf); \ + return show_cpumap(1, topology_##name(cpu), buf); \ } #else @@ -82,9 +85,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ static ssize_t show_##name(struct sys_device *dev, \ struct sysdev_attribute *attr, char *buf) \ { \ - unsigned int cpu = dev->id; \ - cpumask_t mask = topology_##name(cpu); \ - return show_cpumap(0, &mask, buf); \ + return show_cpumap(0, topology_##name(dev->id), buf); \ } #define define_siblings_show_list(name) \ @@ -92,9 +93,7 @@ static ssize_t show_##name##_list(struct sys_device *dev, \ struct sysdev_attribute *attr, \ char *buf) \ { \ - unsigned int cpu = dev->id; \ - cpumask_t mask = topology_##name(cpu); \ - return show_cpumap(1, &mask, buf); \ + return show_cpumap(1, topology_##name(dev->id), buf); \ } #endif @@ -107,13 +106,13 @@ define_one_ro(physical_package_id); define_id_show_func(core_id); define_one_ro(core_id); -define_siblings_show_func(thread_siblings); -define_one_ro(thread_siblings); -define_one_ro(thread_siblings_list); +define_siblings_show_func(thread_cpumask); +define_one_ro_named(thread_siblings, show_thread_cpumask); +define_one_ro_named(thread_siblings_list, show_thread_cpumask_list); -define_siblings_show_func(core_siblings); -define_one_ro(core_siblings); -define_one_ro(core_siblings_list); +define_siblings_show_func(core_cpumask); +define_one_ro_named(core_siblings, show_core_cpumask); +define_one_ro_named(core_siblings_list, show_core_cpumask_list); static struct attribute *default_attrs[] = { &attr_physical_package_id.attr, diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 7673fd92eaf..f2e56ceee0e 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -863,8 +863,8 @@ static int efx_wanted_rx_queues(void) for_each_online_cpu(cpu) { if (!cpu_isset(cpu, core_mask)) { ++count; - cpus_or(core_mask, core_mask, - topology_core_siblings(cpu)); + cpumask_or(&core_mask, &core_mask, + topology_core_cpumask(cpu)); } } diff --git a/include/linux/topology.h b/include/linux/topology.h index e632d29f054..a16b9e06f2e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -193,5 +193,11 @@ int arch_update_cpu_topology(void); #ifndef topology_core_siblings #define topology_core_siblings(cpu) cpumask_of_cpu(cpu) #endif +#ifndef topology_thread_cpumask +#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_core_cpumask +#define topology_core_cpumask(cpu) cpumask_of(cpu) +#endif #endif /* _LINUX_TOPOLOGY_H */ From f7df8ed164996cd2c6aca9674388be6ef78d8b37 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 050/682] cpumask: convert misc driver functions Impact: use new cpumask API. Convert misc driver functions to use struct cpumask. To Do: - Convert iucv_buffer_cpumask to cpumask_var_t. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Dean Nelson Cc: Robert Richter Cc: oprofile-list@lists.sf.net Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Ursula Braun Cc: linux390@de.ibm.com Cc: linux-s390@vger.kernel.org --- drivers/base/cpu.c | 2 +- drivers/misc/sgi-xp/xpc_main.c | 2 +- drivers/oprofile/buffer_sync.c | 22 ++++++++++++++++++---- drivers/oprofile/buffer_sync.h | 4 ++++ drivers/oprofile/oprof.c | 9 ++++++++- drivers/xen/manage.c | 2 +- 6 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 719ee5c1c8d..5b257a57bc5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -107,7 +107,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); /* * Print cpu online, possible, present, and system maps */ -static ssize_t print_cpus_map(char *buf, cpumask_t *map) +static ssize_t print_cpus_map(char *buf, const struct cpumask *map) { int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map); diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 89218f7cfaa..6576170de96 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -318,7 +318,7 @@ xpc_hb_checker(void *ignore) /* this thread was marked active by xpc_hb_init() */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU)); + set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU)); /* set our heartbeating to other partitions into motion */ xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ); diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 9da5a4b8113..c3ea5fa7d05 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -38,7 +38,7 @@ static LIST_HEAD(dying_tasks); static LIST_HEAD(dead_tasks); -static cpumask_t marked_cpus = CPU_MASK_NONE; +static cpumask_var_t marked_cpus; static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); @@ -456,10 +456,10 @@ static void mark_done(int cpu) { int i; - cpu_set(cpu, marked_cpus); + cpumask_set_cpu(cpu, marked_cpus); for_each_online_cpu(i) { - if (!cpu_isset(i, marked_cpus)) + if (!cpumask_test_cpu(i, marked_cpus)) return; } @@ -468,7 +468,7 @@ static void mark_done(int cpu) */ process_task_mortuary(); - cpus_clear(marked_cpus); + cpumask_clear(marked_cpus); } @@ -565,6 +565,20 @@ void sync_buffer(int cpu) mutex_unlock(&buffer_mutex); } +int __init buffer_sync_init(void) +{ + if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(marked_cpus); + return 0; +} + +void __exit buffer_sync_cleanup(void) +{ + free_cpumask_var(marked_cpus); +} + /* The function can be used to add a buffer worth of data directly to * the kernel buffer. The buffer is assumed to be a circular buffer. * Take the entries from index start and end at index end, wrapping diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h index 3110732c183..0ebf5db6267 100644 --- a/drivers/oprofile/buffer_sync.h +++ b/drivers/oprofile/buffer_sync.h @@ -19,4 +19,8 @@ void sync_stop(void); /* sync the given CPU's buffer */ void sync_buffer(int cpu); +/* initialize/destroy the buffer system. */ +int buffer_sync_init(void); +void buffer_sync_cleanup(void); + #endif /* OPROFILE_BUFFER_SYNC_H */ diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 3cffce90f82..ced39f60229 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -183,6 +183,10 @@ static int __init oprofile_init(void) { int err; + err = buffer_sync_init(); + if (err) + return err; + err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { @@ -191,8 +195,10 @@ static int __init oprofile_init(void) } err = oprofilefs_register(); - if (err) + if (err) { oprofile_arch_exit(); + buffer_sync_cleanup(); + } return err; } @@ -202,6 +208,7 @@ static void __exit oprofile_exit(void) { oprofilefs_unregister(); oprofile_arch_exit(); + buffer_sync_cleanup(); } diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 9b91617b958..e7e83b65c18 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -100,7 +100,7 @@ static void do_suspend(void) /* XXX use normal device tree? */ xenbus_suspend(); - err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0)); + err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); if (err) { printk(KERN_ERR "failed to start xen_suspend: %d\n", err); goto out; From 2f8975fbcf07103afab0bbaea5f5b1a9967ffb86 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 051/682] cpumask: convert drivers/net/sfc Impact: reduce stack usage, use new cpumask API. Remove a cpumask from the stack. Ben Hutchings indicated that printing a warning and returning 1 was acceptable for the corner case where allocation fails. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: Ben Hutchings Cc: linux-net-drivers@solarflare.com --- drivers/net/sfc/efx.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index f2e56ceee0e..101c00a7bb7 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -854,20 +854,27 @@ static void efx_fini_io(struct efx_nic *efx) * interrupts across them. */ static int efx_wanted_rx_queues(void) { - cpumask_t core_mask; + cpumask_var_t core_mask; int count; int cpu; - cpus_clear(core_mask); + if (!alloc_cpumask_var(&core_mask, GFP_KERNEL)) { + printk(KERN_WARNING + "efx.c: allocation failure, irq balancing hobbled\n"); + return 1; + } + + cpumask_clear(core_mask); count = 0; for_each_online_cpu(cpu) { - if (!cpu_isset(cpu, core_mask)) { + if (!cpumask_test_cpu(cpu, core_mask)) { ++count; - cpumask_or(&core_mask, &core_mask, + cpumask_or(core_mask, core_mask, topology_core_cpumask(cpu)); } } + free_cpumask_var(core_mask); return count; } From 651f8118cf0a5724f23fe1de4a3d9d36b2e01c2e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 052/682] cpumask: convert other misc kernel functions Impact: use new cpumask API. Convert other misc kernel functions to use struct cpumask. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- lib/smp_processor_id.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 0f8fc22ed10..4689cb073da 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void) * Kernel threads bound to a single CPU can safely use * smp_processor_id(): */ - if (cpus_equal(current->cpus_allowed, cpumask_of_cpu(this_cpu))) + if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) goto out; /* From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 053/682] cpumask: fix bug in use cpumask_var_t in irq_desc Impact: fix bug where new irq_desc uses old cpumask pointers which are freed. As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to the new desc overwriting the cpumask pointers. Since the old_desc and the cpumask pointers are freed, then memory corruption will occur if these old pointers are used. Move the allocation of these pointers to after the copy. Signed-off-by: Mike Travis Cc: Yinghai Lu --- include/linux/irq.h | 9 +++++++-- kernel/irq/handle.c | 8 +------- kernel/irq/numa_migrate.c | 13 ++++++++----- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index fa27210f1df..27a67536511 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); /** * init_alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct + * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { + int node; + if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); cpumask_setall(desc->affinity); @@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, return true; } + node = cpu_to_node(cpu); + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; cpumask_setall(desc->affinity); @@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b8fa1354f01..f01c0a30cb4 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -85,8 +85,6 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { - int node = cpu_to_node(cpu); - memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); @@ -100,7 +98,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { + if (!init_alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } @@ -188,10 +186,6 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); - BUG_ON(1); - } init_one_irq_desc(irq, desc, cpu); irq_desc_ptrs[irq] = desc; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index f001a4ea641..666260e4c06 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -38,16 +38,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->kstat_irqs = NULL; } -static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, +static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); + if (!init_alloc_desc_masks(desc, cpu, false)) { + printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " + "for migration.\n", irq); + return false; + } spin_lock_init(&desc->lock); desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); arch_init_copy_chip_data(old_desc, desc, cpu); + return true; } static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) @@ -83,15 +89,12 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_alloc_desc_masks(desc, node, false)) { - printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " - "for migration.\n", irq); + if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { /* still use old one */ kfree(desc); desc = old_desc; goto out_unlock; } - init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; From 4595f9620cda8a1e973588e743cf5f8436dd20c6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: [PATCH 054/682] x86: change flush_tlb_others to take a const struct cpumask Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/x86/include/asm/paravirt.h | 8 ++-- arch/x86/include/asm/tlbflush.h | 8 ++-- arch/x86/include/asm/uv/uv_bau.h | 3 +- arch/x86/kernel/tlb_32.c | 67 ++++++++++++++------------------ arch/x86/kernel/tlb_64.c | 61 ++++++++++++++++------------- arch/x86/kernel/tlb_uv.c | 16 ++++---- arch/x86/xen/enlighten.c | 31 ++++++--------- 7 files changed, 93 insertions(+), 101 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..c26c6bf4da0 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -244,7 +244,8 @@ struct pv_mmu_ops { void (*flush_tlb_user)(void); void (*flush_tlb_kernel)(void); void (*flush_tlb_single)(unsigned long addr); - void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, + void (*flush_tlb_others)(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va); /* Hooks for allocating and freeing a pagetable top-level */ @@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr) PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); } -static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, +static inline void flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); + PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0e7bbb54911..f4e1b550ce6 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -static inline void native_flush_tlb_others(const cpumask_t *cpumask, +static inline void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { @@ -142,8 +142,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, flush_tlb_mm(vma->vm_mm); } -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va); #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 @@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(void) #endif /* SMP */ #ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) +#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va) #endif static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 50423c7b56b..74e6393bfdd 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); +extern int uv_flush_tlb_others(struct cpumask *, + struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ce505464224..ec53818f4e3 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) * Optimizations Manfred Spraul */ -static cpumask_t flush_cpumask; +static cpumask_var_t flush_cpumask; static struct mm_struct *flush_mm; static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); @@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) cpu = get_cpu(); - if (!cpu_isset(cpu, flush_cpumask)) + if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; /* * This was a BUG() but until someone can quote me the @@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } ack_APIC_irq(); smp_mb__before_clear_bit(); - cpu_clear(cpu, flush_cpumask); + cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); out: put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { - cpumask_t cpumask = *cpumaskp; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask * - mask must exist :) */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (unlikely(cpus_empty(cpumask))) - return; -#endif - /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, */ spin_lock(&tlbstate_lock); + cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); +#ifdef CONFIG_HOTPLUG_CPU + /* If a CPU which we ran on has gone down, OK. */ + cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + if (unlikely(cpumask_empty(flush_cpumask))) { + spin_unlock(&tlbstate_lock); + return; + } +#endif flush_mm = mm; flush_va = va; - cpus_or(flush_cpumask, cpumask, flush_cpumask); /* * Make the above memory operations globally visible before @@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - while (!cpus_empty(flush_cpumask)) + while (!cpumask_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ cpu_relax(); @@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); - + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } EXPORT_SYMBOL(flush_tlb_page); @@ -254,3 +239,9 @@ void reset_lazy_tlbstate(void) per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; } +static int init_flush_cpumask(void) +{ + alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); + return 0; +} +early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..38836aef51b 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -43,10 +43,10 @@ union smp_flush_state { struct { - cpumask_t flush_cpumask; struct mm_struct *flush_mm; unsigned long flush_va; spinlock_t tlbstate_lock; + DECLARE_BITMAP(flush_cpumask, NR_CPUS); }; char pad[SMP_CACHE_BYTES]; } ____cacheline_aligned; @@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; f = &per_cpu(flush_state, sender); - if (!cpu_isset(cpu, f->flush_cpumask)) + if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) goto out; /* * This was a BUG() but until someone can quote me the @@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); - cpu_clear(cpu, f->flush_cpumask); + cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); inc_irq_stat(irq_tlb_count); } -void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, - unsigned long va) +static void flush_tlb_others_ipi(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f; - cpumask_t cpumask = *cpumaskp; - - if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va)) - return; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; @@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, f->flush_mm = mm; f->flush_va = va; - cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); + cpumask_andnot(to_cpumask(f->flush_cpumask), + cpumask, cpumask_of(smp_processor_id())); /* * Make the above memory operations globally visible before @@ -191,9 +188,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); - while (!cpus_empty(f->flush_cpumask)) + while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); f->flush_mm = NULL; @@ -201,6 +198,24 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, spin_unlock(&f->tlbstate_lock); } +void native_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, unsigned long va) +{ + if (is_uv_system()) { + cpumask_var_t after_uv_flush; + + if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { + cpumask_andnot(after_uv_flush, + cpumask, cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + free_cpumask_var(after_uv_flush); + return; + } + } + flush_tlb_others_ipi(cpumask, mm, va); +} + static int __cpuinit init_smp_flush(void) { int i; @@ -215,25 +230,18 @@ core_initcall(init_smp_flush); void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t cpu_mask; - preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -241,8 +249,8 @@ void flush_tlb_mm(struct mm_struct *mm) else leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); preempt_enable(); } @@ -250,11 +258,8 @@ void flush_tlb_mm(struct mm_struct *mm) void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) { struct mm_struct *mm = vma->vm_mm; - cpumask_t cpu_mask; preempt_disable(); - cpu_mask = mm->cpu_vm_mask; - cpu_clear(smp_processor_id(), cpu_mask); if (current->active_mm == mm) { if (current->mm) @@ -263,8 +268,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) leave_mm(smp_processor_id()); } - if (!cpus_empty(cpu_mask)) - flush_tlb_others(cpu_mask, mm, va); + if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) + flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f885023167e..690dcf1a27d 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * The cpumaskp mask contains the cpus the broadcast was sent to. * * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask is left - * unchanged. + * Returns 0 if some remote flushing remains to be done. The mask will have + * some bits still set. */ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - cpumask_t *cpumaskp) + struct cpumask *cpumaskp) { int completion_status = 0; int right_shift; @@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpu_clear(bit, *cpumaskp); + cpumask_clear_cpu(bit, cpumaskp); } - if (!cpus_empty(*cpumaskp)) + if (!cpumask_empty(cpumaskp)) return 0; return 1; } @@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Returns 1 if all remote flushing was done. * Returns 0 if some remote flushing remains to be done. */ -int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, +int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, unsigned long va) { int i; @@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu_mask(bit, *cpumaskp) { + for_each_cpu(bit, cpumaskp) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..965539ec425 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr) preempt_enable(); } -static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, - unsigned long va) +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) { struct { struct mmuext_op op; - cpumask_t mask; + DECLARE_BITMAP(mask, NR_CPUS); } *args; - cpumask_t cpumask = *cpus; struct multicall_space mcs; - /* - * A couple of (to be removed) sanity checks: - * - * - current CPU must not be in mask - * - mask must exist :) - */ - BUG_ON(cpus_empty(cpumask)); - BUG_ON(cpu_isset(smp_processor_id(), cpumask)); + BUG_ON(cpumask_empty(cpus)); BUG_ON(!mm); - /* If a CPU which we ran on has gone down, OK. */ - cpus_and(cpumask, cpumask, cpu_online_map); - if (cpus_empty(cpumask)) - return; - mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; - args->mask = cpumask; - args->op.arg2.vcpumask = &args->mask; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); +issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } From 0e21990ae7ee11af94f44f240b06e520cf1505d4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 055/682] SGI UV cpumask: use static temp cpumask in flush_tlb Impact: Improve tlb flush performance for UV Calling alloc_cpumask_var a zillion times a second does affect performance. Replace with static cpumask. Note: when CONFIG_X86_UV is defined, this extra PER_CPU memory will be optimized out for non-UV configs as is_uv_system() will then return a constant 0. Signed-off-by: Mike Travis --- arch/x86/kernel/tlb_64.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 38836aef51b..7a3f9891302 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -202,16 +202,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - cpumask_var_t after_uv_flush; + /* FIXME: could be an percpu_alloc'd thing */ + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); - if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) { - cpumask_andnot(after_uv_flush, - cpumask, cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - free_cpumask_var(after_uv_flush); - return; - } + cpumask_andnot(after_uv_flush, cpumask, + cpumask_of(smp_processor_id())); + if (!uv_flush_tlb_others(after_uv_flush, mm, va)) + flush_tlb_others_ipi(after_uv_flush, mm, va); + + put_cpu_var(flush_tlb_uv_cpumask); + return; } flush_tlb_others_ipi(cpumask, mm, va); } From a1c33bbeb7061f3ed39103c385844474eaa8f921 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 056/682] x86: cleanup remaining cpumask_t code in mce_amd_64.c Impact: Reduce memory usage, use new cpumask API. Use cpumask_var_t for 'cpus' cpumask in struct threshold_bank and update remaining old cpumask_t functions to new cpumask API. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 8ae8c4ff094..4772e91e824 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = { struct threshold_bank { struct kobject *kobj; struct threshold_block *blocks; - cpumask_t cpus; + cpumask_var_t cpus; }; static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); @@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = first_cpu(per_cpu(cpu_core_map, cpu)); + i = cpumask_first(&per_cpu(cpu_core_map, cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; } @@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } + if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + kfree(b); + err = -ENOMEM; + goto out; + } b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); if (!b->kobj) goto out_free; #ifndef CONFIG_SMP - b->cpus = CPU_MASK_ALL; + cpumask_setall(b->cpus); #else - b->cpus = per_cpu(cpu_core_map, cpu); + cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu)); #endif per_cpu(threshold_banks, cpu)[bank] = b; @@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out_free; - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) out_free: per_cpu(threshold_banks, cpu)[bank] = NULL; + free_cpumask_var(b->cpus); kfree(b); out: return err; @@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) #endif /* remove all sibling symlinks before unregistering */ - for_each_cpu_mask_nr(i, b->cpus) { + for_each_cpu(i, b->cpus) { if (i == cpu) continue; @@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) free_out: kobject_del(b->kobj); kobject_put(b->kobj); + free_cpumask_var(b->cpus); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; } From f9b90566cd46e19f670a1e60a717ff243f060a8a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 057/682] x86: reduce stack usage in init_intel_cacheinfo Impact: reduce stack usage. init_intel_cacheinfo() does not use the cpumask so define a subset of struct _cpuid4_info (_cpuid4_info_regs) that can be used instead. Signed-off-by: Mike Travis --- arch/x86/kernel/cpu/intel_cacheinfo.c | 63 +++++++++++++++++++-------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be7..58527a9fc40 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -132,7 +132,16 @@ struct _cpuid4_info { union _cpuid4_leaf_ecx ecx; unsigned long size; unsigned long can_disable; - cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ + DECLARE_BITMAP(shared_cpu_map, NR_CPUS); +}; + +/* subset of above _cpuid4_info w/o shared_cpu_map */ +struct _cpuid4_info_regs { + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + unsigned long size; + unsigned long can_disable; }; #ifdef CONFIG_PCI @@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } static void __cpuinit -amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) +amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; @@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) } static int -__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +__cpuinit cpuid4_cache_lookup_regs(int index, + struct _cpuid4_info_regs *this_leaf) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) return 0; } +static int +__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +{ + struct _cpuid4_info_regs *leaf_regs = + (struct _cpuid4_info_regs *)this_leaf; + + return cpuid4_cache_lookup_regs(index, leaf_regs); +} + static int __cpuinit find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; @@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info this_leaf; - + struct _cpuid4_info_regs this_leaf; int retval; - retval = cpuid4_cache_lookup(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &this_leaf); if (retval >= 0) { switch(this_leaf.eax.split.level) { case 1: @@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) - cpu_set(cpu, this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); else { index_msb = get_count_order(num_threads_sharing); for_each_online_cpu(i) { if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { - cpu_set(i, this_leaf->shared_cpu_map); + cpumask_set_cpu(i, + to_cpumask(this_leaf->shared_cpu_map)); if (i != cpu && per_cpu(cpuid4_info, i)) { - sibling_leaf = CPUID4_INFO_IDX(i, index); - cpu_set(cpu, sibling_leaf->shared_cpu_map); + sibling_leaf = + CPUID4_INFO_IDX(i, index); + cpumask_set_cpu(cpu, to_cpumask( + sibling_leaf->shared_cpu_map)); } } } @@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) int sibling; this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { + for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpu_clear(cpu, sibling_leaf->shared_cpu_map); + cpumask_clear_cpu(cpu, + to_cpumask(sibling_leaf->shared_cpu_map)); } } #else @@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int n = 0; if (len > 1) { - cpumask_t *mask = &this_leaf->shared_cpu_map; + const struct cpumask *mask; + mask = to_cpumask(this_leaf->shared_cpu_map); n = type? cpulist_scnprintf(buf, len-2, mask) : cpumask_scnprintf(buf, len-2, mask); @@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node) static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; ssize_t ret = 0; int i; @@ -718,7 +742,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, size_t count) { - int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int node = cpu_to_node(cpumask_first(mask)); struct pci_dev *dev = NULL; unsigned int ret, index, val; @@ -863,7 +888,7 @@ err_out: return -ENOMEM; } -static cpumask_t cache_dev_map = CPU_MASK_NONE; +static DECLARE_BITMAP(cache_dev_map, NR_CPUS); /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) @@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } - cpu_set(cpu, cache_dev_map); + cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); return 0; @@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (per_cpu(cpuid4_info, cpu) == NULL) return; - if (!cpu_isset(cpu, cache_dev_map)) + if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; - cpu_clear(cpu, cache_dev_map); + cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); From c90e785be2fd9dfaef1f030d0314e44052553736 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:10 -0800 Subject: [PATCH 058/682] cpumask: use cpumask_var_t in dcdbas.c Impact: reduce stack usage. Replace cpumask_t with cpumask_var_t in drivers/firmware/dcdbas.c. Signed-off-by: Mike Travis --- drivers/firmware/dcdbas.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 777fba48d2d..3009e0171e5 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -244,7 +244,7 @@ static ssize_t host_control_on_shutdown_store(struct device *dev, */ int dcdbas_smi_request(struct smi_cmd *smi_cmd) { - cpumask_t old_mask; + cpumask_var_t old_mask; int ret = 0; if (smi_cmd->magic != SMI_CMD_MAGIC) { @@ -254,8 +254,11 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd) } /* SMI requires CPU 0 */ - old_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); + if (!alloc_cpumask_var(&old_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(old_mask, ¤t->cpus_allowed); + set_cpus_allowed_ptr(current, cpumask_of(0)); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", __func__); @@ -275,7 +278,8 @@ int dcdbas_smi_request(struct smi_cmd *smi_cmd) ); out: - set_cpus_allowed_ptr(current, &old_mask); + set_cpus_allowed_ptr(current, old_mask); + free_cpumask_var(old_mask); return ret; } From d38b223c86db3162dc85b5a1997ac8a210e1660b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:11 -0800 Subject: [PATCH 059/682] cpumask: reduce stack usage in find_lowest_rq Impact: reduce stack usage, cleanup Use a cpumask_var_t in find_lowest_rq() and clean up other old cpumask_t calls. Signed-off-by: Mike Travis --- kernel/sched_rt.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 954e1a81b79..da932f4c852 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -960,16 +960,17 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) +static inline int pick_optimal_cpu(int this_cpu, + const struct cpumask *mask) { int first; /* "this_cpu" is cheaper to preempt than a remote processor */ - if ((this_cpu != -1) && cpu_isset(this_cpu, *mask)) + if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) return this_cpu; - first = first_cpu(*mask); - if (first != NR_CPUS) + first = cpumask_first(mask); + if (first < nr_cpu_ids) return first; return -1; @@ -981,6 +982,7 @@ static int find_lowest_rq(struct task_struct *task) struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + cpumask_var_t domain_mask; if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ @@ -1013,19 +1015,25 @@ static int find_lowest_rq(struct task_struct *task) if (this_cpu == cpu) this_cpu = -1; /* Skip this_cpu opt if the same */ - for_each_domain(cpu, sd) { - if (sd->flags & SD_WAKE_AFFINE) { - cpumask_t domain_mask; - int best_cpu; + if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { + for_each_domain(cpu, sd) { + if (sd->flags & SD_WAKE_AFFINE) { + int best_cpu; - cpumask_and(&domain_mask, sched_domain_span(sd), - lowest_mask); + cpumask_and(domain_mask, + sched_domain_span(sd), + lowest_mask); - best_cpu = pick_optimal_cpu(this_cpu, - &domain_mask); - if (best_cpu != -1) - return best_cpu; + best_cpu = pick_optimal_cpu(this_cpu, + domain_mask); + + if (best_cpu != -1) { + free_cpumask_var(domain_mask); + return best_cpu; + } + } } + free_cpumask_var(domain_mask); } /* From c7a3589e7a1f8fdbd2536fe1bfa60b37f5121c69 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:11 -0800 Subject: [PATCH 060/682] Xen: reduce memory required for cpu_evtchn_mask Impact: reduce memory usage. Reduce this significant gain in the amount of memory used when NR_CPUS bumped from 128 to 4096 by allocating the array based on nr_cpu_ids: 65536 +2031616 2097152 +3100% cpu_evtchn_mask(.bss) Signed-off-by: Mike Travis Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com --- drivers/xen/events.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e0767ff35d6..ed7527b3745 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -75,7 +75,14 @@ enum { static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 }; -static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG]; +struct cpu_evtchn_s { + unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; +}; +static struct cpu_evtchn_s *cpu_evtchn_mask_p; +static inline unsigned long *cpu_evtchn_mask(int cpu) +{ + return cpu_evtchn_mask_p[cpu].bits; +} static u8 cpu_evtchn[NR_EVENT_CHANNELS]; /* Reference counts for bindings to IRQs. */ @@ -115,7 +122,7 @@ static inline unsigned long active_evtchns(unsigned int cpu, unsigned int idx) { return (sh->evtchn_pending[idx] & - cpu_evtchn_mask[cpu][idx] & + cpu_evtchn_mask(cpu)[idx] & ~sh->evtchn_mask[idx]); } @@ -128,8 +135,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]); - __set_bit(chn, cpu_evtchn_mask[cpu]); + __clear_bit(chn, cpu_evtchn_mask(cpu_evtchn[chn])); + __set_bit(chn, cpu_evtchn_mask(cpu)); cpu_evtchn[chn] = cpu; } @@ -147,7 +154,7 @@ static void init_evtchn_cpu_bindings(void) #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); - memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); + memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); } static inline unsigned int cpu_from_evtchn(unsigned int evtchn) @@ -822,6 +829,10 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { void __init xen_init_IRQ(void) { int i; + size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s); + + cpu_evtchn_mask_p = kmalloc(size, GFP_KERNEL); + BUG_ON(cpu_evtchn_mask == NULL); init_evtchn_cpu_bindings(); From 9594949b060efe86ecaa1a66839232a3b9800bc9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 061/682] irq: change references from NR_IRQS to nr_irqs Impact: preparation, cleanup, add KERN_INFO printk Modify references from NR_IRQS to nr_irqs as the later will become variable-sized based on nr_cpu_ids when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- arch/x86/kernel/io_apic.c | 2 +- kernel/irq/handle.c | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1337eab60ec..ae80638012d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3183,7 +3183,7 @@ unsigned int create_irq_nr(unsigned int irq_want) irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new < NR_IRQS; new++) { + for (new = irq_want; new < nr_irqs; new++) { if (platform_legacy_irq(new)) continue; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index f01c0a30cb4..790c5fa7ea3 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,6 +132,8 @@ int __init early_irq_init(void) int legacy_count; int i; + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); + desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); @@ -143,7 +145,7 @@ int __init early_irq_init(void) irq_desc_ptrs[i] = desc + i; } - for (i = legacy_count; i < NR_IRQS; i++) + for (i = legacy_count; i < nr_irqs; i++) irq_desc_ptrs[i] = NULL; return arch_early_irq_init(); @@ -151,7 +153,7 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; + return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) @@ -160,9 +162,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) unsigned long flags; int node; - if (irq >= NR_IRQS) { - printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", - irq, NR_IRQS); + if (irq >= nr_irqs) { + printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", + irq, nr_irqs); WARN_ON(1); return NULL; } @@ -214,6 +216,8 @@ int __init early_irq_init(void) int count; int i; + printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS); + desc = irq_desc; count = ARRAY_SIZE(irq_desc); From e2f4d06545ec1f29b0e838ee34cbf3500ea5b9a4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 062/682] irq: use WARN() instead of WARN_ON(). Impact: cleanup WARN msg. Ingo requested: > While at it, could you please also convert this to a WARN() construct > instead? (in a separate commit) ... and it shall be done. ;-) Signed-off-by: Mike Travis --- kernel/irq/handle.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 790c5fa7ea3..fd1ef16252f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -163,9 +163,8 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) int node; if (irq >= nr_irqs) { - printk(KERN_WARNING "irq >= nr_irqs in irq_to_desc_alloc: %d %d\n", - irq, nr_irqs); - WARN_ON(1); + WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", + irq, nr_irqs); return NULL; } From 0fa0ebbf15addc1be8f73325d809c8547a9de304 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:06 -0800 Subject: [PATCH 063/682] irq: allocate irq_desc_ptrs array based on nr_irqs Impact: allocate irq_desc_ptrs in preparation for making it variable-sized. This addresses this memory usage bump when NR_CPUS bumped from 128 to 4096: 34816 +229376 264192 +658% irq_desc_ptrs(.data.read_mostly) The patch is split into two parts, the first simply allocates the irq_desc_ptrs array. Then next will deal with making it variable. This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 11 +++++++++-- kernel/irq/internals.h | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index fd1ef16252f..d0b8f7e7279 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "internals.h" @@ -110,7 +111,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) */ DEFINE_SPINLOCK(sparse_irq_lock); -struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; +struct irq_desc **irq_desc_ptrs __read_mostly; static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { [0 ... NR_IRQS_LEGACY-1] = { @@ -137,6 +138,9 @@ int __init early_irq_init(void) desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); + /* allocate irq_desc_ptrs array based on nr_irqs */ + irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; @@ -153,7 +157,10 @@ int __init early_irq_init(void) struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc_ptrs[irq] : NULL; + if (irq_desc_ptrs && irq < nr_irqs) + return irq_desc_ptrs[irq]; + + return NULL; } struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index e6d0a43cc12..40416a81a0f 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); extern spinlock_t sparse_irq_lock; + +#ifdef CONFIG_SPARSE_IRQ +/* irq_desc_ptrs allocated at boot time */ +extern struct irq_desc **irq_desc_ptrs; +#else +/* irq_desc_ptrs is a fixed size array */ extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; +#endif #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: [PATCH 064/682] irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- arch/x86/include/asm/apicdef.h | 8 ++------ arch/x86/include/asm/apicnum.h | 12 ++++++++++++ arch/x86/include/asm/irq_vectors.h | 16 +++++++++++----- include/linux/irqnr.h | 7 +++++++ kernel/irq/handle.c | 3 +++ 5 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 arch/x86/include/asm/apicnum.h diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 63134e31e8b..1a6454ef7f6 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,12 +132,8 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif +/* get MAX_IO_APICS */ +#include /* * All x86-64 systems are xAPIC compatible. diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h new file mode 100644 index 00000000000..82f613c607c --- /dev/null +++ b/arch/x86/include/asm/apicnum.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_APICNUM_H +#define _ASM_X86_APICNUM_H + +/* define MAX_IO_APICS */ +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif + +#endif /* _ASM_X86_APICNUM_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index f7ff65032b9..602361ad0e7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -105,6 +105,8 @@ #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#include /* need MAX_IO_APICS */ + #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) @@ -112,11 +114,15 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else -# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) -# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif + +/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ +# define max_nr_irqs(nr_cpus) \ + ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ + (NR_VECTORS + (8 * NR_CPUS)) : \ + (NR_VECTORS + (32 * MAX_IO_APICS))) \ + +# define NR_IRQS max_nr_irqs(NR_CPUS) + #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 86af92e9e84..de66e4e1040 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -20,11 +20,18 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) + #else /* CONFIG_GENERIC_HARDIRQS */ +#include /* need possible max_nr_irqs() */ + extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +# ifndef max_nr_irqs +# define max_nr_irqs(nr_cpus) NR_IRQS +# endif + # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d0b8f7e7279..ebba7a116f1 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -133,6 +133,9 @@ int __init early_irq_init(void) int legacy_count; int i; + /* initialize nr_irqs based on nr_cpu_ids */ + nr_irqs = max_nr_irqs(nr_cpu_ids); + printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; From 542d865bbed4ce1f050f586e53cf1cfadda93766 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: [PATCH 065/682] kstat: modify kstat_irqs_legacy to be variable sized Impact: reduce memory usage. Allocate kstat_irqs_legacy based on nr_cpu_ids to deal with this memory usage bump when NR_CPUS bumped from 128 to 4096: 8192 +253952 262144 +3100% kstat_irqs_legacy(.bss) This is only when CONFIG_SPARSE_IRQS=y. Signed-off-by: Mike Travis --- kernel/irq/handle.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index ebba7a116f1..b39f32ac8f8 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -124,8 +124,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm } }; -/* FIXME: use bootmem alloc ...*/ -static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; +static unsigned int *kstat_irqs_legacy; int __init early_irq_init(void) { @@ -144,9 +143,14 @@ int __init early_irq_init(void) /* allocate irq_desc_ptrs array based on nr_irqs */ irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); + /* allocate based on nr_cpu_ids */ + /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ + kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * + sizeof(int)); + for (i = 0; i < legacy_count; i++) { desc[i].irq = i; - desc[i].kstat_irqs = kstat_irqs_legacy[i]; + desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); init_alloc_desc_masks(&desc[i], 0, true); irq_desc_ptrs[i] = desc + i; From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: [PATCH 066/682] cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apicdef.h | 8 ++++++-- include/linux/irqnr.h | 6 ------ kernel/irq/handle.c | 5 +++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 1a6454ef7f6..63134e31e8b 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -132,8 +132,12 @@ #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) -/* get MAX_IO_APICS */ -#include +#ifdef CONFIG_X86_32 +# define MAX_IO_APICS 64 +#else +# define MAX_IO_APICS 128 +# define MAX_LOCAL_APIC 32768 +#endif /* * All x86-64 systems are xAPIC compatible. diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index de66e4e1040..887477bc2ab 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,15 +23,9 @@ #else /* CONFIG_GENERIC_HARDIRQS */ -#include /* need possible max_nr_irqs() */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); -# ifndef max_nr_irqs -# define max_nr_irqs(nr_cpus) NR_IRQS -# endif - # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b39f32ac8f8..04d3e46031e 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -58,6 +58,11 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ + +#ifndef max_nr_irqs +#define max_nr_irqs(nr_cpus) NR_IRQS +#endif + static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, From 28e08861b9afab4168b758fb7b95aa7a4da0f668 Mon Sep 17 00:00:00 2001 From: Christophe Saout Date: Sun, 11 Jan 2009 11:46:23 -0800 Subject: [PATCH 067/682] xen: fix too early kmalloc call Impact: fix bootup crash on xen guests SLAB is not yet up, with earlyprintk it is giving me an Oops in __kmalloc. Replace call to kmalloc() with alloc_bootmem(). Reported-by: Christophe Saout Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ed7527b3745..3141e149d59 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -831,8 +832,8 @@ void __init xen_init_IRQ(void) int i; size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s); - cpu_evtchn_mask_p = kmalloc(size, GFP_KERNEL); - BUG_ON(cpu_evtchn_mask == NULL); + cpu_evtchn_mask_p = alloc_bootmem(size); + BUG_ON(cpu_evtchn_mask_p == NULL); init_evtchn_cpu_bindings(); From dd3feda7748b4c2739de47daaaa387fb01926c15 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:44:29 +0530 Subject: [PATCH 068/682] x86: microcode_intel.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of ERROR: trailing whitespace ERROR: "(foo*)" should be "(foo *)" total: 3 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/microcode_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index b7f4c929e61..5e9f4fc5138 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -87,9 +87,9 @@ #include #include #include +#include #include -#include #include #include @@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; } -static inline int +static inline int update_match_revision(struct microcode_header_intel *mc_header, int rev) { return (mc_header->rev <= rev) ? 0 : 1; @@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device) return ret; } - ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, - &get_ucode_fw); + ret = generic_load_microcode(cpu, (void *)firmware->data, + firmware->size, &get_ucode_fw); release_firmware(firmware); @@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size) /* We should bind the task to the CPU */ BUG_ON(cpu != raw_smp_processor_id()); - return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); + return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); } static void microcode_fini_cpu(int cpu) From 448dd2fa3ec915ad4325868ef8bb9b9490d9f6a9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:45:14 +0530 Subject: [PATCH 069/682] x86: msr.c fix style problems Impact: cleanup Fix: WARNING: Use #include instead of total: 0 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 726266695b2..3cf3413ec62 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -35,10 +35,10 @@ #include #include #include +#include #include #include -#include #include static struct class *msr_class; From e17029ad69c7b1518413c00f793d89bb77b8527b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:03 +0530 Subject: [PATCH 070/682] x86: module_32.c fix style problems Impact: cleanup Fix: ERROR: code indent should use tabs where possible ERROR: trailing whitespace ERROR: spaces required around that '=' (ctx:VxW) total: 3 errors, 0 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c index 3db0a5442eb..0edd819050e 100644 --- a/arch/x86/kernel/module_32.c +++ b/arch/x86/kernel/module_32.c @@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } /* We don't need anything special. */ @@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr, *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { if (!strcmp(".text", secstrings + s->sh_name)) text = s; if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } From 3b9dc9f2f123286aaade54c45fef6723d587c664 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 14:46:48 +0530 Subject: [PATCH 071/682] x86: module_64.c fix style problems Impact: cleanup Fix: ERROR: trailing whitespace ERROR: code indent should use tabs where possible WARNING: %Ld/%Lu are not-standard C, use %lld/%llu WARNING: printk() should include KERN_ facility level ERROR: spaces required around that '=' (ctx:VxW) total: 13 errors, 2 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/module_64.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 6ba87830d4b..c23880b90b5 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c @@ -30,14 +30,14 @@ #include #include -#define DEBUGP(fmt...) +#define DEBUGP(fmt...) #ifndef CONFIG_UML void module_free(struct module *mod, void *module_region) { vfree(module_region); /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ + table entries. */ } void *module_alloc(unsigned long size) @@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; Elf64_Sym *sym; void *loc; - u64 val; + u64 val; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + ELF64_R_SYM(rel[i].r_info); - DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", - (int)ELF64_R_TYPE(rel[i].r_info), - sym->st_value, rel[i].r_addend, (u64)loc); + DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", + (int)ELF64_R_TYPE(rel[i].r_info), + sym->st_value, rel[i].r_addend, (u64)loc); - val = sym->st_value + rel[i].r_addend; + val = sym->st_value + rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: @@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, if ((s64)val != *(s32 *)loc) goto overflow; break; - case R_X86_64_PC32: + case R_X86_64_PC32: val -= (u64)loc; *(u32 *)loc = val; #if 0 if ((s64)val != *(s32 *)loc) - goto overflow; + goto overflow; #endif break; default: - printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", + printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", me->name, ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } @@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return 0; overflow: - printk(KERN_ERR "overflow in relocation type %d val %Lx\n", + printk(KERN_ERR "overflow in relocation type %d val %Lx\n", (int)ELF64_R_TYPE(rel[i].r_info), val); printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", me->name); @@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs, unsigned int relsec, struct module *me) { - printk("non add relocation not supported\n"); + printk(KERN_ERR "non add relocation not supported\n"); return -ENOSYS; -} +} int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) + const Elf_Shdr *sechdrs, + struct module *me) { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL; @@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secstrings + s->sh_name)) alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks= s; + locks = s; if (!strcmp(".parainstructions", secstrings + s->sh_name)) para = s; } From 23a22d57a8962479ca630c9542e62d6f86fdf927 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Jan 2009 14:24:04 -0800 Subject: [PATCH 072/682] bzip2/lzma: comprehensible error messages for missing decompressor Instead of failing to identify a compressed image with a decompressor that we don't have compiled in, identify it and fail with a comprehensible panic message. Signed-off-by: H. Peter Anvin --- init/do_mounts_rd.c | 5 ++++- init/initramfs.c | 12 +++++++++++- lib/decompress.c | 16 ++++++++++------ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a015e267fd1..91d0cfca507 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -79,9 +79,12 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) sys_read(fd, buf, size); *decompressor = decompress_method(buf, size, &compress_name); - if (*decompressor) { + if (compress_name) { printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", compress_name, start_block); + if (!*decompressor) + printk(KERN_CRIT "RAMDISK: %s decompressor not configured!\n", + compress_name); nblocks = 0; goto done; } diff --git a/init/initramfs.c b/init/initramfs.c index 76f4a012533..9a7290ec818 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -421,6 +421,8 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { int written; decompress_fn decompress; + const char *compress_name; + static __initdata char msg_buf[64]; dry_run = check_only; header_buf = kmalloc(110, GFP_KERNEL); @@ -449,10 +451,18 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) continue; } this_header = 0; - decompress = decompress_method(buf, len, NULL); + decompress = decompress_method(buf, len, &compress_name); if (decompress) decompress(buf, len, NULL, flush_buffer, NULL, &my_inptr, error); + else if (compress_name) { + if (!message) { + snprintf(msg_buf, sizeof msg_buf, + "compression method %s not configured", + compress_name); + message = msg_buf; + } + } if (state != Reset) error("junk in compressed archive"); this_header = saved_offset + my_inptr; diff --git a/lib/decompress.c b/lib/decompress.c index edac55cc782..961f367320f 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -13,21 +13,25 @@ #include #include +#ifndef CONFIG_DECOMPRESS_GZIP +# define gunzip NULL +#endif +#ifndef CONFIG_DECOMPRESS_BZIP2 +# define bunzip2 NULL +#endif +#ifndef CONFIG_DECOMPRESS_LZMA +# define unlzma NULL +#endif + static const struct compress_format { unsigned char magic[2]; const char *name; decompress_fn decompressor; } compressed_formats[] = { -#ifdef CONFIG_DECOMPRESS_GZIP { {037, 0213}, "gzip", gunzip }, { {037, 0236}, "gzip", gunzip }, -#endif -#ifdef CONFIG_DECOMPRESS_BZIP2 { {0x42, 0x5a}, "bzip2", bunzip2 }, -#endif -#ifdef CONFIG_DECOMPRESS_LZMA { {0x5d, 0x00}, "lzma", unlzma }, -#endif { {0, 0}, NULL, NULL } }; From e65e49d0f3714f4a6a42f6f6a19926ba33fcda75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 Jan 2009 15:27:13 -0800 Subject: [PATCH 073/682] irq: update all arches for new irq_desc Impact: cleanup, update to new cpumask API Irq_desc.affinity and irq_desc.pending_mask are now cpumask_var_t's so access to them should be using the new cpumask API. Signed-off-by: Mike Travis --- arch/alpha/kernel/irq.c | 2 +- arch/arm/kernel/irq.c | 18 ++++++++++++------ arch/arm/oprofile/op_model_mpcore.c | 2 +- arch/blackfin/kernel/irqchip.c | 5 +++++ arch/ia64/kernel/iosapic.c | 2 +- arch/ia64/kernel/irq.c | 4 ++-- arch/ia64/kernel/msi_ia64.c | 4 ++-- arch/ia64/sn/kernel/msi_sn.c | 2 +- arch/mips/include/asm/irq.h | 2 +- arch/mips/kernel/irq-gic.c | 2 +- arch/mips/kernel/smtc.c | 2 +- arch/mips/mti-malta/malta-smtc.c | 5 +++-- arch/parisc/kernel/irq.c | 8 ++++---- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/platforms/pseries/xics.c | 5 +++-- arch/powerpc/sysdev/mpic.c | 3 ++- arch/sparc/kernel/irq_64.c | 5 +++-- 17 files changed, 44 insertions(+), 29 deletions(-) diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 703731accda..7bc7489223f 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 7141cee1fab..4bb723eadad 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -104,6 +104,11 @@ static struct irq_desc bad_irq_desc = { .lock = SPIN_LOCK_UNLOCKED }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating bad_irq_desc.affinity or .pending_mask */ +#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + /* * do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their @@ -161,7 +166,7 @@ void __init init_IRQ(void) irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE; #ifdef CONFIG_SMP - bad_irq_desc.affinity = CPU_MASK_ALL; + cpumask_setall(bad_irq_desc.affinity); bad_irq_desc.cpu = smp_processor_id(); #endif init_arch_irq(); @@ -191,15 +196,16 @@ void migrate_irqs(void) struct irq_desc *desc = irq_desc + i; if (desc->cpu == cpu) { - unsigned int newcpu = any_online_cpu(desc->affinity); - - if (newcpu == NR_CPUS) { + unsigned int newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + if (newcpu >= nr_cpu_ids) { if (printk_ratelimit()) printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", i, cpu); - cpus_setall(desc->affinity); - newcpu = any_online_cpu(desc->affinity); + cpumask_setall(desc->affinity); + newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); } route_irq(desc, i, newcpu); diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 6d6bd589924..853d42bb868 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -263,7 +263,7 @@ static void em_route_irq(int irq, unsigned int cpu) const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = *mask; + cpumask_copy(desc->affinity, mask); desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index ab8209cbbad..5780d6df154 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -69,6 +69,11 @@ static struct irq_desc bad_irq_desc = { #endif }; +#ifdef CONFIG_CPUMASK_OFFSTACK +/* We are not allocating a variable-sized bad_irq_desc.affinity */ +#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK." +#endif + int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5cfd3d91001..006ad366a45 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -880,7 +880,7 @@ iosapic_unregister_intr (unsigned int gsi) if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ - cpus_setall(idesc->affinity); + cpumask_setall(idesc->affinity); #endif /* Clear the interrupt information */ iosapic_intr_info[irq].dest = 0; diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0..226233a6fa1 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -103,7 +103,7 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { if (irq < NR_IRQS) { - cpumask_copy(&irq_desc[irq].affinity, + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu_logical_id(hwid))); irq_redir[irq] = (char) (redir & 0xff); } @@ -148,7 +148,7 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask) >= nr_cpu_ids) { /* * Save it for phase 2 processing diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 89033933903..dcb6b7c51ea 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -75,7 +75,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, msg.data = data; write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); } #endif /* CONFIG_SMP */ @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + cpumask_copy(irq_desc[irq].affinity, mask); } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index ca553b0429c..81e428943d7 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -205,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = *cpu_mask; + cpumask_copy(irq_desc[irq].affinity, cpu_mask); } #endif /* CONFIG_SMP */ diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index abc62aa744a..3214ade02d1 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -66,7 +66,7 @@ extern void smtc_forward_irq(unsigned int irq); */ #define IRQ_AFFINITY_HOOK(irq) \ do { \ - if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) { \ + if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\ smtc_forward_irq(irq); \ irq_exit(); \ return; \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 494a49a317e..87deb8f6c45 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = *cpumask; + cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index b6cca01ff82..d2c1ab12425 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -686,7 +686,7 @@ void smtc_forward_irq(unsigned int irq) * and efficiency, we just pick the easiest one to find. */ - target = first_cpu(irq_desc[irq].affinity); + target = cpumask_first(irq_desc[irq].affinity); /* * We depend on the platform code to have correctly processed diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index aabd7274507..5ba31888fef 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -116,7 +116,7 @@ struct plat_smp_ops msmtc_smp_ops = { void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = *affinity; + cpumask_t tmask; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,11 +139,12 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) * be made to forward to an offline "CPU". */ + cpumask_copy(&tmask, affinity); for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } - irq_desc[irq].affinity = tmask; + cpumask_copy(irq_desc[irq].affinity, &tmask); if (cpus_empty(tmask)) /* diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index ac2c822928c..49482806863 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -120,7 +120,7 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) if (CHECK_IRQ_PER_CPU(irq)) { /* Bad linux design decision. The mask has already * been set; we must reset it */ - irq_desc[irq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[irq].affinity); return -EINVAL; } @@ -136,7 +136,7 @@ static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = *dest; + cpumask_copy(irq_desc[irq].affinity, dest); } #endif @@ -295,7 +295,7 @@ int txn_alloc_irq(unsigned int bits_wide) unsigned long txn_affinity_addr(unsigned int irq, int cpu) { #ifdef CONFIG_SMP - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); #endif return per_cpu(cpu_data, cpu).txn_addr; @@ -352,7 +352,7 @@ void do_cpu_irq_mask(struct pt_regs *regs) irq = eirr_to_irq(eirr_val); #ifdef CONFIG_SMP - dest = irq_desc[irq].affinity; + cpumask_copy(&dest, irq_desc[irq].affinity); if (CHECK_IRQ_PER_CPU(irq_desc[irq].status) && !cpu_isset(smp_processor_id(), dest)) { int cpu = first_cpu(dest); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 23b8b5e36f9..ad1e5ac721d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -231,7 +231,7 @@ void fixup_irqs(cpumask_t map) if (irq_desc[irq].status & IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + cpumask_and(&mask, irq_desc[irq].affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 84e058f1e1c..80b513449f4 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -153,9 +153,10 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check) { int server; /* For the moment only implement delivery to all cpus or one cpu */ - cpumask_t cpumask = irq_desc[virq].affinity; + cpumask_t cpumask; cpumask_t tmp = CPU_MASK_NONE; + cpumask_copy(&cpumask, irq_desc[virq].affinity); if (!distribute_irqs) return default_server; @@ -869,7 +870,7 @@ void xics_migrate_irqs_away(void) virq, cpu); /* Reset affinity to all cpus */ - irq_desc[virq].affinity = CPU_MASK_ALL; + cpumask_setall(irq_desc[virq].affinity); desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3e0d89dcdba..0afd21f9a22 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -566,9 +566,10 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic) #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index cab8e028687..4ac5c651e00 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -247,9 +247,10 @@ struct irq_handler_data { #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int virt_irq) { - cpumask_t mask = irq_desc[virt_irq].affinity; + cpumask_t mask; int cpuid; + cpumask_copy(&mask, irq_desc[virt_irq].affinity); if (cpus_equal(mask, CPU_MASK_ALL)) { static int irq_rover; static DEFINE_SPINLOCK(irq_rover_lock); @@ -854,7 +855,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - &irq_desc[irq].affinity); + irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: [PATCH 074/682] x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- arch/x86/include/asm/irq_vectors.h | 7 ++----- arch/x86/kernel/io_apic.c | 16 ++++++++++++++++ include/linux/interrupt.h | 1 + kernel/irq/handle.c | 9 ++------- kernel/softirq.c | 5 +++++ 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 602361ad0e7..a16a2ab2b42 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -115,14 +115,11 @@ # endif #else -/* defined as a macro so nr_irqs = max_nr_irqs(nr_cpu_ids) can be used */ -# define max_nr_irqs(nr_cpus) \ - ((8 * nr_cpus) > (32 * MAX_IO_APICS) ? \ +# define NR_IRQS \ + ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) \ -# define NR_IRQS max_nr_irqs(NR_CPUS) - #endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ae80638012d..157986916cd 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3850,6 +3850,22 @@ void __init probe_nr_irqs_gsi(void) nr_irqs_gsi = nr; } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? + (NR_VECTORS + (8 * nr_cpu_ids)) : + (NR_VECTORS + (32 * nr_ioapics))); + + if (nr < nr_irqs && nr > nr_irqs_gsi) + nr_irqs = nr; + + return 0; +} +#endif + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9127f6b51a3..472f11765f6 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v); struct irq_desc; extern int early_irq_init(void); +extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int cpu); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 04d3e46031e..375d68cd5bf 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -59,10 +59,6 @@ EXPORT_SYMBOL_GPL(nr_irqs); #ifdef CONFIG_SPARSE_IRQ -#ifndef max_nr_irqs -#define max_nr_irqs(nr_cpus) NR_IRQS -#endif - static struct irq_desc irq_desc_init = { .irq = -1, .status = IRQ_DISABLED, @@ -137,9 +133,8 @@ int __init early_irq_init(void) int legacy_count; int i; - /* initialize nr_irqs based on nr_cpu_ids */ - nr_irqs = max_nr_irqs(nr_cpu_ids); - + /* initialize nr_irqs based on nr_cpu_ids */ + arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs); desc = irq_desc_legacy; diff --git a/kernel/softirq.c b/kernel/softirq.c index bdbe9de9cd8..0365b4899a3 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -795,6 +795,11 @@ int __init __weak early_irq_init(void) return 0; } +int __init __weak arch_probe_nr_irqs(void) +{ + return 0; +} + int __init __weak arch_early_irq_init(void) { return 0; From a4a0acf8e17e3d08e28b721ceceb898fbc959ceb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 13 Jan 2009 18:43:03 -0800 Subject: [PATCH 075/682] x86: fix broken flush_tlb_others_ipi() This commit broke flush_tlb_others_ipi() causing boot hangs on a 16 logical cpu system: > commit 4595f9620cda8a1e973588e743cf5f8436dd20c6 > Author: Rusty Russell > Date: Sat Jan 10 21:58:09 2009 -0800 > > x86: change flush_tlb_others to take a const struct cpumask This change resulted in sending the invalidate tlb vector to the sender itself causing the hang. flush_tlb_others_ipi() should exclude the sender itself from the destination list. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7a3f9891302..54ee2ecb5e2 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); From b5ba7e6d1e7e2ac808afd21be1e56dc34caf20e6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:46:17 +0530 Subject: [PATCH 076/682] x86: replacing mp_config_ioapic with mpc_ioapic Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 10 +----- arch/x86/kernel/acpi/boot.c | 28 ++++++++-------- arch/x86/kernel/io_apic.c | 60 ++++++++++++++++------------------ arch/x86/kernel/mpparse.c | 12 +++---- 4 files changed, 50 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7a1f44ac1f1..5a56ae9b505 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -120,14 +120,6 @@ extern int nr_ioapic_registers[MAX_IO_APICS]; #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_ioapic { - unsigned long mp_apicaddr; - unsigned int mp_apicid; - unsigned char mp_type; - unsigned char mp_apicver; - unsigned char mp_flags; -}; - struct mp_config_intsrc { unsigned int mp_dstapic; unsigned char mp_type; @@ -139,7 +131,7 @@ struct mp_config_intsrc { }; /* I/O APIC entries */ -extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; /* # of MP IRQ source entries */ extern int mp_irq_entries; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..2b27019e64f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id) DECLARE_BITMAP(used, 256); bitmap_zero(used, 256); for (i = 0; i < nr_ioapics; i++) { - struct mp_config_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->mp_apicid, used); + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); } if (!test_bit(id, used)) return id; @@ -945,29 +945,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) idx = nr_ioapics; - mp_ioapics[idx].mp_type = MP_IOAPIC; - mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; - mp_ioapics[idx].mp_apicaddr = address; + mp_ioapics[idx].type = MP_IOAPIC; + mp_ioapics[idx].flags = MPC_APIC_USABLE; + mp_ioapics[idx].apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); + mp_ioapics[idx].apicid = uniq_ioapic_id(id); #ifdef CONFIG_X86_32 - mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); + mp_ioapics[idx].apicver = io_apic_get_version(idx); #else - mp_ioapics[idx].mp_apicver = 0; + mp_ioapics[idx].apicver = 0; #endif /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ - mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; + mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; mp_ioapic_routing[idx].gsi_base = gsi_base; mp_ioapic_routing[idx].gsi_end = gsi_base + io_apic_get_redir_entries(idx); - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " - "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, - mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, + printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, + mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); nr_ioapics++; @@ -1026,7 +1026,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) mp_irq.mp_irqflag = (trigger << 2) | polarity; mp_irq.mp_srcbus = MP_ISA_BUS; mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ + mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ mp_irq.mp_dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); @@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void) ioapic = mp_find_ioapic(0); if (ioapic < 0) return; - dstapic = mp_ioapics[ioapic].mp_apicid; + dstapic = mp_ioapics[ioapic].apicid; /* * Use the default configuration for the IRQs 0-15. Unless diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 109c91db202..6c51ecdfbf4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -83,7 +83,7 @@ static DEFINE_SPINLOCK(vector_lock); int nr_ioapic_registers[MAX_IO_APICS]; /* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ @@ -387,7 +387,7 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); + + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); } static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) @@ -946,7 +946,7 @@ static int find_irq_entry(int apic, int pin, int type) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || mp_irqs[i].mp_dstapic == MP_APIC_ALL) && mp_irqs[i].mp_dstirq == pin) return i; @@ -988,7 +988,7 @@ static int __init find_isa_irq_apic(int irq, int type) if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) return apic; } } @@ -1016,7 +1016,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) int lbus = mp_irqs[i].mp_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || mp_irqs[i].mp_dstapic == MP_APIC_ALL) break; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + apic, mp_ioapics[apic].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); + mp_ioapics[apic].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1605,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void) notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + mp_ioapics[apic].apicid, pin); continue; } if (notcon) { @@ -1700,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + mp_ioapics[i].apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -1720,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); @@ -2122,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].mp_apicid; + old_id = mp_ioapics[apic].apicid; - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + mp_ioapics[apic].apicid = reg_00.bits.ID; } /* @@ -2138,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { + mp_ioapics[apic].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); + apic, mp_ioapics[apic].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2149,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; + mp_ioapics[apic].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2164,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].mp_apicid) + if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].mp_dstapic == old_id) mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; + = mp_ioapics[apic].apicid; /* * Read the right value from the MPC table and @@ -2176,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); + mp_ioapics[apic].apicid); - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + reg_00.bits.ID = mp_ioapics[apic].apicid; spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2189,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + if (reg_00.bits.ID != mp_ioapics[apic].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); @@ -3118,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev) spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -4101,7 +4099,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; + ioapic_phys = mp_ioapics[i].apicaddr; #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 8385d4e7e15..a86a6553743 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -143,11 +143,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) if (bad_ioapic(m->apicaddr)) return; - mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].mp_apicid = m->apicid; - mp_ioapics[nr_ioapics].mp_type = m->type; - mp_ioapics[nr_ioapics].mp_apicver = m->apicver; - mp_ioapics[nr_ioapics].mp_flags = m->flags; + mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; + mp_ioapics[nr_ioapics].apicid = m->apicid; + mp_ioapics[nr_ioapics].type = m->type; + mp_ioapics[nr_ioapics].apicver = m->apicver; + mp_ioapics[nr_ioapics].flags = m->flags; nr_ioapics++; } @@ -416,7 +416,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) intsrc.type = MP_INTSRC; intsrc.irqflag = 0; /* conforming */ intsrc.srcbus = 0; - intsrc.dstapic = mp_ioapics[0].mp_apicid; + intsrc.dstapic = mp_ioapics[0].apicid; intsrc.irqtype = mp_INT; From c2c21745ecba23c74690a124bcd371f83bd71e45 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 12 Jan 2009 17:47:22 +0530 Subject: [PATCH 077/682] x86: replacing mp_config_intsrc with mpc_intsrc Impact: cleanup, solve 80 columns wrap problems Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 16 +------- arch/x86/kernel/acpi/boot.c | 70 +++++++++++++++++----------------- arch/x86/kernel/io_apic.c | 64 +++++++++++++++---------------- arch/x86/kernel/mpparse.c | 68 ++++++++++++++++----------------- 4 files changed, 101 insertions(+), 117 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 5a56ae9b505..08ec793aa04 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -114,22 +114,8 @@ struct IR_IO_APIC_route_entry { extern int nr_ioapics; extern int nr_ioapic_registers[MAX_IO_APICS]; -/* - * MP-BIOS irq configuration table structures: - */ - #define MP_MAX_IOAPIC_PIN 127 -struct mp_config_intsrc { - unsigned int mp_dstapic; - unsigned char mp_type; - unsigned char mp_irqtype; - unsigned short mp_irqflag; - unsigned char mp_srcbus; - unsigned char mp_srcbusirq; - unsigned char mp_dstirq; -}; - /* I/O APIC entries */ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; @@ -137,7 +123,7 @@ extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; extern int mp_irq_entries; /* MP IRQ source entries */ -extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* non-0 if default (table-less) MP configuration */ extern int mpc_default_type; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2b27019e64f..4cb5964f149 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,19 +973,19 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } -static void assign_to_mp_irq(struct mp_config_intsrc *m, - struct mp_config_intsrc *mp_irq) +static void assign_to_mp_irq(struct mpc_intsrc *m, + struct mpc_intsrc *mp_irq) { - memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); + memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); } -static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, - struct mp_config_intsrc *m) +static int mp_irq_cmp(struct mpc_intsrc *mp_irq, + struct mpc_intsrc *m) { - return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); + return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); } -static void save_mp_irq(struct mp_config_intsrc *m) +static void save_mp_irq(struct mpc_intsrc *m) { int i; @@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { int ioapic; int pin; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (trigger << 2) | polarity; - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ - mp_irq.mp_dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ - mp_irq.mp_dstirq = pin; /* INTIN# */ + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; /* IRQ */ + mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ + mp_irq.dstirq = pin; /* INTIN# */ save_mp_irq(&mp_irq); } @@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) int i; int ioapic; unsigned int dstapic; - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; #if defined (CONFIG_MCA) || defined (CONFIG_EISA) /* @@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void) int idx; for (idx = 0; idx < mp_irq_entries; idx++) { - struct mp_config_intsrc *irq = mp_irqs + idx; + struct mpc_intsrc *irq = mp_irqs + idx; /* Do we already have a mapping for this ISA IRQ? */ - if (irq->mp_srcbus == MP_ISA_BUS - && irq->mp_srcbusirq == i) + if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i) break; /* Do we already have a mapping for this IOAPIC pin */ - if (irq->mp_dstapic == dstapic && - irq->mp_dstirq == i) + if (irq->dstapic == dstapic && irq->dstirq == i) break; } @@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void) continue; /* IRQ already used */ } - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqflag = 0; /* Conforming */ - mp_irq.mp_srcbus = MP_ISA_BUS; - mp_irq.mp_dstapic = dstapic; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_srcbusirq = i; /* Identity mapped */ - mp_irq.mp_dstirq = i; + mp_irq.type = MP_INTSRC; + mp_irq.irqflag = 0; /* Conforming */ + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.dstapic = dstapic; + mp_irq.irqtype = mp_INT; + mp_irq.srcbusirq = i; /* Identity mapped */ + mp_irq.dstirq = i; save_mp_irq(&mp_irq); } @@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity) { #ifdef CONFIG_X86_MPPARSE - struct mp_config_intsrc mp_irq; + struct mpc_intsrc mp_irq; int ioapic; if (!acpi_ioapic) return 0; /* print the entry should happen on mptable identically */ - mp_irq.mp_type = MP_INTSRC; - mp_irq.mp_irqtype = mp_INT; - mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); - mp_irq.mp_srcbus = number; - mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); + mp_irq.srcbus = number; + mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); - mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; + mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; save_mp_irq(&mp_irq); #endif diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6c51ecdfbf4..79b8c0c72d3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -87,7 +87,7 @@ struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; int nr_ioapics; /* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; +struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; @@ -945,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type) int i; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) + if (mp_irqs[i].irqtype == type && + (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + mp_irqs[i].dstapic == MP_APIC_ALL) && + mp_irqs[i].dstirq == pin) return i; return -1; @@ -962,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) - return mp_irqs[i].mp_dstirq; + return mp_irqs[i].dstirq; } return -1; } @@ -978,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type) int i; for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) + (mp_irqs[i].irqtype == type) && + (mp_irqs[i].srcbusirq == irq)) break; } if (i < mp_irq_entries) { int apic; for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) return apic; } } @@ -1013,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return -1; } for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; + int lbus = mp_irqs[i].srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) + if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + mp_irqs[i].dstapic == MP_APIC_ALL) break; if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && + !mp_irqs[i].irqtype && (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + if (pin == (mp_irqs[i].srcbusirq & 3)) return irq; /* * Use the first all-but-pin matching entry as a @@ -1072,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq) * EISA conforming in the MP table, that means its trigger type must * be read in from the ELCR */ -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq)) #define default_EISA_polarity(idx) default_ISA_polarity(idx) /* PCI interrupts are always polarity one level triggered, @@ -1089,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq) static int MPBIOS_polarity(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int polarity; /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) + switch (mp_irqs[idx].irqflag & 3) { case 0: /* conforms, ie. bus-type dependent polarity */ if (test_bit(bus, mp_bus_not_pci)) @@ -1131,13 +1131,13 @@ static int MPBIOS_polarity(int idx) static int MPBIOS_trigger(int idx) { - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; int trigger; /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + switch ((mp_irqs[idx].irqflag>>2) & 3) { case 0: /* conforms, ie. bus-type dependent */ if (test_bit(bus, mp_bus_not_pci)) @@ -1215,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; - int bus = mp_irqs[idx].mp_srcbus; + int bus = mp_irqs[idx].srcbus; /* * Debugging check, we are in big trouble if this message pops up! */ - if (mp_irqs[idx].mp_dstirq != pin) + if (mp_irqs[idx].dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; + irq = mp_irqs[idx].srcbusirq; } else { /* * PCI IRQs are mapped in order @@ -2164,8 +2164,8 @@ static void __init setup_ioapic_ids_from_mpc(void) */ if (old_id != mp_ioapics[apic].apicid) for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic + if (mp_irqs[i].dstapic == old_id) + mp_irqs[i].dstapic = mp_ioapics[apic].apicid; /* @@ -3983,8 +3983,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) return -1; for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) + if (mp_irqs[i].irqtype == mp_INT && + mp_irqs[i].srcbusirq == bus_irq) break; if (i >= mp_irq_entries) return -1; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a86a6553743..ad36377dc93 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -159,55 +159,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m) m->srcbusirq, m->dstapic, m->dstirq); } -static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) +static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) { apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," " IRQ %02x, APIC ID %x, APIC INT %02x\n", - mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, - (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, - mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); + mp_irq->irqtype, mp_irq->irqflag & 3, + (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus, + mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mp_config_intsrc *mp_irq) + struct mpc_intsrc *mp_irq) { - mp_irq->mp_dstapic = m->dstapic; - mp_irq->mp_type = m->type; - mp_irq->mp_irqtype = m->irqtype; - mp_irq->mp_irqflag = m->irqflag; - mp_irq->mp_srcbus = m->srcbus; - mp_irq->mp_srcbusirq = m->srcbusirq; - mp_irq->mp_dstirq = m->dstirq; + mp_irq->dstapic = m->dstapic; + mp_irq->type = m->type; + mp_irq->irqtype = m->irqtype; + mp_irq->irqflag = m->irqflag; + mp_irq->srcbus = m->srcbus; + mp_irq->srcbusirq = m->srcbusirq; + mp_irq->dstirq = m->dstirq; } -static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, +static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - m->dstapic = mp_irq->mp_dstapic; - m->type = mp_irq->mp_type; - m->irqtype = mp_irq->mp_irqtype; - m->irqflag = mp_irq->mp_irqflag; - m->srcbus = mp_irq->mp_srcbus; - m->srcbusirq = mp_irq->mp_srcbusirq; - m->dstirq = mp_irq->mp_dstirq; + m->dstapic = mp_irq->dstapic; + m->type = mp_irq->type; + m->irqtype = mp_irq->irqtype; + m->irqflag = mp_irq->irqflag; + m->srcbus = mp_irq->srcbus; + m->srcbusirq = mp_irq->srcbusirq; + m->dstirq = mp_irq->dstirq; } -static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, +static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, struct mpc_intsrc *m) { - if (mp_irq->mp_dstapic != m->dstapic) + if (mp_irq->dstapic != m->dstapic) return 1; - if (mp_irq->mp_type != m->type) + if (mp_irq->type != m->type) return 2; - if (mp_irq->mp_irqtype != m->irqtype) + if (mp_irq->irqtype != m->irqtype) return 3; - if (mp_irq->mp_irqflag != m->irqflag) + if (mp_irq->irqflag != m->irqflag) return 4; - if (mp_irq->mp_srcbus != m->srcbus) + if (mp_irq->srcbus != m->srcbus) return 5; - if (mp_irq->mp_srcbusirq != m->srcbusirq) + if (mp_irq->srcbusirq != m->srcbusirq) return 6; - if (mp_irq->mp_dstirq != m->dstirq) + if (mp_irq->dstirq != m->dstirq) return 7; return 0; @@ -808,15 +808,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m) /* not legacy */ for (i = 0; i < mp_irq_entries; i++) { - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; - if (mp_irqs[i].mp_srcbus != m->srcbus) + if (mp_irqs[i].srcbus != m->srcbus) continue; - if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) + if (mp_irqs[i].srcbusirq != m->srcbusirq) continue; if (irq_used[i]) { /* already claimed */ @@ -921,10 +921,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, if (irq_used[i]) continue; - if (mp_irqs[i].mp_irqtype != mp_INT) + if (mp_irqs[i].irqtype != mp_INT) continue; - if (mp_irqs[i].mp_irqflag != 0x0f) + if (mp_irqs[i].irqflag != 0x0f) continue; if (nr_m_spare > 0) { From 09b3ec7315a18d885127544204f1e389d41058d0 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Mon, 12 Jan 2009 22:35:42 +0100 Subject: [PATCH 078/682] x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..8cfea5d1451 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } From 0a2a18b721abc960fbcada406746877d22340a60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 23:37:16 +0100 Subject: [PATCH 079/682] x86: change the default cache size to 64 bytes Right now the generic cacheline size is 128 bytes - that is wasteful when structures are aligned, as all modern x86 CPUs have an (effective) cacheline sizes of 64 bytes. It was set to 128 bytes due to some cacheline aliasing problems on older P4 systems, but those are many years old and we dont optimize for them anymore. (They'll still get the 128 bytes cacheline size if the kernel is specifically built for Pentium 4) Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven --- arch/x86/Kconfig.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845a..cdf4a962323 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -307,10 +307,10 @@ config X86_CMPXCHG config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y From b665967979d0e990f196e7c4ba88e17c9ed9b781 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 11:54:27 -0800 Subject: [PATCH 080/682] x86: make 32bit MAX_HARDIRQS_PER_CPU to be NR_VECTORS Impact: clean up to be same as 64bit 32-bit is using per-cpu vector too, so don't use default NR_IRQS. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq_32.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index cf7954d1405..d4b5d731073 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -19,6 +19,9 @@ typedef struct { DECLARE_PER_CPU(irq_cpustat_t, irq_stat); +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) From 73310a169aebe257efdd35a763cce1c7658f40c9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 14 Jan 2009 11:28:35 -0800 Subject: [PATCH 081/682] init: make initrd/initramfs decompression failure a KERN_EMERG event Impact: More consistent behaviour, avoid policy in the kernel Upgrade/downgrade initrd/initramfs decompression failure from inconsistently a panic or a KERN_ALERT message to a KERN_EMERG event. It is, however, possible do design a system which can recover from this (using the kernel builtin code and/or the internal initramfs), which means this is policy, not a technical necessity. A good way to handle this would be to have a panic-level=X option, to force a panic on a printk above a certain level. That is a separate patch, however. Signed-off-by: H. Peter Anvin --- init/do_mounts_rd.c | 3 ++- init/initramfs.c | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index 91d0cfca507..027a402708d 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -83,7 +83,8 @@ identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor) printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n", compress_name, start_block); if (!*decompressor) - printk(KERN_CRIT "RAMDISK: %s decompressor not configured!\n", + printk(KERN_EMERG + "RAMDISK: %s decompressor not configured!\n", compress_name); nblocks = 0; goto done; diff --git a/init/initramfs.c b/init/initramfs.c index 9a7290ec818..7dcde7ea660 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -528,7 +528,7 @@ static int __init populate_rootfs(void) char *err = unpack_to_rootfs(__initramfs_start, __initramfs_end - __initramfs_start, 0); if (err) - panic(err); + panic(err); /* Failed to decompress INTERNAL initramfs */ if (initrd_start) { #ifdef CONFIG_BLK_DEV_RAM int fd; @@ -554,9 +554,12 @@ static int __init populate_rootfs(void) printk(KERN_INFO "Unpacking initramfs..."); err = unpack_to_rootfs((char *)initrd_start, initrd_end - initrd_start, 0); - if (err) - panic(err); - printk(" done\n"); + if (err) { + printk(" failed!\n"); + printk(KERN_EMERG "%s\n", err); + } else { + printk(" done\n"); + } free_initrd(); #endif } From 444027031cd069ea7e48b016cb33bbf201c8a9f0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:47 +0300 Subject: [PATCH 082/682] x86: headers cleanup - prctl.h Impact: cleanup (internal kernel function exported) 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. sys_arch_prctl is completely removed from header since frankly I don't even understand why we describe it here. It is described like __SYSCALL(__NR_arch_prctl, sys_arch_prctl) in unistd_64.h and implemented in process_64.c. User-mode linux involved? So this one in fact is suspicious. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/prctl.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index a8894647dd9..3ac5032fae0 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h @@ -6,8 +6,4 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 -#ifdef CONFIG_X86_64 -extern long sys_arch_prctl(int, unsigned long); -#endif /* CONFIG_X86_64 */ - #endif /* _ASM_X86_PRCTL_H */ From a7c4e68615e20771f279c51a2bec8980675c78c7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:49 +0300 Subject: [PATCH 083/682] x86: headers cleanup - sigcontext32.h Impact: cleanup 'make headers_check' warn us about lack of linux/types.h here. Lets add it. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/sigcontext32.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h index 6126188cf3a..ad1478c4ae1 100644 --- a/arch/x86/include/asm/sigcontext32.h +++ b/arch/x86/include/asm/sigcontext32.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SIGCONTEXT32_H #define _ASM_X86_SIGCONTEXT32_H +#include + /* signal context for 32bit programs. */ #define X86_FXSR_MAGIC 0x0000 From dbca1df48e89d8aa59254fdc10ef16c16e73d94e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:50 +0300 Subject: [PATCH 084/682] x86: headers cleanup - setup.h Impact: cleanup 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. Guard this one by __KERNEL__. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/setup.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..29d31c0d13d 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SETUP_H #define _ASM_X86_SETUP_H +#ifdef __KERNEL__ + #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ @@ -8,10 +10,8 @@ /* Interrupt control for vSMPowered x86_64 systems */ void vsmp_init(void); - void setup_bios_corruption_check(void); - #ifdef CONFIG_X86_VISWS extern void visws_early_detect(void); extern int is_visws_box(void); @@ -43,7 +43,7 @@ struct x86_quirks { void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); void (*mpc_oem_pci_bus)(struct mpc_bus *m); void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, - unsigned short oemsize); + unsigned short oemsize); int (*setup_ioapic_ids)(void); int (*update_genapic)(void); }; @@ -56,8 +56,6 @@ extern unsigned long saved_video_mode; #endif #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ - #ifdef __i386__ #include From 95c4bff0308eb0819436b730a836846d3e784657 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Jan 2009 23:37:46 +0300 Subject: [PATCH 085/682] x86: headers cleanup - boot.h Impact: cleanup 'make headers_check' warn us about leaking of kernel private (mostly compile time vars) data to userspace in headers. Fix it. Neither BOOT_HEAP_SIZE, BOOT_STACK_SIZE refs was found by searching thru net (ie in user-space area) so fence this all by __KERNEL__ guard. Signed-off-by: Cyrill Gorcunov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/boot.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index c0e8e68a31f..6526cf08b0e 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -10,14 +10,16 @@ #define EXTENDED_VGA 0xfffe /* 80x50 mode */ #define ASK_VGA 0xfffd /* ask for it at bootup */ +#ifdef __KERNEL__ + /* Physical address where kernel should be loaded. */ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) -#if (defined CONFIG_KERNEL_BZIP2) +#ifdef CONFIG_KERNEL_BZIP2 #define BOOT_HEAP_SIZE 0x400000 -#else +#else /* !CONFIG_KERNEL_BZIP2 */ #ifdef CONFIG_X86_64 #define BOOT_HEAP_SIZE 0x7000 @@ -25,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x4000 #endif -#endif +#endif /* !CONFIG_KERNEL_BZIP2 */ #ifdef CONFIG_X86_64 #define BOOT_STACK_SIZE 0x4000 @@ -33,4 +35,6 @@ #define BOOT_STACK_SIZE 0x1000 #endif +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_BOOT_H */ From ee287587dafd77fd211e50637561224605c214b4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 14 Jan 2009 16:07:38 -0800 Subject: [PATCH 086/682] bzip2/lzma: update boot protocol specification Impact: documentation Update the boot protocol specification to include the currently supported file formats and their magic numbers. Signed-off-by: H. Peter Anvin --- Documentation/x86/boot.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index fcdc62b3c3d..9ac70ffe2b7 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -542,7 +542,10 @@ Protocol: 2.08+ The payload may be compressed. The format of both the compressed and uncompressed data should be determined using the standard magic - numbers. Currently only gzip compressed ELF is used. + numbers. The currently supported compression formats are gzip + (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A) and LZMA + (magic number 5D 00). The uncompressed payload is currently always ELF + (magic number 7F 45 4C 46). Field name: payload_length Type: read From d2287f5ebea9ff2487d614719775f0b03fce15f6 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 14 Jan 2009 15:43:54 -0800 Subject: [PATCH 087/682] irq: update all arches for new irq_desc, fix Impact: fix build errors Since the SPARSE IRQS changes redefined how the kstat irqs are organized, arch's must use the new accessor function: kstat_incr_irqs_this_cpu(irq, DESC); If CONFIG_SPARSE_IRQS is set, then DESC is a pointer to the irq_desc which has a pointer to the kstat_irqs. If not, then the .irqs field of struct kernel_stat is used instead. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/ia64/kernel/irq_ia64.c | 12 ++++++++---- arch/mips/kernel/smtc.c | 4 +++- arch/mips/sgi-ip22/ip22-int.c | 2 +- arch/mips/sgi-ip22/ip22-time.c | 2 +- arch/mips/sibyte/bcm1480/smp.c | 3 ++- arch/mips/sibyte/sb1250/smp.c | 3 ++- arch/mn10300/kernel/mn10300-watchdog.c | 3 ++- arch/sparc/kernel/time_64.c | 2 +- 8 files changed, 20 insertions(+), 11 deletions(-) diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 28d3d483db9..927ad027820 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -493,11 +493,13 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { int irq = local_vector_to_irq(vector); @@ -551,11 +553,13 @@ void ia64_process_pending_intr(void) * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { + struct irq_desc *desc = irq_to_desc(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + kstat_incr_irqs_this_cpu(vector, desc); else { struct pt_regs *old_regs = set_irq_regs(NULL); int irq = local_vector_to_irq(vector); diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index d2c1ab12425..5f5af7d4c89 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -921,11 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi) struct clock_event_device *cd; void *arg_copy = pipi->arg; int type_copy = pipi->type; + int irq = MIPS_CPU_IRQ_BASE + 1; + smtc_ipi_nq(&freeIPIq, pipi); switch (type_copy) { case SMTC_CLOCK_TICK: irq_enter(); - kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); irq_exit(); diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index f8b18af141a..0ecd5fe9486 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -155,7 +155,7 @@ static void indy_buserror_irq(void) int irq = SGI_BUSERR_IRQ; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); ip22_be_interrupt(irq); irq_exit(); } diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 3dcb27ec0c5..c8f7d2328b2 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -122,7 +122,7 @@ void indy_8254timer_irq(void) char c; irq_enter(); - kstat_this_cpu.irqs[irq]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); printk(KERN_ALERT "Oops, got 8254 interrupt.\n"); ArcRead(0, &c, 1, &cnt); ArcEnterInteractiveMode(); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index dddfda8e829..314691648c9 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -178,9 +178,10 @@ struct plat_smp_ops bcm1480_smp_ops = { void bcm1480_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_BCM1480_INT_MBOX_0_0; unsigned int action; - kstat_this_cpu.irqs[K_BCM1480_INT_MBOX_0_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index 5950a288a7d..cad14003b84 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -166,9 +166,10 @@ struct plat_smp_ops sb_smp_ops = { void sb1250_mailbox_interrupt(void) { int cpu = smp_processor_id(); + int irq = K_INT_MBOX_0; unsigned int action; - kstat_this_cpu.irqs[K_INT_MBOX_0]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); /* Load the mailbox register to figure out what we're supposed to do */ action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c index 10811e981d2..2e370d88a87 100644 --- a/arch/mn10300/kernel/mn10300-watchdog.c +++ b/arch/mn10300/kernel/mn10300-watchdog.c @@ -130,6 +130,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) * the stack NMI-atomically, it's safe to use smp_processor_id(). */ int sum, cpu = smp_processor_id(); + int irq = NMIIRQ; u8 wdt, tmp; wdt = WDCTR & ~WDCTR_WDCNE; @@ -138,7 +139,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) NMICR = NMICR_WDIF; nmi_count(cpu)++; - kstat_this_cpu.irqs[NMIIRQ]++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); sum = irq_stat[cpu].__irq_count; if (last_irq_sums[cpu] == sum) { diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 54405d36214..28b48f3eb25 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -727,7 +727,7 @@ void timer_interrupt(int irq, struct pt_regs *regs) irq_enter(); - kstat_this_cpu.irqs[0]++; + kstat_incr_irqs_this_cpu(0, irq_to_desc(0)); if (unlikely(!evt->event_handler)) { printk(KERN_WARNING From f11826385b63566d98c02d35f592232ee77cd791 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:27:35 +0000 Subject: [PATCH 088/682] x86: fully honor "nolapic" Impact: widen the effect of the 'nolapic' boot parameter "nolapic" should not only suppress SMP and use of the LAPIC, but it also ought to have the effect of disabling all IO-APIC related activity as well as PCI MSI and HT-IRQs. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 7 ++++++- arch/x86/kernel/io_apic.c | 6 ++++++ arch/x86/kernel/smpboot.c | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..c3dd64fabcf 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1126,6 +1126,11 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; + if (disable_apic) { + disable_ioapic_setup(); + return; + } + #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (lapic_is_integrated() && esr_disable) { @@ -1566,11 +1571,11 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { -#ifdef CONFIG_X86_64 if (disable_apic) { pr_info("Apic disabled\n"); return -1; } +#ifdef CONFIG_X86_64 if (!cpu_has_apic) { disable_apic = 1; pr_info("Apic disabled by BIOS\n"); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..40747e58f30 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3258,6 +3258,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms int err; unsigned dest; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) @@ -3726,6 +3729,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct irq_cfg *cfg; int err; + if (disable_apic) + return -ENXIO; + cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87..31f99ec2e0f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1125,6 +1125,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); + disable_ioapic_setup(); return -1; } From a08c4743ed5b861c4fa3d75be00da7106c926296 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 14 Jan 2009 12:28:51 +0000 Subject: [PATCH 089/682] x86: avoid early crash in disable_local_APIC() E.g. when called due to an early panic. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c3dd64fabcf..38d6aab2358 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -895,6 +895,10 @@ void disable_local_APIC(void) { unsigned int value; + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + clear_local_APIC(); /* From 54da5b3d44238eeb7417bacf792fb416d473bf4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 13:04:58 +0100 Subject: [PATCH 090/682] x86: fix broken flush_tlb_others_ipi(), fix Impact: cleanup Use the proper type. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 54ee2ecb5e2..7f4141d3b66 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -188,7 +188,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(f->flush_cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(to_cpumask(f->flush_cpumask), + INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) cpu_relax(); From e56d0cfe7790fd3218ae4f6aae1335547fea8763 Mon Sep 17 00:00:00 2001 From: Baodong Chen Date: Thu, 8 Jan 2009 19:24:29 +0800 Subject: [PATCH 091/682] Documentation/x86/boot.txt: modify fieldname Modify field names to the right ones: - start_sys was changed to start_sys_seg - iinitrd_addr_max was changed to ramdisk_max - pad2 was changed to pad2 and pad3 - readmode_swtch was changed to realmode_swtch Signed-off-by: Baodong Chen <[email]chenbdchenbd@gmail.com[email]> Acked-by: Jiri Kosina Signed-off-by: Ingo Molnar --- Documentation/x86/boot.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 7b4596ac412..12299697b7c 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -158,7 +158,7 @@ Offset Proto Name Meaning 0202/4 2.00+ header Magic signature "HdrS" 0206/2 2.00+ version Boot protocol version supported 0208/4 2.00+ realmode_swtch Boot loader hook (see below) -020C/2 2.00+ start_sys The load-low segment (0x1000) (obsolete) +020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete) 020E/2 2.00+ kernel_version Pointer to kernel version string 0210/1 2.00+ type_of_loader Boot loader identifier 0211/1 2.00+ loadflags Boot protocol option flags @@ -170,10 +170,11 @@ Offset Proto Name Meaning 0224/2 2.01+ heap_end_ptr Free memory after setup end 0226/2 N/A pad1 Unused 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line -022C/4 2.03+ initrd_addr_max Highest legal initrd address +022C/4 2.03+ ramdisk_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not -0235/3 N/A pad2 Unused +0235/1 N/A pad2 Unused +0236/2 N/A pad3 Unused 0238/4 2.06+ cmdline_size Maximum size of the kernel command line 023C/4 2.07+ hardware_subarch Hardware subarchitecture 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data @@ -299,14 +300,14 @@ Protocol: 2.00+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version 10.17. -Field name: readmode_swtch +Field name: realmode_swtch Type: modify (optional) Offset/size: 0x208/4 Protocol: 2.00+ Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.) -Field name: start_sys +Field name: start_sys_seg Type: read Offset/size: 0x20c/2 Protocol: 2.00+ @@ -468,7 +469,7 @@ Protocol: 2.02+ zero, the kernel will assume that your boot loader does not support the 2.02+ protocol. -Field name: initrd_addr_max +Field name: ramdisk_max Type: read Offset/size: 0x22c/4 Protocol: 2.03+ From 5cd7376200be7b8bab085557ff5876b04bd84191 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 15:46:08 +0100 Subject: [PATCH 092/682] fix: crash: IP: __bitmap_intersects+0x48/0x73 -tip testing found this crash: > [ 35.258515] calling acpi_cpufreq_init+0x0/0x127 @ 1 > [ 35.264127] BUG: unable to handle kernel NULL pointer dereference at (null) > [ 35.267554] IP: [] __bitmap_intersects+0x48/0x73 > [ 35.267554] PGD 0 > [ 35.267554] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c is still broken: there's no allocation of the variable mask, so we pass in an uninitialized cmd.mask field to drv_read(), which then passes it to the scheduler which then crashes ... Switch it over to the much simpler constant-cpumask-pointers approach. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 6f11e029e8c..019276717a7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -145,7 +145,7 @@ typedef union { struct drv_cmd { unsigned int type; - cpumask_var_t mask; + const struct cpumask *mask; drv_addr_union addr; u32 val; }; @@ -235,8 +235,7 @@ static u32 get_cur_val(const struct cpumask *mask) return 0; } - cpumask_copy(cmd.mask, mask); - + cmd.mask = mask; drv_read(&cmd); dprintk("get_cur_val = %u\n", cmd.val); @@ -403,9 +402,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return -ENODEV; } - if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) - return -ENOMEM; - perf = data->acpi_data; result = cpufreq_frequency_table_target(policy, data->freq_table, @@ -450,9 +446,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, /* cpufreq holds the hotplug lock, so we are safe from here on */ if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) - cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); + cmd.mask = policy->cpus; else - cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); + cmd.mask = cpumask_of(policy->cpu); freqs.old = perf->states[perf->state].core_frequency * 1000; freqs.new = data->freq_table[next_state].frequency; @@ -479,7 +475,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, perf->state = next_perf_state; out: - free_cpumask_var(cmd.mask); return result; } From f2a082711905312dc7b6675e913fee0c4689f7ae Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 09:19:32 -0800 Subject: [PATCH 093/682] x86: fix build warning when CONFIG_NUMA not defined. Impact: fix build warning The macro cpu_to_node did not reference it's argument, and instead simply returned a 0. This causes a "unused variable" warning if it's the only reference in a function (show_cache_disable). Replace it with the more correct inline function. Signed-off-by: Mike Travis --- arch/x86/include/asm/topology.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab2..d0c68e29163 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -192,9 +192,20 @@ extern int __node_distance(int, int); #else /* !CONFIG_NUMA */ -#define numa_node_id() 0 -#define cpu_to_node(cpu) 0 -#define early_cpu_to_node(cpu) 0 +static inline int numa_node_id(void) +{ + return 0; +} + +static inline int cpu_to_node(int cpu) +{ + return 0; +} + +static inline int early_cpu_to_node(int cpu) +{ + return 0; +} static inline const cpumask_t *cpumask_of_node(int node) { From c99dbbe9f8f6b3e9383e64710217e873431d1c31 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Thu, 15 Jan 2009 12:09:44 -0800 Subject: [PATCH 094/682] sched: fix warning on ia64 Andrew Morton reported this warning on ia64: kernel/sched.c: In function `sd_init_NODE': kernel/sched.c:7449: warning: comparison of distinct pointer types lacks a cast Using the untyped min() function produces such warnings. Fix: type the constant 32 as unsigned int to match typeof(num_online_cpus). Reported-by: Andrew Morton Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 32f3af1641c..3193f4417e1 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -84,7 +84,7 @@ void build_cpu_to_node_map(void); .child = NULL, \ .groups = NULL, \ .min_interval = 8, \ - .max_interval = 8*(min(num_online_cpus(), 32)), \ + .max_interval = 8*(min(num_online_cpus(), 32U)), \ .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_nice_tries = 2, \ From 7de6883faad71e3a253d55b9e1a47b89ebce0a31 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 095/682] x86: fix pda_to_op() There's no instruction to move a 64bit immediate into memory location. Drop "i". Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff88df3..cbd3f48a832 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -78,7 +78,7 @@ do { \ case 8: \ asm(op "q %1,%%gs:%c2": \ "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ + "r" ((T__)val), \ "i"(pda_offset(field))); \ break; \ default: \ From f10fcd47120e80f66665567dbe17f5071c7aef52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 096/682] x86: make early_per_cpu() a lvalue and use it Make early_per_cpu() a lvalue as per_cpu() is and use it where applicable. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 6 +++--- arch/x86/include/asm/topology.h | 5 +---- arch/x86/kernel/apic.c | 13 ++----------- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ece72053ba6..df644f3e53e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -195,9 +195,9 @@ do { \ #define early_per_cpu_ptr(_name) (_name##_early_ptr) #define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) #define early_per_cpu(_name, _cpu) \ - (early_per_cpu_ptr(_name) ? \ - early_per_cpu_ptr(_name)[_cpu] : \ - per_cpu(_name, _cpu)) + *(early_per_cpu_ptr(_name) ? \ + &early_per_cpu_ptr(_name)[_cpu] : \ + &per_cpu(_name, _cpu)) #else /* !CONFIG_SMP */ #define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4e2f2e0aab2..87ca3fd86e8 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,10 +102,7 @@ static inline int cpu_to_node(int cpu) /* Same function but used if called before per_cpu areas are setup */ static inline int early_cpu_to_node(int cpu) { - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - return per_cpu(x86_cpu_to_node_map, cpu); + return early_per_cpu(x86_cpu_to_node_map, cpu); } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 38d6aab2358..48578795583 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1877,17 +1877,8 @@ void __cpuinit generic_processor_info(int apicid, int version) #endif #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); From c90aa894f0240084f2c6e42e2333b211d6cfe2b2 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 13 Jan 2009 20:41:34 +0900 Subject: [PATCH 097/682] x86: cleanup early setup_percpu references [ Based on original patch from Christoph Lameter and Mike Travis. ] * Ruggedize some calls in setup_percpu.c to prevent mishaps in early calls, particularly for non-critical functions. * Cleanup DEBUG_PER_CPU_MAPS usages and some comments. Signed-off-by: Mike Travis Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 56 ++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bf63de72b64..56c63ac62b1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -27,31 +33,39 @@ unsigned int max_physical_apicid; physid_mask_t phys_cpu_present_map; #endif -/* map cpu index to physical APIC ID */ +/* + * Map cpu index to physical APIC ID + */ DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 +#define X86_64_NUMA 1 /* (used later) */ -/* map cpu index to node index */ +/* + * Map cpu index to node index + */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -/* which logical CPUs are on which nodes */ +/* + * Which logical CPUs are on which nodes + */ cpumask_t *node_to_cpumask_map; EXPORT_SYMBOL(node_to_cpumask_map); -/* setup node_to_cpumask_map */ +/* + * Setup node_to_cpumask_map + */ static void __init setup_node_to_cpumask_map(void); #else static inline void setup_node_to_cpumask_map(void) { } #endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the @@ -200,6 +214,8 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* Setup percpu data maps */ @@ -221,6 +237,7 @@ void __init setup_per_cpu_areas(void) * Requires node_possible_map to be valid. * * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ static void __init setup_node_to_cpumask_map(void) { @@ -236,6 +253,7 @@ static void __init setup_node_to_cpumask_map(void) /* allocate the map */ map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); pr_debug("Node to cpumask map at %p for %d nodes\n", map, nr_node_ids); @@ -248,17 +266,23 @@ void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - if (cpu_pda(cpu) && node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; - - if (cpu_to_node_map) + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; + return; + } - else if (per_cpu_offset(cpu)) - per_cpu(x86_cpu_to_node_map, cpu) = node; +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; - else - pr_debug("Setting node for non-present cpu %d\n", cpu); + if (node != NUMA_NO_NODE) + cpu_pda(cpu)->nodenumber = node; } void __cpuinit numa_clear_node(int cpu) @@ -275,7 +299,7 @@ void __cpuinit numa_add_cpu(int cpu) void __cpuinit numa_remove_cpu(int cpu) { - cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } #else /* CONFIG_DEBUG_PER_CPU_MAPS */ @@ -285,7 +309,7 @@ void __cpuinit numa_remove_cpu(int cpu) */ static void __cpuinit numa_set_cpumask(int cpu, int enable) { - int node = cpu_to_node(cpu); + int node = early_cpu_to_node(cpu); cpumask_t *mask; char buf[64]; From a698c823e15149941b0f0281527d0c0d1daf2639 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 098/682] x86: make vmlinux_32.lds.S use PERCPU() macro Make vmlinux_32.lds.S use the generic PERCPU() macro instead of open coding it. This will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_32.lds.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 82c67559dde..3eba7f7bac0 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -178,14 +178,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); /* freed after init ends here */ From 3e5d8f978435bb9ba4dfe3f4514e65e7885db1a9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 099/682] x86: make percpu symbols zerobased on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] This patch makes percpu symbols zerobased on x86_64 SMP by adding PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on the percpu output section and using it in vmlinux_64.lds.S. A new PHDR is added as existing ones cannot contain sections near address zero. PERCPU_VADDR() also adds a new symbol __per_cpu_load which always points to the vaddr of the loaded percpu data.init region. The following adjustments have been made to accomodate the address change. * code to locate percpu gdt_page in head_64.S is updated to add the load address to the gdt_page offset. * __per_cpu_load is used in places where access to the init data area is necessary. * pda->data_offset is initialized soon after C code is entered as zero value doesn't work anymore. This patch is mostly taken from Mike Travis' "x86_64: Base percpu variables at zero" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/head_64.S | 24 ++++++++++++++- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 17 ++++++++++- include/asm-generic/sections.h | 2 +- include/asm-generic/vmlinux.lds.h | 51 +++++++++++++++++++++++++++---- 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b9a4d8c4b93..bc2900ca82c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,6 +44,8 @@ void __init x86_64_init_pda(void) { _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; + cpu_pda(0)->data_offset = + (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0e275d49556..7ee0363871e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -204,6 +204,23 @@ ENTRY(secondary_startup_64) pushq $0 popfq +#ifdef CONFIG_SMP + /* + * early_gdt_base should point to the gdt_page in static percpu init + * data area. Computing this requires two symbols - __per_cpu_load + * and per_cpu__gdt_page. As linker can't do no such relocation, do + * it by hand. As early_gdt_descr is manipulated by C code for + * secondary CPUs, this should be done only once for the boot CPU + * when early_gdt_descr_base contains zero. + */ + movq early_gdt_descr_base(%rip), %rax + testq %rax, %rax + jnz 1f + movq $__per_cpu_load, %rax + addq $per_cpu__gdt_page, %rax + movq %rax, early_gdt_descr_base(%rip) +1: +#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 - .quad per_cpu__gdt_page +#ifdef CONFIG_SMP +early_gdt_descr_base: + .quad 0x0000000000000000 +#else + .quad per_cpu__gdt_page +#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 56c63ac62b1..44845842e72 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void) } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1a614c0e6be..f50280db0df 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -19,6 +19,9 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -208,14 +211,26 @@ SECTIONS __initramfs_end = .; #endif +#ifdef CONFIG_SMP + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * switch it back to data.init. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else PERCPU(PAGE_SIZE) +#endif . = ALIGN(PAGE_SIZE); __init_end = .; . = ALIGN(PAGE_SIZE); __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + *(.data.nosave) + } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 79a7ff925bf..4ce48e87853 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; extern char _end[]; -extern char __per_cpu_start[], __per_cpu_end[]; +extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index c61fab1dd2f..fc2f55f2dcd 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,12 +430,51 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU(align) \ - . = ALIGN(align); \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ +#define PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; + +#define PERCPU_EPILOG(phdr) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = __per_cpu_load + SIZEOF(.data.percpu); + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. If @vaddr + * is not blank, it specifies explicit base address and all percpu + * symbols will be offset from the given address. If blank, @vaddr + * always equals @laddr + LOAD_OFFSET. + * + * @phdr defines the output PHDR to use if not blank. Be warned that + * output PHDR is sticky. If @phdr is specified, the next output + * section in the linker script will go there too. @phdr should have + * a leading colon. + * + * This macro defines three symbols, __per_cpu_load, __per_cpu_start + * and __per_cpu_end. The first one is the vaddr of loaded percpu + * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the + * end offset. + */ +#define PERCPU_VADDR(vaddr, phdr) \ + PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - } \ - VMLINUX_SYMBOL(__per_cpu_end) = .; + PERCPU_EPILOG(phdr) + +/** + * PERCPU - define output section for percpu area, simple version + * @align: required alignment + * + * Align to @align and outputs output section for percpu area. This + * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and + * __per_cpu_start will be identical. + */ +#define PERCPU(align) \ + . = ALIGN(align); \ + PERCPU_VADDR( , ) From f32ff5388d86518c0375ccdb330d3b459b9c405e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 100/682] x86: load pointer to pda into %gs while brining up a CPU [ Based on original patch from Christoph Lameter and Mike Travis. ] CPU startup code in head_64.S loaded address of a zero page into %gs for temporary use till pda is loaded but address to the actual pda is available at the point. Load the real address directly instead. This will help unifying percpu and pda handling later on. This patch is mostly taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo --- arch/x86/include/asm/trampoline.h | 1 + arch/x86/kernel/acpi/sleep.c | 1 + arch/x86/kernel/head64.c | 4 ++-- arch/x86/kernel/head_64.S | 15 ++++++++++----- arch/x86/kernel/smpboot.c | 1 + 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 780ba0ab94f..90f06c25221 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,6 +13,7 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) #define TRAMPOLINE_BASE 0x6000 diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95f..9ff67f8dc2c 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + initial_gs = (unsigned long)cpu_pda(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index bc2900ca82c..76ffba2aa66 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,8 +26,8 @@ #include #include -/* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda; +/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7ee0363871e..2f0ab008988 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -243,12 +243,15 @@ ENTRY(secondary_startup_64) movl %eax,%fs movl %eax,%gs - /* - * Setup up a dummy PDA. this is just for some early bootup code - * that does in_interrupt() - */ + /* Set up %gs. + * + * %gs should point to the pda. For initial boot, make %gs point + * to the _boot_cpu_pda in data section. For a secondary CPU, + * initial_gs should be set to its pda address before the CPU runs + * this code. + */ movl $MSR_GS_BASE,%ecx - movq $empty_zero_page,%rax + movq initial_gs(%rip),%rax movq %rax,%rdx shrq $32,%rdx wrmsr @@ -274,6 +277,8 @@ ENTRY(secondary_startup_64) .align 8 ENTRY(initial_code) .quad x86_64_start_kernel + ENTRY(initial_gs) + .quad _boot_cpu_pda __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1a712da1dfa..70d846628bb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -854,6 +854,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); + initial_gs = (unsigned long)cpu_pda(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; From c8f3329a0ddd751241e96b4100df7eda14b2cbc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 101/682] x86: use static _cpu_pda array _cpu_pda array first uses statically allocated storage in data.init and then switches to allocated bootmem to conserve space. However, after folding pda area into percpu area, _cpu_pda array will be removed completely. Drop the reallocation part to simplify the code for soon-to-follow changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 ++- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head64.c | 12 ------------ arch/x86/kernel/setup_percpu.c | 14 +++----------- 4 files changed, 6 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index cbd3f48a832..2d5b49c3248 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* Per processor datastructure. %gs points to it while the kernel runs */ @@ -39,7 +40,7 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda **_cpu_pda; +extern struct x8664_pda *_cpu_pda[NR_CPUS]; extern void pda_init(int); #define cpu_pda(i) (_cpu_pda[i]) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0025846244..c116c599326 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,7 +879,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda **_cpu_pda __read_mostly; +struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; EXPORT_SYMBOL(_cpu_pda); struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 76ffba2aa66..462d0beccb6 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -29,20 +29,8 @@ /* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ struct x8664_pda _boot_cpu_pda; -#ifdef CONFIG_SMP -/* - * We install an empty cpu_pda pointer table to indicate to early users - * (numa_set_node) that the cpu_pda pointer table for cpus other than - * the boot cpu is not yet setup. - */ -static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; -#else -static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; -#endif - void __init x86_64_init_pda(void) { - _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 44845842e72..73ab01b297c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -114,7 +114,6 @@ static inline void setup_cpu_pda_map(void) { } static void __init setup_cpu_pda_map(void) { char *pda; - struct x8664_pda **new_cpu_pda; unsigned long size; int cpu; @@ -122,28 +121,21 @@ static void __init setup_cpu_pda_map(void) /* allocate cpu_pda array and pointer table */ { - unsigned long tsize = nr_cpu_ids * sizeof(void *); unsigned long asize = size * (nr_cpu_ids - 1); - tsize = roundup(tsize, cache_line_size()); - new_cpu_pda = alloc_bootmem(tsize + asize); - pda = (char *)new_cpu_pda + tsize; + pda = alloc_bootmem(asize); } /* initialize pointer table to static pda's */ for_each_possible_cpu(cpu) { if (cpu == 0) { /* leave boot cpu pda in place */ - new_cpu_pda[0] = cpu_pda(0); continue; } - new_cpu_pda[cpu] = (struct x8664_pda *)pda; - new_cpu_pda[cpu]->in_bootmem = 1; + cpu_pda(cpu) = (struct x8664_pda *)pda; + cpu_pda(cpu)->in_bootmem = 1; pda += size; } - - /* point to new pointer table */ - _cpu_pda = new_cpu_pda; } #endif /* CONFIG_SMP && CONFIG_X86_64 */ From 1a51e3a0aed18767cf2762e95456ecfeb0bca5e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 102/682] x86: fold pda into percpu area on SMP [ Based on original patch from Christoph Lameter and Mike Travis. ] Currently pdas and percpu areas are allocated separately. %gs points to local pda and percpu area can be reached using pda->data_offset. This patch folds pda into percpu area. Due to strange gcc requirement, pda needs to be at the beginning of the percpu area so that pda->stack_canary is at %gs:40. To achieve this, a new percpu output section macro - PERCPU_VADDR_PREALLOC() - is added and used to reserve pda sized chunk at the start of the percpu area. After this change, for boot cpu, %gs first points to pda in the data.init area and later during setup_per_cpu_areas() gets updated to point to the actual pda. This means that setup_per_cpu_areas() need to reload %gs for CPU0 while clearing pda area for other cpus as cpu0 already has modified it when control reaches setup_per_cpu_areas(). This patch also removes now unnecessary get_local_pda() and its call sites. A lot of this patch is taken from Mike Travis' "x86_64: Fold pda into per cpu area" patch. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 8 +++ arch/x86/include/asm/smp.h | 2 - arch/x86/kernel/asm-offsets_64.c | 1 + arch/x86/kernel/cpu/common.c | 6 +- arch/x86/kernel/head64.c | 8 ++- arch/x86/kernel/head_64.S | 15 +++-- arch/x86/kernel/setup_percpu.c | 107 +++++++++++++----------------- arch/x86/kernel/smpboot.c | 60 +---------------- arch/x86/kernel/vmlinux_64.lds.S | 6 +- arch/x86/xen/smp.c | 10 --- include/asm-generic/vmlinux.lds.h | 25 ++++++- 11 files changed, 104 insertions(+), 144 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index df644f3e53e..0ed77cf33f7 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,6 +1,14 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H +#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif +#endif + #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a8cea7b0943..127415402ea 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -19,8 +19,6 @@ #include #include -extern int __cpuinit get_local_pda(int cpu); - extern int smp_num_siblings; extern unsigned int num_processors; diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 1d41d3f1edb..f8d1b047ef4 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -56,6 +56,7 @@ int main(void) ENTRY(cpunumber); ENTRY(irqstackptr); ENTRY(data_offset); + DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c116c599326..7041acdf557 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,10 +893,8 @@ void __cpuinit pda_init(int cpu) /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); - /* Memory clobbers used to order PDA accessed */ - mb(); - wrmsrl(MSR_GS_BASE, pda); - mb(); + + load_pda_offset(cpu); pda->cpunumber = cpu; pda->irqcount = -1; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 462d0beccb6..1a311293f73 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,12 +26,18 @@ #include #include -/* boot cpu pda, referenced by head_64.S to initialize %gs for boot CPU */ +#ifndef CONFIG_SMP +/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ struct x8664_pda _boot_cpu_pda; +#endif void __init x86_64_init_pda(void) { +#ifdef CONFIG_SMP + cpu_pda(0) = (void *)__per_cpu_load; +#else cpu_pda(0) = &_boot_cpu_pda; +#endif cpu_pda(0)->data_offset = (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2f0ab008988..7a995d0e9f7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -245,10 +245,13 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * %gs should point to the pda. For initial boot, make %gs point - * to the _boot_cpu_pda in data section. For a secondary CPU, - * initial_gs should be set to its pda address before the CPU runs - * this code. + * On SMP, %gs should point to the per-cpu area. For initial + * boot, make %gs point to the init data section. For a + * secondary CPU,initial_gs should be set to its pda address + * before the CPU runs this code. + * + * On UP, initial_gs points to _boot_cpu_pda and doesn't + * change. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -278,7 +281,11 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) +#ifdef CONFIG_SMP + .quad __per_cpu_load +#else .quad _boot_cpu_pda +#endif __FINITDATA ENTRY(stack_start) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 73ab01b297c..63d46280227 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_DEBUG_PER_CPU_MAPS @@ -65,6 +66,36 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +#ifdef CONFIG_X86_64 +void __cpuinit load_pda_offset(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); + mb(); +} + +#endif /* CONFIG_SMP && CONFIG_X86_64 */ + +#ifdef CONFIG_X86_64 + +/* correctly size the local cpu masks */ +static void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + +#else /* CONFIG_X86_32 */ + +static inline void setup_cpu_local_masks(void) +{ +} + +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA /* * Copy data used in early init routines from the initial arrays to the @@ -101,63 +132,7 @@ static void __init setup_per_cpu_maps(void) */ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); -static inline void setup_cpu_pda_map(void) { } - -#elif !defined(CONFIG_SMP) -static inline void setup_cpu_pda_map(void) { } - -#else /* CONFIG_SMP && CONFIG_X86_64 */ - -/* - * Allocate cpu_pda pointer table and array via alloc_bootmem. - */ -static void __init setup_cpu_pda_map(void) -{ - char *pda; - unsigned long size; - int cpu; - - size = roundup(sizeof(struct x8664_pda), cache_line_size()); - - /* allocate cpu_pda array and pointer table */ - { - unsigned long asize = size * (nr_cpu_ids - 1); - - pda = alloc_bootmem(asize); - } - - /* initialize pointer table to static pda's */ - for_each_possible_cpu(cpu) { - if (cpu == 0) { - /* leave boot cpu pda in place */ - continue; - } - cpu_pda(cpu) = (struct x8664_pda *)pda; - cpu_pda(cpu)->in_bootmem = 1; - pda += size; - } -} - -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ +#endif /* * Great future plan: @@ -171,9 +146,6 @@ void __init setup_per_cpu_areas(void) int cpu; unsigned long align = 1; - /* Setup cpu_pda map */ - setup_cpu_pda_map(); - /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); @@ -204,8 +176,21 @@ void __init setup_per_cpu_areas(void) cpu, node, __pa(ptr)); } #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); +#ifdef CONFIG_X86_64 + cpu_pda(cpu) = (void *)ptr; + + /* + * CPU0 modified pda in the init data area, reload pda + * offset for CPU0 and clear the area for others. + */ + if (cpu == 0) + load_pda_offset(0); + else + memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); +#endif + per_cpu_offset(cpu) = ptr - __per_cpu_start; DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 70d846628bb..f2f77ca494d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -744,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -#ifdef CONFIG_X86_64 - -/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ -static void __ref free_bootmem_pda(struct x8664_pda *oldpda) -{ - if (!after_bootmem) - free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); -} - -/* - * Allocate node local memory for the AP pda. - * - * Must be called after the _cpu_pda pointer table is initialized. - */ -int __cpuinit get_local_pda(int cpu) -{ - struct x8664_pda *oldpda, *newpda; - unsigned long size = sizeof(struct x8664_pda); - int node = cpu_to_node(cpu); - - if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) - return 0; - - oldpda = cpu_pda(cpu); - newpda = kmalloc_node(size, GFP_ATOMIC, node); - if (!newpda) { - printk(KERN_ERR "Could not allocate node local PDA " - "for CPU %d on node %d\n", cpu, node); - - if (oldpda) - return 0; /* have a usable pda */ - else - return -1; - } - - if (oldpda) { - memcpy(newpda, oldpda, size); - free_bootmem_pda(oldpda); - } - - newpda->in_bootmem = 0; - cpu_pda(cpu) = newpda; - return 0; -} -#endif /* CONFIG_X86_64 */ - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad @@ -807,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) }; INIT_WORK(&c_idle.work, do_fork_idle); -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - if (cpu > 0) { - boot_error = get_local_pda(cpu); - if (boot_error) - goto restore_state; - /* if can't get pda memory, can't start cpu */ - } -#endif - alternatives_smp_switch(1); c_idle.idle = get_idle_for_cpu(cpu); @@ -931,9 +875,7 @@ do_rest: inquire_remote_apic(apicid); } } -#ifdef CONFIG_X86_64 -restore_state: -#endif + if (boot_error) { /* Try to put things back the way they were before ... */ numa_remove_cpu(cpu); /* was set by numa_add_cpu */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index f50280db0df..962f21f1d4d 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -5,6 +5,7 @@ #define LOAD_OFFSET __START_KERNEL_map #include +#include #include #undef i386 /* in case the preprocessor is a 32bit one */ @@ -215,10 +216,11 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. + * switch it back to data.init. Also, pda should be at the head of + * percpu area. Preallocate it. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) + PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) #else PERCPU(PAGE_SIZE) #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c44e2069c7c..83fa4236477 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -283,16 +283,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; -#ifdef CONFIG_X86_64 - /* Allocate node local memory for AP pdas */ - WARN_ON(cpu == 0); - if (cpu > 0) { - rc = get_local_pda(cpu); - if (rc) - return rc; - } -#endif - #ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index fc2f55f2dcd..e53319cf29c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -441,9 +441,10 @@ . = __per_cpu_load + SIZEOF(.data.percpu); /** - * PERCPU_VADDR - define output section for percpu area + * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) + * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -455,11 +456,33 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * + * If @prealloc is non-zero, the specified number of bytes will be + * reserved at the start of percpu area. As the prealloc area is + * likely to break alignment, this macro puts areas in increasing + * alignment order. + * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ +#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ + PERCPU_PROLOG(vaddr) \ + . += prealloc; \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + *(.data.percpu.page_aligned) \ + PERCPU_EPILOG(segment) + +/** + * PERCPU_VADDR - define output section for percpu area + * @vaddr: explicit base address (optional) + * @phdr: destination PHDR (optional) + * + * Macro which expands to output section for percpu area. Mostly + * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than + * using slighly different layout. + */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ *(.data.percpu.page_aligned) \ From 9939ddaff52787b2a7c1adf1b2afc95421aa0884 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 103/682] x86: merge 64 and 32 SMP percpu handling Now that pda is allocated as part of percpu, percpu doesn't need to be accessed through pda. Unify x86_64 SMP percpu access with x86_32 SMP one. Other than the segment register, operand size and the base of percpu symbols, they behave identical now. This patch replaces now unnecessary pda->data_offset with a dummy field which is necessary to keep stack_canary at its place. This patch also moves per_cpu_offset initialization out of init_gdt() into setup_per_cpu_areas(). Note that this change also necessitates explicit per_cpu_offset initializations in voyager_smp.c. With this change, x86_OP_percpu()'s are as efficient on x86_64 as on x86_32 and also x86_64 can use assembly PER_CPU macros. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 3 +- arch/x86/include/asm/percpu.h | 127 +++++++++------------------- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 7 +- arch/x86/kernel/head64.c | 2 - arch/x86/kernel/setup_percpu.c | 15 ++-- arch/x86/kernel/smpcommon.c | 3 +- arch/x86/mach-voyager/voyager_smp.c | 2 + 8 files changed, 55 insertions(+), 105 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2d5b49c3248..e91558e3785 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,7 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { struct task_struct *pcurrent; /* 0 Current process */ - unsigned long data_offset; /* 8 Per cpu data offset from linker - address */ + unsigned long dummy; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0ed77cf33f7..556f84b9ea9 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -1,62 +1,13 @@ #ifndef _ASM_X86_PERCPU_H #define _ASM_X86_PERCPU_H -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); +#define __percpu_seg gs +#define __percpu_mov_op movq #else -static inline void load_pda_offset(int cpu) { } +#define __percpu_seg fs +#define __percpu_mov_op movl #endif -#endif - -#ifdef CONFIG_X86_64 -#include - -/* Same as asm-generic/percpu.h, except that we store the per cpu offset - in the PDA. Longer term the PDA and every per cpu variable - should be just put into a single section and referenced directly - from %gs */ - -#ifdef CONFIG_SMP -#include - -#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - -#endif -#include - -DECLARE_PER_CPU(struct x8664_pda, pda); - -/* - * These are supposed to be implemented as a single instruction which - * operates on the per-cpu data base segment. x86-64 doesn't have - * that yet, so this is a fairly inefficient workaround for the - * meantime. The single instruction is atomic with respect to - * preemption and interrupts, so we need to explicitly disable - * interrupts here to achieve the same effect. However, because it - * can be used from within interrupt-disable/enable, we can't actually - * disable interrupts; disabling preemption is enough. - */ -#define x86_read_percpu(var) \ - ({ \ - typeof(per_cpu_var(var)) __tmp; \ - preempt_disable(); \ - __tmp = __get_cpu_var(var); \ - preempt_enable(); \ - __tmp; \ - }) - -#define x86_write_percpu(var, val) \ - do { \ - preempt_disable(); \ - __get_cpu_var(var) = (val); \ - preempt_enable(); \ - } while(0) - -#else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -73,42 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda); * PER_CPU(cpu_gdt_descr, %ebx) */ #ifdef CONFIG_SMP -#define PER_CPU(var, reg) \ - movl %fs:per_cpu__##this_cpu_off, reg; \ +#define PER_CPU(var, reg) \ + __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \ lea per_cpu__##var(reg), reg -#define PER_CPU_VAR(var) %fs:per_cpu__##var +#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var #else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg +#define PER_CPU(var, reg) \ + __percpu_mov_op $per_cpu__##var, reg #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ #else /* ...!ASSEMBLY */ -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ +#include + #ifdef CONFIG_SMP - -#define __my_cpu_offset x86_read_percpu(this_cpu_off) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" - -#else /* !SMP */ - -#define __percpu_seg "" - -#endif /* SMP */ +#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#else +#define __percpu_seg_str +#endif #include @@ -128,20 +63,25 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ + asm(op "b %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ + asm(op "w %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ + asm(op "l %1,"__percpu_seg_str"%0" \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ + case 8: \ + asm(op "q %1,"__percpu_seg_str"%0" \ + : "+m" (var) \ + : "r" ((T__)val)); \ + break; \ default: __bad_percpu_size(); \ } \ } while (0) @@ -151,17 +91,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ + asm(op "b "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ + asm(op "w "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ + asm(op "l "__percpu_seg_str"%1,%0" \ + : "=r" (ret__) \ + : "m" (var)); \ + break; \ + case 8: \ + asm(op "q "__percpu_seg_str"%1,%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -175,8 +120,14 @@ do { \ #define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) + +#ifdef CONFIG_X86_64 +extern void load_pda_offset(int cpu); +#else +static inline void load_pda_offset(int cpu) { } +#endif + #endif /* !__ASSEMBLY__ */ -#endif /* !CONFIG_X86_64 */ #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f8d1b047ef4..f4cc81bfbf8 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -55,7 +55,6 @@ int main(void) ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); - ENTRY(data_offset); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..4833f3a1965 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -52,6 +52,7 @@ #include #include #include +#include /* Avoid __ASSEMBLER__'ifying just for this. */ #include @@ -1072,10 +1073,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - movq %gs:pda_data_offset, %rbp - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %rbp) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) call \do_sym - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1a311293f73..e99b661a97f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -38,8 +38,6 @@ void __init x86_64_init_pda(void) #else cpu_pda(0) = &_boot_cpu_pda; #endif - cpu_pda(0)->data_offset = - (unsigned long)(__per_cpu_load - __per_cpu_start); pda_init(0); } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 63d46280227..be1ff34db11 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,14 +125,14 @@ static void __init setup_per_cpu_maps(void) #endif } -#ifdef CONFIG_X86_32 -/* - * Great future not-so-futuristic plan: make i386 and x86_64 do it - * the same way - */ +#ifdef CONFIG_X86_64 +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { + [0] = (unsigned long)__per_cpu_load, +}; +#else unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); #endif +EXPORT_SYMBOL(__per_cpu_offset); /* * Great future plan: @@ -178,6 +178,7 @@ void __init setup_per_cpu_areas(void) #endif memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); + per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 cpu_pda(cpu) = (void *)ptr; @@ -190,7 +191,7 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 397e309839d..84395fabc41 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -4,10 +4,10 @@ #include #include -#ifdef CONFIG_X86_32 DEFINE_PER_CPU(unsigned long, this_cpu_off); EXPORT_PER_CPU_SYMBOL(this_cpu_off); +#ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself * (still using the master per-cpu area), or a CPU doing it for a @@ -24,7 +24,6 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(cpu_number, cpu) = cpu; } #endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9840b7ec749..1a48368acb0 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu) stack_start.sp = (void *)idle->thread.sp; init_gdt(cpu); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); From b12d8db8fbfaed1e8222a15333a3645599636854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 104/682] x86: make pda a percpu variable [ Based on original patch from Christoph Lameter and Mike Travis. ] As pda is now allocated in percpu area, it can easily be made a proper percpu variable. Make it so by defining per cpu symbol from linker script and declaring it in C code for SMP and simply defining it for UP. This change cleans up code and brings SMP and UP closer a bit. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 5 +++-- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/head64.c | 10 ---------- arch/x86/kernel/head_64.S | 5 +++-- arch/x86/kernel/setup_percpu.c | 16 ++++++++++++++-- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e91558e3785..66ae1043393 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -7,6 +7,7 @@ #include #include #include +#include /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { @@ -39,10 +40,10 @@ struct x8664_pda { unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; -extern struct x8664_pda *_cpu_pda[NR_CPUS]; +DECLARE_PER_CPU(struct x8664_pda, __pda); extern void pda_init(int); -#define cpu_pda(i) (_cpu_pda[i]) +#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) /* * There is no fast way to get the base address of the PDA, all the accesses diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7041acdf557..c49498d4083 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -879,9 +879,6 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 -struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(_cpu_pda); - struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index e99b661a97f..71b6f6ec96a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,18 +26,8 @@ #include #include -#ifndef CONFIG_SMP -/* boot cpu pda, referenced by head_64.S to initialize %gs on UP */ -struct x8664_pda _boot_cpu_pda; -#endif - void __init x86_64_init_pda(void) { -#ifdef CONFIG_SMP - cpu_pda(0) = (void *)__per_cpu_load; -#else - cpu_pda(0) = &_boot_cpu_pda; -#endif pda_init(0); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 7a995d0e9f7..c8ace880661 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_PARAVIRT #include @@ -250,7 +251,7 @@ ENTRY(secondary_startup_64) * secondary CPU,initial_gs should be set to its pda address * before the CPU runs this code. * - * On UP, initial_gs points to _boot_cpu_pda and doesn't + * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't * change. */ movl $MSR_GS_BASE,%ecx @@ -284,7 +285,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad _boot_cpu_pda + .quad PER_CPU_VAR(__pda) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be1ff34db11..daeedf82c15 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -66,6 +66,16 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif +/* + * Define load_pda_offset() and per-cpu __pda for x86_64. + * load_pda_offset() is responsible for loading the offset of pda into + * %gs. + * + * On SMP, pda offset also duals as percpu base address and thus it + * should be at the start of per-cpu area. To achieve this, it's + * preallocated in vmlinux_64.lds.S directly instead of using + * DEFINE_PER_CPU(). + */ #ifdef CONFIG_X86_64 void __cpuinit load_pda_offset(int cpu) { @@ -74,6 +84,10 @@ void __cpuinit load_pda_offset(int cpu) wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); mb(); } +#ifndef CONFIG_SMP +DEFINE_PER_CPU(struct x8664_pda, __pda); +EXPORT_PER_CPU_SYMBOL(__pda); +#endif #endif /* CONFIG_SMP && CONFIG_X86_64 */ @@ -180,8 +194,6 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; #ifdef CONFIG_X86_64 - cpu_pda(cpu) = (void *)ptr; - /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 962f21f1d4d..d2a0baa87d1 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -217,10 +217,12 @@ SECTIONS * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should * switch it back to data.init. Also, pda should be at the head of - * percpu area. Preallocate it. + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) + per_cpu____pda = __per_cpu_start; #else PERCPU(PAGE_SIZE) #endif From 49357d19e4fb31e28796eaff83499e7584c26878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 105/682] x86: convert pda ops to wrappers around x86 percpu accessors pda is now a percpu variable and there's no reason it can't use plain x86 percpu accessors. Add x86_test_and_clear_bit_percpu() and replace pda op implementations with wrappers around x86 percpu accessors. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pda.h | 88 +++----------------------------- arch/x86/include/asm/percpu.h | 10 ++++ arch/x86/kernel/vmlinux_64.lds.S | 1 - arch/x86/kernel/x8664_ksyms_64.c | 2 - 4 files changed, 16 insertions(+), 85 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 66ae1043393..e3d3a081d79 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,91 +45,15 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -/* - * There is no fast way to get the base address of the PDA, all the accesses - * have to mention %fs/%gs. So it needs to be done this Torvaldian way. - */ -extern void __bad_pda_field(void) __attribute__((noreturn)); - -/* - * proxy_pda doesn't actually exist, but tell gcc it is accessed for - * all PDA accesses so it gets read/write dependencies right. - */ -extern struct x8664_pda _proxy_pda; - -#define pda_offset(field) offsetof(struct x8664_pda, field) - -#define pda_to_op(op, field, val) \ -do { \ - typedef typeof(_proxy_pda.field) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - case 4: \ - asm(op "l %1,%%gs:%c2" : \ - "+m" (_proxy_pda.field) : \ - "ri" ((T__)val), \ - "i" (pda_offset(field))); \ - break; \ - case 8: \ - asm(op "q %1,%%gs:%c2": \ - "+m" (_proxy_pda.field) : \ - "r" ((T__)val), \ - "i"(pda_offset(field))); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ -} while (0) - -#define pda_from_op(op, field) \ -({ \ - typeof(_proxy_pda.field) ret__; \ - switch (sizeof(_proxy_pda.field)) { \ - case 2: \ - asm(op "w %%gs:%c1,%0" : \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 4: \ - asm(op "l %%gs:%c1,%0": \ - "=r" (ret__): \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - case 8: \ - asm(op "q %%gs:%c1,%0": \ - "=r" (ret__) : \ - "i" (pda_offset(field)), \ - "m" (_proxy_pda.field)); \ - break; \ - default: \ - __bad_pda_field(); \ - } \ - ret__; \ -}) - -#define read_pda(field) pda_from_op("mov", field) -#define write_pda(field, val) pda_to_op("mov", field, val) -#define add_pda(field, val) pda_to_op("add", field, val) -#define sub_pda(field, val) pda_to_op("sub", field, val) -#define or_pda(field, val) pda_to_op("or", field, val) +#define read_pda(field) x86_read_percpu(__pda.field) +#define write_pda(field, val) x86_write_percpu(__pda.field, val) +#define add_pda(field, val) x86_add_percpu(__pda.field, val) +#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) +#define or_pda(field, val) x86_or_percpu(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ -({ \ - int old__; \ - asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (_proxy_pda.field) \ - : "dIr" (bit), "i" (pda_offset(field)) : "memory");\ - old__; \ -}) + x86_test_and_clear_bit_percpu(bit, __pda.field) #endif diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 556f84b9ea9..328b31a429d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -121,6 +121,16 @@ do { \ #define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) #define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +/* This is not atomic against other CPUs -- CPU preemption needs to be off */ +#define x86_test_and_clear_bit_percpu(bit, var) \ +({ \ + int old__; \ + asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ + : "=r" (old__) \ + : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + old__; \ +}) + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index d2a0baa87d1..a09abb8fb97 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -14,7 +14,6 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; -_proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 695e426aa35..3909e3ba5ce 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(init_level4_pgt); EXPORT_SYMBOL(load_gs_index); - -EXPORT_SYMBOL(_proxy_pda); From 004aa322f855a765741d9437a98dd8fe2e4f32a6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 13 Jan 2009 20:41:35 +0900 Subject: [PATCH 106/682] x86: misc clean up after the percpu update Do the following cleanups: * kill x86_64_init_pda() which now is equivalent to pda_init() * use per_cpu_offset() instead of cpu_pda() when initializing initial_gs Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/setup.h | 1 - arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/head64.c | 7 +------ arch/x86/kernel/smpboot.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 5 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ebe858cdc8a..536949749bc 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else -void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 9ff67f8dc2c..4abff454c55 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,7 +101,7 @@ int acpi_save_state_mem(void) stack_start.sp = temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); - initial_gs = (unsigned long)cpu_pda(smp_processor_id()); + initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 71b6f6ec96a..af67d3227ea 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,11 +26,6 @@ #include #include -void __init x86_64_init_pda(void) -{ - pda_init(0); -} - static void __init zap_identity_mappings(void) { pgd_t *pgd = pgd_offset_k(0UL); @@ -96,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - x86_64_init_pda(); + pda_init(0); x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f2f77ca494d..2f0e0f1090f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,7 +798,7 @@ do_rest: #else cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); - initial_gs = (unsigned long)cpu_pda(cpu); + initial_gs = per_cpu_offset(cpu); #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 965539ec425..312414ef936 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - x86_64_init_pda(); + pda_init(0); #endif xen_smp_init(); From 6dbde3530850d4d8bfc1b6bd4006d92786a2787f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Jan 2009 22:15:53 +0900 Subject: [PATCH 107/682] percpu: add optimized generic percpu accessors It is an optimization and a cleanup, and adds the following new generic percpu methods: percpu_read() percpu_write() percpu_add() percpu_sub() percpu_and() percpu_or() percpu_xor() and implements support for them on x86. (other architectures will fall back to a default implementation) The advantage is that for example to read a local percpu variable, instead of this sequence: return __get_cpu_var(var); ffffffff8102ca2b: 48 8b 14 fd 80 09 74 mov -0x7e8bf680(,%rdi,8),%rdx ffffffff8102ca32: 81 ffffffff8102ca33: 48 c7 c0 d8 59 00 00 mov $0x59d8,%rax ffffffff8102ca3a: 48 8b 04 10 mov (%rax,%rdx,1),%rax We can get a single instruction by using the optimized variants: return percpu_read(var); ffffffff8102ca3f: 65 48 8b 05 91 8f fd mov %gs:0x7efd8f91(%rip),%rax I also cleaned up the x86-specific APIs and made the x86 code use these new generic percpu primitives. tj: * fixed generic percpu_sub() definition as Roel Kluin pointed out * added percpu_and() for completeness's sake * made generic percpu ops atomic against preemption Signed-off-by: Ingo Molnar Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 2 +- arch/x86/include/asm/irq_regs_32.h | 4 +-- arch/x86/include/asm/mmu_context_32.h | 12 +++---- arch/x86/include/asm/pda.h | 10 +++--- arch/x86/include/asm/percpu.h | 24 +++++++------ arch/x86/include/asm/smp.h | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/tlb_32.c | 10 +++--- arch/x86/mach-voyager/voyager_smp.c | 4 +-- arch/x86/xen/enlighten.c | 14 ++++---- arch/x86/xen/irq.c | 8 ++--- arch/x86/xen/mmu.c | 2 +- arch/x86/xen/multicalls.h | 2 +- arch/x86/xen/smp.c | 2 +- include/asm-generic/percpu.h | 52 +++++++++++++++++++++++++++ 15 files changed, 102 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0930b4f8d67..0728480f5c5 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -10,7 +10,7 @@ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); static __always_inline struct task_struct *get_current(void) { - return x86_read_percpu(current_task); + return percpu_read(current_task); } #else /* X86_32 */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index 86afd747345..d7ed33ee94e 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h @@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return x86_read_percpu(irq_regs); + return percpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - x86_write_percpu(irq_regs, new_regs); + percpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 7e98ce1d2c0..08b53454f83 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h @@ -4,8 +4,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - x86_write_percpu(cpu_tlbstate.active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); @@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, } #ifdef CONFIG_SMP else { - x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index e3d3a081d79..47f274fe695 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -45,11 +45,11 @@ extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) -#define read_pda(field) x86_read_percpu(__pda.field) -#define write_pda(field, val) x86_write_percpu(__pda.field, val) -#define add_pda(field, val) x86_add_percpu(__pda.field, val) -#define sub_pda(field, val) x86_sub_percpu(__pda.field, val) -#define or_pda(field, val) x86_or_percpu(__pda.field, val) +#define read_pda(field) percpu_read(__pda.field) +#define write_pda(field, val) percpu_write(__pda.field, val) +#define add_pda(field, val) percpu_add(__pda.field, val) +#define sub_pda(field, val) percpu_sub(__pda.field, val) +#define or_pda(field, val) percpu_or(__pda.field, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define test_and_clear_bit_pda(bit, field) \ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 328b31a429d..03aa4b00a1c 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -40,16 +40,11 @@ #ifdef CONFIG_SMP #define __percpu_seg_str "%%"__stringify(__percpu_seg)":" -#define __my_cpu_offset x86_read_percpu(this_cpu_off) +#define __my_cpu_offset percpu_read(this_cpu_off) #else #define __percpu_seg_str #endif -#include - -/* We can use this directly for local CPU (faster). */ -DECLARE_PER_CPU(unsigned long, this_cpu_off); - /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); @@ -115,11 +110,13 @@ do { \ ret__; \ }) -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_read(var) percpu_from_op("mov", per_cpu__##var) +#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val) +#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val) +#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val) +#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val) +#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) +#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ @@ -131,6 +128,11 @@ do { \ old__; \ }) +#include + +/* We can use this directly for local CPU (faster). */ +DECLARE_PER_CPU(unsigned long, this_cpu_off); + #ifdef CONFIG_X86_64 extern void load_pda_offset(int cpu); #else diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 127415402ea..c7bbbbe65d3 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -160,7 +160,7 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) +#define raw_smp_processor_id() (percpu_read(cpu_number)) extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a546f55c77b..77d546817d9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -591,7 +591,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - x86_write_percpu(current_task, next_p); + percpu_write(current_task, next_p); return prev_p; } diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e3..e65449d0f7d 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); */ void leave_mm(int cpu) { - BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); + BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { + if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -222,7 +222,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 1a48368acb0..96f15b09a4c 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -402,7 +402,7 @@ void __init find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - x86_write_percpu(cpu_number, boot_cpu_id); + percpu_write(cpu_number, boot_cpu_id); } /* @@ -1782,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - x86_write_percpu(cpu_number, hard_smp_processor_id()); + percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 312414ef936..75b94139e1f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0) static void xen_write_cr2(unsigned long cr2) { - x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; + percpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return x86_read_percpu(xen_vcpu)->arch.cr2; + return percpu_read(xen_vcpu)->arch.cr2; } static unsigned long xen_read_cr2_direct(void) { - return x86_read_percpu(xen_vcpu_info.arch.cr2); + return percpu_read(xen_vcpu_info.arch.cr2); } static void xen_write_cr4(unsigned long cr4) @@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4) static unsigned long xen_read_cr3(void) { - return x86_read_percpu(xen_cr3); + return percpu_read(xen_cr3); } static void set_current_cr3(void *v) { - x86_write_percpu(xen_current_cr3, (unsigned long)v); + percpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); if (kernel) { - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - x86_write_percpu(xen_cr3, cr3); + percpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index bb042608c60..2e8271431e1 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -83,7 +83,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } @@ -96,7 +96,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = x86_read_percpu(xen_vcpu); + vcpu = percpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 503c240e26c..7bc7852cc5c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1074,7 +1074,7 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { load_cr3(swapper_pg_dir); arch_flush_lazy_cpu_mode(); } diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 85893824161..e786fa7f261 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); + local_irq_restore(percpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 83fa4236477..3bfd6dd0b47 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -78,7 +78,7 @@ static __cpuinit void cpu_bringup(void) xen_setup_cpu_clockevents(); cpu_set(cpu, cpu_online_map); - x86_write_percpu(cpu_state, CPU_ONLINE); + percpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b0e63c672eb..00f45ff081a 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void); #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ __typeof__(type) per_cpu_var(name) +/* + * Optional methods for optimized non-lvalue per-cpu variable access. + * + * @var can be a percpu variable or a field of it and its size should + * equal char, int or long. percpu_read() evaluates to a lvalue and + * all others to void. + * + * These operations are guaranteed to be atomic w.r.t. preemption. + * The generic versions use plain get/put_cpu_var(). Archs are + * encouraged to implement single-instruction alternatives which don't + * require preemption protection. + */ +#ifndef percpu_read +# define percpu_read(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp_var__; \ + __tmp_var__ = get_cpu_var(var); \ + put_cpu_var(var); \ + __tmp_var__; \ + }) +#endif + +#define __percpu_generic_to_op(var, val, op) \ +do { \ + get_cpu_var(var) op val; \ + put_cpu_var(var); \ +} while (0) + +#ifndef percpu_write +# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =) +#endif + +#ifndef percpu_add +# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=) +#endif + +#ifndef percpu_sub +# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=) +#endif + +#ifndef percpu_and +# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=) +#endif + +#ifndef percpu_or +# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=) +#endif + +#ifndef percpu_xor +# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=) +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ From a338af2c648f5e07c582154745a6c60cd2d8bf12 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 11:19:03 +0900 Subject: [PATCH 108/682] x86: fix build bug introduced during merge EXPORT_PER_CPU_SYMBOL() got misplaced during merge leading to build failure. Fix it. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index daeedf82c15..b5c35af2011 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -86,9 +86,8 @@ void __cpuinit load_pda_offset(int cpu) } #ifndef CONFIG_SMP DEFINE_PER_CPU(struct x8664_pda, __pda); -EXPORT_PER_CPU_SYMBOL(__pda); #endif - +EXPORT_PER_CPU_SYMBOL(__pda); #endif /* CONFIG_SMP && CONFIG_X86_64 */ #ifdef CONFIG_X86_64 From cd3adf52309867955d6e2175246b526442235805 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 12:11:43 +0900 Subject: [PATCH 109/682] x86_64: initialize this_cpu_off to __per_cpu_load On x86_64, if get_per_cpu_var() is used before per cpu area is setup (if lockdep is turned on, it happens), it needs this_cpu_off to point to __per_cpu_load. Initialize accordingly. Signed-off-by: Tejun Heo --- arch/x86/kernel/smpcommon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 84395fabc41..7e157810062 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -3,8 +3,13 @@ */ #include #include +#include +#ifdef CONFIG_X86_64 +DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; +#else DEFINE_PER_CPU(unsigned long, this_cpu_off); +#endif EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_X86_32 From 68564a46976017496c2227660930d81240f82355 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 110/682] work_on_cpu: don't try to get_online_cpus() in work_on_cpu. Impact: remove potential circular lock dependency with cpu hotplug lock This has caused more problems than it solved, with a pile of cpu hotplug locking issues. Followup patches will get_online_cpus() in callers that need it, but if they don't do it they're no worse than before when they were using set_cpus_allowed without locking. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2f445833ae3..a35afdbc016 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -991,8 +991,8 @@ static void do_work_for_cpu(struct work_struct *w) * @fn: the function to run * @arg: the function arg * - * This will return -EINVAL in the cpu is not online, or the return value - * of @fn otherwise. + * This will return the value @fn returns. + * It is up to the caller to ensure that the cpu doesn't go offline. */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { @@ -1001,14 +1001,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - get_online_cpus(); - if (unlikely(!cpu_online(cpu))) - wfc.ret = -EINVAL; - else { - schedule_work_on(cpu, &wfc.work); - flush_work(&wfc.work); - } - put_online_cpus(); + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); return wfc.ret; } From e1d9ec6246a2668a5d037f529877efb7cf176af8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 111/682] work_on_cpu: Use our own workqueue. Impact: remove potential clashes with generic kevent workqueue Annoyingly, some places we want to use work_on_cpu are already in workqueues. As per Ingo's suggestion, we create a different workqueue for work_on_cpu. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- kernel/workqueue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a35afdbc016..1f0c509b40d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -971,6 +971,8 @@ undo: } #ifdef CONFIG_SMP +static struct workqueue_struct *work_on_cpu_wq __read_mostly; + struct work_for_cpu { struct work_struct work; long (*fn)(void *); @@ -1001,7 +1003,7 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) INIT_WORK(&wfc.work, do_work_for_cpu); wfc.fn = fn; wfc.arg = arg; - schedule_work_on(cpu, &wfc.work); + queue_work_on(cpu, work_on_cpu_wq, &wfc.work); flush_work(&wfc.work); return wfc.ret; @@ -1019,4 +1021,8 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); +#ifdef CONFIG_SMP + work_on_cpu_wq = create_workqueue("work_on_cpu"); + BUG_ON(!work_on_cpu_wq); +#endif } From 6eb714c63ed5bd663627f7dda8c4d5258f3b64ef Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:31:15 -0800 Subject: [PATCH 112/682] cpufreq: use work_on_cpu in acpi-cpufreq.c for drv_read and drv_write Impact: use new work_on_cpu function to reduce stack usage Replace the saving of current->cpus_allowed and set_cpus_allowed_ptr() with a work_on_cpu function for drv_read() and drv_write(). Basically converts do_drv_{read,write} into "work_on_cpu" functions that are now called by drv_read and drv_write. Note: This patch basically reverts 50c668d6 which reverted 7503bfba, now that the work_on_cpu() function is more stable. Signed-off-by: Mike Travis Acked-by: Rusty Russell Tested-by: Dieter Ries Tested-by: Maciej Rutecki Cc: Dave Jones Cc: --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 019276717a7..4b1c319d30c 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -150,8 +150,9 @@ struct drv_cmd { u32 val; }; -static void do_drv_read(struct drv_cmd *cmd) +static long do_drv_read(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 h; switch (cmd->type) { @@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd) default: break; } + return 0; } -static void do_drv_write(struct drv_cmd *cmd) +static long do_drv_write(void *_cmd) { + struct drv_cmd *cmd = _cmd; u32 lo, hi; switch (cmd->type) { @@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd) default: break; } + return 0; } static void drv_read(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; cmd->val = 0; - set_cpus_allowed_ptr(current, cmd->mask); - do_drv_read(cmd); - set_cpus_allowed_ptr(current, &saved_mask); + work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); } static void drv_write(struct drv_cmd *cmd) { - cpumask_t saved_mask = current->cpus_allowed; unsigned int i; for_each_cpu(i, cmd->mask) { - set_cpus_allowed_ptr(current, cpumask_of(i)); - do_drv_write(cmd); + work_on_cpu(i, do_drv_write, cmd); } - - set_cpus_allowed_ptr(current, &saved_mask); - return; } static u32 get_cur_val(const struct cpumask *mask) @@ -367,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) return freq; } -static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, +static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, struct acpi_cpufreq_data *data) { unsigned int cur_freq; From cef30b3a84e1c7cbd49987d83c5863c001445842 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 16 Jan 2009 15:58:13 -0800 Subject: [PATCH 113/682] x86: put trigger in to detect mismatched apic versions. Fire off one message if two apic's discovered with different apic versions. Signed-off-by: Mike Travis --- arch/x86/kernel/apic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0f830e4f567..db0998641c5 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1833,6 +1833,11 @@ void __cpuinit generic_processor_info(int apicid, int version) num_processors++; cpu = cpumask_next_zero(-1, cpu_present_mask); + if (version != apic_version[boot_cpu_physical_apicid]) + WARN_ONCE(1, + "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* From 145cd30bac885dffad9db9d487baad07b68a3d04 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 14:42:50 +0900 Subject: [PATCH 114/682] linker script: add missing VMLINUX_SYMBOL The newly added PERCPU_*() macros define and use __per_cpu_load but VMLINUX_SYMBOL() was missing from usages causing build failures on archs where linker visible symbol is different from C symbols (e.g. blackfin). Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e53319cf29c..aa6b9b1b30b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -432,13 +432,14 @@ #define PERCPU_PROLOG(vaddr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) { \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; #define PERCPU_EPILOG(phdr) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = __per_cpu_load + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc From 74e7904559a10cbb9fbf9139c5c42fc87c0f62a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 17 Jan 2009 15:26:32 +0900 Subject: [PATCH 115/682] linker script: add missing .data.percpu.page_aligned arm, arm/mach-integrator and powerpc were missing .data.percpu.page_aligned in their percpu output section definitions. Add it. Signed-off-by: Tejun Heo --- arch/arm/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 1 + arch/powerpc/kernel/vmlinux.lds.S | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 00216071eaf..85598f7da40 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -65,6 +65,7 @@ SECTIONS #endif . = ALIGN(4096); __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 10a7d47e851..f45e4e508ec 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -219,6 +219,7 @@ SECTIONS .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 47bf15cd2c9..04e8ecea9b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -182,6 +182,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { __per_cpu_start = .; + *(.data.percpu.page_aligned) *(.data.percpu) *(.data.percpu.shared_aligned) __per_cpu_end = .; From 1b437c8c73a36daa471dd54a63c426d72af5723d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: [PATCH 116/682] x86-64: Move irq stats from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 24 +++++++++++++++++++----- arch/x86/include/asm/pda.h | 10 ---------- arch/x86/kernel/irq.c | 6 +----- arch/x86/kernel/irq_64.c | 3 +++ arch/x86/kernel/nmi.c | 10 +--------- arch/x86/xen/smp.c | 18 +++--------------- 6 files changed, 27 insertions(+), 44 deletions(-) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index b5a6b5d5670..a65bab20f6c 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,22 +3,36 @@ #include #include -#include #include +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT 1 -#define inc_irq_stat(member) add_pda(member, 1) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() read_pda(__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 -#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) -#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 47f274fe695..69a40757e21 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,19 +25,9 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - unsigned int __softirq_pending; - unsigned int __nmi_count; /* number of NMI on this CPUs */ short mmu_state; short isidle; struct mm_struct *active_mm; - unsigned apic_timer_irqs; - unsigned irq0_irqs; - unsigned irq_resched_count; - unsigned irq_call_count; - unsigned irq_tlb_count; - unsigned irq_thermal_count; - unsigned irq_threshold_count; - unsigned irq_spurious_count; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3973e2df7f8..8b30d0c2512 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq) #endif } -#ifdef CONFIG_X86_32 -# define irq_stats(x) (&per_cpu(irq_stat, x)) -#else -# define irq_stats(x) cpu_pda(x) -#endif +#define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing: */ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b21cb1ea11..1db05247b47 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -19,6 +19,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); +EXPORT_PER_CPU_SYMBOL(irq_stat); + /* * Probabilistic stack overflow check: * diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 7228979f1e7..23b6d9e6e4f 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -61,11 +61,7 @@ static int endflag __initdata; static inline unsigned int get_nmi_count(int cpu) { -#ifdef CONFIG_X86_64 - return cpu_pda(cpu)->__nmi_count; -#else - return nmi_count(cpu); -#endif + return per_cpu(irq_stat, cpu).__nmi_count; } static inline int mce_in_progress(void) @@ -82,12 +78,8 @@ static inline int mce_in_progress(void) */ static inline unsigned int get_timer_irqs(int cpu) { -#ifdef CONFIG_X86_64 - return read_pda(apic_timer_irqs) + read_pda(irq0_irqs); -#else return per_cpu(irq_stat, cpu).apic_timer_irqs + per_cpu(irq_stat, cpu).irq0_irqs; -#endif } #ifdef CONFIG_SMP diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 3bfd6dd0b47..9ff3b0999cf 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); */ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) { -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); return IRQ_HANDLED; } @@ -435,11 +431,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; @@ -449,11 +441,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) { irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); return IRQ_HANDLED; From 9eb912d1aa6b8106e06a73ea6702ec3dab0d6a1a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:57 +0900 Subject: [PATCH 117/682] x86-64: Move TLB state from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context_64.h | 16 +++++++--------- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/tlbflush.h | 7 ++----- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/tlb_32.c | 12 ++---------- arch/x86/kernel/tlb_64.c | 13 ++++++++----- arch/x86/xen/mmu.c | 6 +----- 7 files changed, 20 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h index 677d36e9540..c4572505ab3 100644 --- a/arch/x86/include/asm/mmu_context_64.h +++ b/arch/x86/include/asm/mmu_context_64.h @@ -1,13 +1,11 @@ #ifndef _ASM_X86_MMU_CONTEXT_64_H #define _ASM_X86_MMU_CONTEXT_64_H -#include - static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - if (read_pda(mmu_state) == TLBSTATE_OK) - write_pda(mmu_state, TLBSTATE_LAZY); + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif } @@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* stop flush ipis for the previous mm */ cpu_clear(cpu, prev->cpu_vm_mask); #ifdef CONFIG_SMP - write_pda(mmu_state, TLBSTATE_OK); - write_pda(active_mm, next); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); #endif cpu_set(cpu, next->cpu_vm_mask); load_cr3(next->pgd); @@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, } #ifdef CONFIG_SMP else { - write_pda(mmu_state, TLBSTATE_OK); - if (read_pda(active_mm) != next) - BUG(); + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 69a40757e21..8ee835ed10e 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -25,9 +25,7 @@ struct x8664_pda { char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ - short mmu_state; short isidle; - struct mm_struct *active_mm; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 17feaa9c7e7..d3539f998f8 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -#ifdef CONFIG_X86_32 struct tlb_state { struct mm_struct *active_mm; int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; }; DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); -void reset_lazy_tlbstate(void); -#else static inline void reset_lazy_tlbstate(void) { + percpu_write(cpu_tlbstate.state, 0); + percpu_write(cpu_tlbstate.active_mm, &init_mm); } -#endif #endif /* SMP */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c49498d4083..3d0cc6f1711 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,8 +897,6 @@ void __cpuinit pda_init(int cpu) pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - pda->active_mm = &init_mm; - pda->mmu_state = 0; if (cpu == 0) { /* others are initialized in smpboot.c */ diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e65449d0f7d..abf0808d6fc 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -4,8 +4,8 @@ #include -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) - ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; /* must come after the send_IPI functions above for inlining */ #include @@ -231,14 +231,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1); } -void reset_lazy_tlbstate(void) -{ - int cpu = raw_smp_processor_id(); - - per_cpu(cpu_tlbstate, cpu).state = 0; - per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; -} - static int init_flush_cpumask(void) { alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 7f4141d3b66..e64a32c4882 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -18,6 +18,9 @@ #include #include +DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) + = { &init_mm, 0, }; + #include /* * Smarter SMP flushing macros. @@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state); */ void leave_mm(int cpu) { - if (read_pda(mmu_state) == TLBSTATE_OK) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) BUG(); - cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } EXPORT_SYMBOL_GPL(leave_mm); @@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * BUG(); */ - if (f->flush_mm == read_pda(active_mm)) { - if (read_pda(mmu_state) == TLBSTATE_OK) { + if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) { + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_va == TLB_FLUSH_ALL) local_flush_tlb(); else @@ -281,7 +284,7 @@ static void do_flush_tlb_all(void *info) unsigned long cpu = smp_processor_id(); __flush_tlb_all(); - if (read_pda(mmu_state) == TLBSTATE_LAZY) + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(cpu); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7bc7852cc5c..98cb9869eb2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1063,11 +1063,7 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; -#ifdef CONFIG_X86_64 - active_mm = read_pda(active_mm); -#else - active_mm = __get_cpu_var(cpu_tlbstate).active_mm; -#endif + active_mm = percpu_read(cpu_tlbstate.active_mm); if (active_mm == mm) leave_mm(smp_processor_id()); From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 118/682] x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/page_64.h | 4 ++-- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 19 +++++++----------- arch/x86/kernel/dumpstack_64.c | 33 ++++++++++++++++---------------- arch/x86/kernel/entry_64.S | 6 +++--- arch/x86/kernel/setup_percpu.c | 4 +++- 8 files changed, 35 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5ebca29f44f..e27fdbe5f9e 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -13,8 +13,8 @@ #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQSTACK_ORDER 2 -#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define STACKFAULT_STACK 1 #define DOUBLEFAULT_STACK 2 diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 8ee835ed10e..09965f7a216 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - char *irqstackptr; short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..f511246fa6c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f4cc81bfbf8..5b821fbdaf7 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -54,7 +54,6 @@ int main(void) ENTRY(pcurrent); ENTRY(irqcount); ENTRY(cpunumber); - ENTRY(irqstackptr); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3d0cc6f1711..496f0a01919 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,7 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; +DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +#ifdef CONFIG_SMP +DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ +#else +DEFINE_PER_CPU(char *, irq_stack_ptr) = + per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; +#endif void __cpuinit pda_init(int cpu) { @@ -901,18 +907,7 @@ void __cpuinit pda_init(int cpu) if (cpu == 0) { /* others are initialized in smpboot.c */ pda->pcurrent = &init_task; - pda->irqstackptr = boot_cpu_stack; - pda->irqstackptr += IRQSTACKSIZE - 64; } else { - if (!pda->irqstackptr) { - pda->irqstackptr = (char *) - __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); - if (!pda->irqstackptr) - panic("cannot allocate irqstack for cpu %d", - cpu); - pda->irqstackptr += IRQSTACKSIZE - 64; - } - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index c302d070704..28e26a4315d 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned long *irq_stack_end = + (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; @@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, stack = (unsigned long *) estack_end[-2]; continue; } - if (irqstack_end) { - unsigned long *irqstack; - irqstack = irqstack_end - - (IRQSTACKSIZE - 64) / sizeof(*irqstack); + if (irq_stack_end) { + unsigned long *irq_stack; + irq_stack = irq_stack_end - + (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irqstack && stack < irqstack_end) { + if (stack >= irq_stack && stack < irq_stack_end) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end, &graph); + ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ - stack = (unsigned long *) (irqstack_end[-1]); - irqstack_end = NULL; + stack = (unsigned long *) (irq_stack_end[-1]); + irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } @@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = - (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = - (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irq_stack_end = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + unsigned long *irq_stack = + (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* * debugging aid: "show_stack(NULL, NULL);" prints the @@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, stack = sp; for (i = 0; i < kstack_depth_to_print; i++) { - if (stack >= irqstack && stack <= irqstack_end) { - if (stack == irqstack_end) { - stack = (unsigned long *) (irqstack_end[-1]); + if (stack >= irq_stack && stack <= irq_stack_end) { + if (stack == irq_stack_end) { + stack = (unsigned long *) (irq_stack_end[-1]); printk(" "); } } else { diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4833f3a1965..d22677a6643 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -345,7 +345,7 @@ ENTRY(save_args) 1: incl %gs:pda_irqcount jne 2f popq_cfi %rax /* move return address... */ - mov %gs:pda_irqstackptr,%rsp + mov PER_CPU_VAR(irq_stack_ptr),%rsp EMPTY_FRAME 0 pushq_cfi %rax /* ... to the new stack */ /* @@ -1261,7 +1261,7 @@ ENTRY(call_softirq) mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp incl %gs:pda_irqcount - cmove %gs:pda_irqstackptr,%rsp + cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq @@ -1300,7 +1300,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 11: incl %gs:pda_irqcount movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - cmovzq %gs:pda_irqstackptr,%rsp + cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp pushq %rbp # backlink for old unwinder call xen_evtchn_do_upcall popq %rsp diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b5c35af2011..8b53ef83c61 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -192,7 +192,10 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; + per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); #ifdef CONFIG_X86_64 + per_cpu(irq_stack_ptr, cpu) = + (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* * CPU0 modified pda in the init data area, reload pda * offset for CPU0 and clear the area for others. @@ -202,7 +205,6 @@ void __init setup_per_cpu_areas(void) else memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); #endif - per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } From 92d65b2371d86d40807e1dbfdccadc4d501edcde Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 119/682] x86-64: Convert exception stacks to per-cpu Move the exception stacks to per-cpu, removing specific allocation code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 496f0a01919..b6d7eec0be7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -913,8 +913,9 @@ void __cpuinit pda_init(int cpu) } } -static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + - DEBUG_STKSZ] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) + __aligned(PAGE_SIZE); extern asmlinkage void ignore_sysret(void); @@ -972,15 +973,12 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); unsigned long v; - char *estacks = NULL; struct task_struct *me; int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) pda_init(cpu); - else - estacks = boot_exception_stacks; me = current; @@ -1014,18 +1012,13 @@ void __cpuinit cpu_init(void) * set up and load the per-CPU TSS */ if (!orig_ist->ist[0]) { - static const unsigned int order[N_EXCEPTION_STACKS] = { - [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, - [DEBUG_STACK - 1] = DEBUG_STACK_ORDER + static const unsigned int sizes[N_EXCEPTION_STACKS] = { + [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, + [DEBUG_STACK - 1] = DEBUG_STKSZ }; + char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { - if (cpu) { - estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); - if (!estacks) - panic("Cannot allocate exception " - "stack %ld %d\n", v, cpu); - } - estacks += PAGE_SIZE << order[v]; + estacks += sizes[v]; orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } From ea9279066de44053d0c20ea855bc9f4706652d84 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 120/682] x86-64: Move cpu number from PDA to per-cpu and consolidate with 32-bit. tj: moved cpu_number definition out of CONFIG_HAVE_SETUP_PER_CPU_AREA for voyager. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/include/asm/smp.h | 4 +--- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_32.c | 3 --- arch/x86/kernel/setup_percpu.c | 10 ++++++++++ arch/x86/kernel/smpcommon.c | 2 -- 7 files changed, 12 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 09965f7a216..668d5a5b6f7 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -16,7 +16,7 @@ struct x8664_pda { unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ - unsigned int cpunumber; /* 36 Logical CPU number */ + unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ /* gcc-ABI: this canary MUST be at diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c7bbbbe65d3..68636e767a9 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -25,9 +25,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); -#ifdef CONFIG_X86_32 DECLARE_PER_CPU(int, cpu_number); -#endif static inline struct cpumask *cpu_sibling_mask(int cpu) { @@ -164,7 +162,7 @@ extern unsigned disabled_cpus __cpuinitdata; extern int safe_smp_processor_id(void); #elif defined(CONFIG_X86_64_SMP) -#define raw_smp_processor_id() read_pda(cpunumber) +#define raw_smp_processor_id() (percpu_read(cpu_number)) #define stack_smp_processor_id() \ ({ \ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 5b821fbdaf7..cae6697c099 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -53,7 +53,6 @@ int main(void) ENTRY(oldrsp); ENTRY(pcurrent); ENTRY(irqcount); - ENTRY(cpunumber); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b6d7eec0be7..4221e920886 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -899,7 +899,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->cpunumber = cpu; pda->irqcount = -1; pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 77d546817d9..2c00a57ccb9 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - /* * Return saved PC of a blocked thread. */ diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8b53ef83c61..258497f93f4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,6 +22,15 @@ # define DBG(x...) #endif +/* + * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but + * voyager wants cpu_number too. + */ +#ifdef CONFIG_SMP +DEFINE_PER_CPU(int, cpu_number); +EXPORT_PER_CPU_SYMBOL(cpu_number); +#endif + #ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; @@ -193,6 +202,7 @@ void __init setup_per_cpu_areas(void) memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); + per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 7e157810062..add36b4e37c 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -28,7 +28,5 @@ __cpuinit void init_gdt(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); - - per_cpu(cpu_number, cpu) = cpu; } #endif From c6f5e0acd5d12ee23f701f15889872e67b47caa6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 121/682] x86-64: Move current task from PDA to per-cpu and consolidate with 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/current.h | 24 +++--------------------- arch/x86/include/asm/pda.h | 4 ++-- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 5 +---- arch/x86/kernel/dumpstack_64.c | 2 +- arch/x86/kernel/process_64.c | 5 ++++- arch/x86/kernel/smpboot.c | 3 +-- arch/x86/xen/smp.c | 3 +-- 9 files changed, 15 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 0728480f5c5..c68c361697e 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -1,39 +1,21 @@ #ifndef _ASM_X86_CURRENT_H #define _ASM_X86_CURRENT_H -#ifdef CONFIG_X86_32 #include #include +#ifndef __ASSEMBLY__ struct task_struct; DECLARE_PER_CPU(struct task_struct *, current_task); + static __always_inline struct task_struct *get_current(void) { return percpu_read(current_task); } -#else /* X86_32 */ - -#ifndef __ASSEMBLY__ -#include - -struct task_struct; - -static __always_inline struct task_struct *get_current(void) -{ - return read_pda(pcurrent); -} - -#else /* __ASSEMBLY__ */ - -#include -#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg +#define current get_current() #endif /* __ASSEMBLY__ */ -#endif /* X86_32 */ - -#define current get_current() - #endif /* _ASM_X86_CURRENT_H */ diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 668d5a5b6f7..7209302d922 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -11,8 +11,8 @@ /* Per processor datastructure. %gs points to it while the kernel runs */ struct x8664_pda { - struct task_struct *pcurrent; /* 0 Current process */ - unsigned long dummy; + unsigned long unused1; + unsigned long unused2; unsigned long kernelstack; /* 16 top of kernel stack for current */ unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8e626ea33a1..4399aac680e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ + "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -106,7 +106,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ + [current_task] "m" (per_cpu_var(current_task)) \ : "memory", "cc" __EXTRA_CLOBBER) #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cae6697c099..4f7a210e1e5 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -51,7 +51,6 @@ int main(void) #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) ENTRY(kernelstack); ENTRY(oldrsp); - ENTRY(pcurrent); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4221e920886..b50e38d1688 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -903,10 +903,7 @@ void __cpuinit pda_init(int cpu) pda->kernelstack = (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE; - if (cpu == 0) { - /* others are initialized in smpboot.c */ - pda->pcurrent = &init_task; - } else { + if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); } diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 28e26a4315d..d35db5993fd 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -242,7 +242,7 @@ void show_registers(struct pt_regs *regs) int i; unsigned long sp; const int cpu = smp_processor_id(); - struct task_struct *cur = cpu_pda(cpu)->pcurrent; + struct task_struct *cur = current; sp = regs->sp; printk("CPU %d ", cpu); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb9282f4..e00c31a4b3c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -57,6 +57,9 @@ asmlinkage extern void ret_from_fork(void); +DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; +EXPORT_PER_CPU_SYMBOL(current_task); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -615,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + percpu_write(current_task, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2f0e0f1090f..5854be0fb80 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -790,13 +790,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) set_idle_for_cpu(cpu, c_idle.idle); do_rest: -#ifdef CONFIG_X86_32 per_cpu(current_task, cpu) = c_idle.idle; +#ifdef CONFIG_X86_32 init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = c_idle.idle; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 9ff3b0999cf..72c2eb9b64c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -279,12 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) struct task_struct *idle = idle_task(cpu); int rc; + per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 init_gdt(cpu); - per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else - cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); From 9af45651f1f7c89942e016a1a00a7ebddfa727f8 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 122/682] x86-64: Move kernelstack from PDA to per-cpu. Also clean up PER_CPU_VAR usage in xen-asm_64.S tj: * remove now unused stack_thread_info() * s/kernelstack/kernel_stack/ * added FIXME comment in xen-asm_64.S Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/ia32/ia32entry.S | 8 ++++---- arch/x86/include/asm/pda.h | 4 +--- arch/x86/include/asm/thread_info.h | 20 ++++++++------------ arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 6 ++++-- arch/x86/kernel/entry_64.S | 4 ++-- arch/x86/kernel/process_64.c | 4 ++-- arch/x86/kernel/smpboot.c | 3 +++ arch/x86/xen/xen-asm_64.S | 23 +++++++++++------------ 9 files changed, 35 insertions(+), 38 deletions(-) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b6189..9c79b247700 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target) CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp SWAPGS_UNSAFE_STACK - movq %gs:pda_kernelstack, %rsp - addq $(PDA_STACKOFFSET),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp + addq $(KERNEL_STACK_OFFSET),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs, here we enable it straight after entry: @@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target) ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 7209302d922..4d28ffba6e1 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -13,7 +13,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; - unsigned long kernelstack; /* 16 top of kernel stack for current */ + unsigned long unused3; unsigned long oldrsp; /* 24 user rsp for system call */ int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ @@ -44,6 +44,4 @@ extern void pda_init(int); #endif -#define PDA_STACKOFFSET (5*8) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa..b46f8ca007b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void) #else /* X86_32 */ -#include +#include +#define KERNEL_STACK_OFFSET (5*8) /* * macros/functions for gaining access to the thread information structure * preempt_count needs to be 1 initially, until the scheduler is functional. */ #ifndef __ASSEMBLY__ +DECLARE_PER_CPU(unsigned long, kernel_stack); + static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE); - return ti; -} - -/* do not use in interrupt context */ -static inline struct thread_info *stack_thread_info(void) -{ - struct thread_info *ti; - asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); + ti = (void *)(percpu_read(kernel_stack) + + KERNEL_STACK_OFFSET - THREAD_SIZE); return ti; } @@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq %gs:pda_kernelstack,reg ; \ - subq $(THREAD_SIZE-PDA_STACKOFFSET),reg + movq PER_CPU_VAR(kernel_stack),reg ; \ + subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg #endif diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4f7a210e1e5..cafff5f4a03 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(kernelstack); ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b50e38d1688..06b6290088f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -889,6 +889,10 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; #endif +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -900,8 +904,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); pda->irqcount = -1; - pda->kernelstack = (unsigned long)stack_thread_info() - - PDA_STACKOFFSET + THREAD_SIZE; if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d22677a6643..0dd45859a7a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -468,7 +468,7 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ SWAPGS_UNSAFE_STACK @@ -480,7 +480,7 @@ ENTRY(system_call) ENTRY(system_call_after_swapgs) movq %rsp,%gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight * and short: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e00c31a4b3c..6c5f5760210 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -620,9 +620,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) write_pda(oldrsp, next->usersp); percpu_write(current_task, next_p); - write_pda(kernelstack, + percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - PDA_STACKOFFSET); + THREAD_SIZE - KERNEL_STACK_OFFSET); #ifdef CONFIG_CC_STACKPROTECTOR write_pda(stack_canary, next_p->stack_canary); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5854be0fb80..869b98840fd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -798,6 +798,9 @@ do_rest: #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(c_idle.idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 05794c566e8..5a23e899367 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -28,12 +29,10 @@ #if 1 /* - x86-64 does not yet support direct access to percpu variables - via a segment override, so we just need to make sure this code - never gets used + FIXME: x86_64 now can support direct access to percpu variables + via a segment override. Update xen accordingly. */ #define BUG ud2a -#define PER_CPU_VAR(var, off) 0xdeadbeef #endif /* @@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct) BUG /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events @@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct) ENTRY(xen_irq_disable_direct) BUG - movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask ENDPATCH(xen_irq_disable_direct) ret ENDPROC(xen_irq_disable_direct) @@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct) ENTRY(xen_save_fl_direct) BUG - testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah,%ah ENDPATCH(xen_save_fl_direct) @@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct) BUG testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask /* Preempt here doesn't matter because that will deal with any pending interrupts. The pending check may end up being run on the wrong CPU, but that doesn't hurt. */ /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f 2: call check_events 1: @@ -196,7 +195,7 @@ ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack,%rsp + movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS pushq %gs:pda_oldrsp @@ -213,7 +212,7 @@ ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ movq %rsp, %gs:pda_oldrsp - movq %gs:pda_kernelstack, %rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS pushq %gs:pda_oldrsp From 3d1e42a7cf945e289d6ba26159aa0e2b0645401b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 123/682] x86-64: Move oldrsp from PDA to per-cpu. tj: * in asm-offsets_64.c, pda.h inclusion shouldn't be removed as pda is still referenced in the file * s/oldrsp/old_rsp/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/entry_64.S | 10 +++++----- arch/x86/kernel/process_64.c | 8 +++++--- arch/x86/xen/xen-asm_64.S | 8 ++++---- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4d28ffba6e1..ae23deb9955 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -14,7 +14,7 @@ struct x8664_pda { unsigned long unused1; unsigned long unused2; unsigned long unused3; - unsigned long oldrsp; /* 24 user rsp for system call */ + unsigned long unused4; int irqcount; /* 32 Irq nesting counter. Starts -1 */ unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index cafff5f4a03..afda6deb851 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(oldrsp); ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0dd45859a7a..7c27da407da 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -210,7 +210,7 @@ ENTRY(native_usergs_sysret64) /* %rsp:at FRAMEEND */ .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq %gs:pda_oldrsp,\tmp + movq PER_CPU_VAR(old_rsp),\tmp movq \tmp,RSP+\offset(%rsp) movq $__USER_DS,SS+\offset(%rsp) movq $__USER_CS,CS+\offset(%rsp) @@ -221,7 +221,7 @@ ENTRY(native_usergs_sysret64) .macro RESTORE_TOP_OF_STACK tmp offset=0 movq RSP+\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp + movq \tmp,PER_CPU_VAR(old_rsp) movq EFLAGS+\offset(%rsp),\tmp movq \tmp,R11+\offset(%rsp) .endm @@ -479,7 +479,7 @@ ENTRY(system_call) */ ENTRY(system_call_after_swapgs) - movq %rsp,%gs:pda_oldrsp + movq %rsp,PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp /* * No need to follow this irqs off/on section - it's straight @@ -523,7 +523,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp, %rsp + movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 CFI_RESTORE_STATE @@ -833,7 +833,7 @@ common_interrupt: XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): oldrsp-ARGOFFSET */ + /* 0(%rsp): old_rsp-ARGOFFSET */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6c5f5760210..48012891892 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(unsigned long, old_rsp); + unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; static ATOMIC_NOTIFIER_HEAD(idle_notifier); @@ -395,7 +397,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) load_gs_index(0); regs->ip = new_ip; regs->sp = new_sp; - write_pda(oldrsp, new_sp); + percpu_write(old_rsp, new_sp); regs->cs = __USER_CS; regs->ss = __USER_DS; regs->flags = 0x200; @@ -616,8 +618,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = read_pda(oldrsp); - write_pda(oldrsp, next->usersp); + prev->usersp = percpu_read(old_rsp); + percpu_write(old_rsp, next->usersp); percpu_write(current_task, next_p); percpu_write(kernel_stack, diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 5a23e899367..d6fc51f4ce8 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -194,11 +194,11 @@ RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack),%rsp pushq $__USER_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER_CS pushq %rcx @@ -211,11 +211,11 @@ RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) /* We're already on the usermode stack at this point, but still with the kernel gs, so we can easily switch back */ - movq %rsp, %gs:pda_oldrsp + movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq %gs:pda_oldrsp + pushq PER_CPU_VAR(old_rsp) pushq %r11 pushq $__USER32_CS pushq %rcx From 5689553076c4a67b83426b076082c63085b7567a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: [PATCH 124/682] x86-64: Move irqcount from PDA to per-cpu. tj: s/irqcount/irq_count/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 +- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/entry_64.S | 14 +++++++------- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index ae23deb9955..4527d70314d 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -15,7 +15,7 @@ struct x8664_pda { unsigned long unused2; unsigned long unused3; unsigned long unused4; - int irqcount; /* 32 Irq nesting counter. Starts -1 */ + int unused5; unsigned int unused6; /* 36 was cpunumber */ #ifdef CONFIG_CC_STACKPROTECTOR unsigned long stack_canary; /* 40 stack canary value */ diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index afda6deb851..64c834a39aa 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -49,7 +49,6 @@ int main(void) BLANK(); #undef ENTRY #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - ENTRY(irqcount); DEFINE(pda_size, sizeof(struct x8664_pda)); BLANK(); #undef ENTRY diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 06b6290088f..e2323ecce1d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -893,6 +893,8 @@ DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); +DEFINE_PER_CPU(unsigned int, irq_count) = -1; + void __cpuinit pda_init(int cpu) { struct x8664_pda *pda = cpu_pda(cpu); @@ -903,8 +905,6 @@ void __cpuinit pda_init(int cpu) load_pda_offset(cpu); - pda->irqcount = -1; - if (cpu != 0) { if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) pda->nodenumber = cpu_to_node(cpu); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7c27da407da..c52b6091916 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -337,12 +337,12 @@ ENTRY(save_args) je 1f SWAPGS /* - * irqcount is used to check if a CPU is already on an interrupt stack + * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl %gs:pda_irqcount +1: incl PER_CPU_VAR(irq_count) jne 2f popq_cfi %rax /* move return address... */ mov PER_CPU_VAR(irq_stack_ptr),%rsp @@ -837,7 +837,7 @@ common_interrupt: ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 @@ -1260,14 +1260,14 @@ ENTRY(call_softirq) CFI_REL_OFFSET rbp,0 mov %rsp,%rbp CFI_DEF_CFA_REGISTER rbp - incl %gs:pda_irqcount + incl PER_CPU_VAR(irq_count) cmove PER_CPU_VAR(irq_stack_ptr),%rsp push %rbp # backlink for old unwinder call __do_softirq leaveq CFI_DEF_CFA_REGISTER rsp CFI_ADJUST_CFA_OFFSET -8 - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) ret CFI_ENDPROC END(call_softirq) @@ -1297,7 +1297,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME -11: incl %gs:pda_irqcount +11: incl PER_CPU_VAR(irq_count) movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp @@ -1305,7 +1305,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) call xen_evtchn_do_upcall popq %rsp CFI_DEF_CFA_REGISTER rsp - decl %gs:pda_irqcount + decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC END(do_hypervisor_callback) From e7a22c1ebcc1caa8178df1819d05128bb5b45ab9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 125/682] x86-64: Move nodenumber from PDA to per-cpu. tj: * s/nodenumber/node_number/ * removed now unused pda variable from pda_init() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/include/asm/topology.h | 3 ++- arch/x86/kernel/cpu/common.c | 13 ++++++------- arch/x86/kernel/setup_percpu.c | 4 +++- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4527d70314d..b30ef6bddc4 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -22,7 +22,6 @@ struct x8664_pda { /* gcc-ABI: this canary MUST be at offset 40!!! */ #endif - short nodenumber; /* number of current node (32k max) */ short in_bootmem; /* pda lives in bootmem */ short isidle; } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 87ca3fd86e8..ffea1fe03a9 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map; DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); /* Returns the number of the current Node. */ -#define numa_node_id() read_pda(nodenumber) +DECLARE_PER_CPU(int, node_number); +#define numa_node_id() percpu_read(node_number) #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2323ecce1d..7976a6a0f65 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -897,18 +897,11 @@ DEFINE_PER_CPU(unsigned int, irq_count) = -1; void __cpuinit pda_init(int cpu) { - struct x8664_pda *pda = cpu_pda(cpu); - /* Setup up data that may be needed in __get_free_pages early */ loadsegment(fs, 0); loadsegment(gs, 0); load_pda_offset(cpu); - - if (cpu != 0) { - if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) - pda->nodenumber = cpu_to_node(cpu); - } } static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks @@ -978,6 +971,12 @@ void __cpuinit cpu_init(void) if (cpu != 0) pda_init(cpu); +#ifdef CONFIG_NUMA + if (cpu != 0 && percpu_read(node_number) == 0 && + cpu_to_node(cpu) != NUMA_NO_NODE) + percpu_write(node_number, cpu_to_node(cpu)); +#endif + me = current; if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 258497f93f4..efbafbbff58 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -53,6 +53,8 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) #define X86_64_NUMA 1 /* (used later) */ +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); /* * Map cpu index to node index @@ -283,7 +285,7 @@ void __cpuinit numa_set_node(int cpu, int node) per_cpu(x86_cpu_to_node_map, cpu) = node; if (node != NUMA_NO_NODE) - cpu_pda(cpu)->nodenumber = node; + per_cpu(node_number, cpu) = node; } void __cpuinit numa_clear_node(int cpu) From c2558e0eba66b49993e619da66c95a50a97830a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 126/682] x86-64: Move isidle from PDA to per-cpu. tj: s/isidle/is_idle/ Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/kernel/process_64.c | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b30ef6bddc4..c31ca048a90 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -23,7 +23,6 @@ struct x8664_pda { offset 40!!! */ #endif short in_bootmem; /* pda lives in bootmem */ - short isidle; } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 48012891892..4523ff88a69 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -61,6 +61,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(unsigned long, old_rsp); +static DEFINE_PER_CPU(unsigned char, is_idle); unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; @@ -80,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); void enter_idle(void) { - write_pda(isidle, 1); + percpu_write(is_idle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - if (test_and_clear_bit_pda(0, isidle) == 0) + if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) return; atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } From 87b264065880fa696c121dad8498a60524e0f6de Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:59 +0900 Subject: [PATCH 127/682] x86-64: Use absolute displacements for per-cpu accesses. Accessing memory through %gs should not use rip-relative addressing. Adding a P prefix for the argument tells gcc to not add (%rip) to the memory references. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 26 +++++++++++++------------- arch/x86/include/asm/system.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 03aa4b00a1c..165d5272ece 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -39,10 +39,10 @@ #include #ifdef CONFIG_SMP -#define __percpu_seg_str "%%"__stringify(__percpu_seg)":" +#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __my_cpu_offset percpu_read(this_cpu_off) #else -#define __percpu_seg_str +#define __percpu_arg(x) "%" #x #endif /* For arch-specific code, we can use direct single-insn ops (they @@ -58,22 +58,22 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_seg_str"%0" \ + asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 2: \ - asm(op "w %1,"__percpu_seg_str"%0" \ + asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 4: \ - asm(op "l %1,"__percpu_seg_str"%0" \ + asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((T__)val)); \ break; \ case 8: \ - asm(op "q %1,"__percpu_seg_str"%0" \ + asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ : "r" ((T__)val)); \ break; \ @@ -86,22 +86,22 @@ do { \ typeof(var) ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_seg_str"%1,%0" \ + asm(op "b "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_seg_str"%1,%0" \ + asm(op "w "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_seg_str"%1,%0" \ + asm(op "l "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_seg_str"%1,%0" \ + asm(op "q "__percpu_arg(1)",%0" \ : "=r" (ret__) \ : "m" (var)); \ break; \ @@ -122,9 +122,9 @@ do { \ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ int old__; \ - asm volatile("btr %1,"__percpu_seg_str"%c2\n\tsbbl %0,%0" \ - : "=r" (old__) \ - : "dIr" (bit), "i" (&per_cpu__##var) : "memory"); \ + asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ + : "=r" (old__), "+m" (per_cpu__##var) \ + : "dIr" (bit)); \ old__; \ }) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 4399aac680e..d1dc27dba36 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -94,7 +94,7 @@ do { \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ - "movq "__percpu_seg_str"%P[current_task],%%rsi\n\t" \ + "movq "__percpu_arg([current_task])",%%rsi\n\t" \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ From 5662a2f8e7313f78d6b17ab383f3e4f04971c335 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 18 Jan 2009 19:37:21 +0100 Subject: [PATCH 128/682] x86, rdc321x: remove/move leftover files Impact: cleanup Move/remove leftover RDC321 files. Now that it's not a subarch anymore, arch/x86/mach-rdc321x and arch/x86/include/asm/mach-rdc321x/ are not needed. One include file was still in use: rdc321x_defs.h, move that to the generic x86 asm header directory. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-rdc321x/gpio.h | 60 ------ .../asm/{mach-rdc321x => }/rdc321x_defs.h | 0 arch/x86/mach-rdc321x/Makefile | 5 - arch/x86/mach-rdc321x/gpio.c | 194 ------------------ arch/x86/mach-rdc321x/platform.c | 69 ------- drivers/watchdog/rdc321x_wdt.c | 2 +- 6 files changed, 1 insertion(+), 329 deletions(-) delete mode 100644 arch/x86/include/asm/mach-rdc321x/gpio.h rename arch/x86/include/asm/{mach-rdc321x => }/rdc321x_defs.h (100%) delete mode 100644 arch/x86/mach-rdc321x/Makefile delete mode 100644 arch/x86/mach-rdc321x/gpio.c delete mode 100644 arch/x86/mach-rdc321x/platform.c diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h deleted file mode 100644 index c210ab5788b..00000000000 --- a/arch/x86/include/asm/mach-rdc321x/gpio.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _ASM_X86_MACH_RDC321X_GPIO_H -#define _ASM_X86_MACH_RDC321X_GPIO_H - -#include - -extern int rdc_gpio_get_value(unsigned gpio); -extern void rdc_gpio_set_value(unsigned gpio, int value); -extern int rdc_gpio_direction_input(unsigned gpio); -extern int rdc_gpio_direction_output(unsigned gpio, int value); -extern int rdc_gpio_request(unsigned gpio, const char *label); -extern void rdc_gpio_free(unsigned gpio); -extern void __init rdc321x_gpio_setup(void); - -/* Wrappers for the arch-neutral GPIO API */ - -static inline int gpio_request(unsigned gpio, const char *label) -{ - return rdc_gpio_request(gpio, label); -} - -static inline void gpio_free(unsigned gpio) -{ - might_sleep(); - rdc_gpio_free(gpio); -} - -static inline int gpio_direction_input(unsigned gpio) -{ - return rdc_gpio_direction_input(gpio); -} - -static inline int gpio_direction_output(unsigned gpio, int value) -{ - return rdc_gpio_direction_output(gpio, value); -} - -static inline int gpio_get_value(unsigned gpio) -{ - return rdc_gpio_get_value(gpio); -} - -static inline void gpio_set_value(unsigned gpio, int value) -{ - rdc_gpio_set_value(gpio, value); -} - -static inline int gpio_to_irq(unsigned gpio) -{ - return gpio; -} - -static inline int irq_to_gpio(unsigned irq) -{ - return irq; -} - -/* For cansleep */ -#include - -#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */ diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h similarity index 100% rename from arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h rename to arch/x86/include/asm/rdc321x_defs.h diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile deleted file mode 100644 index 8325b4ca431..00000000000 --- a/arch/x86/mach-rdc321x/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the RDC321x specific parts of the kernel -# -obj-$(CONFIG_X86_RDC321X) := gpio.o platform.o - diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c deleted file mode 100644 index 247f33d3a40..00000000000 --- a/arch/x86/mach-rdc321x/gpio.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * GPIO support for RDC SoC R3210/R8610 - * - * Copyright (C) 2007, Florian Fainelli - * Copyright (C) 2008, Volker Weiss - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include -#include -#include -#include - -#include -#include - - -/* spin lock to protect our private copy of GPIO data register plus - the access to PCI conf registers. */ -static DEFINE_SPINLOCK(gpio_lock); - -/* copy of GPIO data registers */ -static u32 gpio_data_reg1; -static u32 gpio_data_reg2; - -static u32 gpio_request_data[2]; - - -static inline void rdc321x_conf_write(unsigned addr, u32 value) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - outl(value, RDC3210_CFGREG_DATA); -} - -static inline void rdc321x_conf_or(unsigned addr, u32 value) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - value |= inl(RDC3210_CFGREG_DATA); - outl(value, RDC3210_CFGREG_DATA); -} - -static inline u32 rdc321x_conf_read(unsigned addr) -{ - outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR); - - return inl(RDC3210_CFGREG_DATA); -} - -/* configure pin as GPIO */ -static void rdc321x_configure_gpio(unsigned gpio) -{ - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - rdc321x_conf_or(gpio < 32 - ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2, - 1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); -} - -/* initially setup the 2 copies of the gpio data registers. - This function must be called by the platform setup code. */ -void __init rdc321x_gpio_setup() -{ - /* this might not be, what others (BIOS, bootloader, etc.) - wrote to these registers before, but it's a good guess. Still - better than just using 0xffffffff. */ - - gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1); - gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2); -} - -/* determine, if gpio number is valid */ -static inline int rdc321x_is_gpio(unsigned gpio) -{ - return gpio <= RDC321X_MAX_GPIO; -} - -/* request GPIO */ -int rdc_gpio_request(unsigned gpio, const char *label) -{ - unsigned long flags; - - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - spin_lock_irqsave(&gpio_lock, flags); - if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f))) - goto inuse; - gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); - - return 0; -inuse: - spin_unlock_irqrestore(&gpio_lock, flags); - return -EINVAL; -} -EXPORT_SYMBOL(rdc_gpio_request); - -/* release previously-claimed GPIO */ -void rdc_gpio_free(unsigned gpio) -{ - unsigned long flags; - - if (!rdc321x_is_gpio(gpio)) - return; - - spin_lock_irqsave(&gpio_lock, flags); - gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f)); - spin_unlock_irqrestore(&gpio_lock, flags); -} -EXPORT_SYMBOL(rdc_gpio_free); - -/* read GPIO pin */ -int rdc_gpio_get_value(unsigned gpio) -{ - u32 reg; - unsigned long flags; - - spin_lock_irqsave(&gpio_lock, flags); - reg = rdc321x_conf_read(gpio < 32 - ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2); - spin_unlock_irqrestore(&gpio_lock, flags); - - return (1 << (gpio & 0x1f)) & reg ? 1 : 0; -} -EXPORT_SYMBOL(rdc_gpio_get_value); - -/* set GPIO pin to value */ -void rdc_gpio_set_value(unsigned gpio, int value) -{ - unsigned long flags; - u32 reg; - - reg = 1 << (gpio & 0x1f); - if (gpio < 32) { - spin_lock_irqsave(&gpio_lock, flags); - if (value) - gpio_data_reg1 |= reg; - else - gpio_data_reg1 &= ~reg; - rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1); - spin_unlock_irqrestore(&gpio_lock, flags); - } else { - spin_lock_irqsave(&gpio_lock, flags); - if (value) - gpio_data_reg2 |= reg; - else - gpio_data_reg2 &= ~reg; - rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2); - spin_unlock_irqrestore(&gpio_lock, flags); - } -} -EXPORT_SYMBOL(rdc_gpio_set_value); - -/* configure GPIO pin as input */ -int rdc_gpio_direction_input(unsigned gpio) -{ - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - rdc321x_configure_gpio(gpio); - - return 0; -} -EXPORT_SYMBOL(rdc_gpio_direction_input); - -/* configure GPIO pin as output and set value */ -int rdc_gpio_direction_output(unsigned gpio, int value) -{ - if (!rdc321x_is_gpio(gpio)) - return -EINVAL; - - gpio_set_value(gpio, value); - rdc321x_configure_gpio(gpio); - - return 0; -} -EXPORT_SYMBOL(rdc_gpio_direction_output); diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c deleted file mode 100644 index 4f4e50c3ad3..00000000000 --- a/arch/x86/mach-rdc321x/platform.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Generic RDC321x platform devices - * - * Copyright (C) 2007 Florian Fainelli - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the - * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#include -#include -#include -#include -#include -#include - -#include - -/* LEDS */ -static struct gpio_led default_leds[] = { - { .name = "rdc:dmz", .gpio = 1, }, -}; - -static struct gpio_led_platform_data rdc321x_led_data = { - .num_leds = ARRAY_SIZE(default_leds), - .leds = default_leds, -}; - -static struct platform_device rdc321x_leds = { - .name = "leds-gpio", - .id = -1, - .dev = { - .platform_data = &rdc321x_led_data, - } -}; - -/* Watchdog */ -static struct platform_device rdc321x_wdt = { - .name = "rdc321x-wdt", - .id = -1, - .num_resources = 0, -}; - -static struct platform_device *rdc321x_devs[] = { - &rdc321x_leds, - &rdc321x_wdt -}; - -static int __init rdc_board_setup(void) -{ - rdc321x_gpio_setup(); - - return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs)); -} - -arch_initcall(rdc_board_setup); diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index bf92802f2bb..36e221beedc 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -37,7 +37,7 @@ #include #include -#include +#include #define RDC_WDT_MASK 0x80000000 /* Mask */ #define RDC_WDT_EN 0x00800000 /* Enable bit */ From 422e79a8b39d9ac73e410dc3cd099aecea82afd2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Jan 2009 17:06:42 +1100 Subject: [PATCH 129/682] x86: Remove never-called arch_setup_msi_irq() Since commit 75c46fa, "x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure", x86 has had an implementation of arch_setup_msi_irqs(). That implementation does not call arch_setup_msi_irq(), instead it calls setup_irq(). No other x86 code calls arch_setup_msi_irq(). That leaves only arch_setup_msi_irqs() in drivers/pci/msi.c, but that routine is overridden by the x86 version of arch_setup_msi_irqs(). So arch_setup_msi_irq() is dead code, remove it. Signed-off-by: Michael Ellerman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/io_apic.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 79b8c0c72d3..157aafa4558 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3462,40 +3462,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; From 5cdc5e9e69d4dc3a3630ae1fa666401b2a8dcde6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Jan 2009 20:49:37 +0100 Subject: [PATCH 130/682] x86: fully honor "nolapic", fix Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 48578795583..9ca12af6c87 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,9 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { +#ifdef CONFIG_X86_IO_APIC disable_ioapic_setup(); +#endif return; } From c6e50f93db5bd0895ec7c7d1b6f3886c6e1f11b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: [PATCH 131/682] x86: cleanup stack protector Impact: cleanup Make the following cleanups. * remove duplicate comment from boot_init_stack_canary() which fits better in the other place - cpu_idle(). * move stack_canary offset check from __switch_to() to boot_init_stack_canary(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 2 -- arch/x86/include/asm/stackprotector.h | 13 ++++++------- arch/x86/kernel/process_64.c | 7 ------- 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 5976cd803e9..4a8c9d382c9 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -40,6 +40,4 @@ extern void pda_init(int); #endif -#define refresh_stack_canary() write_pda(stack_canary, current->stack_canary) - #endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index c7f0d10bae7..2383e5bb475 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -16,13 +16,12 @@ static __always_inline void boot_init_stack_canary(void) u64 tsc; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger). - * + * Build time only check to make sure the stack_canary is at + * offset 40 in the pda; this is a gcc ABI requirement + */ + BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + + /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index aa89eabf09e..088bc9a0f82 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -638,13 +638,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) percpu_write(kernel_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE - KERNEL_STACK_OFFSET); -#ifdef CONFIG_CC_STACKPROTECTOR - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); -#endif /* * Now maybe reload the debug registers and handle I/O bitmaps From b4a8f7a262e79ecb0b39beb1449af524a78887f8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 Jan 2009 12:29:19 +0900 Subject: [PATCH 132/682] x86: conditionalize stack canary handling in hot path Impact: no unnecessary stack canary swapping during context switch There's no point in moving stack_canary around during context switch if it's not enabled. Conditionalize it. Signed-off-by: Tejun Heo --- arch/x86/include/asm/system.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 8cadfe9b119..b77bd8bd3cc 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -86,17 +86,28 @@ do { \ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ "r12", "r13", "r14", "r15" +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movq %P[task_canary](%%rsi),%%r8\n\t" \ + "movq %%r8,%%gs:%P[pda_canary]\n\t" +#define __switch_canary_param \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ + , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_param +#endif /* CC_STACKPROTECTOR */ + /* Save restore flags to clear handle leaking NT */ #define switch_to(prev, next, last) \ - asm volatile(SAVE_CONTEXT \ + asm volatile(SAVE_CONTEXT \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ ".globl thread_return\n" \ "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ - "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[pda_canary]\n\t" \ + __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ "movq %%rax,%%rdi\n\t" \ @@ -108,9 +119,8 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [task_canary] "i" (offsetof(struct task_struct, stack_canary)),\ - [current_task] "m" (per_cpu_var(current_task)), \ - [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary))\ + [current_task] "m" (per_cpu_var(current_task)) \ + __switch_canary_param \ : "memory", "cc" __EXTRA_CLOBBER) #endif From 8ce031972b40da58c268caba8c5ea3c0856d7131 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: [PATCH 133/682] x86: remove pda_init() Impact: cleanup Copy the code to cpu_init() to satisfy the requirement that the cpu be reinitialized. Remove all other calls, since the segments are already initialized in head_64.S. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 1 - arch/x86/kernel/cpu/common.c | 15 +++------------ arch/x86/kernel/head64.c | 2 -- arch/x86/xen/enlighten.c | 1 - 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 4a8c9d382c9..b473e952439 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -24,7 +24,6 @@ struct x8664_pda { } ____cacheline_aligned_in_smp; DECLARE_PER_CPU(struct x8664_pda, __pda); -extern void pda_init(int); #define cpu_pda(cpu) (&per_cpu(__pda, cpu)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7976a6a0f65..f83a4d6160f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -895,15 +895,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack); DEFINE_PER_CPU(unsigned int, irq_count) = -1; -void __cpuinit pda_init(int cpu) -{ - /* Setup up data that may be needed in __get_free_pages early */ - loadsegment(fs, 0); - loadsegment(gs, 0); - - load_pda_offset(cpu); -} - static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) __aligned(PAGE_SIZE); @@ -967,9 +958,9 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - /* CPU 0 is initialised in head64.c */ - if (cpu != 0) - pda_init(cpu); + loadsegment(fs, 0); + loadsegment(gs, 0); + load_pda_offset(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index af67d3227ea..f5b27224769 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); - pda_init(0); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75b94139e1f..bef941f6145 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_64 /* Disable until direct per-cpu data access. */ have_vcpu_info_placement = 0; - pda_init(0); #endif xen_smp_init(); From 0bd74fa8e29dcad98f7e8ffe01ec05fb3326abaf Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: [PATCH 134/682] percpu: refactor percpu.h Impact: cleanup Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first section. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/percpu.h | 43 +++++++++++++++++-------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index aa6b9b1b30b..32bbf50d305 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -486,6 +486,7 @@ */ #define PERCPU_VADDR(vaddr, phdr) \ PERCPU_PROLOG(vaddr) \ + *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f2a3751873..0e24202b5a4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,34 +9,39 @@ #include #ifdef CONFIG_SMP -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define PER_CPU_BASE_SECTION ".data.percpu" #ifdef MODULE -#define SHARED_ALIGNED_SECTION ".data.percpu" +#define PER_CPU_SHARED_ALIGNED_SECTION "" #else -#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" #endif +#define PER_CPU_FIRST_SECTION ".first" + +#else + +#define PER_CPU_BASE_SECTION ".data" +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif + +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ + PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + +#define DEFINE_PER_CPU(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "") #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.page_aligned"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name -#else -#define DEFINE_PER_CPU(type, name) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) -#endif +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) From 8c7e58e690ae60ab4215b025f433ed4af261e103 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 135/682] x86: rework __per_cpu_load adjustments Impact: cleanup Use cpu_number to determine if the adjustment is necessary. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/head_64.S | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c8ace880661..98ea26a2fca 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -207,19 +207,15 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP /* - * early_gdt_base should point to the gdt_page in static percpu init - * data area. Computing this requires two symbols - __per_cpu_load - * and per_cpu__gdt_page. As linker can't do no such relocation, do - * it by hand. As early_gdt_descr is manipulated by C code for - * secondary CPUs, this should be done only once for the boot CPU - * when early_gdt_descr_base contains zero. + * Fix up static pointers that need __per_cpu_load added. The assembler + * is unable to do this directly. This is only needed for the boot cpu. + * These values are set up with the correct base addresses by C code for + * secondary cpus. */ - movq early_gdt_descr_base(%rip), %rax - testq %rax, %rax - jnz 1f - movq $__per_cpu_load, %rax - addq $per_cpu__gdt_page, %rax - movq %rax, early_gdt_descr_base(%rip) + movq initial_gs(%rip), %rax + cmpl $0, per_cpu__cpu_number(%rax) + jne 1f + addq %rax, early_gdt_descr_base(%rip) 1: #endif /* @@ -431,12 +427,8 @@ NEXT_PAGE(level2_spare_pgt) .globl early_gdt_descr early_gdt_descr: .word GDT_ENTRIES*8-1 -#ifdef CONFIG_SMP early_gdt_descr_base: - .quad 0x0000000000000000 -#else .quad per_cpu__gdt_page -#endif ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 136/682] x86: move stack_canary into irq_stack Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/pda.h | 3 --- arch/x86/include/asm/percpu.h | 6 ----- arch/x86/include/asm/processor.h | 23 +++++++++++++++++- arch/x86/include/asm/stackprotector.h | 6 ++--- arch/x86/include/asm/system.h | 4 ++-- arch/x86/kernel/asm-offsets_64.c | 4 ---- arch/x86/kernel/cpu/common.c | 7 +++--- arch/x86/kernel/head_64.S | 13 ++++------ arch/x86/kernel/setup_percpu.c | 34 ++++----------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 +++++-- 10 files changed, 46 insertions(+), 62 deletions(-) diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index b473e952439..ba46416634f 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -17,9 +17,6 @@ struct x8664_pda { unsigned long unused4; int unused5; unsigned int unused6; /* 36 was cpunumber */ - unsigned long stack_canary; /* 40 stack canary value */ - /* gcc-ABI: this canary MUST be at - offset 40!!! */ short in_bootmem; /* pda lives in bootmem */ } ____cacheline_aligned_in_smp; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 165d5272ece..ce980db5e59 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -133,12 +133,6 @@ do { \ /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -#ifdef CONFIG_X86_64 -extern void load_pda_offset(int cpu); -#else -static inline void load_pda_offset(int cpu) { } -#endif - #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f511246fa6c..48676b943b9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -379,8 +379,29 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); + +static inline void load_gs_base(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + mb(); +} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 2383e5bb475..36a700acaf2 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -2,7 +2,7 @@ #define _ASM_STACKPROTECTOR_H 1 #include -#include +#include /* * Initialize the stackprotector canary value. @@ -19,7 +19,7 @@ static __always_inline void boot_init_stack_canary(void) * Build time only check to make sure the stack_canary is at * offset 40 in the pda; this is a gcc ABI requirement */ - BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); + BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); /* * We both use the random pool and the current TSC as a source @@ -32,7 +32,7 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; - write_pda(stack_canary, canary); + percpu_write(irq_stack_union.stack_canary, canary); } #endif diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index b77bd8bd3cc..52eb748a68a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -89,10 +89,10 @@ do { \ #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[pda_canary]\n\t" + "movq %%r8,%%gs:%P[gs_canary]\n\t" #define __switch_canary_param \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ - , [pda_canary] "i" (offsetof(struct x8664_pda, stack_canary)) + , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary #define __switch_canary_param diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 64c834a39aa..94f9c8b39d2 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -48,10 +48,6 @@ int main(void) #endif BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) - DEFINE(pda_size, sizeof(struct x8664_pda)); - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT BLANK(); OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f83a4d6160f..098934e72a1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -881,12 +881,13 @@ __setup("clearcpuid=", setup_disablecpuid); #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; -DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); +DEFINE_PER_CPU_FIRST(union irq_stack_union, + irq_stack_union) __aligned(PAGE_SIZE); #ifdef CONFIG_SMP DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ #else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; + per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; #endif DEFINE_PER_CPU(unsigned long, kernel_stack) = @@ -960,7 +961,7 @@ void __cpuinit cpu_init(void) loadsegment(fs, 0); loadsegment(gs, 0); - load_pda_offset(cpu); + load_gs_base(cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 98ea26a2fca..a0a2b5ca9b7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -242,13 +242,10 @@ ENTRY(secondary_startup_64) /* Set up %gs. * - * On SMP, %gs should point to the per-cpu area. For initial - * boot, make %gs point to the init data section. For a - * secondary CPU,initial_gs should be set to its pda address - * before the CPU runs this code. - * - * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't - * change. + * The base of %gs always points to the bottom of the irqstack + * union. If the stack protector canary is enabled, it is + * located at %gs:40. Note that, on SMP, the boot cpu uses + * init data section till per cpu areas are set up. */ movl $MSR_GS_BASE,%ecx movq initial_gs(%rip),%rax @@ -281,7 +278,7 @@ ENTRY(secondary_startup_64) #ifdef CONFIG_SMP .quad __per_cpu_load #else - .quad PER_CPU_VAR(__pda) + .quad PER_CPU_VAR(irq_stack_union) #endif __FINITDATA diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efbafbbff58..90b8e154bb5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void); static inline void setup_node_to_cpumask_map(void) { } #endif -/* - * Define load_pda_offset() and per-cpu __pda for x86_64. - * load_pda_offset() is responsible for loading the offset of pda into - * %gs. - * - * On SMP, pda offset also duals as percpu base address and thus it - * should be at the start of per-cpu area. To achieve this, it's - * preallocated in vmlinux_64.lds.S directly instead of using - * DEFINE_PER_CPU(). - */ -#ifdef CONFIG_X86_64 -void __cpuinit load_pda_offset(int cpu) -{ - /* Memory clobbers used to order pda/percpu accesses */ - mb(); - wrmsrl(MSR_GS_BASE, cpu_pda(cpu)); - mb(); -} -#ifndef CONFIG_SMP -DEFINE_PER_CPU(struct x8664_pda, __pda); -#endif -EXPORT_PER_CPU_SYMBOL(__pda); -#endif /* CONFIG_SMP && CONFIG_X86_64 */ - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; /* - * CPU0 modified pda in the init data area, reload pda - * offset for CPU0 and clear the area for others. + * Up to this point, CPU0 has been using .data.init + * area. Reload %gs offset for CPU0. */ if (cpu == 0) - load_pda_offset(0); - else - memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu))); + load_gs_base(cpu); #endif DBG("PERCPU: cpu %4d %p\n", cpu, ptr); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index a09abb8fb97..c9740996430 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -220,8 +220,7 @@ SECTIONS * so that it can be accessed as a percpu variable. */ . = ALIGN(PAGE_SIZE); - PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) - per_cpu____pda = __per_cpu_start; + PERCPU_VADDR(0, :percpu) #else PERCPU(PAGE_SIZE) #endif @@ -262,3 +261,8 @@ SECTIONS */ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif From 0d974d4592708f85044751817da4b7016e1b0602 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 18 Jan 2009 19:52:25 -0500 Subject: [PATCH 137/682] x86: remove pda.h Impact: cleanup Signed-off-by: Brian Gerst --- arch/x86/include/asm/pda.h | 39 ------------------------------- arch/x86/include/asm/pgtable_64.h | 1 - arch/x86/include/asm/smp.h | 1 - arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/process_64.c | 1 - arch/x86/kernel/traps.c | 1 - 7 files changed, 45 deletions(-) delete mode 100644 arch/x86/include/asm/pda.h diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h deleted file mode 100644 index ba46416634f..00000000000 --- a/arch/x86/include/asm/pda.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _ASM_X86_PDA_H -#define _ASM_X86_PDA_H - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#include -#include - -/* Per processor datastructure. %gs points to it while the kernel runs */ -struct x8664_pda { - unsigned long unused1; - unsigned long unused2; - unsigned long unused3; - unsigned long unused4; - int unused5; - unsigned int unused6; /* 36 was cpunumber */ - short in_bootmem; /* pda lives in bootmem */ -} ____cacheline_aligned_in_smp; - -DECLARE_PER_CPU(struct x8664_pda, __pda); - -#define cpu_pda(cpu) (&per_cpu(__pda, cpu)) - -#define read_pda(field) percpu_read(__pda.field) -#define write_pda(field, val) percpu_write(__pda.field, val) -#define add_pda(field, val) percpu_add(__pda.field, val) -#define sub_pda(field, val) percpu_sub(__pda.field, val) -#define or_pda(field, val) percpu_or(__pda.field, val) - -/* This is not atomic against other CPUs -- CPU preemption needs to be off */ -#define test_and_clear_bit_pda(bit, field) \ - x86_test_and_clear_bit_percpu(bit, __pda.field) - -#endif - -#endif /* _ASM_X86_PDA_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ba09289acca..1df9637dfda 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -11,7 +11,6 @@ #include #include #include -#include extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 68636e767a9..45ef8a1b9d7 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -15,7 +15,6 @@ # include # endif #endif -#include #include #include diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 94f9c8b39d2..8793ab33e2c 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 098934e72a1..3887fcf6e51 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -30,7 +30,6 @@ #include #endif -#include #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 088bc9a0f82..c422eebb0c5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055284..ed5aee5f3fc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,7 +59,6 @@ #ifdef CONFIG_X86_64 #include #include -#include #else #include #include From 6b7c38d55587f43bcd2cbce3a98b1c0826982090 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: [PATCH 138/682] linker script: kill PERCPU_VADDR_PREALLOC() Impact: cleanup With .data.percpu.first in place, PERCPU_VADDR_PREALLOC() is no longer necessary. Kill it. Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 45 ++++++------------------------- 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32bbf50d305..53e21f36a80 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -430,22 +430,10 @@ *(.initcall7.init) \ *(.initcall7s.init) -#define PERCPU_PROLOG(vaddr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; - -#define PERCPU_EPILOG(phdr) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ - } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); - /** - * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc + * PERCPU_VADDR - define output section for percpu area * @vaddr: explicit base address (optional) * @phdr: destination PHDR (optional) - * @prealloc: the size of prealloc area * * Macro which expands to output section for percpu area. If @vaddr * is not blank, it specifies explicit base address and all percpu @@ -457,40 +445,23 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * If @prealloc is non-zero, the specified number of bytes will be - * reserved at the start of percpu area. As the prealloc area is - * likely to break alignment, this macro puts areas in increasing - * alignment order. - * * This macro defines three symbols, __per_cpu_load, __per_cpu_start * and __per_cpu_end. The first one is the vaddr of loaded percpu * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the * end offset. */ -#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \ - PERCPU_PROLOG(vaddr) \ - . += prealloc; \ - *(.data.percpu) \ - *(.data.percpu.shared_aligned) \ - *(.data.percpu.page_aligned) \ - PERCPU_EPILOG(segment) - -/** - * PERCPU_VADDR - define output section for percpu area - * @vaddr: explicit base address (optional) - * @phdr: destination PHDR (optional) - * - * Macro which expands to output section for percpu area. Mostly - * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than - * using slighly different layout. - */ #define PERCPU_VADDR(vaddr, phdr) \ - PERCPU_PROLOG(vaddr) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ - PERCPU_EPILOG(phdr) + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } phdr \ + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From 5766b842b23c6b40935a5f3bd435b2bcdaff2143 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Jan 2009 09:13:15 +0100 Subject: [PATCH 139/682] x86, cpumask: fix tlb flush race Impact: fix bootup crash The cpumask is now passed in as a reference to mm->cpu_vm_mask, not on the stack - hence it is not constant anymore during the TLB flush. That way it could race and some static sanity checks would trigger: [ 238.154287] ------------[ cut here ]------------ [ 238.156039] kernel BUG at arch/x86/kernel/tlb_32.c:130! [ 238.156039] invalid opcode: 0000 [#1] SMP [ 238.156039] last sysfs file: /sys/class/net/eth2/address [ 238.156039] Modules linked in: [ 238.156039] [ 238.156039] Pid: 6493, comm: ifup-eth Not tainted (2.6.29-rc2-tip #1) P4DC6 [ 238.156039] EIP: 0060:[] EFLAGS: 00010202 CPU: 2 [ 238.156039] EIP is at native_flush_tlb_others+0x35/0x158 [ 238.156039] EAX: c0ef972c EBX: f6143301 ECX: 00000000 EDX: 00000000 [ 238.156039] ESI: f61433a8 EDI: f6143200 EBP: f34f3e00 ESP: f34f3df0 [ 238.156039] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 238.156039] Process ifup-eth (pid: 6493, ti=f34f2000 task=f399ab00 task.ti=f34f2000) [ 238.156039] Stack: [ 238.156039] ffffffff f61433a8 ffffffff f6143200 f34f3e18 c0118e9c 00000000 f6143200 [ 238.156039] f61433a8 f5bec738 f34f3e28 c0119435 c2b5b830 f6143200 f34f3e34 c01c2dc3 [ 238.156039] bffd9000 f34f3e60 c01c3051 00000000 ffffffff f34f3e4c 00000000 00000071 [ 238.156039] Call Trace: [ 238.156039] [] ? flush_tlb_others+0x52/0x5b [ 238.156039] [] ? flush_tlb_mm+0x7f/0x8b [ 238.156039] [] ? tlb_finish_mmu+0x2d/0x55 [ 238.156039] [] ? exit_mmap+0x124/0x170 [ 238.156039] [] ? mmput+0x40/0xf5 [ 238.156039] [] ? flush_old_exec+0x640/0x94b [ 238.156039] [] ? fsnotify_access+0x37/0x39 [ 238.156039] [] ? kernel_read+0x39/0x4b [ 238.156039] [] ? load_elf_binary+0x4a1/0x11bb [ 238.156039] [] ? might_fault+0x51/0x9c [ 238.156039] [] ? paravirt_read_tsc+0x20/0x4f [ 238.156039] [] ? native_sched_clock+0x5d/0x60 [ 238.156039] [] ? search_binary_handler+0xab/0x2c4 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? _raw_read_unlock+0x21/0x46 [ 238.156039] [] ? load_elf_binary+0x0/0x11bb [ 238.156039] [] ? search_binary_handler+0xb2/0x2c4 [ 238.156039] [] ? do_execve+0x21c/0x2ee [ 238.156039] [] ? sys_execve+0x51/0x8c [ 238.156039] [] ? sysenter_do_call+0x12/0x43 Fix it by not assuming that the cpumask is constant. Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_32.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index ec53818f4e3..d37bbfcb813 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -125,9 +125,8 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { /* - * - mask must exist :) + * mm must exist :) */ - BUG_ON(cpumask_empty(cpumask)); BUG_ON(!mm); /* @@ -138,14 +137,18 @@ void native_flush_tlb_others(const struct cpumask *cpumask, spin_lock(&tlbstate_lock); cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); + + /* + * If a task whose mm mask we are looking at has descheduled and + * has cleared its presence from the mask, or if a CPU which we ran + * on has gone down then there might be no flush work left: + */ if (unlikely(cpumask_empty(flush_cpumask))) { spin_unlock(&tlbstate_lock); return; } -#endif + flush_mm = mm; flush_va = va; From 92181f190b649f7ef2b79cbf5c00f26ccc66da2a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:24:26 +0100 Subject: [PATCH 140/682] x86: optimise x86's do_page_fault (C entry point for the page fault path) Impact: cleanup, restructure code to improve assembly gcc isn't _all_ that smart about spilling registers to stack or reusing stack slots, even with branch annotations. do_page_fault contained a lot of functionality, so split unlikely paths into their own functions, and mark them as noinline just to be sure. I consider this actually to be somewhat of a cleanup too: the main function now contains about half the number of lines so the normal path is easier to read, while the error cases are also nicely split away. Also, ensure the order of arguments to functions is always the same: regs, addr, error_code. This can reduce code size a tiny bit, and just looks neater too. And add a couple of branch annotations. Before: do_page_fault: subq $360, %rsp #, After: do_page_fault: subq $56, %rsp #, bloat-o-meter: add/remove: 8/0 grow/shrink: 0/1 up/down: 2222/-1680 (542) function old new delta __bad_area_nosemaphore - 506 +506 no_context - 474 +474 vmalloc_fault - 424 +424 spurious_fault - 358 +358 mm_fault_error - 272 +272 bad_area_access_error - 89 +89 bad_area - 89 +89 bad_area_nosemaphore - 10 +10 do_page_fault 2464 784 -1680 Yes, the total size increases by 542 bytes, due to the extra function calls. But these will very rarely be called (except for vmalloc_fault) in a normal workload. Importantly, do_page_fault is less than 1/3rd it's original size, and touches far less stack. Existing gotos and branch hints did move a lot of the infrequently used text out of the fastpath, but that's even further improved after this patch. Signed-off-by: Nick Piggin Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 438 ++++++++++++++++++++++++++------------------ 1 file changed, 256 insertions(+), 182 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a4..033292dc9e2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -91,8 +91,8 @@ static inline int notify_page_fault(struct pt_regs *regs) * * Opcode checker based on code by Richard Brunner */ -static int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) +static int is_prefetch(struct pt_regs *regs, unsigned long error_code, + unsigned long addr) { unsigned char *instr; int scan_more = 1; @@ -409,15 +409,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, } #ifdef CONFIG_X86_64 -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static noinline void pgtable_bad(struct pt_regs *regs, + unsigned long error_code, unsigned long address) { unsigned long flags = oops_begin(); int sig = SIGKILL; - struct task_struct *tsk; + struct task_struct *tsk = current; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - current->comm, address); + tsk->comm, address); dump_pagetable(address); tsk = current; tsk->thread.cr2 = address; @@ -429,6 +429,190 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static noinline void no_context(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; +#ifdef CONFIG_X86_64 + unsigned long flags; + int sig; +#endif + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * X86_32 + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * X86_64 + * Hall of shame of CPU/BIOS bugs. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata93(regs, address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +#ifdef CONFIG_X86_32 + bust_spinlocks(1); +#else + flags = oops_begin(); +#endif + + show_fault_oops(regs, error_code, address); + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + +#ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +#else + sig = SIGKILL; + if (__die("Oops", regs, error_code)) + sig = 0; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); +#endif +} + +static void __bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata100(regs, address)) + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk( + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; + } + + if (is_f00f_bug(regs, address)) + return; + + no_context(regs, error_code, address); +} + +static noinline void bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void __bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + __bad_area_nosemaphore(regs, error_code, address, si_code); +} + +static noinline void bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} + +static noinline void bad_area_access_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ +static void out_of_memory(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); +} + +static void do_sigbus(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, error_code, address)) + return; +#endif + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline void mm_fault_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address, unsigned int fault) +{ + if (fault & VM_FAULT_OOM) + out_of_memory(regs, error_code, address); + else if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); +} + static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) @@ -448,8 +632,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static int spurious_fault(unsigned long address, - unsigned long error_code) +static noinline int spurious_fault(unsigned long error_code, + unsigned long address) { pgd_t *pgd; pud_t *pud; @@ -494,7 +678,7 @@ static int spurious_fault(unsigned long address, * * This assumes no large pages in there. */ -static int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { #ifdef CONFIG_X86_32 unsigned long pgd_paddr; @@ -573,6 +757,25 @@ static int vmalloc_fault(unsigned long address) int show_unhandled_signals = 1; +static inline int access_error(unsigned long error_code, int write, + struct vm_area_struct *vma) +{ + if (write) { + /* write, present and write, not present */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + } else if (unlikely(error_code & PF_PROT)) { + /* read, present */ + return 1; + } else { + /* read, not present */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + } + + return 0; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -583,16 +786,12 @@ asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address; struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - unsigned long address; - int write, si_code; + int write; int fault; -#ifdef CONFIG_X86_64 - unsigned long flags; - int sig; -#endif tsk = current; mm = tsk->mm; @@ -601,9 +800,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* get the address */ address = read_cr2(); - si_code = SEGV_MAPERR; - - if (notify_page_fault(regs)) + if (unlikely(notify_page_fault(regs))) return; if (unlikely(kmmio_fault(regs, address))) return; @@ -631,17 +828,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; /* Can handle a stale RO->RW TLB */ - if (spurious_fault(address, error_code)) + if (spurious_fault(error_code, address)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ - goto bad_area_nosemaphore; + bad_area_nosemaphore(regs, error_code, address); + return; } - /* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. @@ -657,15 +854,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) - pgtable_bad(address, regs, error_code); + pgtable_bad(regs, error_code, address); #endif /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (unlikely(in_atomic() || !mm)) - goto bad_area_nosemaphore; + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } /* * When running in the kernel we expect faults to occur only to @@ -683,20 +882,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && - !search_exception_tables(regs->ip)) - goto bad_area_nosemaphore; + !search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } if (error_code & PF_USER) { /* * Accessing the stack below %sp is always a bug. @@ -704,31 +909,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * and pusha to work. ("enter $65535,$31" pushes * 32 pointers and then decrements %sp by 65535.) */ - if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) - goto bad_area; + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { + bad_area(regs, error_code, address); + return; + } } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ good_area: - si_code = SEGV_ACCERR; - write = 0; - switch (error_code & (PF_PROT|PF_WRITE)) { - default: /* 3: write, present */ - /* fall through */ - case PF_WRITE: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case PF_PROT: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; + write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { + bad_area_access_error(regs, error_code, address); + return; } /* @@ -738,11 +937,8 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + mm_fault_error(regs, error_code, address, fault); + return; } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; @@ -760,128 +956,6 @@ good_area: } #endif up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata100(regs, address)) - return; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); - return; - } - - if (is_f00f_bug(regs, address)) - return; - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * X86_64 - * Hall of shame of CPU/BIOS bugs. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata93(regs, address)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else - flags = oops_begin(); -#endif - - show_fault_oops(regs, error_code, address); - - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; - -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, sig); -#endif - -out_of_memory: - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & PF_USER)) - goto no_context; -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ - if (is_prefetch(regs, address, error_code)) - return; -#endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } DEFINE_SPINLOCK(pgd_lock); From afb33f8c0d7dea8c48ae1c2e3af5b437aa8dd7bb Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 12 Jan 2009 12:53:45 +0100 Subject: [PATCH 141/682] x86: remove byte locks Impact: cleanup Remove byte locks implementation, which was introduced by Jeremy in 8efcbab6 ("paravirt: introduce a "lock-byte" spinlock implementation"), but turned out to be dead code that is not used by any in-kernel virtualization guest (Xen uses its own variant of spinlocks implementation and KVM is not planning to move to byte locks). Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 2 - arch/x86/include/asm/spinlock.h | 66 +--------------------------- arch/x86/kernel/paravirt-spinlocks.c | 10 ----- 3 files changed, 2 insertions(+), 76 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..32bc6c2c138 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1389,8 +1389,6 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) -void paravirt_use_bytelocks(void); - #ifdef CONFIG_SMP static inline int __raw_spin_is_locked(struct raw_spinlock *lock) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..2bd6b111a41 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -172,70 +172,8 @@ static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; } -#ifdef CONFIG_PARAVIRT -/* - * Define virtualization-friendly old-style lock byte lock, for use in - * pv_lock_ops if desired. - * - * This differs from the pre-2.6.24 spinlock by always using xchgb - * rather than decb to take the lock; this allows it to use a - * zero-initialized lock structure. It also maintains a 1-byte - * contention counter, so that we can implement - * __byte_spin_is_contended. - */ -struct __byte_spinlock { - s8 lock; - s8 spinners; -}; +#ifndef CONFIG_PARAVIRT -static inline int __byte_spin_is_locked(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->lock != 0; -} - -static inline int __byte_spin_is_contended(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - return bl->spinners != 0; -} - -static inline void __byte_spin_lock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - s8 val = 1; - - asm("1: xchgb %1, %0\n" - " test %1,%1\n" - " jz 3f\n" - " " LOCK_PREFIX "incb %2\n" - "2: rep;nop\n" - " cmpb $1, %0\n" - " je 2b\n" - " " LOCK_PREFIX "decb %2\n" - " jmp 1b\n" - "3:" - : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); -} - -static inline int __byte_spin_trylock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - u8 old = 1; - - asm("xchgb %1,%0" - : "+m" (bl->lock), "+q" (old) : : "memory"); - - return old == 0; -} - -static inline void __byte_spin_unlock(raw_spinlock_t *lock) -{ - struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; - smp_wmb(); - bl->lock = 0; -} -#else /* !CONFIG_PARAVIRT */ static inline int __raw_spin_is_locked(raw_spinlock_t *lock) { return __ticket_spin_is_locked(lock); @@ -267,7 +205,7 @@ static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock, __raw_spin_lock(lock); } -#endif /* CONFIG_PARAVIRT */ +#endif static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 95777b0faa7..3a7c5a44082 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = { }; EXPORT_SYMBOL(pv_lock_ops); -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} From 67e68bde02fe783efc2ce2ca31bdb992f5235f8d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 142/682] x86: update canary handling during switch Impact: cleanup In switch_to(), instead of taking offset to irq_stack_union.stack, make it a proper percpu access using __percpu_arg() and per_cpu_var(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/system.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 52eb748a68a..2fcc70bc85f 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -89,13 +89,15 @@ do { \ #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ - "movq %%r8,%%gs:%P[gs_canary]\n\t" -#define __switch_canary_param \ - , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ - , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) + "movq %%r8,"__percpu_arg([gs_canary])"\n\t" +#define __switch_canary_oparam \ + , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) +#define __switch_canary_iparam \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary -#define __switch_canary_param +#define __switch_canary_oparam +#define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ /* Save restore flags to clear handle leaking NT */ @@ -114,13 +116,14 @@ do { \ "jc ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ + __switch_canary_oparam \ : [next] "S" (next), [prev] "D" (prev), \ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [tif_fork] "i" (TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ [current_task] "m" (per_cpu_var(current_task)) \ - __switch_canary_param \ + __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif From 06deef892c7327992434917fb6592c233430803d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 143/682] x86: clean up gdt_page definition Impact: cleanup && more compact percpu area layout with future changes Move 64-bit GDT to page-aligned section and clean up comment formatting. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3887fcf6e51..a8f0dedcb0c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -63,23 +63,23 @@ cpumask_t cpu_sibling_setup_map; static struct cpu_dev *this_cpu __cpuinitdata; +DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 -/* We need valid kernel segments for data and code in long mode too - * IRET will check the segment types kkeil 2000/10/28 - * Also sysret mandates a special GDT layout - */ -/* The TLS descriptors are currently at a different place compared to i386. - Hopefully nobody expects them at a fixed place (Wine?) */ -DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { + /* + * We need valid kernel segments for data and code in long mode too + * IRET will check the segment types kkeil 2000/10/28 + * Also sysret mandates a special GDT layout + * + * The TLS descriptors are currently at a different place compared to i386. + * Hopefully nobody expects them at a fixed place (Wine?) + */ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, -} }; #else -DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, @@ -112,8 +112,8 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, -} }; #endif +} }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); #ifdef CONFIG_X86_32 From 299e26992a737804e13e74fdb97cdab470ed19ac Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 144/682] x86: fix percpu_write with 64-bit constants Impact: slightly better code generation for percpu_to_op() The processor will sign-extend 32-bit immediate values in 64-bit operations. Use the 'e' constraint ("32-bit signed integer constant, or a symbolic reference known to fit that range") for 64-bit constants. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ce980db5e59..0b64af4f13a 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -75,7 +75,7 @@ do { \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "r" ((T__)val)); \ + : "re" ((T__)val)); \ break; \ default: __bad_percpu_size(); \ } \ From 0dd76d736eeb3e0ef86c5b103b47ae0e15edebad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:05 +0900 Subject: [PATCH 145/682] x86: set %fs to __KERNEL_PERCPU unconditionally for x86_32 Impact: cleanup %fs is currently set to __KERNEL_DS at boot, and conditionally switched to __KERNEL_PERCPU for secondary cpus. Instead, initialize GDT_ENTRY_PERCPU to the same attributes as GDT_ENTRY_KERNEL_DS and set %fs to __KERNEL_PERCPU unconditionally. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/head_32.S | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a8f0dedcb0c..fbebbcec001 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -111,7 +111,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, - [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, + [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70..24c0e5cd71e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl %eax,%fs # gets reset once there's real percpu movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es + movl $(__KERNEL_PERCPU), %eax + movl %eax,%fs # set this cpu's percpu + xorl %eax,%eax # Clear GS and LDT movl %eax,%gs lldt %ax @@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP movb $1, ready cmpb $0,%cl # the first CPU calls start_kernel je 1f - movl $(__KERNEL_PERCPU), %eax - movl %eax,%fs # set this cpu's percpu movl (stack_start), %esp 1: #endif /* CONFIG_SMP */ From 6826c8ff07b5f95df0473a748a9831707079b940 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 146/682] x86: merge mmu_context.h Impact: cleanup tj: * changed cpu to unsigned as was done on mmu_context_64.h as cpu id is officially unsigned int * added missing ';' to 32bit version of deactivate_mm() Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/mmu_context.h | 63 +++++++++++++++++++++++++-- arch/x86/include/asm/mmu_context_32.h | 55 ----------------------- arch/x86/include/asm/mmu_context_64.h | 52 ---------------------- 3 files changed, 59 insertions(+), 111 deletions(-) delete mode 100644 arch/x86/include/asm/mmu_context_32.h delete mode 100644 arch/x86/include/asm/mmu_context_64.h diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 8aeeb3fd73d..52948df9cd1 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, int init_new_context(struct task_struct *tsk, struct mm_struct *mm); void destroy_context(struct mm_struct *mm); -#ifdef CONFIG_X86_32 -# include "mmu_context_32.h" -#else -# include "mmu_context_64.h" + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +#ifdef CONFIG_SMP + if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) + percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); #endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) +{ + unsigned cpu = smp_processor_id(); + + if (likely(prev != next)) { + /* stop flush ipis for the previous mm */ + cpu_clear(cpu, prev->cpu_vm_mask); +#ifdef CONFIG_SMP + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + percpu_write(cpu_tlbstate.active_mm, next); +#endif + cpu_set(cpu, next->cpu_vm_mask); + + /* Re-load page tables */ + load_cr3(next->pgd); + + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(prev->context.ldt != next->context.ldt)) + load_LDT_nolock(&next->context); + } +#ifdef CONFIG_SMP + else { + percpu_write(cpu_tlbstate.state, TLBSTATE_OK); + BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); + + if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { + /* We were in lazy tlb mode and leave_mm disabled + * tlb flush IPI delivery. We must reload CR3 + * to make sure to use no freed page tables. + */ + load_cr3(next->pgd); + load_LDT_nolock(&next->context); + } + } +#endif +} #define activate_mm(prev, next) \ do { \ @@ -33,5 +76,17 @@ do { \ switch_mm((prev), (next), NULL); \ } while (0); +#ifdef CONFIG_X86_32 +#define deactivate_mm(tsk, mm) \ +do { \ + loadsegment(gs, 0); \ +} while (0) +#else +#define deactivate_mm(tsk, mm) \ +do { \ + load_gs_index(0); \ + loadsegment(fs, 0); \ +} while (0) +#endif #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h deleted file mode 100644 index 08b53454f83..00000000000 --- a/arch/x86/include/asm/mmu_context_32.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_32_H -#define _ASM_X86_MMU_CONTEXT_32_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, - struct mm_struct *next, - struct task_struct *tsk) -{ - int cpu = smp_processor_id(); - - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - percpu_write(cpu_tlbstate.active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - - /* Re-load page tables */ - load_cr3(next->pgd); - - /* - * load the LDT, if the LDT is different: - */ - if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload %cr3. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%gs": :"r" (0)); - -#endif /* _ASM_X86_MMU_CONTEXT_32_H */ diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h deleted file mode 100644 index c4572505ab3..00000000000 --- a/arch/x86/include/asm/mmu_context_64.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef _ASM_X86_MMU_CONTEXT_64_H -#define _ASM_X86_MMU_CONTEXT_64_H - -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) -{ -#ifdef CONFIG_SMP - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) - percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned cpu = smp_processor_id(); - if (likely(prev != next)) { - /* stop flush ipis for the previous mm */ - cpu_clear(cpu, prev->cpu_vm_mask); -#ifdef CONFIG_SMP - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - percpu_write(cpu_tlbstate.active_mm, next); -#endif - cpu_set(cpu, next->cpu_vm_mask); - load_cr3(next->pgd); - - if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context); - } -#ifdef CONFIG_SMP - else { - percpu_write(cpu_tlbstate.state, TLBSTATE_OK); - BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next); - - if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { - /* We were in lazy tlb mode and leave_mm disabled - * tlb flush IPI delivery. We must reload CR3 - * to make sure to use no freed page tables. - */ - load_cr3(next->pgd); - load_LDT_nolock(&next->context); - } - } -#endif -} - -#define deactivate_mm(tsk, mm) \ -do { \ - load_gs_index(0); \ - asm volatile("movl %0,%%fs"::"r"(0)); \ -} while (0) - -#endif /* _ASM_X86_MMU_CONTEXT_64_H */ From d650a5148593b65a3c3f9a344f46b91b7dfe7713 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 147/682] x86: merge irq_regs.h Impact: cleanup, better irq_regs code generation for x86_64 Make 64-bit use the same optimizations as 32-bit. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_regs.h | 36 +++++++++++++++++++++++++----- arch/x86/include/asm/irq_regs_32.h | 31 ------------------------- arch/x86/include/asm/irq_regs_64.h | 1 - arch/x86/kernel/irq_64.c | 3 +++ 4 files changed, 34 insertions(+), 37 deletions(-) delete mode 100644 arch/x86/include/asm/irq_regs_32.h delete mode 100644 arch/x86/include/asm/irq_regs_64.h diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 89c898ab298..77843225b7e 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -1,5 +1,31 @@ -#ifdef CONFIG_X86_32 -# include "irq_regs_32.h" -#else -# include "irq_regs_64.h" -#endif +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the per-cpu area. + * + * Jeremy Fitzhardinge + */ +#ifndef _ASM_X86_IRQ_REGS_H +#define _ASM_X86_IRQ_REGS_H + +#include + +#define ARCH_HAS_OWN_IRQ_REGS + +DECLARE_PER_CPU(struct pt_regs *, irq_regs); + +static inline struct pt_regs *get_irq_regs(void) +{ + return percpu_read(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = get_irq_regs(); + percpu_write(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h deleted file mode 100644 index d7ed33ee94e..00000000000 --- a/arch/x86/include/asm/irq_regs_32.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Per-cpu current frame pointer - the location of the last exception frame on - * the stack, stored in the per-cpu area. - * - * Jeremy Fitzhardinge - */ -#ifndef _ASM_X86_IRQ_REGS_32_H -#define _ASM_X86_IRQ_REGS_32_H - -#include - -#define ARCH_HAS_OWN_IRQ_REGS - -DECLARE_PER_CPU(struct pt_regs *, irq_regs); - -static inline struct pt_regs *get_irq_regs(void) -{ - return percpu_read(irq_regs); -} - -static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) -{ - struct pt_regs *old_regs; - - old_regs = get_irq_regs(); - percpu_write(irq_regs, new_regs); - - return old_regs; -} - -#endif /* _ASM_X86_IRQ_REGS_32_H */ diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h deleted file mode 100644 index 3dd9c0b7027..00000000000 --- a/arch/x86/include/asm/irq_regs_64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1db05247b47..0b254de8408 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -22,6 +22,9 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); +DEFINE_PER_CPU(struct pt_regs *, irq_regs); +EXPORT_PER_CPU_SYMBOL(irq_regs); + /* * Probabilistic stack overflow check: * From bdbcdd48883940bbd8d17eb01172d58a261a413a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 148/682] x86: uv cleanup Impact: cleanup Make the following uv related cleanups. * collect visible uv related definitions and interfaces into uv/uv.h and use it. this cleans up the messy situation where on 64bit, uv is defined properly, on 32bit generic it's dummy and on the rest undefined. after this clean up, uv is defined on 64 and dummy on 32. * update uv_flush_tlb_others() such that it takes cpumask of to-be-flushed cpus as argument, instead of that minus self, and returns yet-to-be-flushed cpumask, instead of modifying the passed in parameter. this interface change will ease dummy implementation of uv_flush_tlb_others() and makes uv tlb flush related stuff defined in tlb_uv proper. Signed-off-by: Tejun Heo --- arch/x86/include/asm/genapic_32.h | 7 ---- arch/x86/include/asm/genapic_64.h | 6 --- arch/x86/include/asm/uv/uv.h | 33 +++++++++++++++ arch/x86/include/asm/uv/uv_bau.h | 2 - arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/genx2apic_uv_x.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/tlb_64.c | 18 ++++---- arch/x86/kernel/tlb_uv.c | 68 ++++++++++++++++++------------- 9 files changed, 83 insertions(+), 54 deletions(-) create mode 100644 arch/x86/include/asm/uv/uv.h diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 2c05b737ee2..4334502d366 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -138,11 +138,4 @@ struct genapic { extern struct genapic *genapic; extern void es7000_update_genapic_to_cluster(void); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#define get_uv_system_type() UV_NONE -#define is_uv_system() 0 -#define uv_wakeup_secondary(a, b) 1 -#define uv_system_init() do {} while (0) - - #endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index adf32fb56aa..7bb092c5905 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys; extern int acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -extern enum uv_system_type get_uv_system_type(void); -extern int is_uv_system(void); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void uv_cpu_init(void); -extern void uv_system_init(void); -extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h new file mode 100644 index 00000000000..dce5fe35013 --- /dev/null +++ b/arch/x86/include/asm/uv/uv.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_UV_UV_H +#define _ASM_X86_UV_UV_H + +enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; + +#ifdef CONFIG_X86_64 + +extern enum uv_system_type get_uv_system_type(void); +extern int is_uv_system(void); +extern void uv_cpu_init(void); +extern void uv_system_init(void); +extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); +extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, + unsigned int cpu); + +#else /* X86_64 */ + +static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline int is_uv_system(void) { return 0; } +static inline void uv_cpu_init(void) { } +static inline void uv_system_init(void) { } +static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +{ return 1; } +static inline const struct cpumask * +uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, + unsigned long va, unsigned int cpu) +{ return cpumask; } + +#endif /* X86_64 */ + +#endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 74e6393bfdd..9b0e61bf7a8 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) #define cpubit_isset(cpu, bau_local_cpumask) \ test_bit((cpu), (bau_local_cpumask).bits) -extern int uv_flush_tlb_others(struct cpumask *, - struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fbebbcec001..99904f288d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -28,6 +28,7 @@ #include #include #include +#include #endif #include diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b193e082f6c..bfe36249145 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 869b98840fd..def770b57b5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index e64a32c4882..b8bed841ad6 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; @@ -206,16 +205,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { if (is_uv_system()) { - /* FIXME: could be an percpu_alloc'd thing */ - static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask); + unsigned int cpu; - cpumask_andnot(after_uv_flush, cpumask, - cpumask_of(smp_processor_id())); - if (!uv_flush_tlb_others(after_uv_flush, mm, va)) - flush_tlb_others_ipi(after_uv_flush, mm, va); - - put_cpu_var(flush_tlb_uv_cpumask); + cpu = get_cpu(); + cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); + if (cpumask) + flush_tlb_others_ipi(cpumask, mm, va); + put_cpu(); return; } flush_tlb_others_ipi(cpumask, mm, va); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 690dcf1a27d..aae15dd7260 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * * Send a broadcast and wait for a broadcast message to complete. * - * The cpumaskp mask contains the cpus the broadcast was sent to. + * The flush_mask contains the cpus the broadcast was sent to. * - * Returns 1 if all remote flushing was done. The mask is zeroed. - * Returns 0 if some remote flushing remains to be done. The mask will have - * some bits still set. + * Returns NULL if all remote flushing was done. The mask is zeroed. + * Returns @flush_mask if some remote flushing remains to be done. The + * mask will have some bits still set. */ -int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, - struct cpumask *cpumaskp) +const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, + struct bau_desc *bau_desc, + struct cpumask *flush_mask) { int completion_status = 0; int right_shift; @@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, * Success, so clear the remote cpu's from the mask so we don't * use the IPI method of shootdown on them. */ - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); if (blade == this_blade) continue; - cpumask_clear_cpu(bit, cpumaskp); + cpumask_clear_cpu(bit, flush_mask); } - if (!cpumask_empty(cpumaskp)) - return 0; - return 1; + if (!cpumask_empty(flush_mask)) + return flush_mask; + return NULL; } /** * uv_flush_tlb_others - globally purge translation cache of a virtual * address or all TLB's - * @cpumaskp: mask of all cpu's in which the address is to be removed + * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @cpu: the current cpu * * This is the entry point for initiating any UV global TLB shootdown. * * Purges the translation caches of all specified processors of the given * virtual address, or purges all TLB's on specified processors. * - * The caller has derived the cpumaskp from the mm_struct and has subtracted - * the local cpu from the mask. This function is called only if there - * are bits set in the mask. (e.g. flush_tlb_page()) + * The caller has derived the cpumask from the mm_struct. This function + * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) * - * The cpumaskp is converted into a nodemask of the nodes containing + * The cpumask is converted into a nodemask of the nodes containing * the cpus. * - * Returns 1 if all remote flushing was done. - * Returns 0 if some remote flushing remains to be done. + * Note that this function should be called with preemption disabled. + * + * Returns NULL if all remote flushing was done. + * Returns pointer to cpumask if some remote flushing remains to be + * done. The returned pointer is valid till preemption is re-enabled. */ -int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, - unsigned long va) +const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, + struct mm_struct *mm, + unsigned long va, unsigned int cpu) { + static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); + struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask); int i; int bit; int blade; - int cpu; + int uv_cpu; int this_blade; int locals = 0; struct bau_desc *bau_desc; - cpu = uv_blade_processor_id(); + WARN_ON(!in_atomic()); + + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + + uv_cpu = uv_blade_processor_id(); this_blade = uv_numa_blade_id(); bau_desc = __get_cpu_var(bau_control).descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; + bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); i = 0; - for_each_cpu(bit, cpumaskp) { + for_each_cpu(bit, flush_mask) { blade = uv_cpu_to_blade_id(bit); BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); if (blade == this_blade) { @@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, * no off_node flushing; return status for local node */ if (locals) - return 0; + return flush_mask; else - return 1; + return NULL; } __get_cpu_var(ptcstats).requestor++; __get_cpu_var(ptcstats).ntargeted += i; bau_desc->payload.address = va; - bau_desc->payload.sending_cpu = smp_processor_id(); + bau_desc->payload.sending_cpu = cpu; - return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); + return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask); } /* From 6dd01bedee6c3191643db303a1dc530bad56ec55 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 149/682] x86: prepare for tlb merge Impact: clean up, ipi vector number reordering for x86_32 Make the following changes to prepare for tlb merge. * reorder x86_32 ip vectors * adjust tlb_32.c and tlb_64.c such that their logics coincide exactly - on spurious invalidate ipi, tlb_32 acks the irq - tlb_64 now has proper memory barriers around clearing flush_cpumask (no change in generated code) * unexport flush_tlb_page from tlb_32.c, there's no user * use unsigned int for cpu id * drop unnecessary includes from tlb_64.c Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_vectors.h | 33 +++++++++++++++--------------- arch/x86/kernel/tlb_32.c | 10 ++++----- arch/x86/kernel/tlb_64.c | 18 +++++++--------- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index a16a2ab2b42..4ee8f800504 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -49,31 +49,30 @@ * some of the following vectors are 'rare', they are merged * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. - * - * Vectors 0xf0-0xfa are free (reserved for future Linux use). */ #ifdef CONFIG_X86_32 # define SPURIOUS_APIC_VECTOR 0xff # define ERROR_APIC_VECTOR 0xfe -# define INVALIDATE_TLB_VECTOR 0xfd -# define RESCHEDULE_VECTOR 0xfc -# define CALL_FUNCTION_VECTOR 0xfb -# define CALL_FUNCTION_SINGLE_VECTOR 0xfa -# define THERMAL_APIC_VECTOR 0xf0 +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +/* 0xf1 - 0xf9 : free */ +# define INVALIDATE_TLB_VECTOR 0xf0 #else -#define SPURIOUS_APIC_VECTOR 0xff -#define ERROR_APIC_VECTOR 0xfe -#define RESCHEDULE_VECTOR 0xfd -#define CALL_FUNCTION_VECTOR 0xfc -#define CALL_FUNCTION_SINGLE_VECTOR 0xfb -#define THERMAL_APIC_VECTOR 0xfa -#define THRESHOLD_APIC_VECTOR 0xf9 -#define UV_BAU_MESSAGE 0xf8 -#define INVALIDATE_TLB_VECTOR_END 0xf7 -#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ +# define SPURIOUS_APIC_VECTOR 0xff +# define ERROR_APIC_VECTOR 0xfe +# define RESCHEDULE_VECTOR 0xfd +# define CALL_FUNCTION_VECTOR 0xfc +# define CALL_FUNCTION_SINGLE_VECTOR 0xfb +# define THERMAL_APIC_VECTOR 0xfa +# define THRESHOLD_APIC_VECTOR 0xf9 +# define UV_BAU_MESSAGE 0xf8 +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ #define NUM_INVALIDATE_TLB_VECTORS 8 diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index abf0808d6fc..93fcb05c7d4 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -84,13 +84,15 @@ EXPORT_SYMBOL_GPL(leave_mm); * * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. * 2) Leave the mm if we are in the lazy tlb mode. + * + * Interrupts are disabled. */ void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + unsigned int cpu; - cpu = get_cpu(); + cpu = smp_processor_id(); if (!cpumask_test_cpu(cpu, flush_cpumask)) goto out; @@ -112,12 +114,11 @@ void smp_invalidate_interrupt(struct pt_regs *regs) } else leave_mm(cpu); } +out: ack_APIC_irq(); smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, flush_cpumask); smp_mb__after_clear_bit(); -out: - put_cpu_no_resched(); inc_irq_stat(irq_tlb_count); } @@ -215,7 +216,6 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) flush_tlb_others(&mm->cpu_vm_mask, mm, va); preempt_enable(); } -EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void *info) { diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index b8bed841ad6..19ac661422f 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -1,20 +1,14 @@ #include #include -#include #include #include -#include -#include #include +#include -#include -#include #include #include -#include -#include -#include +#include #include DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) @@ -121,8 +115,8 @@ EXPORT_SYMBOL_GPL(leave_mm); asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) { - int cpu; - int sender; + unsigned int cpu; + unsigned int sender; union smp_flush_state *f; cpu = smp_processor_id(); @@ -155,14 +149,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) } out: ack_APIC_irq(); + smp_mb__before_clear_bit(); cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); + smp_mb__after_clear_bit(); inc_irq_stat(irq_tlb_count); } static void flush_tlb_others_ipi(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va) { - int sender; + unsigned int sender; union smp_flush_state *f; /* Caller has disabled preemption */ From 02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 150/682] x86: make x86_32 use tlb_64.c Impact: less contention when issuing invalidate IPI, cleanup Make x86_32 use the same tlb code as 64bit. The 64bit code uses multiple IPI vectors for tlb shootdown to reduce contention. This patch makes x86_32 allocate the same 8 IPIs as x86_64 and share the code paths. Note that the usage of asmlinkage is inconsistent for x86_32 and 64 and calls for further cleanup. This has been noted with a FIXME comment in tlb_64.c. Signed-off-by: Tejun Heo --- arch/x86/include/asm/irq_vectors.h | 7 +- .../x86/include/asm/mach-default/entry_arch.h | 18 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/entry_32.S | 6 +- arch/x86/kernel/irqinit_32.c | 11 +- arch/x86/kernel/tlb_32.c | 239 ------------------ arch/x86/kernel/tlb_64.c | 12 +- 7 files changed, 47 insertions(+), 248 deletions(-) delete mode 100644 arch/x86/kernel/tlb_32.c diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4ee8f800504..9a83a10a5d5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -58,8 +58,11 @@ # define CALL_FUNCTION_VECTOR 0xfc # define CALL_FUNCTION_SINGLE_VECTOR 0xfb # define THERMAL_APIC_VECTOR 0xfa -/* 0xf1 - 0xf9 : free */ -# define INVALIDATE_TLB_VECTOR 0xf0 +/* 0xf8 - 0xf9 : free */ +# define INVALIDATE_TLB_VECTOR_END 0xf7 +# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ + +# define NUM_INVALIDATE_TLB_VECTORS 8 #else diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31d..6fa399ad1de 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -11,10 +11,26 @@ */ #ifdef CONFIG_X86_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) -BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) + +BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, + smp_invalidate_interrupt) +BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, + smp_invalidate_interrupt) #endif /* diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index eb074530c7d..a62a15c2222 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d..a0b91aac72a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -672,7 +672,7 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC -#define BUILD_INTERRUPT(name, nr) \ +#define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ pushl $~(nr); \ @@ -680,11 +680,13 @@ ENTRY(name) \ SAVE_ALL; \ TRACE_IRQS_OFF \ movl %esp,%eax; \ - call smp_##name; \ + call fn; \ jmp ret_from_intr; \ CFI_ENDPROC; \ ENDPROC(name) +#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) + /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674..bf629cadec1 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -149,8 +149,15 @@ void __init native_init_IRQ(void) */ alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - /* IPI for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index 93fcb05c7d4..00000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null @@ -1,239 +0,0 @@ -#include -#include -#include - -#include - -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) - = { &init_mm, 0, }; - -/* must come after the send_IPI functions above for inlining */ -#include - -/* - * Smarter SMP flushing macros. - * c/o Linus Torvalds. - * - * These mean you can really definitely utterly forget about - * writing to user space from interrupts. (Its not allowed anyway). - * - * Optimizations Manfred Spraul - */ - -static cpumask_var_t flush_cpumask; -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -void leave_mm(int cpu) -{ - BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK); - cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} -EXPORT_SYMBOL_GPL(leave_mm); - -/* - * - * The flush IPI assumes that a thread switch happens in this order: - * [cpu0: the cpu that switches] - * 1) switch_mm() either 1a) or 1b) - * 1a) thread switch to a different mm - * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); - * Stop ipi delivery for the old mm. This is not synchronized with - * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superfluous - * tlb flush. - * 1a2) set cpu_tlbstate to TLBSTATE_OK - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 - * was in lazy tlb mode. - * 1a3) update cpu_tlbstate[].active_mm - * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); - * Now the other cpus will send tlb flush ipis. - * 1a4) change cr3. - * 1b) thread switch without mm change - * cpu_tlbstate[].active_mm is correct, cpu0 already handles - * flush ipis. - * 1b1) set cpu_tlbstate to TLBSTATE_OK - * 1b2) test_and_set the cpu bit in cpu_vm_mask. - * Atomically set the bit [other cpus will start sending flush ipis], - * and test the bit. - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. - * 2) switch %%esp, ie current - * - * The interrupt must handle 2 special cases: - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. - * - the cpu performs speculative tlb reads, i.e. even if the cpu only - * runs in kernel space, the cpu could load tlb entries for user space - * pages. - * - * The good news is that cpu_tlbstate is local to each cpu, no - * write/read ordering problems. - */ - -/* - * TLB flush IPI: - * - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. - * 2) Leave the mm if we are in the lazy tlb mode. - * - * Interrupts are disabled. - */ - -void smp_invalidate_interrupt(struct pt_regs *regs) -{ - unsigned int cpu; - - cpu = smp_processor_id(); - - if (!cpumask_test_cpu(cpu, flush_cpumask)) - goto out; - /* - * This was a BUG() but until someone can quote me the - * line from the intel manual that guarantees an IPI to - * multiple CPUs is retried _only_ on the erroring CPUs - * its staying as a return - * - * BUG(); - */ - - if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) { - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - leave_mm(cpu); - } -out: - ack_APIC_irq(); - smp_mb__before_clear_bit(); - cpumask_clear_cpu(cpu, flush_cpumask); - smp_mb__after_clear_bit(); - inc_irq_stat(irq_tlb_count); -} - -void native_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, unsigned long va) -{ - /* - * - mask must exist :) - */ - BUG_ON(cpumask_empty(cpumask)); - BUG_ON(!mm); - - /* - * i'm not happy about this global shared spinlock in the - * MM hot path, but we'll see how contended it is. - * AK: x86-64 has a faster method that could be ported. - */ - spin_lock(&tlbstate_lock); - - cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id())); -#ifdef CONFIG_HOTPLUG_CPU - /* If a CPU which we ran on has gone down, OK. */ - cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask); - if (unlikely(cpumask_empty(flush_cpumask))) { - spin_unlock(&tlbstate_lock); - return; - } -#endif - flush_mm = mm; - flush_va = va; - - /* - * Make the above memory operations globally visible before - * sending the IPI. - */ - smp_mb(); - /* - * We have to send the IPI only to - * CPUs affected. - */ - send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR); - - while (!cpumask_empty(flush_cpumask)) - /* nothing. lockup detection does not belong here */ - cpu_relax(); - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - - preempt_disable(); - - local_flush_tlb(); - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - leave_mm(smp_processor_id()); - } - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - - preempt_disable(); - - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - leave_mm(smp_processor_id()); - } - - if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(&mm->cpu_vm_mask, mm, va); - preempt_enable(); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) - leave_mm(cpu); -} - -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, NULL, 1); -} - -static int init_flush_cpumask(void) -{ - alloc_cpumask_var(&flush_cpumask, GFP_KERNEL); - return 0; -} -early_initcall(init_flush_cpumask); diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 19ac661422f..b3ca1b94065 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm); * Interrupts are disabled. */ -asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) +/* + * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop + * but still used for documentation purpose but the usage is slightly + * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt + * entry calls in with the first parameter in %eax. Maybe define + * intrlinkage? + */ +#ifdef CONFIG_X86_64 +asmlinkage +#endif +void smp_invalidate_interrupt(struct pt_regs *regs) { unsigned int cpu; unsigned int sender; From 16c2d3f895a3bc8d8e4c76c2646a6b750c181299 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Jan 2009 17:26:06 +0900 Subject: [PATCH 151/682] x86: rename tlb_64.c to tlb.c Impact: file rename tlb_64.c is now the tlb code for both 32 and 64. Rename it to tlb.c. Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/{tlb_64.c => tlb.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename arch/x86/kernel/{tlb_64.c => tlb.c} (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a62a15c2222..0626a88fbb4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb.c similarity index 100% rename from arch/x86/kernel/tlb_64.c rename to arch/x86/kernel/tlb.c From 55f4949f5765e7a29863b6d17a774601810732f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:08:53 +0100 Subject: [PATCH 152/682] x86, mm: move tlb.c to arch/x86/mm/ Impact: cleanup Now that it's unified, move the (SMP) TLB flushing code from arch/x86/kernel/ to arch/x86/mm/, where it belongs logically. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/mm/Makefile | 2 ++ arch/x86/{kernel => mm}/tlb.c | 0 3 files changed, 3 insertions(+), 1 deletion(-) rename arch/x86/{kernel => mm}/tlb.c (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0626a88fbb4..0b3272f58bd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o +obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index d8cc96a2738..9f05157220f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,6 +1,8 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o +obj-$(CONFIG_X86_SMP) += tlb.o + obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/x86/kernel/tlb.c b/arch/x86/mm/tlb.c similarity index 100% rename from arch/x86/kernel/tlb.c rename to arch/x86/mm/tlb.c From 4ec71fa2d2c3f1040348f2604f4b8ccc833d1c2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:24:27 +0100 Subject: [PATCH 153/682] x86: uv cleanup, build fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: arch/x86/mm/srat_64.c: In function ‘acpi_numa_processor_affinity_init’: arch/x86/mm/srat_64.c:141: error: implicit declaration of function ‘get_uv_system_type’ arch/x86/mm/srat_64.c:141: error: ‘UV_X2APIC’ undeclared (first use in this function) arch/x86/mm/srat_64.c:141: error: (Each undeclared identifier is reported only once arch/x86/mm/srat_64.c:141: error: for each function it appears in.) A couple of UV definitions were moved to asm/uv/uv.h, but srat_64.c did not include that header. Add it. Signed-off-by: Ingo Molnar --- arch/x86/mm/srat_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 09737c8af07..15df1baee10 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -21,6 +21,7 @@ #include #include #include +#include int acpi_numa __initdata; From ace6c6c840878342f698f0da6588dd5ded755369 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 10:32:44 +0100 Subject: [PATCH 154/682] x86: make x86_32 use tlb_64.c, build fix, clean up X86_L1_CACHE_BYTES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: arch/x86/mm/tlb.c:47: error: ‘CONFIG_X86_INTERNODE_CACHE_BYTES’ undeclared here (not in a function) The CONFIG_X86_INTERNODE_CACHE_BYTES symbol is only defined on 64-bit, because vsmp support is 64-bit only. Define it on 32-bit too - where it will always be equal to X86_L1_CACHE_BYTES. Also move the default of X86_L1_CACHE_BYTES (which is separate from the more commonly used L1_CACHE_SHIFT kconfig symbol) from 128 bytes to 64 bytes. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index cdf4a962323..8eb50ba9161 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -292,15 +292,13 @@ config X86_CPU # Define implied options from the CPU selection here config X86_L1_CACHE_BYTES int - default "128" if GENERIC_CPU || MPSC - default "64" if MK8 || MCORE2 - depends on X86_64 + default "128" if MPSC + default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32 config X86_INTERNODE_CACHE_BYTES int default "4096" if X86_VSMP default X86_L1_CACHE_BYTES if !X86_VSMP - depends on X86_64 config X86_CMPXCHG def_bool X86_64 || (X86_32 && !M386) From 5b221278d61e3907a5e4104a844b63bc8bb3d43a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Jan 2009 11:30:07 +0100 Subject: [PATCH 155/682] x86: uv cleanup, build fix #2 Fix more build-failure fallout from the UV cleanup - the UV drivers were not updated to include . Signed-off-by: Ingo Molnar --- drivers/misc/sgi-gru/gru.h | 2 ++ drivers/misc/sgi-xp/xp.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index f93f03a9e6e..1b5f579df15 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -19,6 +19,8 @@ #ifndef __GRU_H__ #define __GRU_H__ +#include + /* * GRU architectural definitions */ diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 7b4cbd5e03e..069ad3a1c2a 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -15,6 +15,8 @@ #include +#include + #ifdef CONFIG_IA64 #include #include /* defines is_shub1() and is_shub2() */ From 4d5d783896fc8c37be88ee5837ca9b3c13fcd55b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 19 Jan 2009 16:34:26 -0800 Subject: [PATCH 156/682] x86: uaccess: fix style problems Impact: cleanup Fix coding style problems in arch/x86/include/asm/uaccess.h. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 4340055b755..aeb3c1b074c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -121,7 +121,7 @@ extern int __get_user_bad(void); #define __get_user_x(size, ret, x, ptr) \ asm volatile("call __get_user_" #size \ - : "=a" (ret),"=d" (x) \ + : "=a" (ret), "=d" (x) \ : "0" (ptr)) \ /* Careful: we have to cast the result to the type of the pointer @@ -181,7 +181,7 @@ extern int __get_user_bad(void); #define __put_user_x(size, x, ptr, __ret_pu) \ asm volatile("call __put_user_" #size : "=a" (__ret_pu) \ - :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") + : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") @@ -276,7 +276,7 @@ do { \ __put_user_asm(x, ptr, retval, "w", "w", "ir", errret); \ break; \ case 4: \ - __put_user_asm(x, ptr, retval, "l", "k", "ir", errret);\ + __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ From cc86c9e0dc1a41451240b948bb39d46bb2536ae8 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 19 Jan 2009 16:37:41 -0800 Subject: [PATCH 157/682] x86: uaccess: rename __put_user_u64() to __put_user_asm_u64() Impact: cleanup rename __put_user_u64() to __put_user_asm_u64() like __get_user_asm_u64(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index aeb3c1b074c..69d2757cca9 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -186,7 +186,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 -#define __put_user_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -203,7 +203,7 @@ extern int __get_user_bad(void); asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_u64(x, ptr, retval) \ +#define __put_user_asm_u64(x, ptr, retval) \ __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -279,7 +279,7 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ break; \ default: \ __put_user_bad(); \ From 03b486322e994dde49e67aedb391867b7cf28822 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:36:04 +0100 Subject: [PATCH 158/682] x86: make UV support configurable Make X86 SGI Ultraviolet support configurable. Saves about 13K of text size on my modest config. text data bss dec hex filename 6770537 1158680 694356 8623573 8395d5 vmlinux 6757492 1157664 694228 8609384 835e68 vmlinux.nouv Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +++++++ arch/x86/include/asm/uv/uv.h | 6 +++--- arch/x86/kernel/Makefile | 5 +++-- arch/x86/kernel/efi.c | 2 ++ arch/x86/kernel/entry_64.S | 2 ++ arch/x86/kernel/genapic_64.c | 2 ++ arch/x86/kernel/io_apic.c | 2 +- drivers/misc/Kconfig | 4 ++-- 8 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ef27aed6ff7..5a29b792cb8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -391,6 +391,13 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + help + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index dce5fe35013..8ac1d7e312f 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -3,7 +3,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); @@ -15,7 +15,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, unsigned long va, unsigned int cpu); -#else /* X86_64 */ +#else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } @@ -28,6 +28,6 @@ uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { return cpumask; } -#endif /* X86_64 */ +#endif /* X86_UV */ #endif /* _ASM_X86_UV_UV_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0b3272f58bd..a99437c965c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -115,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o uv_sysfs.o + obj-y += genapic_64.o genapic_flat_64.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o + obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o + obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe1..b205272ad39 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,10 +366,12 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); +#ifdef CONFIG_X86_UV } else if (!efi_guidcmp(config_tables[i].guid, UV_SYSTEM_TABLE_GUID)) { efi.uv_systab = config_tables[i].table; printk(" UVsystab=0x%lx ", config_tables[i].table); +#endif } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c52b6091916..a52703864a1 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif +#ifdef CONFIG_X86_UV apicinterrupt UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt +#endif apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2bced78b0b8..e656c272115 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *genapic = &apic_flat; static struct genapic *apic_probe[] __initdata = { +#ifdef CONFIG_X86_UV &apic_x2apic_uv_x, +#endif &apic_x2apic_phys, &apic_x2apic_cluster, &apic_physflat, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f7966039072..e4d36bd56b6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_UV /* * Re-target the irq to the specified CPU and enable the specified MMR located * on the specified blade to allow the sending of MSIs to the specified CPU. diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 419c378bd24..abcb8459254 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" depends on NET - depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP + depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP @@ -197,7 +197,7 @@ config HP_ILO config SGI_GRU tristate "SGI GRU driver" - depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP + depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP default n select MMU_NOTIFIER ---help--- From fb746d0e1365b7472ccc4c3d5b0672b34a092d0b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 21 Jan 2009 20:45:41 +0100 Subject: [PATCH 159/682] x86: optimise page fault entry, cleanup tsk is already assigned to current, drop the redundant second assignment. Signed-off-by: Johannes Weiner Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 033292dc9e2..8f4b859a04b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -419,7 +419,6 @@ static noinline void pgtable_bad(struct pt_regs *regs, printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", tsk->comm, address); dump_pagetable(address); - tsk = current; tsk->thread.cr2 = address; tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; From d639bab8da86d330493487e8c0fea8ca31f53427 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 9 Jan 2009 16:13:13 -0800 Subject: [PATCH 160/682] x86 PAT: ioremap_wc should take resource_size_t parameter Impact: fix/extend ioremap_wc() beyond 4GB aperture on 32-bit ioremap_wc() was taking in unsigned long parameter, where as it should take 64-bit resource_size_t parameter like other ioremap variants. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 2 +- arch/x86/mm/ioremap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485f..bdbb4b96160 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -91,7 +91,7 @@ extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); -extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); /* * early_ioremap() and early_iounmap() are for temporary early boot-time diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index bd85d42819e..2ddb1e79a19 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -367,7 +367,7 @@ EXPORT_SYMBOL(ioremap_nocache); * * Must be freed with iounmap. */ -void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, From ab897d2013128f470240a541b31cf5e636984e71 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:16 -0800 Subject: [PATCH 161/682] x86/pvops: remove pte_flags pvop pte_flags() was introduced as a new pvop in order to extract just the flags portion of a pte, which is a potentially cheaper operation than extracting the page number as well. It turns out this operation is not needed, because simply using a mask to extract the flags from a pte is sufficient for all current users. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 3 +-- arch/x86/include/asm/paravirt.h | 18 ------------------ arch/x86/kernel/paravirt.c | 1 - arch/x86/xen/enlighten.c | 1 - 4 files changed, 1 insertion(+), 22 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..6b9810859da 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } -static inline pteval_t native_pte_flags(pte_t pte) +static inline pteval_t pte_flags(pte_t pte) { return native_pte_val(pte) & PTE_FLAGS_MASK; } @@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ba3e2ff6aed..e25c410f3d8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -279,7 +279,6 @@ struct pv_mmu_ops { pte_t *ptep, pte_t pte); pteval_t (*pte_val)(pte_t); - pteval_t (*pte_flags)(pte_t); pte_t (*make_pte)(pteval_t pte); pgdval_t (*pgd_val)(pgd_t); @@ -1084,23 +1083,6 @@ static inline pteval_t pte_val(pte_t pte) return ret; } -static inline pteval_t pte_flags(pte_t pte) -{ - pteval_t ret; - - if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags, - pte.pte, (u64)pte.pte >> 32); - else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, - pte.pte); - -#ifdef CONFIG_PARAVIRT_DEBUG - BUG_ON(ret & PTE_PFN_MASK); -#endif - return ret; -} - static inline pgd_t __pgd(pgdval_t val) { pgdval_t ret; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e4c8fb60887..202514be592 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = { #endif /* PAGETABLE_LEVELS >= 3 */ .pte_val = native_pte_val, - .pte_flags = native_pte_flags, .pgd_val = native_pgd_val, .make_pte = native_make_pte, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bea215230b2..6f1bb71aa13 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1314,7 +1314,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, - .pte_flags = native_pte_flags, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, From 6522869c34664dd5f05a0a327e93915b1281c90d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 22 Jan 2009 14:24:22 -0800 Subject: [PATCH 162/682] x86: add pte_set_flags/clear_flags for pte flag manipulation It's not necessary to deconstruct and reconstruct a pte every time its flags are being updated. Introduce pte_set_flags and pte_clear_flags to set and clear flags in a pte. This allows the flag manipulation code to be inlined, and avoids calls via paravirt-ops. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 38 +++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9..6ceaef08486 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -240,64 +240,78 @@ static inline int pmd_large(pmd_t pte) (_PAGE_PSE | _PAGE_PRESENT); } +static inline pte_t pte_set_flags(pte_t pte, pteval_t set) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v | set); +} + +static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) +{ + pteval_t v = native_pte_val(pte); + + return native_make_pte(v & ~clear); +} + static inline pte_t pte_mkclean(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkold(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_ACCESSED); + return pte_clear_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_wrprotect(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_RW); + return pte_clear_flags(pte, _PAGE_RW); } static inline pte_t pte_mkexec(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_NX); + return pte_clear_flags(pte, _PAGE_NX); } static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_ACCESSED); + return pte_set_flags(pte, _PAGE_ACCESSED); } static inline pte_t pte_mkwrite(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_RW); + return pte_set_flags(pte, _PAGE_RW); } static inline pte_t pte_mkhuge(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_PSE); + return pte_set_flags(pte, _PAGE_PSE); } static inline pte_t pte_clrhuge(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_PSE); + return pte_clear_flags(pte, _PAGE_PSE); } static inline pte_t pte_mkglobal(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_GLOBAL); + return pte_set_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_clrglobal(pte_t pte) { - return __pte(pte_val(pte) & ~_PAGE_GLOBAL); + return pte_clear_flags(pte, _PAGE_GLOBAL); } static inline pte_t pte_mkspecial(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_SPECIAL); + return pte_set_flags(pte, _PAGE_SPECIAL); } extern pteval_t __supported_pte_mask; From 03d2989df9c1c7df5b33c7a87e0790465461836a Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:28 +0900 Subject: [PATCH 163/682] x86: remove idle_timestamp from 32bit irq_cpustat_t Impact: bogus irq_cpustat field removed idle_timestamp is left over from the removed irqbalance code. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_32.h | 1 - arch/x86/kernel/process_32.c | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index d4b5d731073..a70ed050fde 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -6,7 +6,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq0_irqs; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2c00a57ccb9..1a1ae8edc40 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -108,7 +108,6 @@ void cpu_idle(void) play_dead(); local_irq_disable(); - __get_cpu_var(irq_stat).idle_timestamp = jiffies; /* Don't trace irqs off for idle */ stop_critical_timings(); pm_idle(); From 3819cd489ec5d18a4cbd2f05acdc516473caa105 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:29 +0900 Subject: [PATCH 164/682] x86: remove include of apic.h from hardirq_64.h Impact: cleanup APIC definitions aren't needed here. Remove the include and fix up the fallout. tj: added include to mce_intel_64.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_64.h | 1 - arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 1 + arch/x86/kernel/efi_64.c | 1 + arch/x86/kernel/irq_64.c | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index a65bab20f6c..873c3c7bffc 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -3,7 +3,6 @@ #include #include -#include typedef struct { unsigned int __softirq_pending; diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 4b48f251fd3..5e8c79e748a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215..a4ee29127fd 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -36,6 +36,7 @@ #include #include #include +#include static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0b254de8408..018963aa6ee 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,6 +18,7 @@ #include #include #include +#include DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); From 658a9a2c34914e8114eea9c4d85d4ecd3ee33b98 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:31 +0900 Subject: [PATCH 165/682] x86: sync hardirq_{32,64}.h Impact: better code generation and removal of unused field for 32bit In general, use the 64-bit version. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq_32.h | 14 ++++++++++---- arch/x86/include/asm/hardirq_64.h | 10 +++++----- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index a70ed050fde..e5a332c28c9 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -14,6 +14,7 @@ typedef struct { unsigned int irq_tlb_count; unsigned int irq_thermal_count; unsigned int irq_spurious_count; + unsigned int irq_threshold_count; } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU(irq_cpustat_t, irq_stat); @@ -22,11 +23,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #define MAX_HARDIRQS_PER_CPU NR_VECTORS #define __ARCH_IRQ_STAT -#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) -#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -void ack_bad_irq(unsigned int irq); -#include +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); #endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 873c3c7bffc..392e7d61457 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -22,16 +22,16 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); /* We can have at most NR_VECTORS irqs routed to a cpu at a time */ #define MAX_HARDIRQS_PER_CPU NR_VECTORS -#define __ARCH_IRQ_STAT 1 +#define __ARCH_IRQ_STAT #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) -#define __ARCH_SET_SOFTIRQ_PENDING 1 +#define __ARCH_SET_SOFTIRQ_PENDING -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) extern void ack_bad_irq(unsigned int irq); From 22da7b3df3a2e26a87a8581575dbf26e465a6ac7 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:31 +0900 Subject: [PATCH 166/682] x86: merge hardirq_{32,64}.h into hardirq.h Impact: cleanup Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq.h | 43 +++++++++++++++++++++++++++---- arch/x86/include/asm/hardirq_32.h | 38 --------------------------- arch/x86/include/asm/hardirq_64.h | 38 --------------------------- 3 files changed, 38 insertions(+), 81 deletions(-) delete mode 100644 arch/x86/include/asm/hardirq_32.h delete mode 100644 arch/x86/include/asm/hardirq_64.h diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 000787df66e..f4a95f20f8e 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -1,11 +1,44 @@ -#ifdef CONFIG_X86_32 -# include "hardirq_32.h" -#else -# include "hardirq_64.h" -#endif +#ifndef _ASM_X86_HARDIRQ_H +#define _ASM_X86_HARDIRQ_H + +#include +#include + +typedef struct { + unsigned int __softirq_pending; + unsigned int __nmi_count; /* arch dependent */ + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq0_irqs; + unsigned int irq_resched_count; + unsigned int irq_call_count; + unsigned int irq_tlb_count; + unsigned int irq_thermal_count; + unsigned int irq_spurious_count; + unsigned int irq_threshold_count; +} ____cacheline_aligned irq_cpustat_t; + +DECLARE_PER_CPU(irq_cpustat_t, irq_stat); + +/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ +#define MAX_HARDIRQS_PER_CPU NR_VECTORS + +#define __ARCH_IRQ_STAT + +#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) + +#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) + +#define __ARCH_SET_SOFTIRQ_PENDING + +#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) +#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) + +extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu arch_irq_stat_cpu extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat + +#endif /* _ASM_X86_HARDIRQ_H */ diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h deleted file mode 100644 index e5a332c28c9..00000000000 --- a/arch/x86/include/asm/hardirq_32.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_32_H -#define _ASM_X86_HARDIRQ_32_H - -#include -#include - -typedef struct { - unsigned int __softirq_pending; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; - unsigned int irq_threshold_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT - -#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) - -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_32_H */ diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h deleted file mode 100644 index 392e7d61457..00000000000 --- a/arch/x86/include/asm/hardirq_64.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef _ASM_X86_HARDIRQ_64_H -#define _ASM_X86_HARDIRQ_64_H - -#include -#include - -typedef struct { - unsigned int __softirq_pending; - unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ - unsigned int irq0_irqs; - unsigned int irq_resched_count; - unsigned int irq_call_count; - unsigned int irq_tlb_count; - unsigned int irq_thermal_count; - unsigned int irq_spurious_count; - unsigned int irq_threshold_count; -} ____cacheline_aligned irq_cpustat_t; - -DECLARE_PER_CPU(irq_cpustat_t, irq_stat); - -/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ -#define MAX_HARDIRQS_PER_CPU NR_VECTORS - -#define __ARCH_IRQ_STAT - -#define inc_irq_stat(member) percpu_add(irq_stat.member, 1) - -#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x)) - -extern void ack_bad_irq(unsigned int irq); - -#endif /* _ASM_X86_HARDIRQ_64_H */ From 2de3a5f7956eb81447feea3aec68193ddd8534bb Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jan 2009 11:03:32 +0900 Subject: [PATCH 167/682] x86: make irq_cpustat_t fields conditional Impact: shrink size of irq_cpustat_t when possible Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/hardirq.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index f4a95f20f8e..176f058e715 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -7,14 +7,22 @@ typedef struct { unsigned int __softirq_pending; unsigned int __nmi_count; /* arch dependent */ - unsigned int apic_timer_irqs; /* arch dependent */ unsigned int irq0_irqs; +#ifdef CONFIG_X86_LOCAL_APIC + unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int irq_spurious_count; +#endif +#ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; unsigned int irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE unsigned int irq_thermal_count; - unsigned int irq_spurious_count; +# ifdef CONFIG_X86_64 unsigned int irq_threshold_count; +# endif +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU(irq_cpustat_t, irq_stat); From 99d0000f710f3432182761f65f9658f1cf0bf455 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 23 Jan 2009 11:09:15 +0100 Subject: [PATCH 168/682] x86, xen: fix hardirq.h merge fallout Impact: build fix This build error: arch/x86/xen/suspend.c:22: error: implicit declaration of function 'fix_to_virt' arch/x86/xen/suspend.c:22: error: 'FIX_PARAVIRT_BOOTMAP' undeclared (first use in this function) arch/x86/xen/suspend.c:22: error: (Each undeclared identifier is reported only once arch/x86/xen/suspend.c:22: error: for each function it appears in.) triggers because the hardirq.h unification removed an implicit fixmap.h include - on which arch/x86/xen/suspend.c depended. Add the fixmap.h include explicitly. Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 212ffe012b7..95be7b43472 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -6,6 +6,7 @@ #include #include +#include #include "xen-ops.h" #include "mmu.h" From fe40c0af3cff3ea461cf25bddb979abc7279d4df Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:49:41 -0800 Subject: [PATCH 169/682] x86: uaccess: introduce try and catch framework Impact: introduce new uaccess exception handling framework Introduce {get|put}_user_try and {get|put}_user_catch as new uaccess exception handling framework. {get|put}_user_try begins exception block and {get|put}_user_catch(err) ends the block and gets err if an exception occured in {get|put}_user_ex() in the block. The exception is stored thread_info->uaccess_err. The example usage of this framework is below; int func() { int err = 0; get_user_try { get_user_ex(...); get_user_ex(...); : } get_user_catch(err); return err; } Note: get_user_ex() is not clear the value when an exception occurs, it's different from the behavior of __get_user(), but I think it doesn't matter. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 1 + arch/x86/include/asm/uaccess.h | 103 +++++++++++++++++++++++++++++ arch/x86/mm/extable.c | 6 ++ 3 files changed, 110 insertions(+) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 98789647baa..3f90aeb456b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + int uaccess_err; }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 69d2757cca9..0ec6de4bcb0 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -199,12 +199,22 @@ extern int __get_user_bad(void); : "=r" (err) \ : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) +#define __put_user_asm_ex_u64(x, addr) \ + asm volatile("1: movl %%eax,0(%1)\n" \ + "2: movl %%edx,4(%1)\n" \ + "3:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + _ASM_EXTABLE(2b, 3b - 2b) \ + : : "A" (x), "r" (addr)) + #define __put_user_x8(x, ptr, __ret_pu) \ asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else #define __put_user_asm_u64(x, ptr, retval) \ __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_ex_u64(x, addr) \ + __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) #endif @@ -286,6 +296,27 @@ do { \ } \ } while (0) +#define __put_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __put_user_asm_ex(x, ptr, "b", "b", "iq"); \ + break; \ + case 2: \ + __put_user_asm_ex(x, ptr, "w", "w", "ir"); \ + break; \ + case 4: \ + __put_user_asm_ex(x, ptr, "l", "k", "ir"); \ + break; \ + case 8: \ + __put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr); \ + break; \ + default: \ + __put_user_bad(); \ + } \ +} while (0) + #else #define __put_user_size(x, ptr, size, retval, errret) \ @@ -311,9 +342,12 @@ do { \ #ifdef CONFIG_X86_32 #define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad() +#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad() #else #define __get_user_asm_u64(x, ptr, retval, errret) \ __get_user_asm(x, ptr, retval, "q", "", "=r", errret) +#define __get_user_asm_ex_u64(x, ptr) \ + __get_user_asm_ex(x, ptr, "q", "", "=r") #endif #define __get_user_size(x, ptr, size, retval, errret) \ @@ -350,6 +384,33 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) +#define __get_user_size_ex(x, ptr, size) \ +do { \ + __chk_user_ptr(ptr); \ + switch (size) { \ + case 1: \ + __get_user_asm_ex(x, ptr, "b", "b", "=q"); \ + break; \ + case 2: \ + __get_user_asm_ex(x, ptr, "w", "w", "=r"); \ + break; \ + case 4: \ + __get_user_asm_ex(x, ptr, "l", "k", "=r"); \ + break; \ + case 8: \ + __get_user_asm_ex_u64(x, ptr); \ + break; \ + default: \ + (x) = __get_user_bad(); \ + } \ +} while (0) + +#define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %1,%"rtype"0\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : ltype(x) : "m" (__m(addr))) + #define __put_user_nocheck(x, ptr, size) \ ({ \ int __pu_err; \ @@ -385,6 +446,26 @@ struct __large_struct { unsigned long buf[100]; }; _ASM_EXTABLE(1b, 3b) \ : "=r"(err) \ : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err)) + +#define __put_user_asm_ex(x, addr, itype, rtype, ltype) \ + asm volatile("1: mov"itype" %"rtype"0,%1\n" \ + "2:\n" \ + _ASM_EXTABLE(1b, 2b - 1b) \ + : : ltype(x), "m" (__m(addr))) + +/* + * uaccess_try and catch + */ +#define uaccess_try do { \ + int prev_err = current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = 0; \ + barrier(); + +#define uaccess_catch(err) \ + (err) |= current_thread_info()->uaccess_err; \ + current_thread_info()->uaccess_err = prev_err; \ +} while (0) + /** * __get_user: - Get a simple variable from user space, with less checking. * @x: Variable to store result. @@ -408,6 +489,7 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user(x, ptr) \ __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + /** * __put_user: - Write a simple value into user space, with less checking. * @x: Value to copy to user space. @@ -434,6 +516,27 @@ struct __large_struct { unsigned long buf[100]; }; #define __get_user_unaligned __get_user #define __put_user_unaligned __put_user +/* + * {get|put}_user_try and catch + * + * get_user_try { + * get_user_ex(...); + * } get_user_catch(err) + */ +#define get_user_try uaccess_try +#define get_user_catch(err) uaccess_catch(err) +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + +#define get_user_ex(x, ptr) do { \ + unsigned long __gue_val; \ + __get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr)))); \ + (x) = (__force __typeof__(*(ptr)))__gue_val; \ +} while (0) + +#define put_user_ex(x, ptr) \ + __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + /* * movsl can be slow when source and dest are not both 8-byte aligned */ diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 7e8db53528a..61b41ca3b5a 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -23,6 +23,12 @@ int fixup_exception(struct pt_regs *regs) fixup = search_exception_tables(regs->ip); if (fixup) { + /* If fixup is less than 16, it means uaccess error */ + if (fixup->fixup < 16) { + current_thread_info()->uaccess_err = -EFAULT; + regs->ip += fixup->fixup; + return 1; + } regs->ip = fixup->fixup; return 1; } From 98e3d45edad207b4358948d6e2cac4e482c3bb5d Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:50:10 -0800 Subject: [PATCH 170/682] x86: signal: use {get|put}_user_try and catch Impact: use new framework Use {get|put}_user_try, catch, and _ex in arch/x86/kernel/signal.c. Note: this patch contains "WARNING: line over 80 characters", because when introducing new block I insert an indent to avoid mistakes by edit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/kernel/signal.c | 289 +++++++++++++++++++++------------------ 1 file changed, 153 insertions(+), 136 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89bb7668041..cf34eb37fbe 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -51,24 +51,24 @@ #endif #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp; \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define GET_SEG(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ loadsegment(seg, tmp); \ } @@ -83,45 +83,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + get_user_try { + #ifdef CONFIG_X86_32 - GET_SEG(gs); - COPY_SEG(fs); - COPY_SEG(es); - COPY_SEG(ds); + GET_SEG(gs); + COPY_SEG(fs); + COPY_SEG(es); + COPY_SEG(ds); #endif /* CONFIG_X86_32 */ - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); #ifdef CONFIG_X86_64 - COPY(r8); - COPY(r9); - COPY(r10); - COPY(r11); - COPY(r12); - COPY(r13); - COPY(r14); - COPY(r15); + COPY(r8); + COPY(r9); + COPY(r10); + COPY(r11); + COPY(r12); + COPY(r13); + COPY(r14); + COPY(r15); #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); #else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); + /* Kernel saves and restores only the CS segment register on signals, + * which is the bare minimum needed to allow mixed 32/64-bit code. + * App's signal handler can save/restore other segments if needed. */ + COPY_SEG_CPL3(cs); #endif /* CONFIG_X86_32 */ - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - regs->orig_ax = -1; /* disable syscall checks */ + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + regs->orig_ax = -1; /* disable syscall checks */ - err |= __get_user(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); + get_user_ex(buf, &sc->fpstate); + err |= restore_i387_xstate(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -131,57 +135,60 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, { int err = 0; -#ifdef CONFIG_X86_32 - { - unsigned int tmp; + put_user_try { - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - } - err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); - err |= __put_user(regs->es, (unsigned int __user *)&sc->es); - err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); +#ifdef CONFIG_X86_32 + { + unsigned int tmp; + + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + } + put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); + put_user_ex(regs->es, (unsigned int __user *)&sc->es); + put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); #endif /* CONFIG_X86_32 */ - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); #ifdef CONFIG_X86_64 - err |= __put_user(regs->r8, &sc->r8); - err |= __put_user(regs->r9, &sc->r9); - err |= __put_user(regs->r10, &sc->r10); - err |= __put_user(regs->r11, &sc->r11); - err |= __put_user(regs->r12, &sc->r12); - err |= __put_user(regs->r13, &sc->r13); - err |= __put_user(regs->r14, &sc->r14); - err |= __put_user(regs->r15, &sc->r15); + put_user_ex(regs->r8, &sc->r8); + put_user_ex(regs->r9, &sc->r9); + put_user_ex(regs->r10, &sc->r10); + put_user_ex(regs->r11, &sc->r11); + put_user_ex(regs->r12, &sc->r12); + put_user_ex(regs->r13, &sc->r13); + put_user_ex(regs->r14, &sc->r14); + put_user_ex(regs->r15, &sc->r15); #endif /* CONFIG_X86_64 */ - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); #ifdef CONFIG_X86_32 - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); #else /* !CONFIG_X86_32 */ - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->cs, &sc->cs); - err |= __put_user(0, &sc->gs); - err |= __put_user(0, &sc->fs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->cs, &sc->cs); + put_user_ex(0, &sc->gs); + put_user_ex(0, &sc->fs); #endif /* CONFIG_X86_32 */ - err |= __put_user(fpstate, &sc->fpstate); + put_user_ex(fpstate, &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -336,43 +343,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(&frame->info, &frame->pinfo); + put_user_ex(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - /* Set up to return from userspace. */ - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - err |= __put_user(restorer, &frame->pretcode); + /* Set up to return from userspace. */ + restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + put_user_ex(restorer, &frame->pretcode); - /* - * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 - * - * WE DO NOT USE IT ANY MORE! It's only left here for historical - * reasons and because gdb uses it as a signature to notice - * signal handler stack frames. - */ - err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + /* + * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 + * + * WE DO NOT USE IT ANY MORE! It's only left here for historical + * reasons and because gdb uses it as a signature to notice + * signal handler stack frames. + */ + put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); + } put_user_catch(err); if (err) return -EFAULT; @@ -436,28 +441,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return -EFAULT; } - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - /* Set up to return from userspace. If provided, use a stub - already in userspace. */ - /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); - } else { - /* could use a vstub here */ - return -EFAULT; - } + /* Set up to return from userspace. If provided, use a stub + already in userspace. */ + /* x86-64 should always use SA_RESTORER. */ + if (ka->sa.sa_flags & SA_RESTORER) { + put_user_ex(ka->sa.sa_restorer, &frame->pretcode); + } else { + /* could use a vstub here */ + err |= -EFAULT; + } + } put_user_catch(err); if (err) return -EFAULT; @@ -509,31 +516,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) { struct k_sigaction new_ka, old_ka; - int ret; + int ret = 0; if (act) { old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + if (!access_ok(VERIFY_READ, act, sizeof(*act))) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); + get_user_try { + get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); + get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); + get_user_ex(mask, &act->sa_mask); + get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); + } get_user_catch(ret); + + if (ret) + return -EFAULT; siginitset(&new_ka.sa.sa_mask, mask); } ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_try { + put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); + put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); + put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); + put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); + } put_user_catch(ret); + + if (ret) + return -EFAULT; } return ret; From 3b4b75700a245d0d48fc52a4d2f67d3155812aba Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 23 Jan 2009 15:50:38 -0800 Subject: [PATCH 171/682] x86: ia32_signal: use {get|put}_user_try and catch Impact: use new framework Use {get|put}_user_try, catch, and _ex in arch/x86/ia32/ia32_signal.c. Note: this patch contains "WARNING: line over 80 characters", because when introducing new block I insert an indent to avoid mistakes by edit. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/ia32/ia32_signal.c | 343 +++++++++++++++++++----------------- 1 file changed, 184 insertions(+), 159 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 9dabd00e980..dd77ac0cac4 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -46,78 +46,83 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where); int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { - int err; + int err = 0; if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; - /* If you change siginfo_t structure, please make sure that - this code is fixed accordingly. - It should never copy any pad contained in the structure - to avoid security leaks, but must copy the generic - 3 ints plus the relevant union member. */ - err = __put_user(from->si_signo, &to->si_signo); - err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + put_user_try { + /* If you change siginfo_t structure, please make sure that + this code is fixed accordingly. + It should never copy any pad contained in the structure + to avoid security leaks, but must copy the generic + 3 ints plus the relevant union member. */ + put_user_ex(from->si_signo, &to->si_signo); + put_user_ex(from->si_errno, &to->si_errno); + put_user_ex((short)from->si_code, &to->si_code); - if (from->si_code < 0) { - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr); - } else { - /* - * First 32bits of unions are always present: - * si_pid === si_band === si_tid === si_addr(LS half) - */ - err |= __put_user(from->_sifields._pad[0], - &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: - break; - case __SI_CHLD >> 16: - err |= __put_user(from->si_utime, &to->si_utime); - err |= __put_user(from->si_stime, &to->si_stime); - err |= __put_user(from->si_status, &to->si_status); - /* FALL THROUGH */ - default: - case __SI_KILL >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - break; - case __SI_POLL >> 16: - err |= __put_user(from->si_fd, &to->si_fd); - break; - case __SI_TIMER >> 16: - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(ptr_to_compat(from->si_ptr), - &to->si_ptr); - break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: - err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); - break; + if (from->si_code < 0) { + put_user_ex(from->si_pid, &to->si_pid); + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); + } else { + /* + * First 32bits of unions are always present: + * si_pid === si_band === si_tid === si_addr(LS half) + */ + put_user_ex(from->_sifields._pad[0], + &to->_sifields._pad[0]); + switch (from->si_code >> 16) { + case __SI_FAULT >> 16: + break; + case __SI_CHLD >> 16: + put_user_ex(from->si_utime, &to->si_utime); + put_user_ex(from->si_stime, &to->si_stime); + put_user_ex(from->si_status, &to->si_status); + /* FALL THROUGH */ + default: + case __SI_KILL >> 16: + put_user_ex(from->si_uid, &to->si_uid); + break; + case __SI_POLL >> 16: + put_user_ex(from->si_fd, &to->si_fd); + break; + case __SI_TIMER >> 16: + put_user_ex(from->si_overrun, &to->si_overrun); + put_user_ex(ptr_to_compat(from->si_ptr), + &to->si_ptr); + break; + /* This is not generated by the kernel as of now. */ + case __SI_RT >> 16: + case __SI_MESGQ >> 16: + put_user_ex(from->si_uid, &to->si_uid); + put_user_ex(from->si_int, &to->si_int); + break; + } } - } + } put_user_catch(err); + return err; } int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { - int err; + int err = 0; u32 ptr32; if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t))) return -EFAULT; - err = __get_user(to->si_signo, &from->si_signo); - err |= __get_user(to->si_errno, &from->si_errno); - err |= __get_user(to->si_code, &from->si_code); + get_user_try { + get_user_ex(to->si_signo, &from->si_signo); + get_user_ex(to->si_errno, &from->si_errno); + get_user_ex(to->si_code, &from->si_code); - err |= __get_user(to->si_pid, &from->si_pid); - err |= __get_user(to->si_uid, &from->si_uid); - err |= __get_user(ptr32, &from->si_ptr); - to->si_ptr = compat_ptr(ptr32); + get_user_ex(to->si_pid, &from->si_pid); + get_user_ex(to->si_uid, &from->si_uid); + get_user_ex(ptr32, &from->si_ptr); + to->si_ptr = compat_ptr(ptr32); + } get_user_catch(err); return err; } @@ -142,17 +147,23 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, struct pt_regs *regs) { stack_t uss, uoss; - int ret; + int ret, err = 0; mm_segment_t seg; if (uss_ptr) { u32 ptr; memset(&uss, 0, sizeof(stack_t)); - if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) || - __get_user(ptr, &uss_ptr->ss_sp) || - __get_user(uss.ss_flags, &uss_ptr->ss_flags) || - __get_user(uss.ss_size, &uss_ptr->ss_size)) + if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + get_user_try { + get_user_ex(ptr, &uss_ptr->ss_sp); + get_user_ex(uss.ss_flags, &uss_ptr->ss_flags); + get_user_ex(uss.ss_size, &uss_ptr->ss_size); + } get_user_catch(err); + + if (err) return -EFAULT; uss.ss_sp = compat_ptr(ptr); } @@ -161,10 +172,16 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { - if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) || - __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) || - __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) || - __put_user(uoss.ss_size, &uoss_ptr->ss_size)) + if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) + return -EFAULT; + + put_user_try { + put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp); + put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags); + put_user_ex(uoss.ss_size, &uoss_ptr->ss_size); + } put_user_catch(err); + + if (err) ret = -EFAULT; } return ret; @@ -174,18 +191,18 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, * Do a signal return; undo the signal stack. */ #define COPY(x) { \ - err |= __get_user(regs->x, &sc->x); \ + get_user_ex(regs->x, &sc->x); \ } #define COPY_SEG_CPL3(seg) { \ unsigned short tmp; \ - err |= __get_user(tmp, &sc->seg); \ + get_user_ex(tmp, &sc->seg); \ regs->seg = tmp | 3; \ } #define RELOAD_SEG(seg) { \ unsigned int cur, pre; \ - err |= __get_user(pre, &sc->seg); \ + get_user_ex(pre, &sc->seg); \ savesegment(seg, cur); \ pre |= 3; \ if (pre != cur) \ @@ -209,39 +226,42 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, sc, sc->err, sc->ip, sc->cs, sc->flags); #endif - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ - err |= __get_user(gs, &sc->gs); - gs |= 3; - savesegment(gs, oldgs); - if (gs != oldgs) - load_gs_index(gs); + get_user_try { + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + get_user_ex(gs, &sc->gs); + gs |= 3; + savesegment(gs, oldgs); + if (gs != oldgs) + load_gs_index(gs); - RELOAD_SEG(fs); - RELOAD_SEG(ds); - RELOAD_SEG(es); + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); - COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); - /* Don't touch extended registers */ + COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); + COPY(dx); COPY(cx); COPY(ip); + /* Don't touch extended registers */ - COPY_SEG_CPL3(cs); - COPY_SEG_CPL3(ss); + COPY_SEG_CPL3(cs); + COPY_SEG_CPL3(ss); - err |= __get_user(tmpflags, &sc->flags); - regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); - /* disable syscall checks */ - regs->orig_ax = -1; + get_user_ex(tmpflags, &sc->flags); + regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); + /* disable syscall checks */ + regs->orig_ax = -1; - err |= __get_user(tmp, &sc->fpstate); - buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); + get_user_ex(tmp, &sc->fpstate); + buf = compat_ptr(tmp); + err |= restore_i387_xstate_ia32(buf); + + get_user_ex(*pax, &sc->ax); + } get_user_catch(err); - err |= __get_user(*pax, &sc->ax); return err; } @@ -319,36 +339,38 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, { int tmp, err = 0; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->fs); - savesegment(ds, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->ds); - savesegment(es, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->es); + put_user_try { + savesegment(gs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->gs); + savesegment(fs, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->fs); + savesegment(ds, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->ds); + savesegment(es, tmp); + put_user_ex(tmp, (unsigned int __user *)&sc->es); - err |= __put_user(regs->di, &sc->di); - err |= __put_user(regs->si, &sc->si); - err |= __put_user(regs->bp, &sc->bp); - err |= __put_user(regs->sp, &sc->sp); - err |= __put_user(regs->bx, &sc->bx); - err |= __put_user(regs->dx, &sc->dx); - err |= __put_user(regs->cx, &sc->cx); - err |= __put_user(regs->ax, &sc->ax); - err |= __put_user(current->thread.trap_no, &sc->trapno); - err |= __put_user(current->thread.error_code, &sc->err); - err |= __put_user(regs->ip, &sc->ip); - err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); - err |= __put_user(regs->flags, &sc->flags); - err |= __put_user(regs->sp, &sc->sp_at_signal); - err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); + put_user_ex(regs->di, &sc->di); + put_user_ex(regs->si, &sc->si); + put_user_ex(regs->bp, &sc->bp); + put_user_ex(regs->sp, &sc->sp); + put_user_ex(regs->bx, &sc->bx); + put_user_ex(regs->dx, &sc->dx); + put_user_ex(regs->cx, &sc->cx); + put_user_ex(regs->ax, &sc->ax); + put_user_ex(current->thread.trap_no, &sc->trapno); + put_user_ex(current->thread.error_code, &sc->err); + put_user_ex(regs->ip, &sc->ip); + put_user_ex(regs->cs, (unsigned int __user *)&sc->cs); + put_user_ex(regs->flags, &sc->flags); + put_user_ex(regs->sp, &sc->sp_at_signal); + put_user_ex(regs->ss, (unsigned int __user *)&sc->ss); - err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate); + put_user_ex(ptr_to_compat(fpstate), &sc->fpstate); - /* non-iBCS2 extensions.. */ - err |= __put_user(mask, &sc->oldmask); - err |= __put_user(current->thread.cr2, &sc->cr2); + /* non-iBCS2 extensions.. */ + put_user_ex(mask, &sc->oldmask); + put_user_ex(current->thread.cr2, &sc->cr2); + } put_user_catch(err); return err; } @@ -437,13 +459,17 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, else restorer = &frame->retcode; } - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); - /* - * These are actually not used anymore, but left because some - * gdb versions depend on them as a marker. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); + put_user_try { + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * These are actually not used anymore, but left because some + * gdb versions depend on them as a marker. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); + if (err) return -EFAULT; @@ -496,41 +522,40 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= __put_user(sig, &frame->sig); - err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); - err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); - if (err) - return -EFAULT; + put_user_try { + put_user_ex(sig, &frame->sig); + put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); + put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); + err |= copy_siginfo_to_user32(&frame->info, info); - /* Create the ucontext. */ - if (cpu_has_xsave) - err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); - else - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) - return -EFAULT; + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; - else - restorer = VDSO32_SYMBOL(current->mm->context.vdso, - rt_sigreturn); - err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); + if (ka->sa.sa_flags & SA_RESTORER) + restorer = ka->sa.sa_restorer; + else + restorer = VDSO32_SYMBOL(current->mm->context.vdso, + rt_sigreturn); + put_user_ex(ptr_to_compat(restorer), &frame->pretcode); + + /* + * Not actually used anymore, but left because some gdb + * versions need it. + */ + put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + } put_user_catch(err); - /* - * Not actually used anymore, but left because some gdb - * versions need it. - */ - err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); if (err) return -EFAULT; From b1882e68d17a93b523dce09c3a181319aace2f0e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:18:52 -0800 Subject: [PATCH 172/682] x86: clean up stray space in Impact: Whitespace cleanup only Clean up a stray space character in arch/x86/include/asm/processor.h. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..ac8fab3b868 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -73,7 +73,7 @@ struct cpuinfo_x86 { char pad0; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ - int x86_tlbsize; + int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; #endif From 75a048119e76540d73132cfc8e0fa0c0a8bb6c83 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 22 Jan 2009 16:17:05 -0800 Subject: [PATCH 173/682] x86: handle PAT more like other CPU features Impact: Cleanup When PAT was originally introduced, it was handled specially for a few reasons: - PAT bugs are hard to track down, so we wanted to maintain a whitelist of CPUs. - The i386 and x86-64 CPUID code was not yet unified. Both of these are now obsolete, so handle PAT like any other features, including ordinary feature blacklisting due to known bugs. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pat.h | 4 --- arch/x86/kernel/cpu/addon_cpuid_features.c | 34 ---------------------- arch/x86/kernel/cpu/common.c | 2 -- arch/x86/kernel/cpu/intel.c | 12 ++++++++ arch/x86/mm/pat.c | 31 +++++++++++++------- 5 files changed, 32 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index b8493b3b989..9709fdff661 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -5,10 +5,8 @@ #ifdef CONFIG_X86_PAT extern int pat_enabled; -extern void validate_pat_support(struct cpuinfo_x86 *c); #else static const int pat_enabled; -static inline void validate_pat_support(struct cpuinfo_x86 *c) { } #endif extern void pat_init(void); @@ -17,6 +15,4 @@ extern int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type); extern int free_memtype(u64 start, u64 end); -extern void pat_disable(char *reason); - #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 2cf23634b6d..4e581fdc0a5 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -143,37 +143,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) return; #endif } - -#ifdef CONFIG_X86_PAT -void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) -{ - if (!cpu_has_pat) - pat_disable("PAT not supported by CPU."); - - switch (c->x86_vendor) { - case X86_VENDOR_INTEL: - /* - * There is a known erratum on Pentium III and Core Solo - * and Core Duo CPUs. - * " Page with PAT set to WC while associated MTRR is UC - * may consolidate to UC " - * Because of this erratum, it is better to stick with - * setting WC in MTRR rather than using PAT on these CPUs. - * - * Enable PAT WC only on P4, Core 2 or later CPUs. - */ - if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) - return; - - pat_disable("PAT WC disabled due to known CPU erratum."); - return; - - case X86_VENDOR_AMD: - case X86_VENDOR_CENTAUR: - case X86_VENDOR_TRANSMETA: - return; - } - - pat_disable("PAT disabled. Not yet verified on this CPU type."); -} -#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 83492b1f93b..0f8656361e0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -570,8 +570,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_early_init) this_cpu->c_early_init(c); - validate_pat_support(c); - #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 8ea6929e974..20ce03acf04 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -50,6 +50,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 == 6 && c->x86_model < 15) + clear_cpu_cap(c, X86_FEATURE_PAT); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 8b08fb95527..430cb44dd3f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -30,7 +30,7 @@ #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; -void __cpuinit pat_disable(char *reason) +void __cpuinit pat_disable(const char *reason) { pat_enabled = 0; printk(KERN_INFO "%s\n", reason); @@ -42,6 +42,11 @@ static int __init nopat(char *str) return 0; } early_param("nopat", nopat); +#else +static inline void pat_disable(const char *reason) +{ + (void)reason; +} #endif @@ -78,16 +83,20 @@ void pat_init(void) if (!pat_enabled) return; - /* Paranoia check. */ - if (!cpu_has_pat && boot_pat_state) { - /* - * If this happens we are on a secondary CPU, but - * switched to PAT on the boot CPU. We have no way to - * undo PAT. - */ - printk(KERN_ERR "PAT enabled, " - "but not supported by secondary CPU\n"); - BUG(); + if (!cpu_has_pat) { + if (!boot_pat_state) { + pat_disable("PAT not supported by CPU."); + return; + } else { + /* + * If this happens we are on a secondary CPU, but + * switched to PAT on the boot CPU. We have no way to + * undo PAT. + */ + printk(KERN_ERR "PAT enabled, " + "but not supported by secondary CPU\n"); + BUG(); + } } /* Set PWT to Write-Combining. All other bits stay the same */ From b38b0665905538e76e26f2a4c686179abb1f69f6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:20:50 -0800 Subject: [PATCH 174/682] x86: filter CPU features dependent on unavailable CPUID levels Impact: Fixes potential crashes on misconfigured systems. Some CPU features require specific CPUID levels to be available in order to function, as they contain information about the operation of a specific feature. However, some BIOSes and virtualization software provide the ability to mask CPUID levels in order to support legacy operating systems. We try to enable such CPUID levels when we know how to do it, but for the remaining cases, filter out such CPU features when there is no way for us to support them. Do this in one place, in the CPUID code, with a table-driven approach. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 47 ++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f8656361e0..21f086b4c1a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -212,6 +212,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif +/* + * Some CPU features depend on higher CPUID levels, which may not always + * be available due to CPUID level capping or broken virtualization + * software. Add those features to this table to auto-disable them. + */ +struct cpuid_dependent_feature { + u32 feature; + u32 level; +}; +static const struct cpuid_dependent_feature __cpuinitconst +cpuid_dependent_features[] = { + { X86_FEATURE_MWAIT, 0x00000005 }, + { X86_FEATURE_DCA, 0x00000009 }, + { X86_FEATURE_XSAVE, 0x0000000d }, + { 0, 0 } +}; + +static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) +{ + const struct cpuid_dependent_feature *df; + for (df = cpuid_dependent_features; df->feature; df++) { + /* + * Note: cpuid_level is set to -1 if unavailable, but + * extended_extended_level is set to 0 if unavailable + * and the legitimate extended levels are all negative + * when signed; hence the weird messing around with + * signs here... + */ + if (cpu_has(c, df->feature) && + ((s32)df->feature < 0 ? + (u32)df->feature > (u32)c->extended_cpuid_level : + (s32)df->feature > (s32)c->cpuid_level)) { + clear_cpu_cap(c, df->feature); + if (warn) + printk(KERN_WARNING + "CPU: CPU feature %s disabled " + "due to lack of CPUID level 0x%x\n", + x86_cap_flags[df->feature], + df->level); + } + } +} + /* * Naming convention should be: [()] * This table only is used unless init_() below doesn't set it; @@ -573,6 +616,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP c->cpu_index = boot_cpu_id; #endif + filter_cpuid_features(c, false); } void __init early_cpu_init(void) @@ -706,6 +750,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) * we do "generic changes." */ + /* Filter out anything that depends on CPUID levels we don't have */ + filter_cpuid_features(c, true); + /* If the model name is still unset, do table lookup. */ if (!c->x86_model_id[0]) { char *p; From 2d4d57db692ea790e185656516e6ebe8791f1788 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 25 Jan 2009 12:50:13 -0800 Subject: [PATCH 175/682] x86: micro-optimize __raw_read_trylock() The current version of __raw_read_trylock starts with decrementing the lock and read its new value as a separate operation after that. That makes 3 dereferences (read, write (after sub), read) whereas a single atomic_dec_return does only two pointers dereferences (read, write). Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/spinlock.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index d17c91981da..4d3dcc51cac 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -329,8 +329,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock) { atomic_t *count = (atomic_t *)lock; - atomic_dec(count); - if (atomic_read(count) >= 0) + if (atomic_dec_return(count) >= 0) return 1; atomic_inc(count); return 0; From 34707bcd0452aba644396767bc9fb61585bdab4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 14:18:05 +0100 Subject: [PATCH 176/682] x86, debug: remove early_printk() #ifdefs from head_32.S Impact: cleanup Remove such constructs: #ifdef CONFIG_EARLY_PRINTK call early_printk #else call printk #endif Not only are they ugly, they are also pointless: a call to printk() maps to early_printk during early bootup anyway, if CONFIG_EARLY_PRINTK is enabled. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e835b4eea70..9f141071160 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -548,11 +548,7 @@ early_fault: pushl %eax pushl %edx /* trapno */ pushl $fault_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif #endif call dump_stack hlt_loop: @@ -580,11 +576,7 @@ ignore_int: pushl 32(%esp) pushl 40(%esp) pushl $int_msg -#ifdef CONFIG_EARLY_PRINTK - call early_printk -#else call printk -#endif addl $(5*4),%esp popl %ds popl %es From d5e397cb49b53381e4c99a064ca733c665646de8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Jan 2009 06:09:00 +0100 Subject: [PATCH 177/682] x86: improve early fault/irq printout Impact: add a stack dump to early IRQs/faults Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9f141071160..84d05a4d7fc 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -577,6 +577,9 @@ ignore_int: pushl 40(%esp) pushl $int_msg call printk + + call dump_stack + addl $(5*4),%esp popl %ds popl %es @@ -652,7 +655,7 @@ early_recursion_flag: .long 0 int_msg: - .asciz "Unknown interrupt or fault at EIP %p %p %p\n" + .asciz "Unknown interrupt or fault at: %p %p %p\n" fault_msg: /* fault info: */ From 5a611268b69f05262936dd177205acbce4471358 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 26 Jan 2009 08:44:05 -0500 Subject: [PATCH 178/682] generic, x86: fix __per_cpu_load relocation This patch fixes this linker error: WARNING: Absolute relocations present Offset Info Type Sym.Value Sym.Name c0a4e07d 00e78001 R_386_32 c0ab0000 __per_cpu_load Now, __per_cpu_load is a section-relative symbol: c0aa4000 D __per_cpu_load c0aa4000 A __per_cpu_load_abs Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a80..f3180a85c66 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,17 +451,18 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ + VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ + VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From 0d77e7f04d5da160307f4f5c030a171e004f602b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 179/682] x86: merge setup_per_cpu_maps() into setup_per_cpu_areas() Impact: minor optimization Eliminates the need for two loops over possible cpus. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 48 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 90b8e154bb5..d0b1476490a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -97,33 +97,6 @@ static inline void setup_cpu_local_masks(void) #endif /* CONFIG_X86_32 */ #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -/* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. - */ -static void __init setup_per_cpu_maps(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); - per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif - } - - /* indicate the early static arrays will soon be gone */ - early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; - early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA - early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; -#endif -} #ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { @@ -181,6 +154,19 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + /* + * Copy data used in early init routines from the initial arrays to the + * per cpu data areas. These arrays then become expendable and the + * *_early_ptr's are zeroed indicating that the static arrays are gone. + */ + per_cpu(x86_cpu_to_apicid, cpu) = + early_per_cpu_map(x86_cpu_to_apicid, cpu); + per_cpu(x86_bios_cpu_apicid, cpu) = + early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#ifdef X86_64_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; @@ -195,8 +181,12 @@ void __init setup_per_cpu_areas(void) DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } - /* Setup percpu data maps */ - setup_per_cpu_maps(); + /* indicate the early static arrays will soon be gone */ + early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; + early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#ifdef X86_64_NUMA + early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; +#endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); From 6470aff619fbb9dff8dfe8afa5033084cd55ca20 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 180/682] x86: move 64-bit NUMA code Impact: Code movement, no functional change. Move the 64-bit NUMA code from setup_percpu.c to numa_64.c Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/topology.h | 6 + arch/x86/kernel/setup_percpu.c | 237 +------------------------------- arch/x86/mm/numa_64.c | 217 +++++++++++++++++++++++++++++ 3 files changed, 228 insertions(+), 232 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 10022ed3a4b..77cfb2cfb38 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node) return &node_to_cpumask_map[node]; } +static inline void setup_node_to_cpumask_map(void) { } + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -120,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ +extern void setup_node_to_cpumask_map(void); + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" @@ -218,6 +222,8 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } +static inline void setup_node_to_cpumask_map(void) { } + /* * Replace default node_to_cpumask_ptr with optimized version * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d0b1476490a..cb6d622520b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,32 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) -#define X86_64_NUMA 1 /* (used later) */ -DEFINE_PER_CPU(int, node_number) = 0; -EXPORT_PER_CPU_SYMBOL(node_number); - -/* - * Map cpu index to node index - */ -DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); - -/* - * Which logical CPUs are on which nodes - */ -cpumask_t *node_to_cpumask_map; -EXPORT_SYMBOL(node_to_cpumask_map); - -/* - * Setup node_to_cpumask_map - */ -static void __init setup_node_to_cpumask_map(void); - -#else -static inline void setup_node_to_cpumask_map(void) { } -#endif - #ifdef CONFIG_X86_64 /* correctly size the local cpu masks */ @@ -163,13 +137,13 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); -#ifdef X86_64_NUMA - per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); -#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; +#ifdef CONFIG_NUMA + per_cpu(x86_cpu_to_node_map, cpu) = + early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif /* * Up to this point, CPU0 has been using .data.init * area. Reload %gs offset for CPU0. @@ -184,7 +158,7 @@ void __init setup_per_cpu_areas(void) /* indicate the early static arrays will soon be gone */ early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; -#ifdef X86_64_NUMA +#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif @@ -197,204 +171,3 @@ void __init setup_per_cpu_areas(void) #endif -#ifdef X86_64_NUMA - -/* - * Allocate node_to_cpumask_map based on number of available nodes - * Requires node_possible_map to be valid. - * - * Note: node_to_cpumask() is not valid until after this is done. - * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) - */ -static void __init setup_node_to_cpumask_map(void) -{ - unsigned int node, num = 0; - cpumask_t *map; - - /* setup nr_node_ids if not done yet */ - if (nr_node_ids == MAX_NUMNODES) { - for_each_node_mask(node, node_possible_map) - num = node; - nr_node_ids = num + 1; - } - - /* allocate the map */ - map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); - DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); - - pr_debug("Node to cpumask map at %p for %d nodes\n", - map, nr_node_ids); - - /* node_to_cpumask() will now work */ - node_to_cpumask_map = map; -} - -void __cpuinit numa_set_node(int cpu, int node) -{ - int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); - - /* early setting, no percpu area yet */ - if (cpu_to_node_map) { - cpu_to_node_map[cpu] = node; - return; - } - -#ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { - printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); - dump_stack(); - return; - } -#endif - per_cpu(x86_cpu_to_node_map, cpu) = node; - - if (node != NUMA_NO_NODE) - per_cpu(node_number, cpu) = node; -} - -void __cpuinit numa_clear_node(int cpu) -{ - numa_set_node(cpu, NUMA_NO_NODE); -} - -#ifndef CONFIG_DEBUG_PER_CPU_MAPS - -void __cpuinit numa_add_cpu(int cpu) -{ - cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); -} - -#else /* CONFIG_DEBUG_PER_CPU_MAPS */ - -/* - * --------- debug versions of the numa functions --------- - */ -static void __cpuinit numa_set_cpumask(int cpu, int enable) -{ - int node = early_cpu_to_node(cpu); - cpumask_t *mask; - char buf[64]; - - if (node_to_cpumask_map == NULL) { - printk(KERN_ERR "node_to_cpumask_map NULL\n"); - dump_stack(); - return; - } - - mask = &node_to_cpumask_map[node]; - if (enable) - cpu_set(cpu, *mask); - else - cpu_clear(cpu, *mask); - - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", - enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); -} - -void __cpuinit numa_add_cpu(int cpu) -{ - numa_set_cpumask(cpu, 1); -} - -void __cpuinit numa_remove_cpu(int cpu) -{ - numa_set_cpumask(cpu, 0); -} - -int cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) { - printk(KERN_WARNING - "cpu_to_node(%d): usage too early!\n", cpu); - dump_stack(); - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} -EXPORT_SYMBOL(cpu_to_node); - -/* - * Same function as cpu_to_node() but used if called before the - * per_cpu areas are setup. - */ -int early_cpu_to_node(int cpu) -{ - if (early_per_cpu_ptr(x86_cpu_to_node_map)) - return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - - if (!per_cpu_offset(cpu)) { - printk(KERN_WARNING - "early_cpu_to_node(%d): no per_cpu area!\n", cpu); - dump_stack(); - return NUMA_NO_NODE; - } - return per_cpu(x86_cpu_to_node_map, cpu); -} - - -/* empty cpumask */ -static const cpumask_t cpu_mask_none; - -/* - * Returns a pointer to the bitmask of CPUs on Node 'node'. - */ -const cpumask_t *cpumask_of_node(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "cpumask_of_node(%d): no node_to_cpumask_map!\n", - node); - dump_stack(); - return (const cpumask_t *)&cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "cpumask_of_node(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return &cpu_mask_none; - } - return &node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(cpumask_of_node); - -/* - * Returns a bitmask of CPUs on Node 'node'. - * - * Side note: this function creates the returned cpumask on the stack - * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. - */ -cpumask_t node_to_cpumask(int node) -{ - if (node_to_cpumask_map == NULL) { - printk(KERN_WARNING - "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); - dump_stack(); - return cpu_online_map; - } - if (node >= nr_node_ids) { - printk(KERN_WARNING - "node_to_cpumask(%d): node > nr_node_ids(%d)\n", - node, nr_node_ids); - dump_stack(); - return cpu_mask_none; - } - return node_to_cpumask_map[node]; -} -EXPORT_SYMBOL(node_to_cpumask); - -/* - * --------- end of debug versions of the numa functions --------- - */ - -#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ - -#endif /* X86_64_NUMA */ - diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 71a14f89f89..08d140fbc31 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -20,6 +20,12 @@ #include #include +#ifdef CONFIG_DEBUG_PER_CPU_MAPS +# define DBG(x...) printk(KERN_DEBUG x) +#else +# define DBG(x...) +#endif + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); @@ -33,6 +39,21 @@ int numa_off __initdata; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; +DEFINE_PER_CPU(int, node_number) = 0; +EXPORT_PER_CPU_SYMBOL(node_number); + +/* + * Map cpu index to node index + */ +DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); + +/* + * Which logical CPUs are on which nodes + */ +cpumask_t *node_to_cpumask_map; +EXPORT_SYMBOL(node_to_cpumask_map); + /* * Given a shift value, try to populate memnodemap[] * Returns : @@ -640,3 +661,199 @@ void __init init_cpu_to_node(void) #endif +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) + */ +void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + cpumask_t *map; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); + DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids); + + pr_debug("Node to cpumask map at %p for %d nodes\n", + map, nr_node_ids); + + /* node_to_cpumask() will now work */ + node_to_cpumask_map = map; +} + +void __cpuinit numa_set_node(int cpu, int node) +{ + int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); + + /* early setting, no percpu area yet */ + if (cpu_to_node_map) { + cpu_to_node_map[cpu] = node; + return; + } + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); + dump_stack(); + return; + } +#endif + per_cpu(x86_cpu_to_node_map, cpu) = node; + + if (node != NUMA_NO_NODE) + per_cpu(node_number, cpu) = node; +} + +void __cpuinit numa_clear_node(int cpu) +{ + numa_set_node(cpu, NUMA_NO_NODE); +} + +#ifndef CONFIG_DEBUG_PER_CPU_MAPS + +void __cpuinit numa_add_cpu(int cpu) +{ + cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); +} + +#else /* CONFIG_DEBUG_PER_CPU_MAPS */ + +/* + * --------- debug versions of the numa functions --------- + */ +static void __cpuinit numa_set_cpumask(int cpu, int enable) +{ + int node = early_cpu_to_node(cpu); + cpumask_t *mask; + char buf[64]; + + if (node_to_cpumask_map == NULL) { + printk(KERN_ERR "node_to_cpumask_map NULL\n"); + dump_stack(); + return; + } + + mask = &node_to_cpumask_map[node]; + if (enable) + cpu_set(cpu, *mask); + else + cpu_clear(cpu, *mask); + + cpulist_scnprintf(buf, sizeof(buf), mask); + printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); +} + +void __cpuinit numa_add_cpu(int cpu) +{ + numa_set_cpumask(cpu, 1); +} + +void __cpuinit numa_remove_cpu(int cpu) +{ + numa_set_cpumask(cpu, 0); +} + +int cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) { + printk(KERN_WARNING + "cpu_to_node(%d): usage too early!\n", cpu); + dump_stack(); + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} +EXPORT_SYMBOL(cpu_to_node); + +/* + * Same function as cpu_to_node() but used if called before the + * per_cpu areas are setup. + */ +int early_cpu_to_node(int cpu) +{ + if (early_per_cpu_ptr(x86_cpu_to_node_map)) + return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; + + if (!per_cpu_offset(cpu)) { + printk(KERN_WARNING + "early_cpu_to_node(%d): no per_cpu area!\n", cpu); + dump_stack(); + return NUMA_NO_NODE; + } + return per_cpu(x86_cpu_to_node_map, cpu); +} + + +/* empty cpumask */ +static const cpumask_t cpu_mask_none; + +/* + * Returns a pointer to the bitmask of CPUs on Node 'node'. + */ +const cpumask_t *cpumask_of_node(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "cpumask_of_node(%d): no node_to_cpumask_map!\n", + node); + dump_stack(); + return (const cpumask_t *)&cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return &cpu_mask_none; + } + return &node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(cpumask_of_node); + +/* + * Returns a bitmask of CPUs on Node 'node'. + * + * Side note: this function creates the returned cpumask on the stack + * so with a high NR_CPUS count, excessive stack space is used. The + * node_to_cpumask_ptr function should be used whenever possible. + */ +cpumask_t node_to_cpumask(int node) +{ + if (node_to_cpumask_map == NULL) { + printk(KERN_WARNING + "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); + dump_stack(); + return cpu_online_map; + } + if (node >= nr_node_ids) { + printk(KERN_WARNING + "node_to_cpumask(%d): node > nr_node_ids(%d)\n", + node, nr_node_ids); + dump_stack(); + return cpu_mask_none; + } + return node_to_cpumask_map[node]; +} +EXPORT_SYMBOL(node_to_cpumask); + +/* + * --------- end of debug versions of the numa functions --------- + */ + +#endif /* CONFIG_DEBUG_PER_CPU_MAPS */ From 2f2f52bad72f5e1ca5d1b9ad00a7b57a8cbd9159 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 181/682] x86: move setup_cpu_local_masks() Impact: Code movement, no functional change. Move setup_cpu_local_masks() to kernel/cpu/common.c, where the masks are defined. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/cpumask.h | 4 ++++ arch/x86/kernel/cpu/common.c | 9 +++++++++ arch/x86/kernel/setup_percpu.c | 19 ------------------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 26c6dad9047..a7f3c75f8ad 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -10,6 +10,8 @@ extern cpumask_var_t cpu_callout_mask; extern cpumask_var_t cpu_initialized_mask; extern cpumask_var_t cpu_sibling_setup_mask; +extern void setup_cpu_local_masks(void); + #else /* CONFIG_X86_32 */ extern cpumask_t cpu_callin_map; @@ -22,6 +24,8 @@ extern cpumask_t cpu_sibling_setup_map; #define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) #define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) +static inline void setup_cpu_local_masks(void) { } + #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 99904f288d6..67e30c8a282 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* correctly size the local cpu masks */ +void setup_cpu_local_masks(void) +{ + alloc_bootmem_cpumask_var(&cpu_initialized_mask); + alloc_bootmem_cpumask_var(&cpu_callin_mask); + alloc_bootmem_cpumask_var(&cpu_callout_mask); + alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); +} + #else /* CONFIG_X86_32 */ cpumask_t cpu_callin_map; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cb6d622520b..7bebdba8eb8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -51,25 +51,6 @@ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); -#ifdef CONFIG_X86_64 - -/* correctly size the local cpu masks */ -static void setup_cpu_local_masks(void) -{ - alloc_bootmem_cpumask_var(&cpu_initialized_mask); - alloc_bootmem_cpumask_var(&cpu_callin_mask); - alloc_bootmem_cpumask_var(&cpu_callout_mask); - alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); -} - -#else /* CONFIG_X86_32 */ - -static inline void setup_cpu_local_masks(void) -{ -} - -#endif /* CONFIG_X86_32 */ - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 From 74631a248dc2c2129a96f6b8b706ed54bb5c3d3c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 182/682] x86: always page-align per-cpu area start and size Impact: cleanup The way the code is written, align is always PAGE_SIZE. Simplify the code by removing the align variable. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 7bebdba8eb8..5d4a4964a8b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -69,15 +69,12 @@ EXPORT_SYMBOL(__per_cpu_offset); */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size; char *ptr; int cpu; - unsigned long align = 1; /* Copy section for each CPU (we discard the original) */ - old_size = PERCPU_ENOUGH_ROOM; - align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size, align); + size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); @@ -86,20 +83,17 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = __alloc_bootmem(size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages(size); pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); pr_debug("per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { - ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, - __pa(MAX_DMA_ADDRESS)); + ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); pr_debug("per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); } From ec70de8b04bf37213982a5e8f303bc38679f3f8e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 183/682] x86: move apic variables to apic.c Impact: Code movement Move the variable definitions to apic.c. Ifdef the copying of the two early per-cpu variables, since Voyager doesn't use them. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/apic.c | 18 ++++++++++++++++++ arch/x86/kernel/setup_percpu.c | 22 ++-------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 1df341a528a..c6f15647eba 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -60,6 +60,24 @@ # error SPURIOUS_APIC_VECTOR definition error #endif +unsigned int num_processors; +unsigned disabled_cpus __cpuinitdata; +/* Processor that is doing the boot up */ +unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL(boot_cpu_physical_apicid); +unsigned int max_physical_apicid; + +/* Bitmask of physically existing CPUs */ +physid_mask_t phys_cpu_present_map; + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); + #ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5d4a4964a8b..d367996693e 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -31,26 +31,6 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); #endif -#ifdef CONFIG_X86_LOCAL_APIC -unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; -/* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); -unsigned int max_physical_apicid; - -/* Bitmask of physically existing CPUs */ -physid_mask_t phys_cpu_present_map; -#endif - -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_X86_64 @@ -108,10 +88,12 @@ void __init setup_per_cpu_areas(void) * per cpu data areas. These arrays then become expendable and the * *_early_ptr's are zeroed indicating that the static arrays are gone. */ +#ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); +#endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; From 996db817e3d1529d711e55b938d72ae4060b39fd Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:47 +0900 Subject: [PATCH 184/682] x86: only compile setup_percpu.o on SMP Impact: Minor build optimization Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/Makefile | 3 ++- arch/x86/kernel/setup_percpu.c | 6 ------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a99437c965c..73de055c29c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o +obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o @@ -59,6 +59,7 @@ apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d367996693e..f30ff691c34 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -22,14 +22,8 @@ # define DBG(x...) #endif -/* - * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but - * voyager wants cpu_number too. - */ -#ifdef CONFIG_SMP DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); -#endif #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA From 1688401a0fddba8991aa5c0943b8ae9583998d60 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 185/682] x86: move this_cpu_offset Impact: Small cleanup Define BOOT_PERCPU_OFFSET and use it for this_cpu_offset and __per_cpu_offset initializers. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 15 ++++++++++----- arch/x86/kernel/smpcommon.c | 7 ------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f30ff691c34..36c2e81dfc3 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -25,15 +25,20 @@ DEFINE_PER_CPU(int, cpu_number); EXPORT_PER_CPU_SYMBOL(cpu_number); +#ifdef CONFIG_X86_64 +#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) +#else +#define BOOT_PERCPU_OFFSET 0 +#endif + +DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA -#ifdef CONFIG_X86_64 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = (unsigned long)__per_cpu_load, + [0] = BOOT_PERCPU_OFFSET, }; -#else -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -#endif EXPORT_SYMBOL(__per_cpu_offset); /* diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index add36b4e37c..5ec29a1a846 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c @@ -5,13 +5,6 @@ #include #include -#ifdef CONFIG_X86_64 -DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load; -#else -DEFINE_PER_CPU(unsigned long, this_cpu_off); -#endif -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - #ifdef CONFIG_X86_32 /* * Initialize the CPU's GDT. This is either the boot CPU doing itself From 34019be1cd2941128b5de6d7c0fbdb51f967d268 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 186/682] x86: don't assume boot cpu is #0 Impact: minor cleanup Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 36c2e81dfc3..be77f1a1231 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -37,7 +38,7 @@ EXPORT_PER_CPU_SYMBOL(this_cpu_off); #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { - [0] = BOOT_PERCPU_OFFSET, + [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); @@ -101,10 +102,10 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif /* - * Up to this point, CPU0 has been using .data.init - * area. Reload %gs offset for CPU0. + * Up to this point, the boot CPU has been using .data.init + * area. Reload %gs offset for the boot CPU. */ - if (cpu == 0) + if (cpu == boot_cpu_id) load_gs_base(cpu); #endif From 89c9c4c58ee86e6e8802597271f23679e0c46647 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 187/682] x86: make Voyager use x86 per-cpu setup. Impact: standardize all x86 platforms on same setup code With the preceding changes, Voyager can use the same per-cpu setup code as all the other x86 platforms. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 2 +- arch/x86/kernel/setup_percpu.c | 5 ----- arch/x86/mach-voyager/voyager_smp.c | 3 --- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5a29b792cb8..d6218e6c982 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE def_bool y config HAVE_SETUP_PER_CPU_AREA - def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) + def_bool y config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index be77f1a1231..599dc1cc1da 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -35,8 +35,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA - unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; @@ -125,6 +123,3 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); } - -#endif - diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 96f15b09a4c..dd82f2052f3 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -531,7 +531,6 @@ static void __init do_boot_cpu(__u8 cpu) stack_start.sp = (void *)idle->thread.sp; init_gdt(cpu); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1749,7 +1748,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { init_gdt(smp_processor_id()); - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); @@ -1782,7 +1780,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - percpu_write(cpu_number, hard_smp_processor_id()); } static void voyager_send_call_func(cpumask_t callmask) From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 188/682] x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 - arch/x86/kernel/Makefile | 3 +-- arch/x86/kernel/setup_percpu.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 4 ---- arch/x86/kernel/smpcommon.c | 25 ------------------------- arch/x86/mach-voyager/voyager_smp.c | 2 -- arch/x86/xen/smp.c | 1 - 7 files changed, 15 insertions(+), 35 deletions(-) delete mode 100644 arch/x86/kernel/smpcommon.c diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 48676b943b9..32c30b02b51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -778,7 +778,6 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 73de055c29c..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,8 +60,7 @@ obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o -obj-$(CONFIG_X86_32_SMP) += smpcommon.o -obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o +obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 599dc1cc1da..bcca3a7b374 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -40,6 +40,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static inline void setup_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + struct desc_struct gdt; + + pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF, + 0x2 | DESCTYPE_S, 0x8); + gdt.s = 1; + write_gdt_entry(get_cpu_gdt_table(cpu), + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); +#endif +} + /* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. @@ -81,6 +94,7 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; + setup_percpu_segment(cpu); /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index def770b57b5..f9dbcff4354 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -793,7 +793,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) do_rest: per_cpu(current_task, cpu) = c_idle.idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); #else @@ -1186,9 +1185,6 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); -#ifdef CONFIG_X86_32 - init_gdt(me); -#endif switch_to_new_gdt(); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c deleted file mode 100644 index 5ec29a1a846..00000000000 --- a/arch/x86/kernel/smpcommon.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include -#include -#include - -#ifdef CONFIG_X86_32 -/* - * Initialize the CPU's GDT. This is either the boot CPU doing itself - * (still using the master per-cpu area), or a CPU doing it for a - * secondary which will soon come up. - */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct gdt; - - pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - - write_gdt_entry(get_cpu_gdt_table(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); -} -#endif diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index dd82f2052f3..331cd6d5648 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.sp = (void *)idle->thread.sp; - init_gdt(cpu); per_cpu(current_task, cpu) = idle; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); @@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - init_gdt(smp_processor_id()); switch_to_new_gdt(); cpu_set(smp_processor_id(), cpu_online_map); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72c2eb9b64c..7735e3dd359 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -281,7 +281,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) per_cpu(current_task, cpu) = idle; #ifdef CONFIG_X86_32 - init_gdt(cpu); irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); From 1825b8edc2034c012ae48f797d74efd1bd9d4f72 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 189/682] x86: remove extra barriers from load_gs_base() Impact: optimization mb() generates an mfence instruction, which is not needed here. Only a compiler barrier is needed, and that is handled by the memory clobber in the wrmsrl function. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 32c30b02b51..794234eba31 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -397,10 +397,7 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); static inline void load_gs_base(int cpu) { - /* Memory clobbers used to order pda/percpu accesses */ - mb(); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); - mb(); } #endif From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: [PATCH 190/682] x86: load new GDT after setting up boot cpu per-cpu area Impact: sync 32 and 64-bit code Merge load_gs_base() into switch_to_new_gdt(). Load the GDT and per-cpu state for the boot cpu when its new area is set up. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 5 ----- arch/x86/kernel/cpu/common.c | 15 +++++++++------ arch/x86/kernel/setup_percpu.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 794234eba31..befa20b4a68 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -394,11 +394,6 @@ union irq_stack_union { DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); - -static inline void load_gs_base(int cpu) -{ - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -} #endif extern void print_cpu_info(struct cpuinfo_x86 *); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e30c8a282..0c766b80d91 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -258,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; void switch_to_new_gdt(void) { struct desc_ptr gdt_descr; + int cpu = smp_processor_id(); - gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); + gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + /* Reload the per-cpu base */ #ifdef CONFIG_X86_32 - asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif } @@ -968,10 +973,6 @@ void __cpuinit cpu_init(void) struct task_struct *me; int i; - loadsegment(fs, 0); - loadsegment(gs, 0); - load_gs_base(cpu); - #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) @@ -993,6 +994,8 @@ void __cpuinit cpu_init(void) */ switch_to_new_gdt(); + loadsegment(fs, 0); + load_idt((const struct desc_ptr *)&idt_descr); memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index bcca3a7b374..4caa78d7cb1 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -112,14 +112,14 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); +#endif #endif /* * Up to this point, the boot CPU has been using .data.init - * area. Reload %gs offset for the boot CPU. + * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - load_gs_base(cpu); -#endif + switch_to_new_gdt(); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } From 22f25138c345ec46a13744c93c093ff822cd98d1 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 27 Jan 2009 14:21:37 +0900 Subject: [PATCH 191/682] x86: fix build breakage on voyage Impact: build fix x86_cpu_to_apicid and x86_bios_cpu_apicid aren't defined for voyage. Earlier patch forgot to conditionalize early percpu clearing. Fix it. Signed-off-by: James Bottomley Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4caa78d7cb1..c7458ead22d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -125,8 +125,10 @@ void __init setup_per_cpu_areas(void) } /* indicate the early static arrays will soon be gone */ +#ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; +#endif #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif From cf3997f507624757f149fcc42b76fb03c151fb65 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jan 2009 14:25:05 +0900 Subject: [PATCH 192/682] x86: clean up indentation in setup_per_cpu_areas() Impact: cosmetic cleanup Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c7458ead22d..0d1e7ac439f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -96,22 +96,25 @@ void __init setup_per_cpu_areas(void) per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* - * Copy data used in early init routines from the initial arrays to the - * per cpu data areas. These arrays then become expendable and the - * *_early_ptr's are zeroed indicating that the static arrays are gone. + * Copy data used in early init routines from the + * initial arrays to the per cpu data areas. These + * arrays then become expendable and the *_early_ptr's + * are zeroed indicating that the static arrays are + * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = - early_per_cpu_map(x86_cpu_to_apicid, cpu); + early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = - early_per_cpu_map(x86_bios_cpu_apicid, cpu); + early_per_cpu_map(x86_bios_cpu_apicid, cpu); #endif #ifdef CONFIG_X86_64 per_cpu(irq_stack_ptr, cpu) = - per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; + per_cpu(irq_stack_union.irq_stack, cpu) + + IRQ_STACK_SIZE - 64; #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = - early_per_cpu_map(x86_cpu_to_node_map, cpu); + early_per_cpu_map(x86_cpu_to_node_map, cpu); #endif #endif /* From 042cbaf88ab48e11afb725541e3c2cbf5b483680 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 27 Jan 2009 21:45:57 +0100 Subject: [PATCH 193/682] x86 setup: fix asm constraints in vesa_store_edid Impact: fix potential miscompile (currently believed non-manifest) As the comment explains, the VBE DDC call can clobber any register. Tell the compiler about that fact. Signed-off-by: Andreas Schwab Signed-off-by: H. Peter Anvin --- arch/x86/boot/video-vesa.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 75115849af3..4a58c8ce3f6 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -269,9 +269,8 @@ void vesa_store_edid(void) we genuinely have to assume all registers are destroyed here. */ asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) + : : "esi", "edx"); if (ax != 0x004f) return; /* No EDID */ @@ -285,9 +284,9 @@ void vesa_store_edid(void) dx = 0; /* EDID block number */ di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) - : "c" (cx), "D" (di) - : "esi"); + : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), + "+c" (cx), "+D" (di) + : : "esi"); #endif /* CONFIG_FIRMWARE_EDID */ } From 8f6d86dc4178957d9814b1784848012a927a3898 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Jan 2009 21:41:34 +0100 Subject: [PATCH 194/682] x86: cpu_init(): remove ugly #ifdef construct around debug register clear Impact: Cleanup While I was looking through the new and improved bootstrap code - great work that, thanks! I found the below a slight improvement. Remove unnecessary ugly #ifdef construct around debug register clear. Signed-off-by: Peter Zijlstra Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f0025846244..3f272d42d09 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1071,22 +1071,19 @@ void __cpuinit cpu_init(void) */ if (kgdb_connected && arch_kgdb_ops.correct_hw_break) arch_kgdb_ops.correct_hw_break(); - else { + else #endif - /* - * Clear all 6 debug registers: - */ - - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); - set_debugreg(0UL, 6); - set_debugreg(0UL, 7); -#ifdef CONFIG_KGDB - /* If the kgdb is connected no debug regs should be altered. */ + { + /* + * Clear all 6 debug registers: + */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); } -#endif fpu_init(); From 6e7a59944a2971c4fb400bfbecb2f68570086b05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:07:42 +0100 Subject: [PATCH 195/682] x86, genapic: refactor genapic_64.h Impact: pre unification cleanup Make genapic_64.h similar to genapic_32.h: reorder fields, unify types and bring in new entries. No existing functionality is affected. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic_64.h | 68 +++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 7bb092c5905..0a5f7122d9f 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -16,13 +16,62 @@ struct genapic { char *name; + + int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + u32 int_delivery_mode; u32 int_dest_mode; - int (*apic_id_registered)(void); + const struct cpumask *(*target_cpus)(void); + + int ESR_DISABLE; + + int apic_destination_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + int no_balance_irq; + int no_ioapic_check; + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, + char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + +#ifdef CONFIG_SMP /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, @@ -30,16 +79,17 @@ struct genapic { void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); - /* */ - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - unsigned int (*phys_pkg_id)(int index_msb); - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; +#endif /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); }; extern struct genapic *genapic; From 943d0f74d47724d0e33083674c16a834f080af2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:07:42 +0100 Subject: [PATCH 196/682] x86, genapic: refactor genapic_32.h Impact: pre unification cleanup Make genapic_32.h similar to genapic_64.h: reorder fields, unify types and bring in new entries. No existing functionality is affected. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic_32.h | 37 ++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 4334502d366..5808b7daf0a 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -21,19 +21,28 @@ struct mpc_cpu; struct genapic { char *name; - int (*probe)(void); + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); int (*apic_id_registered)(void); + + u32 int_delivery_mode; + u32 int_dest_mode; + const struct cpumask *(*target_cpus)(void); - int int_delivery_mode; - int int_dest_mode; + int ESR_DISABLE; + int apic_destination_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); + int no_balance_irq; int no_ioapic_check; + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); void (*setup_apic_routing)(void); @@ -45,22 +54,27 @@ struct genapic { void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif - /* mpparse */ - /* When one of the next two hooks returns 1 the genapic - is switched to this. Essentially they are additional probe - functions. */ + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - unsigned (*get_apic_id)(unsigned long x); + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); #ifdef CONFIG_SMP /* ipi */ @@ -69,8 +83,11 @@ struct genapic { int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); #endif + /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); + int trampoline_phys_low; int trampoline_phys_high; void (*wait_for_init_deassert)(atomic_t *deassert); From ef7471b13f3ef81074af1972b97355df9df3cdf3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:12:02 +0100 Subject: [PATCH 197/682] x86, genapic: unify struct genapic Move over the (now identical) struct genapic definitions from genapic_32/64.h to genapic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 96 +++++++++++++++++++++++++++++++ arch/x86/include/asm/genapic_32.h | 93 ------------------------------ arch/x86/include/asm/genapic_64.h | 91 ----------------------------- 3 files changed, 96 insertions(+), 184 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index d48bee663a6..3dea66a328e 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -1,5 +1,101 @@ +#ifndef _ASM_X86_GENAPIC_H +#define _ASM_X86_GENAPIC_H + +#include + +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ + +struct genapic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + + u32 int_delivery_mode; + u32 int_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int ESR_DISABLE; + + int apic_destination_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + int no_balance_irq; + int no_ioapic_check; + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); +#ifdef CONFIG_X86_32 + u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); +#else + unsigned int (*phys_pkg_id)(int index_msb); +#endif + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, + char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + +#ifdef CONFIG_SMP + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); +#endif + /* wakeup_secondary_cpu */ + int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); +}; + #ifdef CONFIG_X86_32 # include "genapic_32.h" #else # include "genapic_64.h" #endif + +#endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 5808b7daf0a..a56785ed12a 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -4,99 +4,6 @@ #include #include -/* - * Generic APIC driver interface. - * - * An straight forward mapping of the APIC related parts of the - * x86 subarchitecture interface to a dynamic object. - * - * This is used by the "generic" x86 subarchitecture. - * - * Copyright 2003 Andi Kleen, SuSE Labs. - */ - -struct mpc_bus; -struct mpc_table; -struct mpc_cpu; - -struct genapic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_registered)(void); - - u32 int_delivery_mode; - u32 int_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int ESR_DISABLE; - - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - - int no_balance_irq; - int no_ioapic_check; - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); - void (*init_apic_ldr)(void); - - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif - - /* - * When one of the next two hooks returns 1 the genapic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); - - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; - - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - -#ifdef CONFIG_SMP - /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); -#endif - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - - int trampoline_phys_low; - int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); -}; - #define APICFUNC(x) .x = x, /* More functions could be probably marked IPIFUNC and save some space diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 0a5f7122d9f..c70ca0d50eb 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -1,97 +1,6 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H -#include - -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC sub-arch data struct. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ - -struct genapic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_registered)(void); - - u32 int_delivery_mode; - u32 int_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int ESR_DISABLE; - - int apic_destination_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - - int no_balance_irq; - int no_ioapic_check; - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); - void (*init_apic_ldr)(void); - - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif - - /* - * When one of the next two hooks returns 1 the genapic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); - - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; - - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - -#ifdef CONFIG_SMP - /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); -#endif - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - - int trampoline_phys_low; - int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); -}; - extern struct genapic *genapic; extern struct genapic apic_flat; From ced733ec0bfe9a8a5140a7aefdfe802598e4b8c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:15:06 +0100 Subject: [PATCH 198/682] x86, genapic: finish unification Unify remaining bits of genapic_32/64.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 80 ++++++++++++++++++++++++++++++- arch/x86/include/asm/genapic_32.h | 65 ------------------------- arch/x86/include/asm/genapic_64.h | 19 -------- 3 files changed, 78 insertions(+), 86 deletions(-) delete mode 100644 arch/x86/include/asm/genapic_32.h delete mode 100644 arch/x86/include/asm/genapic_64.h diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 3dea66a328e..7df1b48fa35 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -93,9 +93,85 @@ struct genapic { }; #ifdef CONFIG_X86_32 -# include "genapic_32.h" + +#include +#include + +#define APICFUNC(x) .x = x, + +/* More functions could be probably marked IPIFUNC and save some space + in UP GENERICARCH kernels, but I don't have the nerve right now + to untangle this mess. -AK */ +#ifdef CONFIG_SMP +#define IPIFUNC(x) APICFUNC(x) #else -# include "genapic_64.h" +#define IPIFUNC(x) #endif +#define APIC_INIT(aname, aprobe) \ +{ \ + .name = aname, \ + .probe = aprobe, \ + .int_delivery_mode = INT_DELIVERY_MODE, \ + .int_dest_mode = INT_DEST_MODE, \ + .no_balance_irq = NO_BALANCE_IRQ, \ + .ESR_DISABLE = esr_disable, \ + .apic_destination_logical = APIC_DEST_LOGICAL, \ + APICFUNC(apic_id_registered) \ + APICFUNC(target_cpus) \ + APICFUNC(check_apicid_used) \ + APICFUNC(check_apicid_present) \ + APICFUNC(init_apic_ldr) \ + APICFUNC(ioapic_phys_id_map) \ + APICFUNC(setup_apic_routing) \ + APICFUNC(multi_timer_check) \ + APICFUNC(apicid_to_node) \ + APICFUNC(cpu_to_logical_apicid) \ + APICFUNC(cpu_present_to_apicid) \ + APICFUNC(apicid_to_cpu_present) \ + APICFUNC(setup_portio_remap) \ + APICFUNC(check_phys_apicid_present) \ + APICFUNC(mps_oem_check) \ + APICFUNC(get_apic_id) \ + .apic_id_mask = APIC_ID_MASK, \ + APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(cpu_mask_to_apicid_and) \ + APICFUNC(vector_allocation_domain) \ + APICFUNC(acpi_madt_oem_check) \ + IPIFUNC(send_IPI_mask) \ + IPIFUNC(send_IPI_allbutself) \ + IPIFUNC(send_IPI_all) \ + APICFUNC(enable_apic_mode) \ + APICFUNC(phys_pkg_id) \ + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ + APICFUNC(wait_for_init_deassert) \ + APICFUNC(smp_callin_clear_local_apic) \ + APICFUNC(store_NMI_vector) \ + APICFUNC(restore_NMI_vector) \ + APICFUNC(inquire_remote_apic) \ +} + +extern struct genapic *genapic; +extern void es7000_update_genapic_to_cluster(void); + +#else /* CONFIG_X86_64: */ + +extern struct genapic *genapic; + +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; +extern int acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +extern struct genapic apic_x2apic_uv_x; +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern void setup_apic_routing(void); + +#endif /* CONFIG_X86_64 */ + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h deleted file mode 100644 index a56785ed12a..00000000000 --- a/arch/x86/include/asm/genapic_32.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_32_H -#define _ASM_X86_GENAPIC_32_H - -#include -#include - -#define APICFUNC(x) .x = x, - -/* More functions could be probably marked IPIFUNC and save some space - in UP GENERICARCH kernels, but I don't have the nerve right now - to untangle this mess. -AK */ -#ifdef CONFIG_SMP -#define IPIFUNC(x) APICFUNC(x) -#else -#define IPIFUNC(x) -#endif - -#define APIC_INIT(aname, aprobe) \ -{ \ - .name = aname, \ - .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered) \ - APICFUNC(target_cpus) \ - APICFUNC(check_apicid_used) \ - APICFUNC(check_apicid_present) \ - APICFUNC(init_apic_ldr) \ - APICFUNC(ioapic_phys_id_map) \ - APICFUNC(setup_apic_routing) \ - APICFUNC(multi_timer_check) \ - APICFUNC(apicid_to_node) \ - APICFUNC(cpu_to_logical_apicid) \ - APICFUNC(cpu_present_to_apicid) \ - APICFUNC(apicid_to_cpu_present) \ - APICFUNC(setup_portio_remap) \ - APICFUNC(check_phys_apicid_present) \ - APICFUNC(mps_oem_check) \ - APICFUNC(get_apic_id) \ - .apic_id_mask = APIC_ID_MASK, \ - APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ - APICFUNC(vector_allocation_domain) \ - APICFUNC(acpi_madt_oem_check) \ - IPIFUNC(send_IPI_mask) \ - IPIFUNC(send_IPI_allbutself) \ - IPIFUNC(send_IPI_all) \ - APICFUNC(enable_apic_mode) \ - APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ - APICFUNC(wait_for_init_deassert) \ - APICFUNC(smp_callin_clear_local_apic) \ - APICFUNC(store_NMI_vector) \ - APICFUNC(restore_NMI_vector) \ - APICFUNC(inquire_remote_apic) \ -} - -extern struct genapic *genapic; -extern void es7000_update_genapic_to_cluster(void); - -#endif /* _ASM_X86_GENAPIC_32_H */ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h deleted file mode 100644 index c70ca0d50eb..00000000000 --- a/arch/x86/include/asm/genapic_64.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _ASM_X86_GENAPIC_64_H -#define _ASM_X86_GENAPIC_64_H - -extern struct genapic *genapic; - -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; -extern int acpi_madt_oem_check(char *, char *); - -extern void apic_send_IPI_self(int vector); - -extern struct genapic apic_x2apic_uv_x; -DECLARE_PER_CPU(int, x2apic_extra_bits); - -extern void setup_apic_routing(void); - -#endif /* _ASM_X86_GENAPIC_64_H */ From 505deeb1a228e5b0bf6ac5d0d78f4a4253a9efe9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:23:22 +0100 Subject: [PATCH 199/682] x86, genapic: cleanups Unify genapic.h some more. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 7df1b48fa35..19a5193e965 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -3,6 +3,9 @@ #include +#include +#include + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -13,7 +16,6 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ - struct genapic { char *name; @@ -85,6 +87,7 @@ struct genapic { int trampoline_phys_low; int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); @@ -92,10 +95,9 @@ struct genapic { void (*inquire_remote_apic)(int apicid); }; -#ifdef CONFIG_X86_32 +extern struct genapic *genapic; -#include -#include +#ifdef CONFIG_X86_32 #define APICFUNC(x) .x = x, @@ -143,8 +145,8 @@ struct genapic { IPIFUNC(send_IPI_all) \ APICFUNC(enable_apic_mode) \ APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ APICFUNC(wait_for_init_deassert) \ APICFUNC(smp_callin_clear_local_apic) \ APICFUNC(store_NMI_vector) \ @@ -152,13 +154,10 @@ struct genapic { APICFUNC(inquire_remote_apic) \ } -extern struct genapic *genapic; extern void es7000_update_genapic_to_cluster(void); #else /* CONFIG_X86_64: */ -extern struct genapic *genapic; - extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; From 6781d948cc05b02df915650f2eb49550a1631df9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 23:54:23 +0100 Subject: [PATCH 200/682] x86, genapic: provide IPI callbacks unconditionally 64-bit x86 uses the IPI callbacks even on UP - so provide them generally. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 19a5193e965..c27efde0523 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -73,7 +73,6 @@ struct genapic { unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, const struct cpumask *andmask); -#ifdef CONFIG_SMP /* ipi */ void (*send_IPI_mask)(const struct cpumask *mask, int vector); void (*send_IPI_mask_allbutself)(const struct cpumask *mask, @@ -81,7 +80,7 @@ struct genapic { void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); -#endif + /* wakeup_secondary_cpu */ int (*wakeup_cpu)(int apicid, unsigned long start_eip); From c8d46cf06dc2e3a8f57a350eb9f9b19fd7f2ffe5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 00:14:11 +0100 Subject: [PATCH 201/682] x86: rename 'genapic' to 'apic' Rename genapic-> to apic-> references because in a future chagne we'll open-code all the indirect calls (instead of obscuring them via macros), so we want this reference to be as short as possible. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 22 ++--- .../include/asm/mach-default/mach_apicdef.h | 6 +- arch/x86/include/asm/mach-default/mach_ipi.h | 8 +- arch/x86/include/asm/mach-generic/mach_apic.h | 50 ++++++------ .../include/asm/mach-generic/mach_apicdef.h | 4 +- arch/x86/include/asm/mach-generic/mach_ipi.h | 6 +- .../include/asm/mach-generic/mach_wakecpu.h | 14 ++-- arch/x86/kernel/es7000_32.c | 6 +- arch/x86/kernel/genapic_64.c | 16 ++-- arch/x86/kernel/io_apic.c | 80 +++++++++---------- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/mach-generic/es7000.c | 12 +-- arch/x86/mach-generic/probe.c | 24 +++--- 15 files changed, 127 insertions(+), 127 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index c27efde0523..3970da3245c 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -94,7 +94,7 @@ struct genapic { void (*inquire_remote_apic)(int apicid); }; -extern struct genapic *genapic; +extern struct genapic *apic; #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index cc09cbbee27..2448b927b64 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,18 +22,18 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) -#define phys_pkg_id (genapic->phys_pkg_id) -#define vector_allocation_domain (genapic->vector_allocation_domain) +#define INT_DELIVERY_MODE (apic->int_delivery_mode) +#define INT_DEST_MODE (apic->int_dest_mode) +#define TARGET_CPUS (apic->target_cpus()) +#define apic_id_registered (apic->apic_id_registered) +#define init_apic_ldr (apic->init_apic_ldr) +#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) +#define phys_pkg_id (apic->phys_pkg_id) +#define vector_allocation_domain (apic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) -#define send_IPI_self (genapic->send_IPI_self) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) +#define send_IPI_self (apic->send_IPI_self) +#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index 53179936d6c..b4dcc0971c7 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -4,9 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (genapic->apic_id_mask) -#define GET_APIC_ID(x) (genapic->get_apic_id(x)) -#define SET_APIC_ID(x) (genapic->set_apic_id(x)) +#define APIC_ID_MASK (apic->apic_id_mask) +#define GET_APIC_ID(x) (apic->get_apic_id(x)) +#define SET_APIC_ID(x) (apic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) static inline unsigned get_apic_id(unsigned long x) diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index 191312d155d..089399643df 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -12,8 +12,8 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) +#define send_IPI_mask (apic->send_IPI_mask) +#define send_IPI_mask_allbutself (apic->send_IPI_mask_allbutself) #else static inline void send_IPI_mask(const struct cpumask *mask, int vector) { @@ -39,8 +39,8 @@ static inline void __local_send_IPI_all(int vector) } #ifdef CONFIG_X86_64 -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) +#define send_IPI_allbutself (apic->send_IPI_allbutself) +#define send_IPI_all (apic->send_IPI_all) #else static inline void send_IPI_allbutself(int vector) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 48553e958ad..59972d94ff1 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,32 +3,32 @@ #include -#define esr_disable (genapic->ESR_DISABLE) -#define NO_BALANCE_IRQ (genapic->no_balance_irq) -#define INT_DELIVERY_MODE (genapic->int_delivery_mode) -#define INT_DEST_MODE (genapic->int_dest_mode) +#define esr_disable (apic->ESR_DISABLE) +#define NO_BALANCE_IRQ (apic->no_balance_irq) +#define INT_DELIVERY_MODE (apic->int_delivery_mode) +#define INT_DEST_MODE (apic->int_dest_mode) #undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (genapic->apic_destination_logical) -#define TARGET_CPUS (genapic->target_cpus()) -#define apic_id_registered (genapic->apic_id_registered) -#define init_apic_ldr (genapic->init_apic_ldr) -#define ioapic_phys_id_map (genapic->ioapic_phys_id_map) -#define setup_apic_routing (genapic->setup_apic_routing) -#define multi_timer_check (genapic->multi_timer_check) -#define apicid_to_node (genapic->apicid_to_node) -#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) -#define cpu_present_to_apicid (genapic->cpu_present_to_apicid) -#define apicid_to_cpu_present (genapic->apicid_to_cpu_present) -#define setup_portio_remap (genapic->setup_portio_remap) -#define check_apicid_present (genapic->check_apicid_present) -#define check_phys_apicid_present (genapic->check_phys_apicid_present) -#define check_apicid_used (genapic->check_apicid_used) -#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) -#define vector_allocation_domain (genapic->vector_allocation_domain) -#define enable_apic_mode (genapic->enable_apic_mode) -#define phys_pkg_id (genapic->phys_pkg_id) -#define wakeup_secondary_cpu (genapic->wakeup_cpu) +#define APIC_DEST_LOGICAL (apic->apic_destination_logical) +#define TARGET_CPUS (apic->target_cpus()) +#define apic_id_registered (apic->apic_id_registered) +#define init_apic_ldr (apic->init_apic_ldr) +#define ioapic_phys_id_map (apic->ioapic_phys_id_map) +#define setup_apic_routing (apic->setup_apic_routing) +#define multi_timer_check (apic->multi_timer_check) +#define apicid_to_node (apic->apicid_to_node) +#define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) +#define cpu_present_to_apicid (apic->cpu_present_to_apicid) +#define apicid_to_cpu_present (apic->apicid_to_cpu_present) +#define setup_portio_remap (apic->setup_portio_remap) +#define check_apicid_present (apic->check_apicid_present) +#define check_phys_apicid_present (apic->check_phys_apicid_present) +#define check_apicid_used (apic->check_apicid_used) +#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) +#define vector_allocation_domain (apic->vector_allocation_domain) +#define enable_apic_mode (apic->enable_apic_mode) +#define phys_pkg_id (apic->phys_pkg_id) +#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index 68041f3802f..acc9adddb34 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -4,8 +4,8 @@ #ifndef APIC_DEFINITION #include -#define GET_APIC_ID (genapic->get_apic_id) -#define APIC_ID_MASK (genapic->apic_id_mask) +#define GET_APIC_ID (apic->get_apic_id) +#define APIC_ID_MASK (apic->apic_id_mask) #endif #endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h index ffd637e3c3d..75e54bd6cbd 100644 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ b/arch/x86/include/asm/mach-generic/mach_ipi.h @@ -3,8 +3,8 @@ #include -#define send_IPI_mask (genapic->send_IPI_mask) -#define send_IPI_allbutself (genapic->send_IPI_allbutself) -#define send_IPI_all (genapic->send_IPI_all) +#define send_IPI_mask (apic->send_IPI_mask) +#define send_IPI_allbutself (apic->send_IPI_allbutself) +#define send_IPI_all (apic->send_IPI_all) #endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 1ab16b168c8..22006bbee61 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,12 +1,12 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) -#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) -#define wait_for_init_deassert (genapic->wait_for_init_deassert) -#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) -#define store_NMI_vector (genapic->store_NMI_vector) -#define restore_NMI_vector (genapic->restore_NMI_vector) -#define inquire_remote_apic (genapic->inquire_remote_apic) +#define TRAMPOLINE_PHYS_LOW (apic->trampoline_phys_low) +#define TRAMPOLINE_PHYS_HIGH (apic->trampoline_phys_high) +#define wait_for_init_deassert (apic->wait_for_init_deassert) +#define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) +#define store_NMI_vector (apic->store_NMI_vector) +#define restore_NMI_vector (apic->restore_NMI_vector) +#define inquire_remote_apic (apic->inquire_remote_apic) #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 53699c931ad..20a2a43c2a9 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -188,14 +188,14 @@ static void noop_wait_for_deassert(atomic_t *deassert_not_used) static int __init es7000_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - genapic->wait_for_init_deassert = noop_wait_for_deassert; - genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } return 0; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index e656c272115..2b986389a24 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -29,7 +29,7 @@ extern struct genapic apic_x2xpic_uv_x; extern struct genapic apic_x2apic_phys; extern struct genapic apic_x2apic_cluster; -struct genapic __read_mostly *genapic = &apic_flat; +struct genapic __read_mostly *apic = &apic_flat; static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV @@ -46,15 +46,15 @@ static struct genapic *apic_probe[] __initdata = { */ void __init setup_apic_routing(void) { - if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { + if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) - genapic = &apic_flat; + apic = &apic_flat; } - if (genapic == &apic_flat) { + if (apic == &apic_flat) { if (max_physical_apicid >= 8) - genapic = &apic_physflat; - printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); + apic = &apic_physflat; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } if (x86_quirks->update_genapic) @@ -74,9 +74,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - genapic = apic_probe[i]; + apic = apic_probe[i]; printk(KERN_INFO "Setting APIC routing to %s.\n", - genapic->name); + apic->name); return 1; } } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bfb7d734062..7283234229f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1486,7 +1486,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic, int irq, +static int setup_ioapic_entry(int apic_id, int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int trigger, int polarity, int vector) @@ -1498,18 +1498,18 @@ static int setup_ioapic_entry(int apic, int irq, #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); struct irte irte; struct IR_IO_APIC_route_entry *ir_entry = (struct IR_IO_APIC_route_entry *) entry; int index; if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); + panic("No mapping iommu for ioapic %d\n", apic_id); index = alloc_irte(iommu, irq, 1); if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); + panic("Failed to allocate IRTE for ioapic %d\n", apic_id); memset(&irte, 0, sizeof(irte)); @@ -1547,7 +1547,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, +static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1567,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].apicid, pin, cfg->vector, + apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, irq, trigger, polarity); - if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, + if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); __clear_irq_vector(irq, cfg); return; } @@ -1583,12 +1583,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1596,19 +1596,19 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { + for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); + idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { if (!notcon) { notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); } else apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic].apicid, pin); + mp_ioapics[apic_id].apicid, pin); continue; } if (notcon) { @@ -1617,9 +1617,9 @@ static void __init setup_IO_APIC_irqs(void) notcon = 0; } - irq = pin_2_irq(idx, apic, pin); + irq = pin_2_irq(idx, apic_id, pin); #ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) + if (multi_timer_check(apic_id, irq)) continue; #endif desc = irq_to_desc_alloc_cpu(irq, cpu); @@ -1628,9 +1628,9 @@ static void __init setup_IO_APIC_irqs(void) continue; } cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic, pin); + add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); - setup_IO_APIC_irq(apic, pin, irq, desc, + setup_IO_APIC_irq(apic_id, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -1643,7 +1643,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, +static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; @@ -1676,7 +1676,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, /* * Add it to the IO-APIC irq-routing table: */ - ioapic_write_entry(apic, pin, entry); + ioapic_write_entry(apic_id, pin, entry); } @@ -2089,7 +2089,7 @@ static void __init setup_ioapic_ids_from_mpc(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int apic; + int apic_id; int i; unsigned char old_id; unsigned long flags; @@ -2113,21 +2113,21 @@ static void __init setup_ioapic_ids_from_mpc(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (apic = 0; apic < nr_ioapics; apic++) { + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic].apicid; + old_id = mp_ioapics[apic_id].apicid; - if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { + if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic].apicid = reg_00.bits.ID; + mp_ioapics[apic_id].apicid = reg_00.bits.ID; } /* @@ -2136,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].apicid)) { + mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].apicid); + apic_id, mp_ioapics[apic_id].apicid); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -2147,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic].apicid = i; + mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); + tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); physids_or(phys_id_present_map, phys_id_present_map, tmp); } @@ -2162,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void) * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic].apicid) + if (old_id != mp_ioapics[apic_id].apicid) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].dstapic == old_id) mp_irqs[i].dstapic - = mp_ioapics[apic].apicid; + = mp_ioapics[apic_id].apicid; /* * Read the right value from the MPC table and @@ -2174,20 +2174,20 @@ static void __init setup_ioapic_ids_from_mpc(void) */ apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].apicid); + mp_ioapics[apic_id].apicid); - reg_00.bits.ID = mp_ioapics[apic].apicid; + reg_00.bits.ID = mp_ioapics[apic_id].apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); + io_apic_write(apic_id, 0, reg_00.raw); spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); + reg_00.raw = io_apic_read(apic_id, 0); spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].apicid) + if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f2191d4f271..3928280278f 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -236,7 +236,7 @@ static int __init numaq_setup_ioapic_ids(void) static int __init numaq_update_genapic(void) { - genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; + apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; return 0; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f41c4486c27..a58e9f5e603 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -590,7 +590,7 @@ static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; # endif #endif diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index c2ded144802..2f4f4a6e39b 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -20,14 +20,14 @@ void __init es7000_update_genapic_to_cluster(void) { - genapic->target_cpus = target_cpus_cluster; - genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; - genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; + apic->target_cpus = target_cpus_cluster; + apic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->int_dest_mode = INT_DEST_MODE_CLUSTER; + apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; - genapic->init_apic_ldr = init_apic_ldr_cluster; + apic->init_apic_ldr = init_apic_ldr_cluster; - genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; + apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; } static int probe_es7000(void) diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 15a38daef1a..82bf0f520fb 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -23,7 +23,7 @@ extern struct genapic apic_bigsmp; extern struct genapic apic_es7000; extern struct genapic apic_default; -struct genapic *genapic = &apic_default; +struct genapic *apic = &apic_default; static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_NUMAQ @@ -52,7 +52,7 @@ static int __init parse_apic(char *arg) for (i = 0; apic_probe[i]; i++) { if (!strcmp(apic_probe[i]->name, arg)) { - genapic = apic_probe[i]; + apic = apic_probe[i]; cmdline_apic = 1; return 0; } @@ -76,13 +76,13 @@ void __init generic_bigsmp_probe(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && genapic == &apic_default) { + if (!cmdline_apic && apic == &apic_default) { if (apic_bigsmp.probe()) { - genapic = &apic_bigsmp; + apic = &apic_bigsmp; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Overriding APIC driver with %s\n", - genapic->name); + apic->name); } } #endif @@ -94,7 +94,7 @@ void __init generic_apic_probe(void) int i; for (i = 0; apic_probe[i]; i++) { if (apic_probe[i]->probe()) { - genapic = apic_probe[i]; + apic = apic_probe[i]; break; } } @@ -105,7 +105,7 @@ void __init generic_apic_probe(void) if (x86_quirks->update_genapic) x86_quirks->update_genapic(); } - printk(KERN_INFO "Using APIC driver %s\n", genapic->name); + printk(KERN_INFO "Using APIC driver %s\n", apic->name); } /* These functions can switch the APIC even after the initial ->probe() */ @@ -116,11 +116,11 @@ int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { if (!cmdline_apic) { - genapic = apic_probe[i]; + apic = apic_probe[i]; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + apic->name); } return 1; } @@ -134,11 +134,11 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) for (i = 0; apic_probe[i]; ++i) { if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { if (!cmdline_apic) { - genapic = apic_probe[i]; + apic = apic_probe[i]; if (x86_quirks->update_genapic) x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", - genapic->name); + apic->name); } return 1; } @@ -148,5 +148,5 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) int hard_smp_processor_id(void) { - return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); + return apic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); } From f2f05ee8b8d346d4edee766384a5fedafdd4f9f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 202/682] x86: clean up genapic_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 73 +++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 34185488e4f..f1bfdd3608a 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -175,25 +175,60 @@ static unsigned int phys_pkg_id(int index_msb) } struct genapic apic_flat = { - .name = "flat", - .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = flat_target_cpus, - .vector_allocation_domain = flat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr, - .send_IPI_all = flat_send_IPI_all, - .send_IPI_allbutself = flat_send_IPI_allbutself, - .send_IPI_mask = flat_send_IPI_mask, - .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + .name = "flat", + .probe = NULL, + .acpi_madt_oem_check = flat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = flat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = flat_vector_allocation_domain, + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu << 24, + + .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + + .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, + .send_IPI_allbutself = flat_send_IPI_allbutself, + .send_IPI_all = flat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; /* From 4c3e51e05a7eefead4033b187394458ff8626497 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 203/682] x86: clean up genapic_phys_flat - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 75 +++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f1bfdd3608a..e9233374cef 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -320,23 +320,60 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, } struct genapic apic_physflat = { - .name = "physical flat", - .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = physflat_target_cpus, - .vector_allocation_domain = physflat_vector_allocation_domain, - .apic_id_registered = flat_apic_id_registered, - .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ - .send_IPI_all = physflat_send_IPI_all, - .send_IPI_allbutself = physflat_send_IPI_allbutself, - .send_IPI_mask = physflat_send_IPI_mask, - .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, - .send_IPI_self = apic_send_IPI_self, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFu<<24), + + .name = "physical flat", + .probe = NULL, + .acpi_madt_oem_check = physflat_acpi_madt_oem_check, + .apic_id_registered = flat_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = physflat_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = physflat_vector_allocation_domain, + /* not needed, but shouldn't hurt: */ + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFu<<24, + + .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + + .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, + .send_IPI_allbutself = physflat_send_IPI_allbutself, + .send_IPI_all = physflat_send_IPI_all, + .send_IPI_self = apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From c7967329911013a05920ef12832935c541bb8c9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 204/682] x86: clean up apic_x2apic_uv_x - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 74 ++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index bfe36249145..94f606f204a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -237,25 +237,61 @@ static void uv_send_IPI_self(int vector) } struct genapic apic_x2apic_uv_x = { - .name = "UV large system", - .acpi_madt_oem_check = uv_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = uv_target_cpus, - .vector_allocation_domain = uv_vector_allocation_domain, - .apic_id_registered = uv_apic_id_registered, - .init_apic_ldr = uv_init_apic_ldr, - .send_IPI_all = uv_send_IPI_all, - .send_IPI_allbutself = uv_send_IPI_allbutself, - .send_IPI_mask = uv_send_IPI_mask, - .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, - .send_IPI_self = uv_send_IPI_self, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "UV large system", + .probe = NULL, + .acpi_madt_oem_check = uv_acpi_madt_oem_check, + .apic_id_registered = uv_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = uv_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = uv_vector_allocation_domain, + .init_apic_ldr = uv_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, + + .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, + .send_IPI_allbutself = uv_send_IPI_allbutself, + .send_IPI_all = uv_send_IPI_all, + .send_IPI_self = uv_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; static __cpuinit void set_x2apic_extra_bits(int pnode) From 05c155c235c757329ec89ad591516538ed8352c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 205/682] x86: clean up apic_x2apic_phys - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_phys.c | 74 ++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 21bcc0e098b..c98361fb7ee 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -172,23 +172,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_phys = { - .name = "physical x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "physical x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_Fixed, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From 504a3c3ad45d200a6ac8be5aa019c8fa05e26dc8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 02:37:01 +0100 Subject: [PATCH 206/682] x86: clean up apic_x2apic_cluster - reorder fields so that they appear in struct genapic field ordering - add zero-initialized fields too so that it's apparent which functionality is default / missing. Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_cluster.c | 74 +++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 6ce497cc372..fc855e503ac 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -176,23 +176,59 @@ static void init_x2apic_ldr(void) } struct genapic apic_x2apic_cluster = { - .name = "cluster x2apic", - .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), - .target_cpus = x2apic_target_cpus, - .vector_allocation_domain = x2apic_vector_allocation_domain, - .apic_id_registered = x2apic_apic_id_registered, - .init_apic_ldr = init_x2apic_ldr, - .send_IPI_all = x2apic_send_IPI_all, - .send_IPI_allbutself = x2apic_send_IPI_allbutself, - .send_IPI_mask = x2apic_send_IPI_mask, - .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, - .send_IPI_self = x2apic_send_IPI_self, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, - .phys_pkg_id = phys_pkg_id, - .get_apic_id = get_apic_id, - .set_apic_id = set_apic_id, - .apic_id_mask = (0xFFFFFFFFu), + + .name = "cluster x2apic", + .probe = NULL, + .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_registered = x2apic_apic_id_registered, + + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_LOGICAL != 0), + + .target_cpus = x2apic_target_cpus, + .ESR_DISABLE = 0, + .apic_destination_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .no_balance_irq = 0, + .no_ioapic_check = 0, + + .vector_allocation_domain = x2apic_vector_allocation_domain, + .init_apic_ldr = init_x2apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = NULL, + .cpu_to_logical_apicid = NULL, + .cpu_present_to_apicid = NULL, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = NULL, + .enable_apic_mode = NULL, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xFFFFFFFFu, + + .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + + .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, + .send_IPI_allbutself = x2apic_send_IPI_allbutself, + .send_IPI_all = x2apic_send_IPI_all, + .send_IPI_self = x2apic_send_IPI_self, + + .wakeup_cpu = NULL, + .trampoline_phys_low = 0, + .trampoline_phys_high = 0, + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, + .inquire_remote_apic = NULL, }; From 0a7e8c64142b2ae5aacdc509ed112b8e362ac8a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 207/682] x86, genapic: cleanup 32-bit apic_default template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/default.c | 58 ++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e63a4a76d8c..d5fec764fb4 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -24,4 +24,60 @@ static int probe_default(void) return 1; } -struct genapic apic_default = APIC_INIT("default", probe_default); +struct genapic apic_default = { + + .name = "default", + .probe = probe_default, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From d26b6d6660d704ffa59f22ad57c9103e3fba289f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 208/682] x86, genapic: cleanup 32-bit apic_bigsmp template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/bigsmp.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index bc4c7840b2a..13e82bc4dae 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -57,4 +57,60 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); +struct genapic apic_bigsmp = { + + .name = "bigsmp", + .probe = probe_bigsmp, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From fea3437adf778cfe69b7f8cff0afb8060d84b647 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 209/682] x86, genapic: cleanup 32-bit apic_numaq template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/numaq.c | 58 ++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 3679e225564..fa486ca49c0 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -50,4 +50,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); +struct genapic apic_numaq = { + + .name = "NUMAQ", + .probe = probe_numaq, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From fed53ebf3c4e233e085c453a27ae287ccbf149fb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 210/682] x86, genapic: cleanup 32-bit apic_es7000 template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/es7000.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 2f4f4a6e39b..4a404ea5f92 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -100,4 +100,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); +struct genapic apic_es7000 = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From 491a50c4fbcf6cc39a702a16a2dfaf42f0eb8058 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:32:03 +0100 Subject: [PATCH 211/682] x86, genapic: cleanup 32-bit apic_summit template Clean up the APIC driver template: - order fields properly - use the macro names explicitly (so that they can be renamed later) - fill in NULL entries as well Signed-off-by: Ingo Molnar --- arch/x86/mach-generic/summit.c | 58 +++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2821ffc188b..479c1d40977 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -37,4 +37,60 @@ static void vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } -struct genapic apic_summit = APIC_INIT("summit", probe_summit); +struct genapic apic_summit = { + + .name = "summit", + .probe = probe_summit, + .acpi_madt_oem_check = acpi_madt_oem_check, + .apic_id_registered = apic_id_registered, + + .int_delivery_mode = INT_DELIVERY_MODE, + .int_dest_mode = INT_DEST_MODE, + + .target_cpus = target_cpus, + .ESR_DISABLE = esr_disable, + .apic_destination_logical = APIC_DEST_LOGICAL, + .check_apicid_used = check_apicid_used, + .check_apicid_present = check_apicid_present, + + .no_balance_irq = NO_BALANCE_IRQ, + .no_ioapic_check = 0, + + .vector_allocation_domain = vector_allocation_domain, + .init_apic_ldr = init_apic_ldr, + + .ioapic_phys_id_map = ioapic_phys_id_map, + .setup_apic_routing = setup_apic_routing, + .multi_timer_check = multi_timer_check, + .apicid_to_node = apicid_to_node, + .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_present_to_apicid = cpu_present_to_apicid, + .apicid_to_cpu_present = apicid_to_cpu_present, + .setup_portio_remap = setup_portio_remap, + .check_phys_apicid_present = check_phys_apicid_present, + .enable_apic_mode = enable_apic_mode, + .phys_pkg_id = phys_pkg_id, + .mps_oem_check = mps_oem_check, + + .get_apic_id = get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = APIC_ID_MASK, + + .cpu_mask_to_apicid = cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + + .send_IPI_mask = send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = send_IPI_allbutself, + .send_IPI_all = send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .store_NMI_vector = store_NMI_vector, + .restore_NMI_vector = restore_NMI_vector, + .inquire_remote_apic = inquire_remote_apic, +}; From 9a6801da55e4a4492e8f666ac272efe8186682c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:18:13 +0100 Subject: [PATCH 212/682] x86: remove APIC_INIT / APICFUNC / IPIFUNC The APIC_INIT() / APICFUNC / IPIFUNC macros were ugly and obfuscated the true identity of various APIC driver methods. Now that they are not used anymore, remove them. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 63 ++-------------------------------- 1 file changed, 2 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 3970da3245c..26c5e824a71 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -97,66 +97,8 @@ struct genapic { extern struct genapic *apic; #ifdef CONFIG_X86_32 - -#define APICFUNC(x) .x = x, - -/* More functions could be probably marked IPIFUNC and save some space - in UP GENERICARCH kernels, but I don't have the nerve right now - to untangle this mess. -AK */ -#ifdef CONFIG_SMP -#define IPIFUNC(x) APICFUNC(x) -#else -#define IPIFUNC(x) -#endif - -#define APIC_INIT(aname, aprobe) \ -{ \ - .name = aname, \ - .probe = aprobe, \ - .int_delivery_mode = INT_DELIVERY_MODE, \ - .int_dest_mode = INT_DEST_MODE, \ - .no_balance_irq = NO_BALANCE_IRQ, \ - .ESR_DISABLE = esr_disable, \ - .apic_destination_logical = APIC_DEST_LOGICAL, \ - APICFUNC(apic_id_registered) \ - APICFUNC(target_cpus) \ - APICFUNC(check_apicid_used) \ - APICFUNC(check_apicid_present) \ - APICFUNC(init_apic_ldr) \ - APICFUNC(ioapic_phys_id_map) \ - APICFUNC(setup_apic_routing) \ - APICFUNC(multi_timer_check) \ - APICFUNC(apicid_to_node) \ - APICFUNC(cpu_to_logical_apicid) \ - APICFUNC(cpu_present_to_apicid) \ - APICFUNC(apicid_to_cpu_present) \ - APICFUNC(setup_portio_remap) \ - APICFUNC(check_phys_apicid_present) \ - APICFUNC(mps_oem_check) \ - APICFUNC(get_apic_id) \ - .apic_id_mask = APIC_ID_MASK, \ - APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(cpu_mask_to_apicid_and) \ - APICFUNC(vector_allocation_domain) \ - APICFUNC(acpi_madt_oem_check) \ - IPIFUNC(send_IPI_mask) \ - IPIFUNC(send_IPI_allbutself) \ - IPIFUNC(send_IPI_all) \ - APICFUNC(enable_apic_mode) \ - APICFUNC(phys_pkg_id) \ - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ - APICFUNC(wait_for_init_deassert) \ - APICFUNC(smp_callin_clear_local_apic) \ - APICFUNC(store_NMI_vector) \ - APICFUNC(restore_NMI_vector) \ - APICFUNC(inquire_remote_apic) \ -} - extern void es7000_update_genapic_to_cluster(void); - -#else /* CONFIG_X86_64: */ - +#else extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; @@ -169,7 +111,6 @@ extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void setup_apic_routing(void); - -#endif /* CONFIG_X86_64 */ +#endif #endif /* _ASM_X86_GENAPIC_64_H */ From 306db03b0d71bf9c94155c0c4771a79fc70b4b27 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: [PATCH 213/682] x86: clean up apic->acpi_madt_oem_check methods Impact: refactor code x86 subarchitectures each defined a "acpi_madt_oem_check()" method, which could be an inline function, or an extern, or a static function, and which was also the name of a genapic field. Untangle this namespace spaghetti by setting ->acpi_madt_oem_check() to NULL on those subarchitectures that have no detection quirks, and rename the other ones (summit, es7000) that do. Also change default_acpi_madt_oem_check() to handle NULL entries, and clean its control flow up as well. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/mpparse.h | 2 +- arch/x86/include/asm/genapic.h | 2 +- .../include/asm/mach-default/mach_mpparse.h | 2 +- .../include/asm/mach-generic/mach_mpparse.h | 2 +- arch/x86/include/asm/summit/mpparse.h | 2 +- arch/x86/kernel/acpi/boot.c | 3 ++- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 6 ++--- arch/x86/mach-generic/numaq.c | 8 +------ arch/x86/mach-generic/probe.c | 24 +++++++++++-------- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h index c1629b090ec..30692c4ae85 100644 --- a/arch/x86/include/asm/es7000/mpparse.h +++ b/arch/x86/include/asm/es7000/mpparse.h @@ -9,7 +9,7 @@ extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); extern void setup_unisys(void); #ifndef CONFIG_X86_GENERICARCH -extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); +extern int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id); extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid); #endif diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 26c5e824a71..108abdf6953 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -103,7 +103,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; extern struct genapic apic_x2apic_cluster; extern struct genapic apic_x2apic_phys; -extern int acpi_madt_oem_check(char *, char *); +extern int default_acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h index c70a263d68c..8fa01770ba6 100644 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ b/arch/x86/include/asm/mach-default/mach_mpparse.h @@ -8,7 +8,7 @@ mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) } /* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static inline int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h index 9444ab8dca9..f497d96c76b 100644 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h @@ -4,6 +4,6 @@ extern int mps_oem_check(struct mpc_table *, char *, char *); -extern int acpi_madt_oem_check(char *, char *); +extern int default_acpi_madt_oem_check(char *, char *); #endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h index 380e86c0236..555ed8238e9 100644 --- a/arch/x86/include/asm/summit/mpparse.h +++ b/arch/x86/include/asm/summit/mpparse.h @@ -27,7 +27,7 @@ static inline int mps_oem_check(struct mpc_table *mpc, char *oem, } /* Hook from generic ACPI tables.c */ -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4cb5964f149..314fe0dddef 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -239,7 +239,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) madt->address); } - acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); + default_acpi_madt_oem_check(madt->header.oem_id, + madt->header.oem_table_id); return 0; } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 2b986389a24..060945b8eec 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -68,7 +68,7 @@ void apic_send_IPI_self(int vector) __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int i; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 13e82bc4dae..22c3608b80d 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -61,7 +61,7 @@ struct genapic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d5fec764fb4..cfec3494a96 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -28,7 +28,7 @@ struct genapic apic_default = { .name = "default", .probe = probe_default, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4a404ea5f92..23fe6f1c969 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -57,7 +57,7 @@ mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) #ifdef CONFIG_ACPI /* Hook from generic ACPI tables.c */ -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { unsigned long oem_addr = 0; int check_dsdt; @@ -81,7 +81,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return ret; } #else -static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } @@ -104,7 +104,7 @@ struct genapic apic_es7000 = { .name = "es7000", .probe = probe_es7000, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fa486ca49c0..9691b4e1654 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -31,12 +31,6 @@ static int probe_numaq(void) return found_numaq; } -/* Hook from generic ACPI tables.c */ -static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -54,7 +48,7 @@ struct genapic apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = NULL, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 82bf0f520fb..a21e2b1a701 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -128,20 +128,24 @@ int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) return 0; } -int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { int i; + for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - if (!cmdline_apic) { - apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; + if (!apic_probe[i]->acpi_madt_oem_check) + continue; + if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); } + return 1; } return 0; } diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 479c1d40977..0eea9fbb2a5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -41,7 +41,7 @@ struct genapic apic_summit = { .name = "summit", .probe = probe_summit, - .acpi_madt_oem_check = acpi_madt_oem_check, + .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, From 7ed248daa56156f2fd7175f90b62fc6397b0c7b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 03:43:47 +0100 Subject: [PATCH 214/682] x86: clean up apic->apic_id_registered() methods Impact: cleanup x86 subarchitectures each defined a "apic_id_registered()" method, which could be an inline function depending on which subarch we build for, and which was also the name of a genapic field. Untangle this namespace spaghetti by giving each of the instances a separate name. Also remove wrapper macro obfuscation. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 ++-- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d8dd9f53791..42c56df3ff3 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -4,9 +4,9 @@ #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) -static inline int apic_id_registered(void) +static inline int bigsmp_apic_id_registered(void) { - return (1); + return 1; } static inline const cpumask_t *target_cpus(void) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index c58b9cc7446..a1819b510de 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -6,9 +6,9 @@ #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) -static inline int apic_id_registered(void) +static inline int es7000_apic_id_registered(void) { - return (1); + return 1; } static inline const cpumask_t *target_cpus_cluster(void) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2448b927b64..6a454fa0b43 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -25,7 +25,6 @@ static inline const struct cpumask *target_cpus(void) #define INT_DELIVERY_MODE (apic->int_delivery_mode) #define INT_DEST_MODE (apic->int_dest_mode) #define TARGET_CPUS (apic->target_cpus()) -#define apic_id_registered (apic->apic_id_registered) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) @@ -57,7 +56,7 @@ static inline void init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int apic_id_registered(void) +static inline int default_apic_id_registered(void) { return physid_isset(read_apic_id(), phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 59972d94ff1..cc6e9d70f06 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -10,7 +10,6 @@ #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) -#define apic_id_registered (apic->apic_id_registered) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index bf37bc49bd8..59b62b19d02 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -28,7 +28,7 @@ static inline unsigned long check_apicid_present(int bit) } #define apicid_cluster(apicid) (apicid & 0xF0) -static inline int apic_id_registered(void) +static inline int numaq_apic_id_registered(void) { return 1; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 93d2c8667cf..a36ef6e4b1f 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -74,7 +74,7 @@ static inline int multi_timer_check(int apic, int irq) return 0; } -static inline int apic_id_registered(void) +static inline int summit_apic_id_registered(void) { return 1; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c6f15647eba..b6740de18fb 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1171,7 +1171,7 @@ void __cpuinit setup_local_APIC(void) * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. */ - if (!apic_id_registered()) + if (!apic->apic_id_registered()) BUG(); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 22c3608b80d..17abf5c6242 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -62,7 +62,7 @@ struct genapic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = bigsmp_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index cfec3494a96..1f30559e9d8 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -29,7 +29,7 @@ struct genapic apic_default = { .name = "default", .probe = probe_default, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = default_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 23fe6f1c969..d68ca0bce67 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -105,7 +105,7 @@ struct genapic apic_es7000 = { .name = "es7000", .probe = probe_es7000, .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_registered = apic_id_registered, + .apic_id_registered = es7000_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 9691b4e1654..b22a79b15b1 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -49,7 +49,7 @@ struct genapic apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, .acpi_madt_oem_check = NULL, - .apic_id_registered = apic_id_registered, + .apic_id_registered = numaq_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 0eea9fbb2a5..744fa1b86ef 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -42,7 +42,7 @@ struct genapic apic_summit = { .name = "summit", .probe = probe_summit, .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_registered = apic_id_registered, + .apic_id_registered = summit_apic_id_registered, .int_delivery_mode = INT_DELIVERY_MODE, .int_dest_mode = INT_DEST_MODE, From f8987a1093cc7a896137e264c24e04d4048e9f95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:02:31 +0100 Subject: [PATCH 215/682] x86, genapic: rename int_delivery_mode, et. al. int_delivery_mode is supposed to mean 'interrupt delivery mode', but it's quite a misnomer as 'int' we usually think of as an integer type ... The standard naming for such attributes is 'irq' - so rename the following fields and macros: int_delivery_mode => irq_delivery_mode INT_DELIVERY_MODE => IRQ_DELIVERY_MODE int_dest_mode => irq_dest_mode INT_DEST_MODE => IRQ_DEST_MODE Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 +-- arch/x86/include/asm/es7000/apic.h | 4 +-- arch/x86/include/asm/genapic.h | 4 +-- arch/x86/include/asm/mach-default/mach_apic.h | 8 ++--- arch/x86/include/asm/mach-generic/mach_apic.h | 4 +-- arch/x86/include/asm/numaq/apic.h | 4 +-- arch/x86/include/asm/summit/apic.h | 4 +-- arch/x86/kernel/genapic_flat_64.c | 8 ++--- arch/x86/kernel/genx2apic_cluster.c | 4 +-- arch/x86/kernel/genx2apic_phys.c | 4 +-- arch/x86/kernel/genx2apic_uv_x.c | 4 +-- arch/x86/kernel/io_apic.c | 30 +++++++++---------- arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 8 ++--- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 17 files changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 42c56df3ff3..8ff8bba8833 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,8 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target proc */ +#define IRQ_DELIVERY_MODE (dest_Fixed) +#define IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index a1819b510de..830e8731cc0 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define INT_DELIVERY_MODE (dest_Fixed) -#define INT_DEST_MODE (0) /* phys delivery to target procs */ +#define IRQ_DELIVERY_MODE (dest_Fixed) +#define IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 108abdf6953..e998e3df5d2 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -23,8 +23,8 @@ struct genapic { int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); int (*apic_id_registered)(void); - u32 int_delivery_mode; - u32 int_dest_mode; + u32 irq_delivery_mode; + u32 irq_dest_mode; const struct cpumask *(*target_cpus)(void); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6a454fa0b43..b5364793262 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,8 +22,8 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define INT_DELIVERY_MODE (apic->int_delivery_mode) -#define INT_DEST_MODE (apic->int_dest_mode) +#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) +#define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) @@ -35,8 +35,8 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define IRQ_DELIVERY_MODE dest_LowestPrio +#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index cc6e9d70f06..03492f2219e 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,8 +5,8 @@ #define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) -#define INT_DELIVERY_MODE (apic->int_delivery_mode) -#define INT_DEST_MODE (apic->int_dest_mode) +#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) +#define IRQ_DEST_MODE (apic->irq_dest_mode) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 59b62b19d02..d885e35df18 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,8 +15,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define INT_DELIVERY_MODE dest_LowestPrio -#define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ +#define IRQ_DELIVERY_MODE dest_LowestPrio +#define IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index a36ef6e4b1f..0b7d0d14e56 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,8 +24,8 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define IRQ_DELIVERY_MODE (dest_LowestPrio) +#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9233374cef..0a263d6bb5e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -180,8 +180,8 @@ struct genapic apic_flat = { .acpi_madt_oem_check = flat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, .ESR_DISABLE = 0, @@ -326,8 +326,8 @@ struct genapic apic_physflat = { .acpi_madt_oem_check = physflat_acpi_madt_oem_check, .apic_id_registered = flat_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index fc855e503ac..e9ff7dc9a0f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -182,8 +182,8 @@ struct genapic apic_x2apic_cluster = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_LowestPrio, - .int_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_delivery_mode = dest_LowestPrio, + .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c98361fb7ee..8141b5a88f6 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -178,8 +178,8 @@ struct genapic apic_x2apic_phys = { .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, .apic_id_registered = x2apic_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 94f606f204a..6a73cad0d3e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -243,8 +243,8 @@ struct genapic apic_x2apic_uv_x = { .acpi_madt_oem_check = uv_acpi_madt_oem_check, .apic_id_registered = uv_apic_id_registered, - .int_delivery_mode = dest_Fixed, - .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, .ESR_DISABLE = 0, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7283234229f..5f967b9c9af 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = INT_DEST_MODE; + entry.dest_mode = IRQ_DEST_MODE; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; + entry.delivery_mode = IRQ_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = INT_DEST_MODE; + irte.dst_mode = IRQ_DEST_MODE; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; + irte.dlvry_mode = IRQ_DELIVERY_MODE; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? + ((IRQ_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? + ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; + entry->delivery_mode = IRQ_DELIVERY_MODE; + entry->dest_mode = IRQ_DEST_MODE; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 17abf5c6242..c15c1aa2dc7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,8 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 1f30559e9d8..d32b175eff8 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,8 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d68ca0bce67..06653892953 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -21,8 +21,8 @@ void __init es7000_update_genapic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; - apic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - apic->int_dest_mode = INT_DEST_MODE_CLUSTER; + apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; apic->init_apic_ldr = init_apic_ldr_cluster; @@ -107,8 +107,8 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index b22a79b15b1..401957142fd 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,8 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 744fa1b86ef..946da7aa762 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,8 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .int_delivery_mode = INT_DELIVERY_MODE, - .int_dest_mode = INT_DEST_MODE, + .irq_delivery_mode = IRQ_DELIVERY_MODE, + .irq_dest_mode = IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 9b5bc8dc12421a4b17047061f473d85c1797d543 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:09:58 +0100 Subject: [PATCH 216/682] x86, apic: remove IRQ_DEST_MODE / IRQ_DELIVERY_MODE Remove the wrapper macros IRQ_DEST_MODE and IRQ_DELIVERY_MODE. The typical 32-bit and the 64-bit build all dereference via the genapic, so it's pointless to hide that indirection via these ugly macros. Furthermore, it also obscures subarchitecture details. So replace it with apic->irq_dest_mode / etc. accesses. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 +-- arch/x86/include/asm/es7000/apic.h | 4 +-- arch/x86/include/asm/mach-default/mach_apic.h | 5 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 4 +-- arch/x86/include/asm/summit/apic.h | 4 +-- arch/x86/kernel/io_apic.c | 30 +++++++++---------- arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 4 +-- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 12 files changed, 35 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 8ff8bba8833..293551b0e61 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,8 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define IRQ_DELIVERY_MODE (dest_Fixed) -#define IRQ_DEST_MODE (0) /* phys delivery to target proc */ +#define BIGSMP_IRQ_DELIVERY_MODE (dest_Fixed) +#define BIGSMP_IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 830e8731cc0..690016683f2 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define IRQ_DELIVERY_MODE (dest_Fixed) -#define IRQ_DEST_MODE (0) /* phys delivery to target procs */ +#define ES7000_IRQ_DELIVERY_MODE (dest_Fixed) +#define ES7000_IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index b5364793262..eafbf4f2038 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) #define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) @@ -35,8 +34,8 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define IRQ_DELIVERY_MODE dest_LowestPrio -#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define DEFAULT_IRQ_DELIVERY_MODE dest_LowestPrio +#define DEFAULT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 03492f2219e..387a5d00c43 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,8 +5,6 @@ #define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) -#define IRQ_DELIVERY_MODE (apic->irq_delivery_mode) -#define IRQ_DEST_MODE (apic->irq_dest_mode) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define TARGET_CPUS (apic->target_cpus()) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index d885e35df18..7746035c591 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,8 +15,8 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define IRQ_DELIVERY_MODE dest_LowestPrio -#define IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ +#define NUMAQ_IRQ_DELIVERY_MODE dest_LowestPrio +#define NUMAQ_IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 0b7d0d14e56..ea2abe9b597 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,8 +24,8 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define IRQ_DELIVERY_MODE (dest_LowestPrio) -#define IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ +#define SUMMIT_IRQ_DELIVERY_MODE (dest_LowestPrio) +#define SUMMIT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 5f967b9c9af..301b6571d70 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1514,9 +1514,9 @@ static int setup_ioapic_entry(int apic_id, int irq, memset(&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = trigger; - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = vector; irte.dest_id = IRTE_DEST(destination); @@ -1529,8 +1529,8 @@ static int setup_ioapic_entry(int apic_id, int irq, } else #endif { - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->dest = destination; } @@ -1659,10 +1659,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ - entry.dest_mode = IRQ_DEST_MODE; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = IRQ_DELIVERY_MODE; + entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; entry.vector = vector; @@ -3279,9 +3279,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms memset (&irte, 0, sizeof(irte)); irte.present = 1; - irte.dst_mode = IRQ_DEST_MODE; + irte.dst_mode = apic->irq_dest_mode; irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = IRQ_DELIVERY_MODE; + irte.dlvry_mode = apic->irq_delivery_mode; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3299,10 +3299,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->address_hi = MSI_ADDR_BASE_HI; msg->address_lo = MSI_ADDR_BASE_LO | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? MSI_ADDR_DEST_MODE_PHYSICAL: MSI_ADDR_DEST_MODE_LOGICAL) | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_ADDR_REDIRECTION_CPU: MSI_ADDR_REDIRECTION_LOWPRI) | MSI_ADDR_DEST_ID(dest); @@ -3310,7 +3310,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms msg->data = MSI_DATA_TRIGGER_EDGE | MSI_DATA_LEVEL_ASSERT | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); @@ -3711,11 +3711,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | HT_IRQ_LOW_VECTOR(cfg->vector) | - ((IRQ_DEST_MODE == 0) ? + ((apic->irq_dest_mode == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | HT_IRQ_LOW_RQEOI_EDGE | - ((IRQ_DELIVERY_MODE != dest_LowestPrio) ? + ((apic->irq_delivery_mode != dest_LowestPrio) ? HT_IRQ_LOW_MT_FIXED : HT_IRQ_LOW_MT_ARBITRATED) | HT_IRQ_LOW_IRQ_MASKED; @@ -3763,8 +3763,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); entry->vector = cfg->vector; - entry->delivery_mode = IRQ_DELIVERY_MODE; - entry->dest_mode = IRQ_DEST_MODE; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; entry->polarity = 0; entry->trigger = 0; entry->mask = 0; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index c15c1aa2dc7..e8c1ceca7c9 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,8 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = BIGSMP_IRQ_DELIVERY_MODE, + .irq_dest_mode = BIGSMP_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d32b175eff8..0482106f0e1 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,8 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = DEFAULT_IRQ_DELIVERY_MODE, + .irq_dest_mode = DEFAULT_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 06653892953..5d97408919b 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -107,8 +107,8 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = ES7000_IRQ_DELIVERY_MODE, + .irq_dest_mode = ES7000_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 401957142fd..77ac66935fd 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,8 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = NUMAQ_IRQ_DELIVERY_MODE, + .irq_dest_mode = NUMAQ_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 946da7aa762..7b3f43caf2a 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,8 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .irq_delivery_mode = IRQ_DELIVERY_MODE, - .irq_dest_mode = IRQ_DEST_MODE, + .irq_delivery_mode = SUMMIT_IRQ_DELIVERY_MODE, + .irq_dest_mode = SUMMIT_IRQ_DEST_MODE, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From dcafa4a8c95ce063cbae0a5e61632bc3c4924e66 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 217/682] x86, apic: remove DEFAULT_IRQ_DELIVERY_MODE and DEFAULT_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_default template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/mach-generic/default.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index eafbf4f2038..f3b2cd42388 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -34,8 +34,6 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define DEFAULT_IRQ_DELIVERY_MODE dest_LowestPrio -#define DEFAULT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 0482106f0e1..fe97b0114a0 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -31,8 +31,9 @@ struct genapic apic_default = { .acpi_madt_oem_check = NULL, .apic_id_registered = default_apic_id_registered, - .irq_delivery_mode = DEFAULT_IRQ_DELIVERY_MODE, - .irq_dest_mode = DEFAULT_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 82daea6b0890f739be1ad4ab1c1b922b1555582e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 218/682] x86, apic: remove SUMMIT_IRQ_DELIVERY_MODE and SUMMIT_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_summit template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/summit/apic.h | 3 --- arch/x86/mach-generic/summit.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index ea2abe9b597..427d0889f6f 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -24,9 +24,6 @@ static inline const cpumask_t *target_cpus(void) return &cpumask_of_cpu(0); } -#define SUMMIT_IRQ_DELIVERY_MODE (dest_LowestPrio) -#define SUMMIT_IRQ_DEST_MODE 1 /* logical delivery broadcast to all procs */ - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 7b3f43caf2a..1b9164b92b0 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -44,8 +44,9 @@ struct genapic apic_summit = { .acpi_madt_oem_check = summit_acpi_madt_oem_check, .apic_id_registered = summit_apic_id_registered, - .irq_delivery_mode = SUMMIT_IRQ_DELIVERY_MODE, - .irq_dest_mode = SUMMIT_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 1b1bcb3ff4e4934d949574cec90679219ace5412 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 219/682] x86, apic: remove NUMAQ_IRQ_DELIVERY_MODE and NUMAQ_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_numaq template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq/apic.h | 3 --- arch/x86/mach-generic/numaq.c | 5 +++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 7746035c591..a9d846769a0 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -15,9 +15,6 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define NUMAQ_IRQ_DELIVERY_MODE dest_LowestPrio -#define NUMAQ_IRQ_DEST_MODE 0 /* physical delivery on LOCAL quad */ - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 77ac66935fd..6daddb6949d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -51,8 +51,9 @@ struct genapic apic_numaq = { .acpi_madt_oem_check = NULL, .apic_id_registered = numaq_apic_id_registered, - .irq_delivery_mode = NUMAQ_IRQ_DELIVERY_MODE, - .irq_dest_mode = NUMAQ_IRQ_DEST_MODE, + .irq_delivery_mode = dest_LowestPrio, + /* physical delivery on LOCAL quad: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From d8a3539e64f8e27b0ab5bb7e7ba3b8f34b739224 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 220/682] x86, apic: remove BIGSMP_IRQ_DELIVERY_MODE and BIGSMP_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_bigsmp driver template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 -- arch/x86/mach-generic/bigsmp.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 293551b0e61..dca2d5b01da 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -21,8 +21,6 @@ static inline const cpumask_t *target_cpus(void) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define BIGSMP_IRQ_DELIVERY_MODE (dest_Fixed) -#define BIGSMP_IRQ_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index e8c1ceca7c9..06be776067a 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -64,8 +64,9 @@ struct genapic apic_bigsmp = { .acpi_madt_oem_check = NULL, .apic_id_registered = bigsmp_apic_id_registered, - .irq_delivery_mode = BIGSMP_IRQ_DELIVERY_MODE, - .irq_dest_mode = BIGSMP_IRQ_DEST_MODE, + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPU: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 38bd77a6c35168b03b65f7438cdcc1257d550924 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:16:01 +0100 Subject: [PATCH 221/682] x86, apic: remove ES7000_IRQ_DELIVERY_MODE and ES7000_IRQ_DEST_MODE Impact: cleanup They were only used in a single place and obscured the apic_es7000 driver template. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/mach-generic/es7000.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 690016683f2..342416b3fad 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,6 @@ static inline const cpumask_t *target_cpus(void) #define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define ES7000_IRQ_DELIVERY_MODE (dest_Fixed) -#define ES7000_IRQ_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5d97408919b..269a97aef43 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -107,8 +107,9 @@ struct genapic apic_es7000 = { .acpi_madt_oem_check = es7000_acpi_madt_oem_check, .apic_id_registered = es7000_apic_id_registered, - .irq_delivery_mode = ES7000_IRQ_DELIVERY_MODE, - .irq_dest_mode = ES7000_IRQ_DEST_MODE, + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPUs: */ + .irq_dest_mode = 0, .target_cpus = target_cpus, .ESR_DISABLE = esr_disable, From 7fe732862d9697cc1863286fbcace9a67f231b4c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:24:16 +0100 Subject: [PATCH 222/682] x86, apic: remove IRQ_DEST_MODE Remove leftover definition. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index f3b2cd42388..ce3bc4845b9 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *target_cpus(void) #ifdef CONFIG_X86_64 #include -#define IRQ_DEST_MODE (apic->irq_dest_mode) #define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) From 0a9cc20b9c18372ba5a9fea990f5812f3ee01e32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:30:40 +0100 Subject: [PATCH 223/682] x86, apic: clean up target_cpus methods Impact: cleanup Clean up all the target_cpus() namespace overlap that exists between bigsmp, es7000, mach-default, numaq and summit - by separating the different functions into different names. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index dca2d5b01da..d6aeca3c5a8 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -9,7 +9,7 @@ static inline int bigsmp_apic_id_registered(void) return 1; } -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *bigsmp_target_cpus(void) { #ifdef CONFIG_SMP return &cpu_online_map; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 342416b3fad..7e5c31a4f8d 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -16,7 +16,7 @@ static inline const cpumask_t *target_cpus_cluster(void) return &CPU_MASK_ALL; } -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *es7000_target_cpus(void) { return &cpumask_of_cpu(smp_processor_id()); } @@ -83,7 +83,7 @@ static inline void setup_apic_routing(void) printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*target_cpus())[0]); + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index ce3bc4845b9..af1607ddd2a 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,7 +8,7 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline const struct cpumask *target_cpus(void) +static inline const struct cpumask *default_target_cpus(void) { #ifdef CONFIG_SMP return cpu_online_mask; @@ -33,7 +33,7 @@ static inline const struct cpumask *target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define TARGET_CPUS (target_cpus()) +#define TARGET_CPUS (default_target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index a9d846769a0..1111ff9e41d 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -7,7 +7,7 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *numaq_target_cpus(void) { return &CPU_MASK_ALL; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 427d0889f6f..7c1f9151429 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -15,7 +15,7 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *target_cpus(void) +static inline const cpumask_t *summit_target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 06be776067a..d3cead2d2fc 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -68,7 +68,7 @@ struct genapic apic_bigsmp = { /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = bigsmp_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index fe97b0114a0..a483e22273e 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -35,7 +35,7 @@ struct genapic apic_default = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = target_cpus, + .target_cpus = default_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 269a97aef43..e31f0c35470 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -111,7 +111,7 @@ struct genapic apic_es7000 = { /* phys delivery to target CPUs: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = es7000_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 6daddb6949d..4b84b5970fb 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -55,7 +55,7 @@ struct genapic apic_numaq = { /* physical delivery on LOCAL quad: */ .irq_dest_mode = 0, - .target_cpus = target_cpus, + .target_cpus = numaq_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 1b9164b92b0..e6b956a0848 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -48,7 +48,7 @@ struct genapic apic_summit = { /* logical delivery broadcast to all CPUs: */ .irq_dest_mode = 1, - .target_cpus = target_cpus, + .target_cpus = summit_target_cpus, .ESR_DISABLE = esr_disable, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, From fe402e1f2b67a63f1e53ab2a316fc20f7ca4ec91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 04:32:51 +0100 Subject: [PATCH 224/682] x86, apic: clean up / remove TARGET_CPUS Impact: cleanup use apic->target_cpus() directly instead of the TARGET_CPUS wrapper. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/io_apic.c | 22 +++++++++---------- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 7e5c31a4f8d..53adda099c9 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -161,7 +161,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); @@ -194,7 +194,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index af1607ddd2a..77a97247587 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -22,7 +22,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) @@ -33,7 +32,6 @@ static inline const struct cpumask *default_target_cpus(void) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void setup_apic_routing(void); #else -#define TARGET_CPUS (default_target_cpus()) #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 387a5d00c43..da2d7780cb5 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -7,7 +7,6 @@ #define NO_BALANCE_IRQ (apic->no_balance_irq) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) -#define TARGET_CPUS (apic->target_cpus()) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 7c1f9151429..cf5036f1ce6 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -148,7 +148,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return (int) 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. + * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 301b6571d70..7503285e180 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1559,10 +1559,10 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, TARGET_CPUS)) + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1661,7 +1661,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2877,7 +2877,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, cfg, TARGET_CPUS); + assign_irq_vector(0, cfg, apic->target_cpus()); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -3195,7 +3195,7 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; } @@ -3261,11 +3261,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3698,12 +3698,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) return -ENXIO; cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, TARGET_CPUS); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); if (!err) { struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); + dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3987,7 +3987,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) /* * This function currently is only a helper for the i386 smp boot process where * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS + * so mask in all cases should simply be apic->target_cpus() */ #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) @@ -4028,7 +4028,7 @@ void __init setup_ioapic_dest(void) (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; else - mask = TARGET_CPUS; + mask = apic->target_cpus(); #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) From f6f52baf2613dd319e9ba3f3319bf1f1c442e4b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:01:41 +0100 Subject: [PATCH 225/682] x86: clean up esr_disable() methods Impact: cleanup Most subarchitectures want to disable the APIC ESR (Error Status Register), because they generally have hardware hacks that wrap standard CPUs into a bigger system and hence the APIC bus is quite non-standard and weirdnesses (lockups) have been seen with ESR reporting. Remove the esr_disable macros and put the desired flag into each subarchitecture's genapic template directly. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 1 - arch/x86/include/asm/es7000/apic.h | 1 - arch/x86/include/asm/mach-default/mach_apic.h | 1 - arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 1 - arch/x86/include/asm/summit/apic.h | 1 - arch/x86/kernel/apic.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d6aeca3c5a8..b550cb11102 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -2,7 +2,6 @@ #define __ASM_MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) -#define esr_disable (1) static inline int bigsmp_apic_id_registered(void) { diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 53adda099c9..aa11c768bed 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -4,7 +4,6 @@ #include #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) -#define esr_disable (1) static inline int es7000_apic_id_registered(void) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 77a97247587..5f8d17fdc96 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -18,7 +18,6 @@ static inline const struct cpumask *default_target_cpus(void) } #define NO_BALANCE_IRQ (0) -#define esr_disable (0) #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index da2d7780cb5..63fe985219f 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define esr_disable (apic->ESR_DISABLE) #define NO_BALANCE_IRQ (apic->no_balance_irq) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL (apic->apic_destination_logical) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 1111ff9e41d..8ecb3b45c6c 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -13,7 +13,6 @@ static inline const cpumask_t *numaq_target_cpus(void) } #define NO_BALANCE_IRQ (1) -#define esr_disable (1) static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index cf5036f1ce6..84679e687ad 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -4,7 +4,6 @@ #include #include -#define esr_disable (1) #define NO_BALANCE_IRQ (0) /* In clustered mode, the high nibble of APIC ID is a cluster number. diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b6740de18fb..69d8c30d571 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (esr_disable) { + if (apic->ESR_DISABLE) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && esr_disable) { + if (lapic_is_integrated() && apic->ESR_DISABLE) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d3cead2d2fc..f0bb72674f7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -69,7 +69,7 @@ struct genapic apic_bigsmp = { .irq_dest_mode = 0, .target_cpus = bigsmp_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index a483e22273e..c30141a9aca 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -36,7 +36,7 @@ struct genapic apic_default = { .irq_dest_mode = 1, .target_cpus = default_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 0, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index e31f0c35470..e8aa8fd4f49 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -112,7 +112,7 @@ struct genapic apic_es7000 = { .irq_dest_mode = 0, .target_cpus = es7000_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 4b84b5970fb..860edc8bd90 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -56,7 +56,7 @@ struct genapic apic_numaq = { .irq_dest_mode = 0, .target_cpus = numaq_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index e6b956a0848..cd5ef115a4e 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -49,7 +49,7 @@ struct genapic apic_summit = { .irq_dest_mode = 1, .target_cpus = summit_target_cpus, - .ESR_DISABLE = esr_disable, + .ESR_DISABLE = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From 08125d3edab90644724652eedec3e219e3e0f2e7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:08:44 +0100 Subject: [PATCH 226/682] x86: rename ->ESR_DISABLE to ->disable_esr the ->ESR_DISABLE shouting variant was used to enable the esr_disable macro wrappers. Those ugly macros are removed now so we can rename ->ESR_DISABLE to ->disable_esr Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index e998e3df5d2..7aee4a4c524 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -28,7 +28,7 @@ struct genapic { const struct cpumask *(*target_cpus)(void); - int ESR_DISABLE; + int disable_esr; int apic_destination_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 69d8c30d571..3853ed76c88 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1107,7 +1107,7 @@ static void __cpuinit lapic_setup_esr(void) return; } - if (apic->ESR_DISABLE) { + if (apic->disable_esr) { /* * Something untraceable is creating bad interrupts on * secondary quads ... for the moment, just leave the @@ -1157,7 +1157,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && apic->ESR_DISABLE) { + if (lapic_is_integrated() && apic->disable_esr) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 0a263d6bb5e..d437a60cc58 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -184,7 +184,7 @@ struct genapic apic_flat = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = flat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -330,7 +330,7 @@ struct genapic apic_physflat = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = physflat_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index e9ff7dc9a0f..c1cffae4a4c 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -186,7 +186,7 @@ struct genapic apic_x2apic_cluster = { .irq_dest_mode = (APIC_DEST_LOGICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 8141b5a88f6..c59602be035 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -182,7 +182,7 @@ struct genapic apic_x2apic_phys = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = x2apic_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 6a73cad0d3e..525b4e480a7 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -247,7 +247,7 @@ struct genapic apic_x2apic_uv_x = { .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), .target_cpus = uv_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index f0bb72674f7..fe9bf252c06 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -69,7 +69,7 @@ struct genapic apic_bigsmp = { .irq_dest_mode = 0, .target_cpus = bigsmp_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c30141a9aca..d3fe8017e94 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -36,7 +36,7 @@ struct genapic apic_default = { .irq_dest_mode = 1, .target_cpus = default_target_cpus, - .ESR_DISABLE = 0, + .disable_esr = 0, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index e8aa8fd4f49..b4f8abfb714 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -112,7 +112,7 @@ struct genapic apic_es7000 = { .irq_dest_mode = 0, .target_cpus = es7000_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 860edc8bd90..f3b7840d227 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -56,7 +56,7 @@ struct genapic apic_numaq = { .irq_dest_mode = 0, .target_cpus = numaq_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index cd5ef115a4e..95e075b61bd 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -49,7 +49,7 @@ struct genapic apic_summit = { .irq_dest_mode = 1, .target_cpus = summit_target_cpus, - .ESR_DISABLE = 1, + .disable_esr = 1, .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From 0b06e734bff7554c31eac4aad2fc9be4adb7c1c1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:13:04 +0100 Subject: [PATCH 227/682] x86: clean up the APIC_DEST_LOGICAL logic Impact: cleanup The bigsmp and es7000 subarchitectures un-defined APIC_DEST_LOGICAL in a rather nasty way by re-defining it to zero. That is infinitely fragile and makes it very hard to see what to code really does in a given context. The very same constant has different meanings and values - depending on which subarch is enabled. Untangle this mess by never undefining the constant, but instead propagating the right values into the genapic driver templates. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 -- arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/kernel/genapic_flat_64.c | 12 ++++++------ arch/x86/kernel/genx2apic_cluster.c | 10 +++++----- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- 12 files changed, 19 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index b550cb11102..7e6e33a6db0 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -17,8 +17,6 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #endif } -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0 #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index aa11c768bed..0d770fce4b2 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -27,8 +27,6 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL 0x0 static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 63fe985219f..00d5fe6e676 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -4,8 +4,6 @@ #include #define NO_BALANCE_IRQ (apic->no_balance_irq) -#undef APIC_DEST_LOGICAL -#define APIC_DEST_LOGICAL (apic->apic_destination_logical) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d437a60cc58..fd242c6b3ba 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); + __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); } static unsigned int get_apic_id(unsigned long x) @@ -181,11 +181,11 @@ struct genapic apic_flat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -327,7 +327,7 @@ struct genapic apic_physflat = { .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = physflat_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c1cffae4a4c..a76e75ecc20 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); + vector, apic->apic_destination_logical); local_irq_restore(flags); } @@ -183,11 +183,11 @@ struct genapic apic_x2apic_cluster = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, - .irq_dest_mode = (APIC_DEST_LOGICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index c59602be035..9b6d68deb14 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -179,7 +179,7 @@ struct genapic apic_x2apic_phys = { .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 0, /* physical */ .target_cpus = x2apic_target_cpus, .disable_esr = 0, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 525b4e480a7..0a756800c11 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -244,11 +244,11 @@ struct genapic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = (APIC_DEST_PHYSICAL != 0), + .irq_dest_mode = 1, /* logical */ .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .apic_destination_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7503285e180..17526d7a8ab 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 285bbf8831f..400b7bd48f6 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | APIC_DEST_LOGICAL; + unsigned int icr = shortcut | apic->apic_destination_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff4354..f0a173718d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index fe9bf252c06..13ee7dc9b95 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -70,7 +70,7 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .apic_destination_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index b4f8abfb714..61b5da213ce 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -113,7 +113,7 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .apic_destination_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From bdb1a9b62fc182d4da3143e346f7a0925d243352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:29:25 +0100 Subject: [PATCH 228/682] x86, apic: rename genapic::apic_destination_logical to genapic::dest_logical This field name was unreasonably long - shorten it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 10 +++++----- arch/x86/kernel/genx2apic_cluster.c | 8 ++++---- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 7aee4a4c524..f9d1ec018fd 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -30,7 +30,7 @@ struct genapic { int disable_esr; - int apic_destination_logical; + int dest_logical; unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index fd242c6b3ba..d22cbdaee20 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->apic_destination_logical); + __send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -114,7 +114,7 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); } } @@ -123,7 +123,7 @@ static void flat_send_IPI_all(int vector) if (vector == NMI_VECTOR) flat_send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->apic_destination_logical); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } static unsigned int get_apic_id(unsigned long x) @@ -185,7 +185,7 @@ struct genapic apic_flat = { .target_cpus = flat_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, @@ -331,7 +331,7 @@ struct genapic apic_physflat = { .target_cpus = physflat_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index a76e75ecc20..b91a48eae52 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -64,7 +64,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) for_each_cpu(query_cpu, mask) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -80,7 +80,7 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -95,7 +95,7 @@ static void x2apic_send_IPI_allbutself(int vector) if (query_cpu != this_cpu) __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->apic_destination_logical); + vector, apic->dest_logical); local_irq_restore(flags); } @@ -187,7 +187,7 @@ struct genapic apic_x2apic_cluster = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 9b6d68deb14..f070e86af0f 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -183,7 +183,7 @@ struct genapic apic_x2apic_phys = { .target_cpus = x2apic_target_cpus, .disable_esr = 0, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 0a756800c11..c8a89158679 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -248,7 +248,7 @@ struct genapic apic_x2apic_uv_x = { .target_cpus = uv_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 17526d7a8ab..7f8b32b2089 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -806,7 +806,7 @@ void send_IPI_self(int vector) * Wait for idle. */ apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->apic_destination_logical; + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; /* * Send the IPI. The write to APIC_ICR fires this off. */ diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 400b7bd48f6..e2e4895ca69 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -30,7 +30,7 @@ static inline int __prepare_ICR(unsigned int shortcut, int vector) { - unsigned int icr = shortcut | apic->apic_destination_logical; + unsigned int icr = shortcut | apic->dest_logical; switch (vector) { default: diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f0a173718d9..45c096f605f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) /* Target chip */ /* Boot on the stack */ /* Kick the second */ - apic_icr_write(APIC_DM_NMI | apic->apic_destination_logical, logical_apicid); + apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); pr_debug("Waiting for send to finish...\n"); send_status = safe_apic_wait_icr_idle(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 13ee7dc9b95..7c52840f205 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -70,7 +70,7 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d3fe8017e94..53fa1ad8318 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -37,7 +37,7 @@ struct genapic apic_default = { .target_cpus = default_target_cpus, .disable_esr = 0, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 61b5da213ce..50fed0225cd 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -113,7 +113,7 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, - .apic_destination_logical = 0, + .dest_logical = 0, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index f3b7840d227..1fb1b1a4aa0 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -57,7 +57,7 @@ struct genapic apic_numaq = { .target_cpus = numaq_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 95e075b61bd..5c27d4d824e 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -50,7 +50,7 @@ struct genapic apic_summit = { .target_cpus = summit_target_cpus, .disable_esr = 1, - .apic_destination_logical = APIC_DEST_LOGICAL, + .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = check_apicid_used, .check_apicid_present = check_apicid_present, From d1d7cae8fd54a301a0de531b48451649933ffdcf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:41:42 +0100 Subject: [PATCH 229/682] x86, apic: clean up check_apicid*() callbacks Clean up these methods - to make it clearer which function is used in which case. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 9 +++++---- arch/x86/include/asm/es7000/apic.h | 5 +++-- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 5 +++-- arch/x86/include/asm/summit/apic.h | 5 +++-- arch/x86/kernel/io_apic.c | 6 +++--- arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 12 files changed, 29 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 7e6e33a6db0..bd52d4d86f0 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -20,14 +20,15 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) { - return (0); + return 0; } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long bigsmp_check_apicid_present(int bit) { - return (1); + return 1; } static inline unsigned long calculate_ldr(int cpu) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 0d770fce4b2..cd888daa193 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -28,11 +28,12 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define NO_BALANCE_IRQ (0) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 5f8d17fdc96..064bc11a991 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -105,12 +105,12 @@ static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) } #endif -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long default_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 00d5fe6e676..e035f88dfcd 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -13,9 +13,7 @@ #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) -#define check_apicid_present (apic->check_apicid_present) #define check_phys_apicid_present (apic->check_phys_apicid_present) -#define check_apicid_used (apic->check_apicid_used) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define vector_allocation_domain (apic->vector_allocation_domain) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 8ecb3b45c6c..571fdaeafaa 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -14,11 +14,12 @@ static inline const cpumask_t *numaq_target_cpus(void) #define NO_BALANCE_IRQ (1) -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) { return physid_isset(apicid, bitmap); } -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long numaq_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 84679e687ad..482038b244b 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -23,13 +23,14 @@ static inline const cpumask_t *summit_target_cpus(void) return &cpumask_of_cpu(0); } -static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long +summit_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long check_apicid_present(int bit) +static inline unsigned long summit_check_apicid_present(int bit) { return 1; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7f8b32b2089..733ecf17272 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2135,7 +2135,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -3878,10 +3878,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(apic_id_map, i)) break; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 7c52840f205..aa8443f6c0f 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -71,8 +71,8 @@ struct genapic apic_bigsmp = { .target_cpus = bigsmp_target_cpus, .disable_esr = 1, .dest_logical = 0, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = bigsmp_check_apicid_used, + .check_apicid_present = bigsmp_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 53fa1ad8318..47f6b5b06ba 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -38,8 +38,8 @@ struct genapic apic_default = { .target_cpus = default_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = default_check_apicid_used, + .check_apicid_present = default_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 50fed0225cd..5633f3296e1 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -114,8 +114,8 @@ struct genapic apic_es7000 = { .target_cpus = es7000_target_cpus, .disable_esr = 1, .dest_logical = 0, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 1fb1b1a4aa0..d85206d8e4a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -58,8 +58,8 @@ struct genapic apic_numaq = { .target_cpus = numaq_target_cpus, .disable_esr = 1, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = numaq_check_apicid_used, + .check_apicid_present = numaq_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 5c27d4d824e..f54cf73d3ed 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -51,8 +51,8 @@ struct genapic apic_summit = { .target_cpus = summit_target_cpus, .disable_esr = 1, .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = check_apicid_used, - .check_apicid_present = check_apicid_present, + .check_apicid_used = summit_check_apicid_used, + .check_apicid_present = summit_check_apicid_present, .no_balance_irq = NO_BALANCE_IRQ, .no_ioapic_check = 0, From 2e867b17cc02e1799f18126af0ddd7b63dd8f6f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 05:57:56 +0100 Subject: [PATCH 230/682] x86, apic: remove no_balance_irq and no_ioapic_check flags These flags are completely unused. (the in-kernel IRQ balancer has been removed from the upstream kernel.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 1 - arch/x86/include/asm/es7000/apic.h | 2 -- arch/x86/include/asm/genapic.h | 3 --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 -- arch/x86/include/asm/summit/apic.h | 2 -- arch/x86/kernel/genapic_flat_64.c | 6 ------ arch/x86/kernel/genx2apic_cluster.c | 3 --- arch/x86/kernel/genx2apic_phys.c | 3 --- arch/x86/kernel/genx2apic_uv_x.c | 3 --- arch/x86/mach-generic/bigsmp.c | 3 --- arch/x86/mach-generic/default.c | 3 --- arch/x86/mach-generic/es7000.c | 4 ---- arch/x86/mach-generic/numaq.c | 3 --- arch/x86/mach-generic/summit.c | 3 --- 16 files changed, 44 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index bd52d4d86f0..916451252b3 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -18,7 +18,6 @@ static inline const cpumask_t *bigsmp_target_cpus(void) } #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define NO_BALANCE_IRQ (0) static inline unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index cd888daa193..847008a7702 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -23,10 +23,8 @@ static inline const cpumask_t *es7000_target_cpus(void) #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) #define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ_CLUSTER (1) #define APIC_DFR_VALUE (APIC_DFR_FLAT) -#define NO_BALANCE_IRQ (0) static inline unsigned long es7000_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index f9d1ec018fd..661898c2229 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -34,9 +34,6 @@ struct genapic { unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); unsigned long (*check_apicid_present)(int apicid); - int no_balance_irq; - int no_ioapic_check; - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 064bc11a991..8adccf8ee47 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -17,8 +17,6 @@ static inline const struct cpumask *default_target_cpus(void) #endif } -#define NO_BALANCE_IRQ (0) - #ifdef CONFIG_X86_64 #include #define init_apic_ldr (apic->init_apic_ldr) diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e035f88dfcd..4cb9e2b99e3 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define NO_BALANCE_IRQ (apic->no_balance_irq) #define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 571fdaeafaa..defee3496ad 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -12,8 +12,6 @@ static inline const cpumask_t *numaq_target_cpus(void) return &CPU_MASK_ALL; } -#define NO_BALANCE_IRQ (1) - static inline unsigned long numaq_check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 482038b244b..51df002ecf4 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -4,8 +4,6 @@ #include #include -#define NO_BALANCE_IRQ (0) - /* In clustered mode, the high nibble of APIC ID is a cluster number. * The low nibble is a 4-bit bitmap. */ #define XAPIC_DEST_CPUS_SHIFT 4 diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index d22cbdaee20..9446f372a16 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -189,9 +189,6 @@ struct genapic apic_flat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = flat_init_apic_ldr, @@ -335,9 +332,6 @@ struct genapic apic_physflat = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = physflat_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index b91a48eae52..2eeca6e744a 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -191,9 +191,6 @@ struct genapic apic_x2apic_cluster = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index f070e86af0f..be0ee3e56ef 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -187,9 +187,6 @@ struct genapic apic_x2apic_phys = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = x2apic_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c8a89158679..68b423f3da9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -252,9 +252,6 @@ struct genapic apic_x2apic_uv_x = { .check_apicid_used = NULL, .check_apicid_present = NULL, - .no_balance_irq = 0, - .no_ioapic_check = 0, - .vector_allocation_domain = uv_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index aa8443f6c0f..6da251aa9f4 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -74,9 +74,6 @@ struct genapic apic_bigsmp = { .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 47f6b5b06ba..e89e8c9dd68 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -41,9 +41,6 @@ struct genapic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5633f3296e1..8e9eeecf7e2 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -23,7 +23,6 @@ void __init es7000_update_genapic_to_cluster(void) apic->target_cpus = target_cpus_cluster; apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - apic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; apic->init_apic_ldr = init_apic_ldr_cluster; @@ -117,9 +116,6 @@ struct genapic apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d85206d8e4a..f909189fee3 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -61,9 +61,6 @@ struct genapic apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index f54cf73d3ed..99a9bea8d14 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -54,9 +54,6 @@ struct genapic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .no_balance_irq = NO_BALANCE_IRQ, - .no_ioapic_check = 0, - .vector_allocation_domain = vector_allocation_domain, .init_apic_ldr = init_apic_ldr, From e2d40b1878bd13ca1028ddd299c48e4821ac3535 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 231/682] x86, apic: clean up ->vector_allocation_domain() - separate the namespace - remove macros - move the default vector-allocation-domain to mach-generic - fix whitespace damage Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 13 ------------- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/kernel/io_apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 16 +++++++++++++++- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8adccf8ee47..9c56542644c 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -23,7 +23,6 @@ static inline const struct cpumask *default_target_cpus(void) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define phys_pkg_id (apic->phys_pkg_id) -#define vector_allocation_domain (apic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -89,18 +88,6 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; -} #endif static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 4cb9e2b99e3..e94881af962 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -15,7 +15,6 @@ #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define vector_allocation_domain (apic->vector_allocation_domain) #define enable_apic_mode (apic->enable_apic_mode) #define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 733ecf17272..49899e06624 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1316,7 +1316,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int new_cpu; int vector, offset; - vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask); vector = current_vector; offset = current_offset; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6da251aa9f4..391cc99cd21 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -42,7 +42,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) { cpus_clear(*retmask); cpu_set(cpu, *retmask); @@ -74,7 +74,7 @@ struct genapic apic_bigsmp = { .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = bigsmp_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e89e8c9dd68..6adc3c69a3c 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -18,6 +18,20 @@ #include #include +static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* + * Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; +} + /* should be called last. */ static int probe_default(void) { @@ -41,7 +55,7 @@ struct genapic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 8e9eeecf7e2..bc1f21cd6a4 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -86,7 +86,7 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -116,7 +116,7 @@ struct genapic apic_es7000 = { .check_apicid_used = es7000_check_apicid_used, .check_apicid_present = es7000_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = es7000_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index f909189fee3..712882f48c4 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -31,7 +31,7 @@ static int probe_numaq(void) return found_numaq; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -61,7 +61,7 @@ struct genapic apic_numaq = { .check_apicid_used = numaq_check_apicid_used, .check_apicid_present = numaq_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = numaq_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 99a9bea8d14..1834887b94a 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static void vector_allocation_domain(int cpu, cpumask_t *retmask) +static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -54,7 +54,7 @@ struct genapic apic_summit = { .check_apicid_used = summit_check_apicid_used, .check_apicid_present = summit_check_apicid_present, - .vector_allocation_domain = vector_allocation_domain, + .vector_allocation_domain = summit_vector_allocation_domain, .init_apic_ldr = init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, From a5c4329622a3437adef4b2a4288d127957743c97 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 232/682] x86, apic: clean up ->init_apic_ldr() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 916451252b3..81941308299 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -46,7 +46,7 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr(void) +static inline void bigsmp_init_apic_ldr(void) { unsigned long val; int cpu = smp_processor_id(); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 847008a7702..06f5757bf7a 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -52,7 +52,7 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr_cluster(void) +static inline void es7000_init_apic_ldr_cluster(void) { unsigned long val; int cpu = smp_processor_id(); @@ -62,7 +62,7 @@ static inline void init_apic_ldr_cluster(void) apic_write(APIC_LDR, val); } -static inline void init_apic_ldr(void) +static inline void es7000_init_apic_ldr(void) { unsigned long val; int cpu = smp_processor_id(); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 9c56542644c..23e0a2da3a9 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,7 +19,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define init_apic_ldr (apic->init_apic_ldr) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define phys_pkg_id (apic->phys_pkg_id) @@ -36,7 +35,7 @@ extern void setup_apic_routing(void); * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void init_apic_ldr(void) +static inline void default_init_apic_ldr(void) { unsigned long val; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e94881af962..8e51f416394 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define init_apic_ldr (apic->init_apic_ldr) #define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index defee3496ad..802297489a3 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -28,7 +28,7 @@ static inline int numaq_apic_id_registered(void) return 1; } -static inline void init_apic_ldr(void) +static inline void numaq_init_apic_ldr(void) { /* Already done in NUMA-Q firmware */ } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 51df002ecf4..9108c89fe88 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -37,7 +37,7 @@ static inline unsigned long summit_check_apicid_present(int bit) extern u8 cpu_2_logical_apicid[]; -static inline void init_apic_ldr(void) +static inline void summit_init_apic_ldr(void) { unsigned long val, id; int count = 0; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 3853ed76c88..b7077936ac0 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1179,7 +1179,7 @@ void __cpuinit setup_local_APIC(void) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ - init_apic_ldr(); + apic->init_apic_ldr(); /* * Set Task Priority to 'accept all'. We never change this diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 391cc99cd21..7b7fc471a3f 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -75,7 +75,7 @@ struct genapic apic_bigsmp = { .check_apicid_present = bigsmp_check_apicid_present, .vector_allocation_domain = bigsmp_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6adc3c69a3c..633e8482af2 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -56,7 +56,7 @@ struct genapic apic_default = { .check_apicid_present = default_check_apicid_present, .vector_allocation_domain = default_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index bc1f21cd6a4..b70833e3597 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -24,7 +24,7 @@ void __init es7000_update_genapic_to_cluster(void) apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - apic->init_apic_ldr = init_apic_ldr_cluster; + apic->init_apic_ldr = es7000_init_apic_ldr_cluster; apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; } @@ -117,7 +117,7 @@ struct genapic apic_es7000 = { .check_apicid_present = es7000_check_apicid_present, .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 712882f48c4..a06fda57928 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -62,7 +62,7 @@ struct genapic apic_numaq = { .check_apicid_present = numaq_check_apicid_present, .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 1834887b94a..36c552fa427 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -55,7 +55,7 @@ struct genapic apic_summit = { .check_apicid_present = summit_check_apicid_present, .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = init_apic_ldr, + .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, From d190cb87c4503014353f2310c4bfa2268fa7111d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 233/682] x86, apic: clean up ->ioapic_phys_id_map() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 3 ++- arch/x86/kernel/io_apic.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 81941308299..05116d5487d 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -94,7 +94,7 @@ static inline int cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0xFFL); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 06f5757bf7a..db3e652f0f7 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -125,7 +125,7 @@ static inline int cpu_to_logical_apicid(int cpu) #endif } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0xff); diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 23e0a2da3a9..7abdaae06f2 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -99,7 +99,7 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) { return phys_map; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 8e51f416394..c1c96e6bb18 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define ioapic_phys_id_map (apic->ioapic_phys_id_map) #define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 802297489a3..dc7499b9262 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -48,7 +48,7 @@ static inline int multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) { /* We don't have a good way to do this yet - hack */ return physids_promote(0xFUL); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9108c89fe88..4dafb58f930 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -109,7 +109,8 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) +static inline physid_mask_t + summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 49899e06624..db79ad9a764 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2108,7 +2108,7 @@ static void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -3862,7 +3862,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 7b7fc471a3f..f2a3418d0cc 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -77,7 +77,7 @@ struct genapic apic_bigsmp = { .vector_allocation_domain = bigsmp_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 633e8482af2..c403f3d9300 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -58,7 +58,7 @@ struct genapic apic_default = { .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index b70833e3597..ce09baf0872 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -119,7 +119,7 @@ struct genapic apic_es7000 = { .vector_allocation_domain = es7000_vector_allocation_domain, .init_apic_ldr = es7000_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index a06fda57928..5d98f18a0bd 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -64,7 +64,7 @@ struct genapic apic_numaq = { .vector_allocation_domain = numaq_vector_allocation_domain, .init_apic_ldr = numaq_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 36c552fa427..6abdd53a01c 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -57,7 +57,7 @@ struct genapic apic_summit = { .vector_allocation_domain = summit_vector_allocation_domain, .init_apic_ldr = summit_init_apic_ldr, - .ioapic_phys_id_map = ioapic_phys_id_map, + .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, From 72ce016583916fb7ffcbaa6a3e1f8f731b79a865 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 234/682] x86, apic: clean up ->setup_apic_routing() - separate the namespace - remove macros - remove namespace clash on 64-bit Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 4 ++-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/acpi/boot.c | 5 ++--- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/mpparse.c | 6 +++--- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 20 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 05116d5487d..321ea47b5dd 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -56,7 +56,7 @@ static inline void bigsmp_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void setup_apic_routing(void) +static inline void bigsmp_setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "Physflat", nr_ioapics); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index db3e652f0f7..f1183000a94 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -73,7 +73,7 @@ static inline void es7000_init_apic_ldr(void) } extern int apic_version [MAX_APICS]; -static inline void setup_apic_routing(void) +static inline void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 661898c2229..38b1202316f 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -107,7 +107,7 @@ extern void apic_send_IPI_self(int vector); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void setup_apic_routing(void); +extern void default_setup_apic_routing(void); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 7abdaae06f2..d4467746304 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -25,7 +25,7 @@ static inline const struct cpumask *default_target_cpus(void) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) -extern void setup_apic_routing(void); +extern void default_setup_apic_routing(void); #else #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* @@ -70,7 +70,7 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -static inline void setup_apic_routing(void) +static inline void default_setup_apic_routing(void) { #ifdef CONFIG_X86_IO_APIC printk("Enabling APIC mode: %s. Using %d I/O APICs\n", diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index c1c96e6bb18..ddf369248ab 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define setup_apic_routing (apic->setup_apic_routing) #define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index dc7499b9262..2feb7e72e9e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -33,7 +33,7 @@ static inline void numaq_init_apic_ldr(void) /* Already done in NUMA-Q firmware */ } -static inline void setup_apic_routing(void) +static inline void numaq_setup_apic_routing(void) { printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "NUMA-Q", nr_ioapics); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 4dafb58f930..7ec2696bc9a 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -74,7 +74,7 @@ static inline int summit_apic_id_registered(void) return 1; } -static inline void setup_apic_routing(void) +static inline void summit_setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 314fe0dddef..539163161a4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1360,9 +1360,8 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b7077936ac0..fcbcc03cd4b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1625,7 +1625,7 @@ int __init APIC_init_uniprocessor(void) enable_IR_x2apic(); #endif #ifdef CONFIG_X86_64 - setup_apic_routing(); + default_setup_apic_routing(); #endif verify_local_APIC(); diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 060945b8eec..d57d2138f07 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -44,7 +44,7 @@ static struct genapic *apic_probe[] __initdata = { /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ -void __init setup_apic_routing(void) +void __init default_setup_apic_routing(void) { if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index fa6bb263892..c8a534a16d9 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -390,9 +390,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) generic_bigsmp_probe(); #endif -#ifdef CONFIG_X86_32 - setup_apic_routing(); -#endif + if (apic->setup_apic_routing) + apic->setup_apic_routing(); + if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 45c096f605f..3791b4ae567 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1128,7 +1128,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) #ifdef CONFIG_X86_64 enable_IR_x2apic(); - setup_apic_routing(); + default_setup_apic_routing(); #endif if (smp_sanity_check(max_cpus) < 0) { diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index f2a3418d0cc..ad3837a59bd 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -78,7 +78,7 @@ struct genapic apic_bigsmp = { .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c403f3d9300..67f287fc12d 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -59,7 +59,7 @@ struct genapic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index ce09baf0872..f6117293946 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -120,7 +120,7 @@ struct genapic apic_es7000 = { .init_apic_ldr = es7000_init_apic_ldr, .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 5d98f18a0bd..8c137f41348 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -65,7 +65,7 @@ struct genapic apic_numaq = { .init_apic_ldr = numaq_init_apic_ldr, .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6abdd53a01c..0698566dc7b 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -58,7 +58,7 @@ struct genapic apic_summit = { .init_apic_ldr = summit_init_apic_ldr, .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = setup_apic_routing, + .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, From 33a201fac698a93d9d1ffa77030ba2ff38d1a3d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 07:17:26 +0100 Subject: [PATCH 235/682] x86, apic: streamline the ->multi_timer_check() quirk only NUMAQ uses this quirk: to prevent the timer IRQ from being added on secondary nodes. All other genapic templates can have a NULL ->multi_timer_check() callback. Also, extend the generic code to treat a NULL pointer accordingly. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 5 ----- arch/x86/include/asm/es7000/apic.h | 5 ----- arch/x86/include/asm/mach-default/mach_apic.h | 5 ----- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 5 ----- arch/x86/kernel/io_apic.c | 11 ++++++++--- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 14 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 321ea47b5dd..df59298086c 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -62,11 +62,6 @@ static inline void bigsmp_setup_apic_routing(void) "Physflat", nr_ioapics); } -static inline int multi_timer_check(int apic, int irq) -{ - return (0); -} - static inline int apicid_to_node(int logical_apicid) { return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index f1183000a94..632e4cd3f4f 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -82,11 +82,6 @@ static inline void es7000_setup_apic_routing(void) nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - static inline int apicid_to_node(int logical_apicid) { return 0; diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index d4467746304..f418d470cf4 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -104,11 +104,6 @@ static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) return phys_map; } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index ddf369248ab..bdea0a759e8 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define multi_timer_check (apic->multi_timer_check) #define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 2feb7e72e9e..22bdf3d4c0e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -43,7 +43,7 @@ static inline void numaq_setup_apic_routing(void) * Skip adding the timer int on secondary nodes, which causes * a small but painful rift in the time-space continuum. */ -static inline int multi_timer_check(int apic, int irq) +static inline int numaq_multi_timer_check(int apic, int irq) { return apic != 0 && irq == 0; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 7ec2696bc9a..acb7bd1de84 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -64,11 +64,6 @@ static inline void summit_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int multi_timer_check(int apic, int irq) -{ - return 0; -} - static inline int summit_apic_id_registered(void) { return 1; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index db79ad9a764..282ea112f3c 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1618,10 +1618,15 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic_id, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic_id, irq)) + + /* + * Skip the timer IRQ if there's a quirk handler + * installed and if it returns 1: + */ + if (apic->multi_timer_check && + apic->multi_timer_check(apic_id, irq)) continue; -#endif + desc = irq_to_desc_alloc_cpu(irq, cpu); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ad3837a59bd..d0749569cdf 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -79,7 +79,7 @@ struct genapic apic_bigsmp = { .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 67f287fc12d..6a21aa7c0c6 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -60,7 +60,7 @@ struct genapic apic_default = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = default_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index f6117293946..0be59a51df2 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -121,7 +121,7 @@ struct genapic apic_es7000 = { .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8c137f41348..da4ed653506 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -66,7 +66,7 @@ struct genapic apic_numaq = { .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 0698566dc7b..b618a186f1e 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -59,7 +59,7 @@ struct genapic apic_summit = { .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = multi_timer_check, + .multi_timer_check = NULL, .apicid_to_node = apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, From 3f57a318c36e1f24070a18df8c4971ca08d33142 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 236/682] x86, apic: clean up ->apicid_to_node() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index df59298086c..77f0b734875 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -62,7 +62,7 @@ static inline void bigsmp_setup_apic_routing(void) "Physflat", nr_ioapics); } -static inline int apicid_to_node(int logical_apicid) +static inline int bigsmp_apicid_to_node(int logical_apicid) { return apicid_2_node[hard_smp_processor_id()]; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 632e4cd3f4f..bcdf31400df 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -82,7 +82,7 @@ static inline void es7000_setup_apic_routing(void) nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } -static inline int apicid_to_node(int logical_apicid) +static inline int es7000_apicid_to_node(int logical_apicid) { return 0; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index f418d470cf4..2f78209d972 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -78,7 +78,7 @@ static inline void default_setup_apic_routing(void) #endif } -static inline int apicid_to_node(int logical_apicid) +static inline int default_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index bdea0a759e8..b585a8e5f81 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define apicid_to_node (apic->apicid_to_node) #define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 22bdf3d4c0e..a0e3b437118 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -76,14 +76,14 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline int apicid_to_node(int logical_apicid) +static inline int numaq_apicid_to_node(int logical_apicid) { return logical_apicid >> 4; } static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) { - int node = apicid_to_node(logical_apicid); + int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); return physid_mask_of_physid(cpu + 4*node); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index acb7bd1de84..cfff2760e60 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -75,7 +75,7 @@ static inline void summit_setup_apic_routing(void) nr_ioapics); } -static inline int apicid_to_node(int logical_apicid) +static inline int summit_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3791b4ae567..1dd4cecd4bc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -163,7 +163,7 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); - int node = apicid_to_node(apicid); + int node = apic->apicid_to_node(apicid); if (!node_online(node)) node = first_online_node; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d0749569cdf..2f4121499e5 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -80,7 +80,7 @@ struct genapic apic_bigsmp = { .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6a21aa7c0c6..d391c2dc819 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -61,7 +61,7 @@ struct genapic apic_default = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 0be59a51df2..933f2a38599 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -122,7 +122,7 @@ struct genapic apic_es7000 = { .ioapic_phys_id_map = es7000_ioapic_phys_id_map, .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index da4ed653506..38344fb9979 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -67,7 +67,7 @@ struct genapic apic_numaq = { .ioapic_phys_id_map = numaq_ioapic_phys_id_map, .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = numaq_multi_timer_check, - .apicid_to_node = apicid_to_node, + .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index b618a186f1e..6150604deaa 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -60,7 +60,7 @@ struct genapic apic_summit = { .ioapic_phys_id_map = summit_ioapic_phys_id_map, .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = apicid_to_node, + .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, From 5257c5111ca21c8e857b65a79ab986b313e1c362 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 237/682] x86, apic: clean up ->cpu_to_logical_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 6 +++--- arch/x86/include/asm/es7000/apic.h | 16 ++++++++-------- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 3 ++- arch/x86/include/asm/summit/apic.h | 8 ++++---- arch/x86/kernel/ipi.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 77f0b734875..d0d894ff7d3 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -82,7 +82,7 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int bigsmp_cpu_to_logical_apicid(int cpu) { if (cpu >= nr_cpu_ids) return BAD_APICID; @@ -115,7 +115,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) int apicid; cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = bigsmp_cpu_to_logical_apicid(cpu); return apicid; } @@ -132,7 +132,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; if (cpu < nr_cpu_ids) - return cpu_to_logical_apicid(cpu); + return bigsmp_cpu_to_logical_apicid(cpu); return BAD_APICID; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index bcdf31400df..e0cd07e74f9 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -109,7 +109,7 @@ static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int es7000_cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) @@ -155,10 +155,10 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) * on the same apicid cluster return default value of target_cpus(): */ cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = es7000_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); @@ -182,20 +182,20 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == nr_cpu_ids) - return cpu_to_logical_apicid(0); + return es7000_cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = es7000_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); + return es7000_cpu_to_logical_apicid(0); } apicid = new_apicid; cpus_found++; @@ -209,7 +209,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int apicid = cpu_to_logical_apicid(0); + int apicid = es7000_cpu_to_logical_apicid(0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2f78209d972..eae3e4b6ed0 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -105,7 +105,7 @@ static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) } /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int default_cpu_to_logical_apicid(int cpu) { return 1 << cpu; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index b585a8e5f81..2ea913e8e0d 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define cpu_to_logical_apicid (apic->cpu_to_logical_apicid) #define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index a0e3b437118..6989abd3485 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -56,7 +56,8 @@ static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) /* Mapping from cpu number to logical apicid */ extern u8 cpu_2_logical_apicid[]; -static inline int cpu_to_logical_apicid(int cpu) + +static inline int numaq_cpu_to_logical_apicid(int cpu) { if (cpu >= nr_cpu_ids) return BAD_APICID; diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index cfff2760e60..d564d7ee3f6 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -85,7 +85,7 @@ static inline int summit_apicid_to_node(int logical_apicid) } /* Mapping from cpu number to logical apicid */ -static inline int cpu_to_logical_apicid(int cpu) +static inline int summit_cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) @@ -145,10 +145,10 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); - apicid = cpu_to_logical_apicid(cpu); + apicid = summit_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); + int new_apicid = summit_cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); @@ -165,7 +165,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int apicid = cpu_to_logical_apicid(0); + int apicid = summit_cpu_to_logical_apicid(0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e2e4895ca69..367c5e684fa 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -140,7 +140,7 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } @@ -155,7 +155,7 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) if (query_cpu != this_cpu) - __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), + __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 2f4121499e5..cd6f02ba88e 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -81,7 +81,7 @@ struct genapic apic_bigsmp = { .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = bigsmp_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d391c2dc819..ef9b936c41a 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -62,7 +62,7 @@ struct genapic apic_default = { .setup_apic_routing = default_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 933f2a38599..74bf2b6b751 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -123,7 +123,7 @@ struct genapic apic_es7000 = { .setup_apic_routing = es7000_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 38344fb9979..461f5beedb2 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -68,7 +68,7 @@ struct genapic apic_numaq = { .setup_apic_routing = numaq_setup_apic_routing, .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = numaq_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6150604deaa..d99be2d4efc 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -61,7 +61,7 @@ struct genapic apic_summit = { .setup_apic_routing = summit_setup_apic_routing, .multi_timer_check = NULL, .apicid_to_node = summit_apicid_to_node, - .cpu_to_logical_apicid = cpu_to_logical_apicid, + .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, From a21769a4461801454930a06bc18bd8249cd9e993 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 238/682] x86, apic: clean up ->cpu_present_to_apicid() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 2 ++ arch/x86/include/asm/mach-default/mach_apic.h | 11 ++++++++++- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 9 ++++++++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 34 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index d0d894ff7d3..eea5e9788dd 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -67,7 +67,7 @@ static inline int bigsmp_apicid_to_node(int logical_apicid) return apicid_2_node[hard_smp_processor_id()]; } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e0cd07e74f9..7cdde3d9c5f 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -88,7 +88,7 @@ static inline int es7000_apicid_to_node(int logical_apicid) } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int es7000_cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 38b1202316f..2cb14d51e45 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -108,6 +108,8 @@ extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void default_setup_apic_routing(void); + +extern int default_cpu_present_to_apicid(int mps_cpu); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index eae3e4b6ed0..15d5627a9d6 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -110,7 +110,7 @@ static inline int default_cpu_to_logical_apicid(int cpu) return 1 << cpu; } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int __default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); @@ -118,6 +118,15 @@ static inline int cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +#endif + static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 2ea913e8e0d..332fe93ab41 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define cpu_present_to_apicid (apic->cpu_present_to_apicid) #define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 6989abd3485..f482b063447 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -69,7 +69,7 @@ static inline int numaq_cpu_to_logical_apicid(int cpu) * cpu to APIC ID relation to properly interact with the intelligent * mode of the cluster controller. */ -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int numaq_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < 60) return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index d564d7ee3f6..fc127369188 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -96,7 +96,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu) #endif } -static inline int cpu_present_to_apicid(int mps_cpu) +static inline int summit_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9446f372a16..f4a2c1c0a1a 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -197,7 +197,7 @@ struct genapic apic_flat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, @@ -341,7 +341,7 @@ struct genapic apic_physflat = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2eeca6e744a..710d612a964 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -199,7 +199,7 @@ struct genapic apic_x2apic_cluster = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index be0ee3e56ef..49a449178c3 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -195,7 +195,7 @@ struct genapic apic_x2apic_phys = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 68b423f3da9..a08a6359186 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -260,7 +260,7 @@ struct genapic apic_x2apic_uv_x = { .multi_timer_check = NULL, .apicid_to_node = NULL, .cpu_to_logical_apicid = NULL, - .cpu_present_to_apicid = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1dd4cecd4bc..812bf39de35 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -903,9 +903,16 @@ do_rest: return boot_error; } +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} +#endif + int __cpuinit native_cpu_up(unsigned int cpu) { - int apicid = cpu_present_to_apicid(cpu); + int apicid = apic->cpu_present_to_apicid(cpu); unsigned long flags; int err; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index cd6f02ba88e..1eaf18c801d 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -82,7 +82,7 @@ struct genapic apic_bigsmp = { .multi_timer_check = NULL, .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index ef9b936c41a..2903657f420 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -63,7 +63,7 @@ struct genapic apic_default = { .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 74bf2b6b751..5a3a8ab4f8a 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -124,7 +124,7 @@ struct genapic apic_es7000 = { .multi_timer_check = NULL, .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 461f5beedb2..d928cae211c 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -69,7 +69,7 @@ struct genapic apic_numaq = { .multi_timer_check = numaq_multi_timer_check, .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index d99be2d4efc..e6bb34ee580 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -62,7 +62,7 @@ struct genapic apic_summit = { .multi_timer_check = NULL, .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, - .cpu_present_to_apicid = cpu_present_to_apicid, + .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, From 8058714a41afc4c983acb274b1adf7bd3cfe7f6e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 06:50:47 +0100 Subject: [PATCH 239/682] x86, apic: clean up ->apicid_to_cpu_present() - separate the namespace - remove macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 4 ++-- arch/x86/kernel/io_apic.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index eea5e9788dd..080457450b2 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -75,7 +75,7 @@ static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 7cdde3d9c5f..a09e1133ced 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -98,7 +98,7 @@ static inline int es7000_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) { static int id = 0; physid_mask_t mask; diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 15d5627a9d6..22683e5b82b 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -127,7 +127,7 @@ static inline int default_cpu_present_to_apicid(int mps_cpu) extern int default_cpu_present_to_apicid(int mps_cpu); #endif -static inline physid_mask_t apicid_to_cpu_present(int phys_apicid) +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 332fe93ab41..997618f2eb5 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define apicid_to_cpu_present (apic->apicid_to_cpu_present) #define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index f482b063447..8ac000f9928 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -82,7 +82,7 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t apicid_to_cpu_present(int logical_apicid) +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fc127369188..79c1a45f886 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -105,13 +105,13 @@ static inline int summit_cpu_present_to_apicid(int mps_cpu) } static inline physid_mask_t - summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); } -static inline physid_mask_t apicid_to_cpu_present(int apicid) +static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) { return physid_mask_of_physid(0); } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 282ea112f3c..3d85d3d810b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2155,7 +2155,7 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3899,7 +3899,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apicid_to_cpu_present(apic_id); + tmp = apic->apicid_to_cpu_present(apic_id); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index d801d06af06..2ed5bdf15c9 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -200,7 +200,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apicid_to_cpu_present(m->apicid); + apic_cpus = apic->apicid_to_cpu_present(m->apicid); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 1eaf18c801d..61396523074 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -83,7 +83,7 @@ struct genapic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 2903657f420..8fc704a3db7 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -64,7 +64,7 @@ struct genapic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 5a3a8ab4f8a..1e0e1627455 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -125,7 +125,7 @@ struct genapic apic_es7000 = { .apicid_to_node = es7000_apicid_to_node, .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d928cae211c..839b86b765a 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -70,7 +70,7 @@ struct genapic apic_numaq = { .apicid_to_node = numaq_apicid_to_node, .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index e6bb34ee580..b6e37607a52 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -63,7 +63,7 @@ struct genapic apic_summit = { .apicid_to_node = summit_apicid_to_node, .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = apicid_to_cpu_present, + .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, From d83093b50416f4ca59d3a84b2ddc217748214d64 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 240/682] x86: refactor ->setup_portio_remap() subarch methods Only NUMAQ has a real ->setup_portio_remap() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Also move the NUMAQ method from the header file into the apic driver .c file. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 4 ---- arch/x86/include/asm/mach-default/mach_apic.h | 4 ---- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 13 ------------- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 15 ++++++++++++++- arch/x86/mach-generic/summit.c | 2 +- 12 files changed, 22 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 080457450b2..2fa70032e3b 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -95,7 +95,7 @@ static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xFFL); } -static inline void setup_portio_remap(void) +static inline void bigsmp_setup_portio_remap(void) { } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index a09e1133ced..c5b0eb51e53 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -127,10 +127,6 @@ static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) } -static inline void setup_portio_remap(void) -{ -} - extern unsigned int boot_cpu_physical_apicid; static inline int check_phys_apicid_present(int cpu_physical_apicid) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 22683e5b82b..54c20e19e28 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -132,10 +132,6 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline void setup_portio_remap(void) -{ -} - static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) { return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 997618f2eb5..393a97c5685 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define setup_portio_remap (apic->setup_portio_remap) #define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 8ac000f9928..6b626519cd7 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -92,19 +92,6 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) extern void *xquad_portio; -static inline void setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) { return (1); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 79c1a45f886..131343b0b75 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -116,7 +116,7 @@ static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) return physid_mask_of_physid(0); } -static inline void setup_portio_remap(void) +static inline void summit_setup_portio_remap(void) { } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 812bf39de35..0e7d26c01f9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1170,7 +1170,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); - setup_portio_remap(); + if (apic->setup_portio_remap) + apic->setup_portio_remap(); smpboot_setup_io_apic(); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 61396523074..424740554a3 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -84,7 +84,7 @@ struct genapic apic_bigsmp = { .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 8fc704a3db7..b48a58daf71 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -65,7 +65,7 @@ struct genapic apic_default = { .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = default_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1e0e1627455..449eca5b604 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -126,7 +126,7 @@ struct genapic apic_es7000 = { .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 839b86b765a..e60361b0bf1 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -44,6 +44,19 @@ static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } +static void numaq_setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + struct genapic apic_numaq = { .name = "NUMAQ", @@ -71,7 +84,7 @@ struct genapic apic_numaq = { .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index b6e37607a52..ffcf7ca2e8c 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -64,7 +64,7 @@ struct genapic apic_summit = { .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = setup_portio_remap, + .setup_portio_remap = NULL, .check_phys_apicid_present = check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, From a27a621001f4c3e57caf47feff4b014577fd01c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 241/682] x86: refactor ->check_phys_apicid_present() subarch methods - spread out the namespace to per driver methods - extend it to 64-bit as well so that we can use apic->check_phys_apicid_present() unconditionally Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 6 +++--- arch/x86/include/asm/es7000/apic.h | 4 ++-- arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/mach-default/mach_apic.h | 18 +++++++++++++----- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 7 ++++++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 38 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 2fa70032e3b..5ba4118fcdf 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -99,13 +99,13 @@ static inline void bigsmp_setup_portio_remap(void) { } -static inline void enable_apic_mode(void) +static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) { + return 1; } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline void enable_apic_mode(void) { - return (1); } /* As we are using single CPU as destination, pick only one CPU here */ diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index c5b0eb51e53..717c27f8da6 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -126,9 +126,9 @@ static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xff); } - extern unsigned int boot_cpu_physical_apicid; -static inline int check_phys_apicid_present(int cpu_physical_apicid) + +static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) { boot_cpu_physical_apicid = read_apic_id(); return (1); diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 2cb14d51e45..f292fd02eba 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -110,6 +110,7 @@ DECLARE_PER_CPU(int, x2apic_extra_bits); extern void default_setup_apic_routing(void); extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 54c20e19e28..0a824d3a223 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -118,13 +118,26 @@ static inline int __default_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + #ifdef CONFIG_X86_32 static inline int default_cpu_present_to_apicid(int mps_cpu) { return __default_cpu_present_to_apicid(mps_cpu); } + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} #else extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) @@ -132,11 +145,6 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - static inline void enable_apic_mode(void) { } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 393a97c5685..efd762d951a 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,7 +3,6 @@ #include -#define check_phys_apicid_present (apic->check_phys_apicid_present) #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define enable_apic_mode (apic->enable_apic_mode) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 6b626519cd7..3be735e2ab9 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -92,9 +92,9 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) extern void *xquad_portio; -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) { - return (1); + return 1; } static inline void enable_apic_mode(void) diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 131343b0b75..fe578f6df78 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -120,7 +120,7 @@ static inline void summit_setup_portio_remap(void) { } -static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) +static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) { return 1; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index f4a2c1c0a1a..78adf71f7e5 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -200,7 +200,7 @@ struct genapic apic_flat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, @@ -344,7 +344,7 @@ struct genapic apic_physflat = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 710d612a964..7062e24b18f 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -202,7 +202,7 @@ struct genapic apic_x2apic_cluster = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 49a449178c3..7177a1110f0 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -198,7 +198,7 @@ struct genapic apic_x2apic_phys = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index a08a6359186..debd721f0b4 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -263,7 +263,7 @@ struct genapic apic_x2apic_uv_x = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, - .check_phys_apicid_present = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0e7d26c01f9..ab83be2f8e0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -908,6 +908,11 @@ int default_cpu_present_to_apicid(int mps_cpu) { return __default_cpu_present_to_apicid(mps_cpu); } + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} #endif int __cpuinit native_cpu_up(unsigned int cpu) @@ -1058,7 +1063,7 @@ static int __init smp_sanity_check(unsigned max_cpus) * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 424740554a3..82743d16c23 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -85,7 +85,7 @@ struct genapic apic_bigsmp = { .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index b48a58daf71..d0374c69ad0 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -66,7 +66,7 @@ struct genapic apic_default = { .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 449eca5b604..52b3eb5e645 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -127,7 +127,7 @@ struct genapic apic_es7000 = { .cpu_present_to_apicid = es7000_cpu_present_to_apicid, .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index e60361b0bf1..7ec2ca43ca2 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -85,7 +85,7 @@ struct genapic apic_numaq = { .cpu_present_to_apicid = numaq_cpu_present_to_apicid, .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index ffcf7ca2e8c..acf12de8916 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -65,7 +65,7 @@ struct genapic apic_summit = { .cpu_present_to_apicid = summit_cpu_present_to_apicid, .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = NULL, - .check_phys_apicid_present = check_phys_apicid_present, + .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, From 4904033302c745342e3b3a611881cdee57fbe06a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 12:43:18 +0100 Subject: [PATCH 242/682] x86: refactor ->enable_apic_mode() subarch methods Only ES7000 has a real ->enable_apic_mode() method, the other subarchitectures define it but keep it empty. So mark the vector as NULL, extend the generic code to handle NULL -setup_portio_remap() entries and remove all the empty handlers. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 4 ---- arch/x86/include/asm/mach-default/mach_apic.h | 3 --- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 4 ---- arch/x86/include/asm/summit/apic.h | 4 ---- arch/x86/kernel/apic.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 6 +++--- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 9 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 5ba4118fcdf..f49d440862f 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -104,10 +104,6 @@ static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - /* As we are using single CPU as destination, pick only one CPU here */ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 0a824d3a223..3647c92d45e 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -145,8 +145,5 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -static inline void enable_apic_mode(void) -{ -} #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index efd762d951a..6fed521585c 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,7 +5,6 @@ #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define enable_apic_mode (apic->enable_apic_mode) #define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 3be735e2ab9..dc93c30972e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -97,10 +97,6 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - /* * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fe578f6df78..526d19e7944 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -125,10 +125,6 @@ static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline void enable_apic_mode(void) -{ -} - static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index fcbcc03cd4b..9d6374da478 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1763,7 +1763,8 @@ void __init connect_bsp_APIC(void) outb(0x01, 0x23); } #endif - enable_apic_mode(); + if (apic->enable_apic_mode) + apic->enable_apic_mode(); } /** diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 82743d16c23..e151b472456 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -86,7 +86,7 @@ struct genapic apic_bigsmp = { .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d0374c69ad0..ac6be195b97 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -67,7 +67,7 @@ struct genapic apic_default = { .apicid_to_cpu_present = default_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 52b3eb5e645..9acb71120ef 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -36,10 +36,10 @@ static int probe_es7000(void) } extern void es7000_sw_apic(void); -static void __init enable_apic_mode(void) + +static void __init es7000_enable_apic_mode(void) { es7000_sw_apic(); - return; } static __init int @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .apicid_to_cpu_present = es7000_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = es7000_enable_apic_mode, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 7ec2ca43ca2..8d3358de3fe 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -86,7 +86,7 @@ struct genapic apic_numaq = { .apicid_to_cpu_present = numaq_apicid_to_cpu_present, .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index acf12de8916..cb83bcbb2de 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -66,7 +66,7 @@ struct genapic apic_summit = { .apicid_to_cpu_present = summit_apicid_to_cpu_present, .setup_portio_remap = NULL, .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = enable_apic_mode, + .enable_apic_mode = NULL, .phys_pkg_id = phys_pkg_id, .mps_oem_check = mps_oem_check, From b0b20e5a3a6615ae750804523aeedd32911bb9d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:15:06 +0100 Subject: [PATCH 243/682] x86, es7000: clean up es7000_enable_apic_mode() - eliminate the needless es7000_enable_apic_mode() complication which was not apparent prior the namespace cleanups - clean up the control flow in es7000_enable_apic_mode() - other cleanups Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 2 ++ arch/x86/kernel/es7000_32.c | 25 +++++++++++++------------ arch/x86/mach-generic/es7000.c | 7 ------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 717c27f8da6..038c4f0e480 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -36,6 +36,8 @@ static inline unsigned long es7000_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } +extern void es7000_enable_apic_mode(void); + #define apicid_cluster(apicid) (apicid & 0xF0) static inline unsigned long calculate_ldr(int cpu) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 20a2a43c2a9..e73fe18488a 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -359,20 +359,21 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -void __init -es7000_sw_apic(void) +void __init es7000_enable_apic_mode(void) { - if (es7000_plat) { - int mip_status; - struct mip_reg es7000_mip_reg; + struct mip_reg es7000_mip_reg; + int mip_status; - printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = (MIP_VALID); - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - printk("es7000_sw_apic: command failed, status = %x\n", - mip_status); + if (!es7000_plat) return; + + printk("ES7000: Enabling APIC mode.\n"); + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0 = MIP_SW_APIC; + es7000_mip_reg.off_38 = MIP_VALID; + + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { + printk("es7000_enable_apic_mode: command failed, status = %x\n", + mip_status); } } diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 9acb71120ef..1185964b7a3 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -35,13 +35,6 @@ static int probe_es7000(void) return 0; } -extern void es7000_sw_apic(void); - -static void __init es7000_enable_apic_mode(void) -{ - es7000_sw_apic(); -} - static __init int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { From d4c9a9f3d416cfa1f5ffbe09d864d069467fe693 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:31:22 +0100 Subject: [PATCH 244/682] x86, apic: unify phys_pkg_id() - unify the call signature of 64-bit to that of 32-bit - clean up the types all around - clean up namespace contamination Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/genapic.h | 6 +----- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 5 +++-- arch/x86/kernel/cpu/addon_cpuid_features.c | 10 +--------- arch/x86/kernel/cpu/common.c | 11 +---------- arch/x86/kernel/genapic_flat_64.c | 6 +++--- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 12 files changed, 19 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index f49d440862f..b7cba5b5635 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -133,7 +133,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 038c4f0e480..d2c6c202e8b 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -221,7 +221,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index f292fd02eba..14b19de8cd0 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -48,11 +48,7 @@ struct genapic { void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); void (*enable_apic_mode)(void); -#ifdef CONFIG_X86_32 - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); -#else - unsigned int (*phys_pkg_id)(int index_msb); -#endif + int (*phys_pkg_id)(int cpuid_apic, int index_msb); /* * When one of the next two hooks returns 1 the genapic diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 3647c92d45e..55797a35150 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -65,7 +65,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index dc93c30972e..bc2c8a425c0 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -113,7 +113,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 526d19e7944..64cd441ae00 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -175,13 +175,14 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -/* cpuid returns the value latched in the HW at reset, not the APIC ID +/* + * cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) +static inline int phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4e581fdc0a5..84f8e4a5aef 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,7 +116,6 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; -#ifdef CONFIG_X86_32 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); @@ -124,14 +123,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = phys_pkg_id(c->initial_apicid, 0); -#else - c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(core_plus_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->apicid = phys_pkg_id(0); -#endif + c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 275e2cb43b9..93c491c4fe7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,11 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); -#ifdef CONFIG_X86_64 - c->phys_proc_id = phys_pkg_id(index_msb); -#else c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); -#endif smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -454,13 +450,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); -#ifdef CONFIG_X86_64 - c->cpu_core_id = phys_pkg_id(index_msb) & - ((1 << core_bits) - 1); -#else c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); -#endif } out: @@ -742,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(0); + c->apicid = phys_pkg_id(c->initial_apicid, 0); #endif /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78adf71f7e5..cc9e07b9609 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -169,7 +169,7 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return mask1 & mask2; } -static unsigned int phys_pkg_id(int index_msb) +static int flat_phys_pkg_id(int initial_apic_id, int index_msb) { return hard_smp_processor_id() >> index_msb; } @@ -202,7 +202,7 @@ struct genapic apic_flat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, @@ -346,7 +346,7 @@ struct genapic apic_physflat = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 7062e24b18f..18b6f14376e 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -157,7 +157,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -204,7 +204,7 @@ struct genapic apic_x2apic_cluster = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 7177a1110f0..2cb6f49e4c5 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -156,7 +156,7 @@ static unsigned long set_apic_id(unsigned int id) return x; } -static unsigned int phys_pkg_id(int index_msb) +static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { return current_cpu_data.initial_apicid >> index_msb; } @@ -200,7 +200,7 @@ struct genapic apic_x2apic_phys = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index debd721f0b4..67e7658775e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -226,7 +226,7 @@ static unsigned int uv_read_apic_id(void) return get_apic_id(apic_read(APIC_ID)); } -static unsigned int phys_pkg_id(int index_msb) +static int uv_phys_pkg_id(int initial_apicid, int index_msb) { return uv_read_apic_id() >> index_msb; } @@ -265,7 +265,7 @@ struct genapic apic_x2apic_uv_x = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, .get_apic_id = get_apic_id, From cb8cc442dc7e07cb5438b357843ab4095ad73933 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:24:54 +0100 Subject: [PATCH 245/682] x86, apic: refactor ->phys_pkg_id() Refactor the ->phys_pkg_id() methods: - namespace separation - macro wrapper removal - open-coded calls to the methods in the generic code Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 2 +- arch/x86/include/asm/es7000/apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apic.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apic.h | 1 - arch/x86/include/asm/numaq/apic.h | 2 +- arch/x86/include/asm/summit/apic.h | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 6 +++--- arch/x86/kernel/cpu/common.c | 8 ++++---- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 17 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index b7cba5b5635..1230f5d7a38 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -133,7 +133,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index d2c6c202e8b..f183dfb4de4 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -221,7 +221,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, return apicid; } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int es7000_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 55797a35150..d0605281a6b 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -21,7 +21,6 @@ static inline const struct cpumask *default_target_cpus(void) #include #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define phys_pkg_id (apic->phys_pkg_id) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -65,7 +64,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 6fed521585c..1eeb5b61e48 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -5,7 +5,6 @@ #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define phys_pkg_id (apic->phys_pkg_id) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index bc2c8a425c0..765c4d5124c 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -113,7 +113,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 64cd441ae00..fa6b3b45290 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -182,7 +182,7 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline int phys_pkg_id(int cpuid_apic, int index_msb) +static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84f8e4a5aef..e8bb892c09f 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -116,13 +116,13 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; - c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 93c491c4fe7..055b9c3a660 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,7 +442,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) } index_msb = get_count_order(smp_num_siblings); - c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); smp_num_siblings = smp_num_siblings / c->x86_max_cores; @@ -450,7 +450,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) core_bits = get_count_order(c->x86_max_cores); - c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) & ((1 << core_bits) - 1); } @@ -686,7 +686,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); # else c->apicid = c->initial_apicid; # endif @@ -733,7 +733,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); #ifdef CONFIG_X86_64 - c->apicid = phys_pkg_id(c->initial_apicid, 0); + c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); #endif /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index e151b472456..d04b38954df 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -87,7 +87,7 @@ struct genapic apic_bigsmp = { .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = bigsmp_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index ac6be195b97..5c9266f756e 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -68,7 +68,7 @@ struct genapic apic_default = { .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = default_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1185964b7a3..52787e34c9c 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -122,7 +122,7 @@ struct genapic apic_es7000 = { .setup_portio_remap = NULL, .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = es7000_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8d3358de3fe..6a1134e6d72 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -87,7 +87,7 @@ struct genapic apic_numaq = { .setup_portio_remap = numaq_setup_portio_remap, .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = numaq_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index cb83bcbb2de..2d6843a61d9 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -67,7 +67,7 @@ struct genapic apic_summit = { .setup_portio_remap = NULL, .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = phys_pkg_id, + .phys_pkg_id = summit_phys_pkg_id, .mps_oem_check = mps_oem_check, .get_apic_id = get_apic_id, From 5f836405ef632ba82f4a5261ff2be4198e53b51b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:47:42 +0100 Subject: [PATCH 246/682] x86, smp: clean up mps_oem_check() Impact: cleanup - allow NULL ->mps_oem_check() entries - clean up the code flow Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 3 +-- arch/x86/mach-generic/probe.c | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 14b19de8cd0..8bb1c73c55b 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -55,8 +55,7 @@ struct genapic { * is switched to this. Essentially they are additional * probe functions: */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, - char *productid); + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index a21e2b1a701..799a70f4d90 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -113,17 +113,21 @@ void __init generic_apic_probe(void) int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { int i; + for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { - if (!cmdline_apic) { - apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); - printk(KERN_INFO "Switched to APIC driver `%s'.\n", - apic->name); - } - return 1; + if (!apic_probe[i]->mps_oem_check) + continue; + if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) + continue; + + if (!cmdline_apic) { + apic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + printk(KERN_INFO "Switched to APIC driver `%s'.\n", + apic->name); } + return 1; } return 0; } From 1322a2e2db87c938d8381f8501af9a4d0eab8bc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:54:56 +0100 Subject: [PATCH 247/682] x86, mpparse: call the generic quirk handlers early Call all the registered MPS quirk handlers early. These methods scan low RAM typically for specific signatures so are safe to be called early. Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c8a534a16d9..f6fb1928439 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,16 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - /* - * need to make sure summit and es7000's mps_oem_check is safe to be - * called early via genericarch 's mps_oem_check - */ - if (early) { -#ifdef CONFIG_X86_NUMAQ - numaq_mps_oem_check(mpc, oem, str); -#endif - } else - mps_oem_check(mpc, oem, str); + mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) From 9c7642470ecf03d8b4946a2addc8fe631b8426dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 13:44:32 +0100 Subject: [PATCH 248/682] x86: consolidate the ->mps_oem_check() code - spread out the mps_oem_check() namespace on a per APIC driver basis Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/mpparse.h | 6 ------ arch/x86/include/asm/mach-default/mach_mpparse.h | 2 +- arch/x86/include/asm/mach-generic/mach_mpparse.h | 3 +-- arch/x86/include/asm/summit/mpparse.h | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 5 +++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/probe.c | 3 ++- arch/x86/mach-generic/summit.c | 2 +- 11 files changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h index 30692c4ae85..662eb1e574d 100644 --- a/arch/x86/include/asm/es7000/mpparse.h +++ b/arch/x86/include/asm/es7000/mpparse.h @@ -8,13 +8,7 @@ extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); extern void setup_unisys(void); -#ifndef CONFIG_X86_GENERICARCH -extern int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id); -extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid); -#endif - #ifdef CONFIG_ACPI - static inline int es7000_check_dsdt(void) { struct acpi_table_header header; diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h index 8fa01770ba6..af0da140df9 100644 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ b/arch/x86/include/asm/mach-default/mach_mpparse.h @@ -2,7 +2,7 @@ #define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H static inline int -mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { return 0; } diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h index f497d96c76b..22bfb56f8fb 100644 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ b/arch/x86/include/asm/mach-generic/mach_mpparse.h @@ -1,8 +1,7 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H #define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H - -extern int mps_oem_check(struct mpc_table *, char *, char *); +extern int generic_mps_oem_check(struct mpc_table *, char *, char *); extern int default_acpi_madt_oem_check(char *, char *); diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h index 555ed8238e9..4bbcce39acb 100644 --- a/arch/x86/include/asm/summit/mpparse.h +++ b/arch/x86/include/asm/summit/mpparse.h @@ -11,8 +11,8 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) +static inline int +summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { if (!strncmp(oem, "IBM ENSW", 8) && (!strncmp(productid, "VIGIL SMP", 9) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f6fb1928439..b12fa5ce6f5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -292,7 +292,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) return 0; #ifdef CONFIG_X86_32 - mps_oem_check(mpc, oem, str); + generic_mps_oem_check(mpc, oem, str); #endif /* save the local APIC address, it might be non-default */ if (!acpi_lapic) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d04b38954df..6bf6aafeb2c 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -88,7 +88,7 @@ struct genapic apic_bigsmp = { .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = bigsmp_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 5c9266f756e..e5f85cd75b4 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -69,7 +69,7 @@ struct genapic apic_default = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = NULL, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 52787e34c9c..f861163cd39 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -36,11 +36,12 @@ static int probe_es7000(void) } static __init int -mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { if (mpc->oemptr) { struct mpc_oemtable *oem_table = (struct mpc_oemtable *)mpc->oemptr; + if (!strncmp(oem, "UNISYS", 6)) return parse_unisys_oem((char *)oem_table); } @@ -123,7 +124,7 @@ struct genapic apic_es7000 = { .check_phys_apicid_present = es7000_check_phys_apicid_present, .enable_apic_mode = es7000_enable_apic_mode, .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = es7000_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 6a1134e6d72..517882c9c15 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -19,7 +19,7 @@ #include #include -static int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { numaq_mps_oem_check(mpc, oem, productid); return found_numaq; @@ -88,7 +88,7 @@ struct genapic apic_numaq = { .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = __numaq_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 799a70f4d90..ab68c6e5c48 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -110,7 +110,8 @@ void __init generic_apic_probe(void) /* These functions can switch the APIC even after the initial ->probe() */ -int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +int __init +generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { int i; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2d6843a61d9..719e944ff30 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -68,7 +68,7 @@ struct genapic apic_summit = { .check_phys_apicid_present = summit_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = mps_oem_check, + .mps_oem_check = summit_mps_oem_check, .get_apic_id = get_apic_id, .set_apic_id = NULL, From ca6c8ed4646f8ccaa4f7db618bf69b8b8fb49767 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:08:38 +0100 Subject: [PATCH 249/682] x86, apic: refactor ->get_apic_id() & GET_APIC_ID() - spread out the namespace on a per driver basis - get rid of macro wrappers - small cleanups Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 6 ++---- arch/x86/include/asm/es7000/apicdef.h | 6 ++---- arch/x86/include/asm/mach-default/mach_apic.h | 2 +- arch/x86/include/asm/mach-default/mach_apicdef.h | 10 +++++----- arch/x86/include/asm/mach-generic/mach_apicdef.h | 1 - arch/x86/include/asm/numaq/apicdef.h | 7 ++----- arch/x86/include/asm/smp.h | 2 +- arch/x86/include/asm/summit/apicdef.h | 6 ++---- arch/x86/kernel/genapic_flat_64.c | 9 +++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 6 +++--- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 17 files changed, 32 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index 392c3f5ef2f..ed25dd6503b 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned bigsmp_get_apic_id(unsigned long x) { - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index 8b234a3cb85..e23791762a1 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned int es7000_get_apic_id(unsigned long x) { - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index d0605281a6b..8719208f273 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -21,7 +21,7 @@ static inline const struct cpumask *default_target_cpus(void) #include #define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) #define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) -#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) +#define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index b4dcc0971c7..e84d437ba2b 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -5,20 +5,20 @@ #ifdef CONFIG_X86_64 #define APIC_ID_MASK (apic->apic_id_mask) -#define GET_APIC_ID(x) (apic->get_apic_id(x)) #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else #define APIC_ID_MASK (0xF<<24) -static inline unsigned get_apic_id(unsigned long x) + +static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + if (APIC_XAPIC(ver)) - return (((x)>>24)&0xFF); + return (x >> 24) & 0xFF; else - return (((x)>>24)&0xF); + return (x >> 24) & 0x0F; } -#define GET_APIC_ID(x) get_apic_id(x) #endif #endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index acc9adddb34..645520bcd2c 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -4,7 +4,6 @@ #ifndef APIC_DEFINITION #include -#define GET_APIC_ID (apic->get_apic_id) #define APIC_ID_MASK (apic->apic_id_mask) #endif diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index e012a46cc22..29f5e3d34e5 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,14 +1,11 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H - #define APIC_ID_MASK (0xF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned int numaq_get_apic_id(unsigned long x) { - return (((x)>>24)&0x0F); + return (x >> 24) & 0x0F; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 45ef8a1b9d7..c63d480802a 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -189,7 +189,7 @@ static inline unsigned int read_apic_id(void) reg = *(u32 *)(APIC_BASE + APIC_ID); - return GET_APIC_ID(reg); + return apic->get_apic_id(reg); } #endif diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index f3fbca1f61c..4286528af7c 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -3,11 +3,9 @@ #define APIC_ID_MASK (0xFF<<24) -static inline unsigned get_apic_id(unsigned long x) +static inline unsigned summit_get_apic_id(unsigned long x) { - return (x>>24)&0xFF; + return (x >> 24) & 0xFF; } -#define GET_APIC_ID(x) get_apic_id(x) - #endif diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index cc9e07b9609..ab47091dac2 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -126,11 +126,12 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); } -static unsigned int get_apic_id(unsigned long x) +static unsigned int flat_get_apic_id(unsigned long x) { unsigned int id; id = (((x)>>24) & 0xFFu); + return id; } @@ -146,7 +147,7 @@ static unsigned int read_xapic_id(void) { unsigned int id; - id = get_apic_id(apic_read(APIC_ID)); + id = flat_get_apic_id(apic_read(APIC_ID)); return id; } @@ -205,7 +206,7 @@ struct genapic apic_flat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, @@ -349,7 +350,7 @@ struct genapic apic_physflat = { .phys_pkg_id = flat_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu<<24, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 18b6f14376e..c7557e05184 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -141,7 +141,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -207,7 +207,7 @@ struct genapic apic_x2apic_cluster = { .phys_pkg_id = x2apic_cluster_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_cluster_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 2cb6f49e4c5..80cba49cfd8 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -140,7 +140,7 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_phys_get_apic_id(unsigned long x) { unsigned int id; @@ -203,7 +203,7 @@ struct genapic apic_x2apic_phys = { .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_phys_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 67e7658775e..50310b96adc 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -201,7 +201,7 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static unsigned int get_apic_id(unsigned long x) +static unsigned int x2apic_get_apic_id(unsigned long x) { unsigned int id; @@ -223,7 +223,7 @@ static unsigned long set_apic_id(unsigned int id) static unsigned int uv_read_apic_id(void) { - return get_apic_id(apic_read(APIC_ID)); + return x2apic_get_apic_id(apic_read(APIC_ID)); } static int uv_phys_pkg_id(int initial_apicid, int index_msb) @@ -268,7 +268,7 @@ struct genapic apic_x2apic_uv_x = { .phys_pkg_id = uv_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = x2apic_get_apic_id, .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6bf6aafeb2c..9eca977227c 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -90,7 +90,7 @@ struct genapic apic_bigsmp = { .phys_pkg_id = bigsmp_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e5f85cd75b4..d51a3f0335a 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -71,7 +71,7 @@ struct genapic apic_default = { .phys_pkg_id = default_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = get_apic_id, + .get_apic_id = default_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index f861163cd39..1944675db62 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -126,7 +126,7 @@ struct genapic apic_es7000 = { .phys_pkg_id = es7000_phys_pkg_id, .mps_oem_check = es7000_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 517882c9c15..fcbba84c090 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -90,7 +90,7 @@ struct genapic apic_numaq = { .phys_pkg_id = numaq_phys_pkg_id, .mps_oem_check = __numaq_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 719e944ff30..5650eaf9061 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -70,7 +70,7 @@ struct genapic apic_summit = { .phys_pkg_id = summit_phys_pkg_id, .mps_oem_check = summit_mps_oem_check, - .get_apic_id = get_apic_id, + .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, .apic_id_mask = APIC_ID_MASK, From 5b8127277bc4cdca78eda5ee900a314642822ace Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 14:59:17 +0100 Subject: [PATCH 250/682] x86, apic: refactor ->apic_id_mask & APIC_ID_MASK - spread out the namespace on a per driver basis - get rid of wrapper macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 2 +- arch/x86/include/asm/es7000/apicdef.h | 2 +- arch/x86/include/asm/mach-default/mach_apicdef.h | 3 +-- arch/x86/include/asm/mach-generic/mach_apicdef.h | 2 -- arch/x86/include/asm/numaq/apicdef.h | 2 +- arch/x86/include/asm/summit/apicdef.h | 2 +- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 13 files changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index ed25dd6503b..6e587818c7e 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_MACH_APICDEF_H #define __ASM_MACH_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define BIGSMP_APIC_ID_MASK (0xFF<<24) static inline unsigned bigsmp_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index e23791762a1..476da0c7f5c 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_ES7000_APICDEF_H #define __ASM_ES7000_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define ES7000_APIC_ID_MASK (0xFF<<24) static inline unsigned int es7000_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index e84d437ba2b..8318d121ea6 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -4,10 +4,9 @@ #include #ifdef CONFIG_X86_64 -#define APIC_ID_MASK (apic->apic_id_mask) #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#define APIC_ID_MASK (0xF<<24) +#define DEFAULT_APIC_ID_MASK (0x0F<<24) static inline unsigned default_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h index 645520bcd2c..61caa65b13f 100644 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ b/arch/x86/include/asm/mach-generic/mach_apicdef.h @@ -3,8 +3,6 @@ #ifndef APIC_DEFINITION #include - -#define APIC_ID_MASK (apic->apic_id_mask) #endif #endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index 29f5e3d34e5..6f2cc5df0b1 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H -#define APIC_ID_MASK (0xF<<24) +#define NUMAQ_APIC_ID_MASK (0xF<<24) static inline unsigned int numaq_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index 4286528af7c..0373f0c7b5d 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -1,7 +1,7 @@ #ifndef __ASM_SUMMIT_APICDEF_H #define __ASM_SUMMIT_APICDEF_H -#define APIC_ID_MASK (0xFF<<24) +#define SUMMIT_APIC_ID_MASK (0xFF<<24) static inline unsigned summit_get_apic_id(unsigned long x) { diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 9d6374da478..5f7f3a9a47a 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1009,11 +1009,11 @@ int __init verify_local_APIC(void) */ reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) + if (reg1 != (reg0 ^ apic->apic_id_mask)) return 0; /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index ab47091dac2..78baa55cd0e 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -352,7 +352,7 @@ struct genapic apic_physflat = { .get_apic_id = flat_get_apic_id, .set_apic_id = set_apic_id, - .apic_id_mask = 0xFFu<<24, + .apic_id_mask = 0xFFu << 24, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 9eca977227c..1f4ad4f7702 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -92,7 +92,7 @@ struct genapic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = BIGSMP_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index d51a3f0335a..239af25615b 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -73,7 +73,7 @@ struct genapic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = DEFAULT_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 1944675db62..21fb33eea91 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = ES7000_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fcbba84c090..27d2d1f2d6f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -92,7 +92,7 @@ struct genapic apic_numaq = { .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = NUMAQ_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 5650eaf9061..f24cba1b29d 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -72,7 +72,7 @@ struct genapic apic_summit = { .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = APIC_ID_MASK, + .apic_id_mask = SUMMIT_APIC_ID_MASK, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, From 94af18755266edf46803564414d74f9621aaded8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:08:53 +0100 Subject: [PATCH 251/682] x86, apic: get rid of *_APIC_ID_MASK definitions Impact: cleanup Remove the *_APIC_ID_MASK subarch definitions and move them straight to the genapic driver initialization code. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apicdef.h | 2 -- arch/x86/include/asm/es7000/apicdef.h | 2 -- arch/x86/include/asm/mach-default/mach_apicdef.h | 1 - arch/x86/include/asm/numaq/apicdef.h | 2 -- arch/x86/include/asm/summit/apicdef.h | 2 -- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h index 6e587818c7e..e58dee84757 100644 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ b/arch/x86/include/asm/bigsmp/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_MACH_APICDEF_H #define __ASM_MACH_APICDEF_H -#define BIGSMP_APIC_ID_MASK (0xFF<<24) - static inline unsigned bigsmp_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h index 476da0c7f5c..c74881a7b3d 100644 --- a/arch/x86/include/asm/es7000/apicdef.h +++ b/arch/x86/include/asm/es7000/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_ES7000_APICDEF_H #define __ASM_ES7000_APICDEF_H -#define ES7000_APIC_ID_MASK (0xFF<<24) - static inline unsigned int es7000_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h index 8318d121ea6..5141085962d 100644 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ b/arch/x86/include/asm/mach-default/mach_apicdef.h @@ -6,7 +6,6 @@ #ifdef CONFIG_X86_64 #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#define DEFAULT_APIC_ID_MASK (0x0F<<24) static inline unsigned default_get_apic_id(unsigned long x) { diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h index 6f2cc5df0b1..cd927d5bd50 100644 --- a/arch/x86/include/asm/numaq/apicdef.h +++ b/arch/x86/include/asm/numaq/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_NUMAQ_APICDEF_H #define __ASM_NUMAQ_APICDEF_H -#define NUMAQ_APIC_ID_MASK (0xF<<24) - static inline unsigned int numaq_get_apic_id(unsigned long x) { return (x >> 24) & 0x0F; diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h index 0373f0c7b5d..c24b0df2dec 100644 --- a/arch/x86/include/asm/summit/apicdef.h +++ b/arch/x86/include/asm/summit/apicdef.h @@ -1,8 +1,6 @@ #ifndef __ASM_SUMMIT_APICDEF_H #define __ASM_SUMMIT_APICDEF_H -#define SUMMIT_APIC_ID_MASK (0xFF<<24) - static inline unsigned summit_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 1f4ad4f7702..ee52c59aa3a 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -92,7 +92,7 @@ struct genapic apic_bigsmp = { .get_apic_id = bigsmp_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = BIGSMP_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 239af25615b..e4ed7e6d626 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -73,7 +73,7 @@ struct genapic apic_default = { .get_apic_id = default_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = DEFAULT_APIC_ID_MASK, + .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 21fb33eea91..3d046dec0c9 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -128,7 +128,7 @@ struct genapic apic_es7000 = { .get_apic_id = es7000_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = ES7000_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 27d2d1f2d6f..a7bf1aa02e1 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -92,7 +92,7 @@ struct genapic apic_numaq = { .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = NUMAQ_APIC_ID_MASK, + .apic_id_mask = 0x0F << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index f24cba1b29d..a0ae6b91048 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -72,7 +72,7 @@ struct genapic apic_summit = { .get_apic_id = summit_get_apic_id, .set_apic_id = NULL, - .apic_id_mask = SUMMIT_APIC_ID_MASK, + .apic_id_mask = 0xFF << 24, .cpu_mask_to_apicid = cpu_mask_to_apicid, .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, From debccb3e77be52cfc26c5a99e123c114c5c72aeb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:20:18 +0100 Subject: [PATCH 252/682] x86, apic: refactor ->cpu_mask_to_apicid*() - spread out the namespace on a per driver basis - clean up the functions - get rid of macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 17 +++++------ arch/x86/include/asm/es7000/apic.h | 29 ++++++++++++------- arch/x86/include/asm/mach-default/mach_apic.h | 10 +++---- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/include/asm/numaq/apic.h | 11 +++---- arch/x86/include/asm/summit/apic.h | 21 +++++++++----- arch/x86/kernel/genapic_flat_64.c | 4 ++- arch/x86/kernel/genx2apic_cluster.c | 15 ++++++---- arch/x86/kernel/genx2apic_phys.c | 15 ++++++---- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++---- arch/x86/kernel/io_apic.c | 21 ++++++++------ arch/x86/mach-generic/bigsmp.c | 4 +-- arch/x86/mach-generic/default.c | 4 +-- arch/x86/mach-generic/es7000.c | 6 ++-- arch/x86/mach-generic/numaq.c | 4 +-- arch/x86/mach-generic/summit.c | 4 +-- 16 files changed, 101 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 1230f5d7a38..ee29d66cd30 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -105,18 +105,14 @@ static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int cpu; - int apicid; - - cpu = first_cpu(*cpumask); - apicid = bigsmp_cpu_to_logical_apicid(cpu); - return apicid; + return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -124,9 +120,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return bigsmp_cpu_to_logical_apicid(cpu); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index f183dfb4de4..b89b45db735 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -137,12 +137,12 @@ static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) } static inline unsigned int -cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpumask_weight(cpumask); /* Return id to all */ @@ -154,12 +154,15 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) */ cpu = cpumask_first(cpumask); apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; } apicid = new_apicid; @@ -170,12 +173,12 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpus_weight(*cpumask); /* Return id to all */ @@ -190,9 +193,11 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return es7000_cpu_to_logical_apicid(0); } apicid = new_apicid; @@ -204,8 +209,9 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static inline unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) { int apicid = es7000_cpu_to_logical_apicid(0); cpumask_var_t cpumask; @@ -215,9 +221,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = cpu_mask_to_apicid(cpumask); + apicid = es7000_cpu_mask_to_apicid(cpumask); free_cpumask_var(cpumask); + return apicid; } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8719208f273..8972f843414 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,8 +19,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include -#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) #define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) @@ -49,13 +47,15 @@ static inline int default_apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) { return cpumask_bits(cpumask)[0]; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { unsigned long mask1 = cpumask_bits(cpumask)[0]; unsigned long mask2 = cpumask_bits(andmask)[0]; diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 1eeb5b61e48..ca460e45991 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,8 +3,6 @@ #include -#define cpu_mask_to_apicid (apic->cpu_mask_to_apicid) -#define cpu_mask_to_apicid_and (apic->cpu_mask_to_apicid_and) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 765c4d5124c..ce95e79f723 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -101,15 +101,16 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) { - return (int) 0xF; + return 0x0F; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - return (int) 0xF; + return 0x0F; } /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index fa6b3b45290..15b8dbd19e1 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -125,29 +125,32 @@ static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) return 1; } -static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int num_bits_set; int cpus_found = 0; - int cpu; + int num_bits_set; int apicid; + int cpu; num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set >= nr_cpu_ids) - return (int) 0xFF; + return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of target_cpus(): */ cpu = first_cpu(*cpumask); apicid = summit_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { if (cpu_isset(cpu, *cpumask)) { int new_apicid = summit_cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ + apicid_cluster(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; } apicid = apicid | new_apicid; @@ -158,8 +161,9 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) +static inline unsigned int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) { int apicid = summit_cpu_to_logical_apicid(0); cpumask_var_t cpumask; @@ -169,9 +173,10 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = cpu_mask_to_apicid(cpumask); + apicid = summit_cpu_mask_to_apicid(cpumask); free_cpumask_var(cpumask); + return apicid; } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 78baa55cd0e..b941b112caf 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -309,11 +309,13 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index c7557e05184..62f9fccf01d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -111,21 +111,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -133,11 +133,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 80cba49cfd8..3da1675b260 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -110,21 +110,21 @@ static int x2apic_apic_id_registered(void) static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -132,11 +132,14 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } + if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 50310b96adc..f957878c21e 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -171,21 +171,21 @@ static void uv_init_apic_ldr(void) static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { - int cpu; - /* * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = cpumask_first(cpumask); + int cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } -static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static unsigned int +uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { int cpu; @@ -193,11 +193,13 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - for_each_cpu_and(cpu, cpumask, andmask) + for_each_cpu_and(cpu, cpumask, andmask) { if (cpumask_test_cpu(cpu, cpu_online_mask)) break; + } if (cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3d85d3d810b..01a2505d727 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -563,8 +563,9 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); /* - * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid - * of that, or returns BAD_APICID and leaves desc->affinity untouched. + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. */ static unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) @@ -582,7 +583,8 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) cpumask_and(desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); - return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); } static void @@ -1562,7 +1564,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1666,7 +1668,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, */ entry.dest_mode = apic->irq_dest_mode; entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -2367,7 +2369,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) set_extra_move_desc(desc, mask); - dest = cpu_mask_to_apicid_and(cfg->domain, mask); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -3270,7 +3272,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3708,7 +3710,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; - dest = cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + dest = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus()); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3773,7 +3776,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(eligible_cpu); + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ee52c59aa3a..22c2c7b8e4a 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -94,8 +94,8 @@ struct genapic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e4ed7e6d626..477ebec1674 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -75,8 +75,8 @@ struct genapic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 3d046dec0c9..d758cf65d73 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -26,7 +26,7 @@ void __init es7000_update_genapic_to_cluster(void) apic->init_apic_ldr = es7000_init_apic_ldr_cluster; - apic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; + apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; } static int probe_es7000(void) @@ -130,8 +130,8 @@ struct genapic apic_es7000 = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index a7bf1aa02e1..eb7d56a521d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -94,8 +94,8 @@ struct genapic apic_numaq = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index a0ae6b91048..8c293055bdf 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -74,8 +74,8 @@ struct genapic apic_summit = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = cpu_mask_to_apicid_and, + .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, .send_IPI_mask = send_IPI_mask, .send_IPI_mask_allbutself = NULL, From dac5f4121df3c39fdb2ea57acd669a0ae19e46f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 15:42:24 +0100 Subject: [PATCH 253/682] x86, apic: untangle the send_IPI_*() jungle Our send_IPI_*() methods and definitions are a twisted mess: the same symbol is defined to different things depending on .config details, in a non-transparent way. - spread out the quirks into separately named per apic driver methods - prefix the standard PC methods with default_ - get rid of wrapper macro obfuscation - clean up various details Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/ipi.h | 16 ++++---- arch/x86/include/asm/es7000/ipi.h | 16 ++++---- arch/x86/include/asm/hw_irq.h | 4 +- arch/x86/include/asm/ipi.h | 38 +++++++++--------- arch/x86/include/asm/mach-default/mach_apic.h | 1 - arch/x86/include/asm/mach-default/mach_ipi.h | 40 ++++++++----------- arch/x86/include/asm/mach-generic/mach_ipi.h | 4 -- arch/x86/include/asm/numaq/ipi.h | 16 ++++---- arch/x86/include/asm/summit/ipi.h | 16 ++++---- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 24 ++++++----- arch/x86/kernel/genx2apic_cluster.c | 38 ++++++++++-------- arch/x86/kernel/genx2apic_phys.c | 36 +++++++---------- arch/x86/kernel/genx2apic_uv_x.c | 21 +++++----- arch/x86/kernel/io_apic.c | 10 ++--- arch/x86/kernel/ipi.c | 28 +++++++------ arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 10 ++--- arch/x86/mach-generic/bigsmp.c | 6 +-- arch/x86/mach-generic/default.c | 6 +-- arch/x86/mach-generic/es7000.c | 6 +-- arch/x86/mach-generic/numaq.c | 6 +-- arch/x86/mach-generic/summit.c | 6 +-- arch/x86/mm/tlb.c | 2 +- 26 files changed, 178 insertions(+), 180 deletions(-) diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 27fcd01b3ae..a91db69cda6 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void bigsmp_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void bigsmp_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + default_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 7e8ed24d4b8..81e77c812ba 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void es7000_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void es7000_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + es7000_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8de644b6b95..bfa921fad13 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -73,9 +73,9 @@ extern void enable_IO_APIC(void); /* IPI functions */ #ifdef CONFIG_X86_32 -extern void send_IPI_self(int vector); +extern void default_send_IPI_self(int vector); #endif -extern void send_IPI(int dest, int vector); +extern void default_send_IPI(int dest, int vector); /* Statistics */ extern atomic_t irq_err_count; diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index c745a306f7d..a8d717f2c7e 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -55,8 +55,9 @@ static inline void __xapic_wait_icr_idle(void) cpu_relax(); } -static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, - unsigned int dest) +static inline void +__default_send_IPI_shortcut(unsigned int shortcut, + int vector, unsigned int dest) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -87,8 +88,8 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned int mask, int vector, - unsigned int dest) +static inline void + __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest) { unsigned long cfg; @@ -117,11 +118,11 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(const struct cpumask *mask, - int vector) +static inline void +default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; /* * Hack. The clustered APIC addressing mode doesn't allow us to send @@ -130,27 +131,28 @@ static inline void send_IPI_mask_sequence(const struct cpumask *mask, */ local_irq_save(flags); for_each_cpu(query_cpu, mask) { - __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static inline void send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static inline void +default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned int query_cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int query_cpu; + unsigned long flags; /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } local_irq_restore(flags); } diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 8972f843414..2e4104cf348 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -20,7 +20,6 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -#define send_IPI_self (apic->send_IPI_self) #define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); #else diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index 089399643df..85dec630c69 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,45 +4,40 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -void __send_IPI_shortcut(unsigned int shortcut, int vector); +void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void __default_send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; #ifdef CONFIG_X86_64 #include -#define send_IPI_mask (apic->send_IPI_mask) -#define send_IPI_mask_allbutself (apic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_bitmask(mask, vector); + default_send_IPI_mask_bitmask(mask, vector); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif -static inline void __local_send_IPI_allbutself(int vector) +static inline void __default_local_send_IPI_allbutself(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask_allbutself(cpu_online_mask, vector); + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } -static inline void __local_send_IPI_all(int vector) +static inline void __default_local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_mask, vector); + apic->send_IPI_mask(cpu_online_mask, vector); else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector); + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); } -#ifdef CONFIG_X86_64 -#define send_IPI_allbutself (apic->send_IPI_allbutself) -#define send_IPI_all (apic->send_IPI_all) -#else -static inline void send_IPI_allbutself(int vector) +#ifdef CONFIG_X86_32 +static inline void default_send_IPI_allbutself(int vector) { /* * if there are no other CPUs in the system then we get an APIC send @@ -51,13 +46,12 @@ static inline void send_IPI_allbutself(int vector) if (!(num_online_cpus() > 1)) return; - __local_send_IPI_allbutself(vector); - return; + __default_local_send_IPI_allbutself(vector); } -static inline void send_IPI_all(int vector) +static inline void default_send_IPI_all(int vector) { - __local_send_IPI_all(vector); + __default_local_send_IPI_all(vector); } #endif diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h index 75e54bd6cbd..5691c09645c 100644 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ b/arch/x86/include/asm/mach-generic/mach_ipi.h @@ -3,8 +3,4 @@ #include -#define send_IPI_mask (apic->send_IPI_mask) -#define send_IPI_allbutself (apic->send_IPI_allbutself) -#define send_IPI_all (apic->send_IPI_all) - #endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index a8374c65277..5dbc4b4cd5e 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,22 +1,22 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(const struct cpumask *mask, int vector) +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void numaq_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } -static inline void send_IPI_all(int vector) +static inline void numaq_send_IPI_all(int vector) { - send_IPI_mask(cpu_online_mask, vector); + numaq_send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index a8a2c24f50c..f87a43fe0ae 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h @@ -1,26 +1,26 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); +void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(const cpumask_t *mask, int vector) +static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) { - send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence(mask, vector); } -static inline void send_IPI_allbutself(int vector) +static inline void summit_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(&mask, vector); + summit_send_IPI_mask(&mask, vector); } -static inline void send_IPI_all(int vector) +static inline void summit_send_IPI_all(int vector) { - send_IPI_mask(&cpu_online_map, vector); + summit_send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 5f7f3a9a47a..84b8e7c57ab 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -475,7 +475,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR); #endif } diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index d57d2138f07..820dea5d0eb 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -65,7 +65,7 @@ void __init default_setup_apic_routing(void) void apic_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index b941b112caf..7c648ccea51 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -74,7 +74,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) unsigned long flags; local_irq_save(flags); - __send_IPI_dest_field(mask, vector, apic->dest_logical); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); local_irq_restore(flags); } @@ -85,14 +85,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) _flat_send_IPI_mask(mask, vector); } -static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, - int vector) +static void + flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); } @@ -114,16 +115,19 @@ static void flat_send_IPI_allbutself(int vector) _flat_send_IPI_mask(mask, vector); } } else if (num_online_cpus() > 1) { - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, + vector, apic->dest_logical); } } static void flat_send_IPI_all(int vector) { - if (vector == NMI_VECTOR) + if (vector == NMI_VECTOR) { flat_send_IPI_mask(cpu_online_mask, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); + } else { + __default_send_IPI_shortcut(APIC_DEST_ALLINC, + vector, apic->dest_logical); + } } static unsigned int flat_get_apic_id(unsigned long x) @@ -265,18 +269,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 62f9fccf01d..2d97726a973 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -36,8 +36,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) +static void + __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) { unsigned long cfg; @@ -57,45 +57,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, */ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest( per_cpu(x86_cpu_to_logical_apicid, query_cpu), vector, apic->dest_logical); + } local_irq_restore(flags); } @@ -175,7 +180,6 @@ static void init_x2apic_ldr(void) int cpu = smp_processor_id(); per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); - return; } struct genapic apic_x2apic_cluster = { diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 3da1675b260..74777c27669 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -55,8 +55,8 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - unsigned long flags; unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) local_irq_restore(flags); } -static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, - int vector) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); for_each_cpu(query_cpu, mask) { @@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, static void x2apic_send_IPI_allbutself(int vector) { - unsigned long flags; - unsigned long query_cpu; unsigned long this_cpu = smp_processor_id(); + unsigned long query_cpu; + unsigned long flags; local_irq_save(flags); - for_each_online_cpu(query_cpu) - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); + for_each_online_cpu(query_cpu) { + if (query_cpu == this_cpu) + continue; + __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } local_irq_restore(flags); } @@ -145,18 +146,12 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, static unsigned int x2apic_phys_get_apic_id(unsigned long x) { - unsigned int id; - - id = x; - return id; + return x; } static unsigned long set_apic_id(unsigned int id) { - unsigned long x; - - x = id; - return x; + return id; } static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) @@ -171,7 +166,6 @@ static void x2apic_send_IPI_self(int vector) static void init_x2apic_ldr(void) { - return; } struct genapic apic_x2apic_phys = { diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index f957878c21e..24b9f42db9b 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -118,12 +118,13 @@ static void uv_send_IPI_one(int cpu, int vector) int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); - lapicid = apicid & 0x3f; /* ZZZ macro needed */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); - val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << - UVH_IPI_INT_APIC_ID_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); + + val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | + ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | + ( vector << UVH_IPI_INT_VECTOR_SHFT ); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } @@ -137,22 +138,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector) static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_cpu(cpu, mask) + for_each_cpu(cpu, mask) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_allbutself(int vector) { - unsigned int cpu; unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); + } } static void uv_send_IPI_all(int vector) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 01a2505d727..e90970ce21a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -514,11 +514,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg) for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } cfg->move_in_progress = 0; @@ -800,7 +800,7 @@ static void clear_IO_APIC (void) } #if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { unsigned int cfg; @@ -2297,7 +2297,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2305,7 +2305,7 @@ static int ioapic_retrigger_irq(unsigned int irq) #else static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + apic->send_IPI_self(irq_cfg(irq)->vector); return 1; } diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 367c5e684fa..e16c41b2e4e 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -48,7 +48,7 @@ static inline int __prepare_ICR2(unsigned int mask) return SET_APIC_DEST_FIELD(mask); } -void __send_IPI_shortcut(unsigned int shortcut, int vector) +void __default_send_IPI_shortcut(unsigned int shortcut, int vector) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -75,16 +75,16 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) apic_write(APIC_ICR, cfg); } -void send_IPI_self(int vector) +void default_send_IPI_self(int vector) { - __send_IPI_shortcut(APIC_DEST_SELF, vector); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector); } /* * This is used to send an IPI with no shorthand notation (the destination is * specified in bits 56 to 63 of the ICR). */ -static inline void __send_IPI_dest_field(unsigned long mask, int vector) +static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) { unsigned long cfg; @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) +void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __send_IPI_dest_field(mask, vector); + __default_send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(const struct cpumask *mask, int vector) +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -140,11 +140,11 @@ void send_IPI_mask_sequence(const struct cpumask *mask, int vector) local_irq_save(flags); for_each_cpu(query_cpu, mask) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); + __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); local_irq_restore(flags); } -void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -153,10 +153,12 @@ void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) /* See Hack comment above */ local_irq_save(flags); - for_each_cpu(query_cpu, mask) - if (query_cpu != this_cpu) - __send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), - vector); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector); + } local_irq_restore(flags); } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 10435a120d2..b62a3811e01 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code) */ void kgdb_roundup_cpus(unsigned long flags) { - send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(APIC_DM_NMI); } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8536fee5c1..38dace28d62 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -651,7 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - send_IPI_allbutself(NMI_VECTOR); + apic->send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index e6faa3316bd..c48ba6cc32a 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,12 +118,12 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); + apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } void native_send_call_func_ipi(const struct cpumask *mask) @@ -131,7 +131,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) cpumask_var_t allbutself; if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); return; } @@ -140,9 +140,9 @@ void native_send_call_func_ipi(const struct cpumask *mask) if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); + apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR); else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR); free_cpumask_var(allbutself); } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 22c2c7b8e4a..4782b554788 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -97,10 +97,10 @@ struct genapic apic_bigsmp = { .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = default_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = bigsmp_send_IPI_allbutself, + .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 477ebec1674..bf4670db25f 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -78,10 +78,10 @@ struct genapic apic_default = { .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = default_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d758cf65d73..d36642e6d90 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -133,10 +133,10 @@ struct genapic apic_es7000 = { .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = es7000_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index eb7d56a521d..135b1832ad8 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -97,10 +97,10 @@ struct genapic apic_numaq = { .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = numaq_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = numaq_send_IPI_allbutself, + .send_IPI_all = numaq_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 8c293055bdf..77196a4a9d2 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -77,10 +77,10 @@ struct genapic apic_summit = { .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - .send_IPI_mask = send_IPI_mask, + .send_IPI_mask = summit_send_IPI_mask, .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = send_IPI_allbutself, - .send_IPI_all = send_IPI_all, + .send_IPI_allbutself = summit_send_IPI_allbutself, + .send_IPI_all = summit_send_IPI_all, .send_IPI_self = NULL, .wakeup_cpu = NULL, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 72a6d4ebe34..6348e114692 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -196,7 +196,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(to_cpumask(f->flush_cpumask), + apic->send_IPI_mask(to_cpumask(f->flush_cpumask), INVALIDATE_TLB_VECTOR_START + sender); while (!cpumask_empty(to_cpumask(f->flush_cpumask))) From 6f177c01db6b865181fbc6c948381b290ee09718 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:09:23 +0100 Subject: [PATCH 254/682] x86, smp: clean up ->trampoline_phys_low/high handling - spread out the namespace on a per apic driver basis - remove wrapper macros Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 4 ++-- arch/x86/include/asm/mach-default/mach_wakecpu.h | 4 ++-- arch/x86/include/asm/mach-default/smpboot_hooks.h | 6 +++--- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 12 ++++++------ arch/x86/mach-generic/bigsmp.c | 4 ++-- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 4 ++-- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 4 ++-- 10 files changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 78f0daaee43..4c01be6ff80 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -1,8 +1,8 @@ #ifndef __ASM_ES7000_WAKECPU_H #define __ASM_ES7000_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW 0x467 -#define TRAMPOLINE_PHYS_HIGH 0x469 +#define ES7000_TRAMPOLINE_PHYS_LOW 0x467 +#define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 static inline void wait_for_init_deassert(atomic_t *deassert) { diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 89897a6a65b..0a8d7860e44 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,8 +1,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (0x467) -#define TRAMPOLINE_PHYS_HIGH (0x469) +#define DEFAULT_TRAMPOLINE_PHYS_LOW (0x467) +#define DEFAULT_TRAMPOLINE_PHYS_HIGH (0x469) static inline void wait_for_init_deassert(atomic_t *deassert) { diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h index 23bf52103b8..1def6011490 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h @@ -13,10 +13,10 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) CMOS_WRITE(0xa, 0xf); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) = start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + *((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) = start_eip & 0xf; pr_debug("3.\n"); } @@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; + *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 22006bbee61..2031377a954 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define TRAMPOLINE_PHYS_LOW (apic->trampoline_phys_low) -#define TRAMPOLINE_PHYS_HIGH (apic->trampoline_phys_high) #define wait_for_init_deassert (apic->wait_for_init_deassert) #define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 6f499df8edd..8b6c16d8558 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -3,8 +3,8 @@ /* This file copes with machines that wakeup secondary CPUs by NMIs */ -#define TRAMPOLINE_PHYS_LOW (0x8) -#define TRAMPOLINE_PHYS_HIGH (0xa) +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) /* We don't do anything here because we use NMI's to boot instead */ static inline void wait_for_init_deassert(atomic_t *deassert) @@ -24,17 +24,17 @@ static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); *high = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); *low = - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = *high; - *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)) = *low; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 4782b554788..a317fbe07fd 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -104,8 +104,8 @@ struct genapic apic_bigsmp = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index bf4670db25f..17d8f9c2218 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -85,8 +85,8 @@ struct genapic apic_default = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index d36642e6d90..871e85445e2 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -140,8 +140,8 @@ struct genapic apic_es7000 = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 135b1832ad8..0b496ab5450 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -104,8 +104,8 @@ struct genapic apic_numaq = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 77196a4a9d2..c4799cd3459 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -84,8 +84,8 @@ struct genapic apic_summit = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = wait_for_init_deassert, .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, From abfa584c8df8b691cf18f51c7d4af27e5b32be4a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:15:16 +0100 Subject: [PATCH 255/682] x86: set ->trampoline_phys_low/high on 64-bit too 64-bit x86 has zero for ->trampoline_phys_low/high, but the smpboot code can use these values - so it's better to set them up to their correct values. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 6 ++++++ arch/x86/include/asm/mach-default/mach_wakecpu.h | 3 --- arch/x86/kernel/genapic_flat_64.c | 8 ++++---- arch/x86/kernel/genx2apic_cluster.c | 4 ++-- arch/x86/kernel/genx2apic_phys.c | 4 ++-- arch/x86/kernel/genx2apic_uv_x.c | 4 ++-- 6 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 8bb1c73c55b..90e83a769a1 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -88,6 +88,12 @@ struct genapic { extern struct genapic *apic; +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + #ifdef CONFIG_X86_32 extern void es7000_update_genapic_to_cluster(void); #else diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 0a8d7860e44..a327a675e47 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,9 +1,6 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define DEFAULT_TRAMPOLINE_PHYS_LOW (0x467) -#define DEFAULT_TRAMPOLINE_PHYS_HIGH (0x469) - static inline void wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 7c648ccea51..3a28d6a8c49 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -224,8 +224,8 @@ struct genapic apic_flat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, @@ -370,8 +370,8 @@ struct genapic apic_physflat = { .send_IPI_self = apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 2d97726a973..abc5ee329f2 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -228,8 +228,8 @@ struct genapic apic_x2apic_cluster = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 74777c27669..dc815ef22d8 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -214,8 +214,8 @@ struct genapic apic_x2apic_phys = { .send_IPI_self = x2apic_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 24b9f42db9b..b5908735ca5 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -287,8 +287,8 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_self = uv_send_IPI_self, .wakeup_cpu = NULL, - .trampoline_phys_low = 0, - .trampoline_phys_high = 0, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, From a965936643e28af8152d9e960b966baa1a5588a2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:21:32 +0100 Subject: [PATCH 256/682] x86, smp: refactor ->wait_for_init_deassert() - spread out the namespace on a per APIC driver basis - handle a NULL ->wait_for_init_deassert() as a 'dont wait' default method - remove NUMAQ and Summit handlers Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 2 +- arch/x86/include/asm/mach-default/mach_wakecpu.h | 2 +- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 5 ----- arch/x86/kernel/es7000_32.c | 6 +----- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 4 +++- arch/x86/mach-generic/default.c | 4 +++- arch/x86/mach-generic/es7000.c | 4 +++- arch/x86/mach-generic/numaq.c | 5 ++++- arch/x86/mach-generic/summit.c | 4 +++- 11 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 4c01be6ff80..5c4d05f46d2 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -4,7 +4,7 @@ #define ES7000_TRAMPOLINE_PHYS_LOW 0x467 #define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 -static inline void wait_for_init_deassert(atomic_t *deassert) +static inline void es7000_wait_for_init_deassert(atomic_t *deassert) { #ifndef CONFIG_ES7000_CLUSTERED_APIC while (!atomic_read(deassert)) diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index a327a675e47..1d34c69a758 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -static inline void wait_for_init_deassert(atomic_t *deassert) +static inline void default_wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) cpu_relax(); diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 2031377a954..58e54122f73 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define wait_for_init_deassert (apic->wait_for_init_deassert) #define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) #define restore_NMI_vector (apic->restore_NMI_vector) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 8b6c16d8558..884b95cf584 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -6,11 +6,6 @@ #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) -/* We don't do anything here because we use NMI's to boot instead */ -static inline void wait_for_init_deassert(atomic_t *deassert) -{ -} - /* * Because we use NMIs rather than the INIT-STARTUP sequence to * bootstrap the CPUs, the APIC may be in a weird state. Kick it. diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index e73fe18488a..d7f433ee602 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -182,10 +182,6 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static void noop_wait_for_deassert(atomic_t *deassert_not_used) -{ -} - static int __init es7000_update_genapic(void) { apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; @@ -194,7 +190,7 @@ static int __init es7000_update_genapic(void) if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { es7000_update_genapic_to_cluster(); - apic->wait_for_init_deassert = noop_wait_for_deassert; + apic->wait_for_init_deassert = NULL; apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ab83be2f8e0..558af378a61 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -196,7 +196,8 @@ static void __cpuinit smp_callin(void) * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. */ - wait_for_init_deassert(&init_deasserted); + if (apic->wait_for_init_deassert) + apic->wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index a317fbe07fd..40910bfd1b4 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -106,7 +106,9 @@ struct genapic apic_bigsmp = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 17d8f9c2218..c2464843df9 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -87,7 +87,9 @@ struct genapic apic_default = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 871e85445e2..4cb3984834e 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -142,7 +142,9 @@ struct genapic apic_es7000 = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 0b496ab5450..fb03867e7c0 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -106,7 +106,10 @@ struct genapic apic_numaq = { .wakeup_cpu = NULL, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + /* We don't do anything here because we use NMI's to boot instead */ + .wait_for_init_deassert = NULL, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index c4799cd3459..fdca78b96b6 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -86,7 +86,9 @@ struct genapic apic_summit = { .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = wait_for_init_deassert, + + .wait_for_init_deassert = default_wait_for_init_deassert, + .smp_callin_clear_local_apic = smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, From 333344d94300500e401cffb4eea10a5ab6e5a41d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 257/682] x86, smp: refactor ->smp_callin_clear_local_apic() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to reset the APIC. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 5 ----- arch/x86/include/asm/mach-default/mach_wakecpu.h | 5 ----- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 4 ++-- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 3 ++- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 4 +++- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 3 ++- 10 files changed, 13 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 5c4d05f46d2..e8e03962633 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,11 +13,6 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { } diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 1d34c69a758..d059807cd7e 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -8,11 +8,6 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -/* Nothing to do for most platforms, since cleared by the INIT cycle */ -static inline void smp_callin_clear_local_apic(void) -{ -} - static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { } diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 58e54122f73..30515a154d8 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define smp_callin_clear_local_apic (apic->smp_callin_clear_local_apic) #define store_NMI_vector (apic->store_NMI_vector) #define restore_NMI_vector (apic->restore_NMI_vector) #define inquire_remote_apic (apic->inquire_remote_apic) diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 884b95cf584..61d0a7d9613 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -8,9 +8,9 @@ /* * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it. + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: */ -static inline void smp_callin_clear_local_apic(void) +static inline void numaq_smp_callin_clear_local_apic(void) { clear_local_APIC(); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 558af378a61..10873a46b29 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -244,7 +244,8 @@ static void __cpuinit smp_callin(void) */ pr_debug("CALLIN, before setup_local_APIC().\n"); - smp_callin_clear_local_apic(); + if (apic->smp_callin_clear_local_apic) + apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); map_cpu_to_logical_apicid(); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 40910bfd1b4..bd069e7b521 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -109,7 +109,8 @@ struct genapic apic_bigsmp = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c2464843df9..a25e6eff048 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -90,7 +90,7 @@ struct genapic apic_default = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 4cb3984834e..ab41b543914 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -145,7 +145,9 @@ struct genapic apic_es7000 = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index fb03867e7c0..4d3924f8cd0 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -110,7 +110,7 @@ struct genapic apic_numaq = { /* We don't do anything here because we use NMI's to boot instead */ .wait_for_init_deassert = NULL, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index fdca78b96b6..2595baa7997 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -89,7 +89,8 @@ struct genapic apic_summit = { .wait_for_init_deassert = default_wait_for_init_deassert, - .smp_callin_clear_local_apic = smp_callin_clear_local_apic, + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = store_NMI_vector, .restore_NMI_vector = restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, From 7bd06ec63a1204ca44b9f1dc487b8632016162d1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 258/682] x86, smp: refactor ->store/restore_NMI_vector() methods Only NUMAQ does something substantial here, because it initializes via NMIs (not via INIT as standard SMP startup) - so it needs to store and restore the NMI vector. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 8 -------- arch/x86/include/asm/mach-default/mach_wakecpu.h | 8 -------- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 6 ++++-- arch/x86/kernel/smpboot.c | 3 ++- arch/x86/mach-generic/bigsmp.c | 5 ++--- arch/x86/mach-generic/default.c | 4 ++-- arch/x86/mach-generic/es7000.c | 5 ++--- arch/x86/mach-generic/numaq.c | 4 ++-- arch/x86/mach-generic/summit.c | 5 ++--- 10 files changed, 16 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index e8e03962633..71a3a412d0e 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,14 +13,6 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - extern void __inquire_remote_apic(int apicid); static inline void inquire_remote_apic(int apicid) diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index d059807cd7e..656bb5e52bc 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -8,14 +8,6 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) -{ -} - #ifdef CONFIG_SMP extern void __inquire_remote_apic(int apicid); #else /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 30515a154d8..93207dfe8f5 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define store_NMI_vector (apic->store_NMI_vector) -#define restore_NMI_vector (apic->restore_NMI_vector) #define inquire_remote_apic (apic->inquire_remote_apic) #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 61d0a7d9613..123201712a9 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -15,7 +15,8 @@ static inline void numaq_smp_callin_clear_local_apic(void) clear_local_APIC(); } -static inline void store_NMI_vector(unsigned short *high, unsigned short *low) +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); *high = @@ -24,7 +25,8 @@ static inline void store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) +static inline void +numaq_restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 10873a46b29..1492024592f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -826,7 +826,8 @@ do_rest: pr_debug("Setting warm reset code and vector.\n"); - store_NMI_vector(&nmi_high, &nmi_low); + if (apic->store_NMI_vector) + apic->store_NMI_vector(&nmi_high, &nmi_low); smpboot_setup_warm_reset_vector(start_ip); /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index bd069e7b521..ecdb230d0f2 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -110,8 +110,7 @@ struct genapic apic_bigsmp = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index a25e6eff048..950925615a9 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -91,7 +91,7 @@ struct genapic apic_default = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index ab41b543914..13190709138 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -147,8 +147,7 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 4d3924f8cd0..d7f7fcf21c3 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -111,7 +111,7 @@ struct genapic apic_numaq = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = numaq_store_NMI_vector, + .restore_NMI_vector = numaq_restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2595baa7997..46fca79f831 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -90,8 +90,7 @@ struct genapic apic_summit = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - - .store_NMI_vector = store_NMI_vector, - .restore_NMI_vector = restore_NMI_vector, + .store_NMI_vector = NULL, + .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; From 3d5f597e938c425554cb7668fd3c9d6a536a984a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:43:47 +0100 Subject: [PATCH 259/682] x86, smp: remove ->restore_NMI_vector() Nothing actually restores the NMI vector - so remove this logic altogether. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 1 - arch/x86/include/asm/numaq/wakecpu.h | 10 ---------- arch/x86/kernel/genapic_flat_64.c | 2 -- arch/x86/kernel/genx2apic_cluster.c | 1 - arch/x86/kernel/genx2apic_phys.c | 1 - arch/x86/kernel/genx2apic_uv_x.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - arch/x86/mach-generic/es7000.c | 1 - arch/x86/mach-generic/numaq.c | 1 - arch/x86/mach-generic/summit.c | 1 - 11 files changed, 21 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 90e83a769a1..e5f9c5696fb 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -82,7 +82,6 @@ struct genapic { void (*wait_for_init_deassert)(atomic_t *deassert); void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); void (*inquire_remote_apic)(int apicid); }; diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 123201712a9..920dcfefa83 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -25,16 +25,6 @@ numaq_store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void -numaq_restore_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Restoring NMI vector\n"); - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)) = - *high; - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)) = - *low; -} - static inline void inquire_remote_apic(int apicid) { } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 3a28d6a8c49..e9237f59928 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -229,7 +229,6 @@ struct genapic apic_flat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; @@ -375,6 +374,5 @@ struct genapic apic_physflat = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index abc5ee329f2..7c87156b641 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -233,6 +233,5 @@ struct genapic apic_x2apic_cluster = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index dc815ef22d8..5cbae8aa040 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -219,6 +219,5 @@ struct genapic apic_x2apic_phys = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b5908735ca5..6adb5e6f4d9 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -292,7 +292,6 @@ struct genapic apic_x2apic_uv_x = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = NULL, }; diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index ecdb230d0f2..d9377af88cb 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -111,6 +111,5 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 950925615a9..b004257035c 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -92,6 +92,5 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 13190709138..62673a8002f 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -148,6 +148,5 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index d7f7fcf21c3..2c3341564d1 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -112,6 +112,5 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, - .restore_NMI_vector = numaq_restore_NMI_vector, .inquire_remote_apic = inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 46fca79f831..c2471a9fa8f 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -91,6 +91,5 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .restore_NMI_vector = NULL, .inquire_remote_apic = inquire_remote_apic, }; From 25dc004903a38f0b6f6626dbbab058c8709c5398 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 16:31:52 +0100 Subject: [PATCH 260/682] x86, smp: refactor ->inquire_remote_apic() methods Nothing exciting - a few subarches dont want APIC remote reads to be performed - the others are content with the default method. - extend the generic code to handle NULL methods - clear out dummy methods and replace them with NULL - clean up: remove wrapper macros, etc. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/wakecpu.h | 8 -------- arch/x86/include/asm/mach-default/mach_wakecpu.h | 2 +- arch/x86/include/asm/mach-generic/mach_wakecpu.h | 2 -- arch/x86/include/asm/numaq/wakecpu.h | 4 ---- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/mach-generic/bigsmp.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/es7000.c | 2 +- arch/x86/mach-generic/numaq.c | 2 +- arch/x86/mach-generic/summit.c | 2 +- 10 files changed, 8 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 71a3a412d0e..99c72be1840 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -13,12 +13,4 @@ static inline void es7000_wait_for_init_deassert(atomic_t *deassert) return; } -extern void __inquire_remote_apic(int apicid); - -static inline void inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 656bb5e52bc..b1cde560e4c 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -16,7 +16,7 @@ static inline void __inquire_remote_apic(int apicid) } #endif /* CONFIG_SMP */ -static inline void inquire_remote_apic(int apicid) +static inline void default_inquire_remote_apic(int apicid) { if (apic_verbosity >= APIC_DEBUG) __inquire_remote_apic(apicid); diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h index 93207dfe8f5..0b884c03a3f 100644 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -1,6 +1,4 @@ #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define inquire_remote_apic (apic->inquire_remote_apic) - #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index 920dcfefa83..afe81439c7d 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -25,8 +25,4 @@ numaq_store_NMI_vector(unsigned short *high, unsigned short *low) *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); } -static inline void inquire_remote_apic(int apicid) -{ -} - #endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1492024592f..170adc5b6cb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -876,8 +876,8 @@ do_rest: else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - if (get_uv_system_type() != UV_NON_UNIQUE_APIC) - inquire_remote_apic(apicid); + if (apic->inquire_remote_apic) + apic->inquire_remote_apic(apicid); } } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index d9377af88cb..4d8b2d442ba 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -111,5 +111,5 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index b004257035c..c12dd2300a5 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -92,5 +92,5 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 62673a8002f..be090b2037c 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -148,5 +148,5 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 2c3341564d1..ddb50fba286 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -112,5 +112,5 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = NULL, }; diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index c2471a9fa8f..d5db3045437 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -91,5 +91,5 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, - .inquire_remote_apic = inquire_remote_apic, + .inquire_remote_apic = default_inquire_remote_apic, }; From 018e047f3a98bd8d9e9d78b19bc38415f0c34dd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:09:58 +0100 Subject: [PATCH 261/682] x86, ES7000: consolidate the APIC code Consolidate all the ES7000 APIC code into arch/x86/mach-generic/es7000.c. With this ES7000 ceases to rely on any subarchitecture include files. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 236 ------------------- arch/x86/include/asm/es7000/apicdef.h | 9 - arch/x86/include/asm/es7000/ipi.h | 22 -- arch/x86/include/asm/es7000/mpparse.h | 23 -- arch/x86/include/asm/es7000/wakecpu.h | 16 -- arch/x86/mach-generic/es7000.c | 314 ++++++++++++++++++++++++-- 6 files changed, 295 insertions(+), 325 deletions(-) delete mode 100644 arch/x86/include/asm/es7000/apic.h delete mode 100644 arch/x86/include/asm/es7000/apicdef.h delete mode 100644 arch/x86/include/asm/es7000/ipi.h delete mode 100644 arch/x86/include/asm/es7000/mpparse.h delete mode 100644 arch/x86/include/asm/es7000/wakecpu.h diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h deleted file mode 100644 index b89b45db735..00000000000 --- a/arch/x86/include/asm/es7000/apic.h +++ /dev/null @@ -1,236 +0,0 @@ -#ifndef __ASM_ES7000_APIC_H -#define __ASM_ES7000_APIC_H - -#include - -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) - -static inline int es7000_apic_id_registered(void) -{ - return 1; -} - -static inline const cpumask_t *target_cpus_cluster(void) -{ - return &CPU_MASK_ALL; -} - -static inline const cpumask_t *es7000_target_cpus(void) -{ - return &cpumask_of_cpu(smp_processor_id()); -} - -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static inline unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -extern void es7000_enable_apic_mode(void); - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long id; - id = xapic_phys_to_log_apicid(cpu); - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -extern int apic_version [MAX_APICS]; -static inline void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); -} - -static inline int es7000_apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static inline int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; - return mask; -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int es7000_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - -extern unsigned int boot_cpu_physical_apicid; - -static inline int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static inline unsigned int -es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return es7000_cpu_to_logical_apicid(0); - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return es7000_cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - - -static inline unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = es7000_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static inline int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_ES7000_APIC_H */ diff --git a/arch/x86/include/asm/es7000/apicdef.h b/arch/x86/include/asm/es7000/apicdef.h deleted file mode 100644 index c74881a7b3d..00000000000 --- a/arch/x86/include/asm/es7000/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_ES7000_APICDEF_H -#define __ASM_ES7000_APICDEF_H - -static inline unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h deleted file mode 100644 index 81e77c812ba..00000000000 --- a/arch/x86/include/asm/es7000/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_ES7000_IPI_H -#define __ASM_ES7000_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/es7000/mpparse.h b/arch/x86/include/asm/es7000/mpparse.h deleted file mode 100644 index 662eb1e574d..00000000000 --- a/arch/x86/include/asm/es7000/mpparse.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __ASM_ES7000_MPPARSE_H -#define __ASM_ES7000_MPPARSE_H - -#include - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#ifdef CONFIG_ACPI -static inline int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -#endif /* __ASM_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h deleted file mode 100644 index 99c72be1840..00000000000 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_ES7000_WAKECPU_H -#define __ASM_ES7000_WAKECPU_H - -#define ES7000_TRAMPOLINE_PHYS_LOW 0x467 -#define ES7000_TRAMPOLINE_PHYS_HIGH 0x469 - -static inline void es7000_wait_for_init_deassert(atomic_t *deassert) -{ -#ifndef CONFIG_ES7000_CLUSTERED_APIC - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -#endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index be090b2037c..8b6113ec380 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -11,13 +11,300 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include #include +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int apic_version [MAX_APICS]; +extern u8 cpu_2_logical_apicid[]; +extern unsigned int boot_cpu_physical_apicid; + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) + +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + + +static void es7000_wait_for_init_deassert(atomic_t *deassert) +{ +#ifndef CONFIG_ES7000_CLUSTERED_APIC + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +static unsigned int es7000_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#ifdef CONFIG_ACPI +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static void es7000_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static void es7000_send_IPI_all(int vector) +{ + es7000_send_IPI_mask(cpu_online_mask, vector); +} + +static int es7000_apic_id_registered(void) +{ + return 1; +} + +static const cpumask_t *target_cpus_cluster(void) +{ + return &CPU_MASK_ALL; +} + +static const cpumask_t *es7000_target_cpus(void) +{ + return &cpumask_of_cpu(smp_processor_id()); +} + +static unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static unsigned long es7000_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static unsigned long calculate_ldr(int cpu) +{ + unsigned long id = xapic_phys_to_log_apicid(cpu); + + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void es7000_init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); +} + +static int es7000_apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static int es7000_cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + + mask = physid_mask_of_physid(id); + ++id; + + return mask; +} + +/* Mapping from cpu number to logical apicid */ +static int es7000_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + +static int es7000_check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static unsigned int +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = cpumask_first(cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return es7000_cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return es7000_cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = es7000_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = es7000_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + void __init es7000_update_genapic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; @@ -80,18 +367,6 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} struct genapic apic_es7000 = { @@ -140,10 +415,11 @@ struct genapic apic_es7000 = { .send_IPI_self = NULL, .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = es7000_wait_for_init_deassert, /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, From 0939e4fd351c58d08d25650797749f18904461af Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:16:25 +0100 Subject: [PATCH 262/682] x86, smp: eliminate asm/mach-default/mach_wakecpu.h Spread mach_wakecpu.h's definitions into apic.h and genapic.h and remove mach_wakecpu.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 15 +++++++++++ arch/x86/include/asm/genapic.h | 7 ++++++ .../include/asm/mach-default/mach_wakecpu.h | 25 ------------------- arch/x86/kernel/smpboot.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - arch/x86/mach-generic/es7000.c | 1 - arch/x86/mach-generic/summit.c | 1 - 8 files changed, 22 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_wakecpu.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ab1d51a8855..e8f030440bc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -41,6 +41,21 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; + +#ifdef CONFIG_SMP +extern void __inquire_remote_apic(int apicid); +#else /* CONFIG_SMP */ +static inline void __inquire_remote_apic(int apicid) +{ +} +#endif /* CONFIG_SMP */ + +static inline void default_inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} + /* * Basic functions accessing APICs. */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index e5f9c5696fb..1772dad01b1 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -113,4 +113,11 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h deleted file mode 100644 index b1cde560e4c..00000000000 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H - -static inline void default_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); - return; -} - -#ifdef CONFIG_SMP -extern void __inquire_remote_apic(int apicid); -#else /* CONFIG_SMP */ -static inline void __inquire_remote_apic(int apicid) -{ -} -#endif /* CONFIG_SMP */ - -static inline void default_inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 170adc5b6cb..1fdc1a7e7b5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,6 @@ #include #include -#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 4d8b2d442ba..6fcccfb5918 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -17,7 +17,6 @@ #include #include #include -#include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index c12dd2300a5..e3c5114fd91 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -16,7 +16,6 @@ #include #include #include -#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 8b6113ec380..bb11166b7c3 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -14,7 +14,6 @@ #include #include #include -#include #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index d5db3045437..673a64f8b46 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -16,7 +16,6 @@ #include #include #include -#include static int probe_summit(void) { From fb5b33c9f62ca9222c11841d61ddb7dc1a6552e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:29:27 +0100 Subject: [PATCH 263/682] x86: eliminate asm/mach-*/mach_mpparse.h Move the definition to mpparse.h. Signed-off-by: Ingo Molnar --- .../x86/include/asm/mach-default/mach_mpparse.h | 17 ----------------- .../x86/include/asm/mach-generic/mach_mpparse.h | 8 -------- arch/x86/include/asm/mpspec.h | 4 ++++ arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/mpparse.c | 1 - arch/x86/mach-generic/bigsmp.c | 1 - arch/x86/mach-generic/default.c | 1 - 8 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_mpparse.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_mpparse.h diff --git a/arch/x86/include/asm/mach-default/mach_mpparse.h b/arch/x86/include/asm/mach-default/mach_mpparse.h deleted file mode 100644 index af0da140df9..00000000000 --- a/arch/x86/include/asm/mach-default/mach_mpparse.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H - -static inline int -generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpparse.h b/arch/x86/include/asm/mach-generic/mach_mpparse.h deleted file mode 100644 index 22bfb56f8fb..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpparse.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H -#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H - -extern int generic_mps_oem_check(struct mpc_table *, char *, char *); - -extern int default_acpi_madt_oem_check(char *, char *); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 62d14ce3cd0..432e9cbc607 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -142,4 +142,8 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; +extern int generic_mps_oem_check(struct mpc_table *, char *, char *); + +extern int default_acpi_madt_oem_check(char *, char *); + #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 539163161a4..7b02a1cedca 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,7 +63,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_LOCAL_APIC #include -#include #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* X86 */ diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index d7f433ee602..8faea13c8fa 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b12fa5ce6f5..c6930162b3b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -32,7 +32,6 @@ #include #ifdef CONFIG_X86_32 #include -#include #endif /* diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 6fcccfb5918..626f45ca4e7 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -16,7 +16,6 @@ #include #include #include -#include static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index e3c5114fd91..6485e57e29b 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -15,7 +15,6 @@ #include #include #include -#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { From b2af018ff26f1a2a026f548f7f0e552589905689 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:36:56 +0100 Subject: [PATCH 264/682] x86: remove mach_mpspec.h Move its definitions into mpspec.h. Signed-off-by: Ingo Molnar --- .../include/asm/mach-default/mach_mpspec.h | 12 --------- .../include/asm/mach-generic/mach_mpspec.h | 12 --------- arch/x86/include/asm/mpspec.h | 25 ++++++++++++++----- 3 files changed, 19 insertions(+), 30 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_mpspec.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_mpspec.h diff --git a/arch/x86/include/asm/mach-default/mach_mpspec.h b/arch/x86/include/asm/mach-default/mach_mpspec.h deleted file mode 100644 index e85ede686be..00000000000 --- a/arch/x86/include/asm/mach-default/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H -#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -#if CONFIG_BASE_SMALL == 0 -#define MAX_MP_BUSSES 256 -#else -#define MAX_MP_BUSSES 32 -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_mpspec.h b/arch/x86/include/asm/mach-generic/mach_mpspec.h deleted file mode 100644 index 3bc40722657..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_mpspec.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H -#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H - -#define MAX_IRQ_SOURCES 256 - -/* Summit or generic (i.e. installer) kernels need lots of bus entries. */ -/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ -#define MAX_MP_BUSSES 260 - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */ diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 432e9cbc607..03fb0d39654 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -9,7 +9,18 @@ extern int apic_version[MAX_APICS]; extern int pic_mode; #ifdef CONFIG_X86_32 -#include + +/* + * Summit or generic (i.e. installer) kernels need lots of bus entries. + * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. + */ +#if CONFIG_BASE_SMALL == 0 +# define MAX_MP_BUSSES 260 +#else +# define MAX_MP_BUSSES 32 +#endif + +#define MAX_IRQ_SOURCES 256 extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; @@ -20,15 +31,15 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES]; extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; #endif -#define MAX_APICID 256 +#define MAX_APICID 256 -#else +#else /* CONFIG_X86_64: */ -#define MAX_MP_BUSSES 256 +#define MAX_MP_BUSSES 256 /* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ -#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) -#endif +#endif /* CONFIG_X86_64 */ extern void early_find_smp_config(void); extern void early_get_smp_config(void); @@ -146,4 +157,6 @@ extern int generic_mps_oem_check(struct mpc_table *, char *, char *); extern int default_acpi_madt_oem_check(char *, char *); +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + #endif /* _ASM_X86_MPSPEC_H */ From 1f75ed0c1311a50ed393bcac258de65680d360e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:36:56 +0100 Subject: [PATCH 265/682] x86: remove mach_apicdef.h Move its definitions into apic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 16 ++++++++++++++ arch/x86/include/asm/mach-default/mach_apic.h | 1 - .../include/asm/mach-default/mach_apicdef.h | 22 ------------------- .../include/asm/mach-generic/mach_apicdef.h | 8 ------- arch/x86/include/asm/smp.h | 4 ++-- arch/x86/kernel/apic.c | 1 - arch/x86/kernel/genapic_flat_64.c | 1 - arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/mpparse.c | 3 --- arch/x86/mach-generic/default.c | 1 - 10 files changed, 18 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_apicdef.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_apicdef.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e8f030440bc..3a3202074c6 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -211,4 +211,20 @@ static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_64 +#define SET_APIC_ID(x) (apic->set_apic_id(x)) +#else + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +#endif + #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 2e4104cf348..b60b767d5be 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -3,7 +3,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -#include #include #define APIC_DFR_VALUE (APIC_DFR_FLAT) diff --git a/arch/x86/include/asm/mach-default/mach_apicdef.h b/arch/x86/include/asm/mach-default/mach_apicdef.h deleted file mode 100644 index 5141085962d..00000000000 --- a/arch/x86/include/asm/mach-default/mach_apicdef.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H -#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H - -#include - -#ifdef CONFIG_X86_64 -#define SET_APIC_ID(x) (apic->set_apic_id(x)) -#else - -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} - -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apicdef.h b/arch/x86/include/asm/mach-generic/mach_apicdef.h deleted file mode 100644 index 61caa65b13f..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_apicdef.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H -#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H - -#ifndef APIC_DEFINITION -#include -#endif - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c63d480802a..d4ac4de4bce 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,6 +173,8 @@ extern int safe_smp_processor_id(void); #endif +#include + #ifdef CONFIG_X86_LOCAL_APIC #ifndef CONFIG_X86_64 @@ -182,7 +184,6 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -#include static inline unsigned int read_apic_id(void) { unsigned int reg; @@ -197,7 +198,6 @@ static inline unsigned int read_apic_id(void) # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); # else -#include static inline int hard_smp_processor_id(void) { /* we don't want to mark this access volatile - bad code generation */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 84b8e7c57ab..e6220809ca1 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,7 +50,6 @@ #include #include -#include #include /* diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index e9237f59928..19bffb3f732 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -19,7 +19,6 @@ #include #include #include -#include #ifdef CONFIG_ACPI #include diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e90970ce21a..abae81989c2 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -64,7 +64,6 @@ #include #include -#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index c6930162b3b..a1452a53d14 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -30,9 +30,6 @@ #include #include -#ifdef CONFIG_X86_32 -#include -#endif /* * Checksum an MP configuration block. diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 6485e57e29b..07817b2a787 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include From 328386d7ab600aa0993a1226f5817ac30a735724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:50:18 +0100 Subject: [PATCH 266/682] x86, smp: refactor ->wake_cpu - remove macro wrappers Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mach-default/mach_apic.h | 2 -- arch/x86/include/asm/mach-generic/mach_apic.h | 2 -- arch/x86/kernel/setup.c | 5 ++--- arch/x86/kernel/smpboot.c | 4 ++-- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index b60b767d5be..bae053cdcde 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -19,10 +19,8 @@ static inline const struct cpumask *default_target_cpus(void) #ifdef CONFIG_X86_64 #include #define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -#define wakeup_secondary_cpu (apic->wakeup_cpu) extern void default_setup_apic_routing(void); #else -#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index ca460e45991..96f217f819e 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -3,8 +3,6 @@ #include -#define wakeup_secondary_cpu (apic->wakeup_cpu) - extern void generic_bigsmp_probe(void); #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a58e9f5e603..6b27f6dc7bf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -589,9 +589,8 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { #ifdef CONFIG_X86_SMP -# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) - apic->wakeup_cpu = wakeup_secondary_cpu_via_init; -# endif + if (!apic->wakeup_cpu) + apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif return 0; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1fdc1a7e7b5..3fed177f345 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -750,7 +750,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. + * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. */ { unsigned long boot_error = 0; @@ -841,7 +841,7 @@ do_rest: /* * Starting actual IPI sequence... */ - boot_error = wakeup_secondary_cpu(apicid, start_ip); + boot_error = apic->wakeup_cpu(apicid, start_ip); if (!boot_error) { /* From 5a44632f77a9c867621f7bf80c233eac75fea672 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 18:47:24 +0100 Subject: [PATCH 267/682] x86, numaq: consolidate code Move all the NUMAQ subarch definitions into numaq.c. With this it ceases to depend on build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq/apic.h | 122 ------------------- arch/x86/include/asm/numaq/apicdef.h | 9 -- arch/x86/include/asm/numaq/ipi.h | 22 ---- arch/x86/include/asm/numaq/mpparse.h | 6 - arch/x86/include/asm/numaq/wakecpu.h | 28 ----- arch/x86/mach-generic/numaq.c | 171 ++++++++++++++++++++++++++- arch/x86/pci/numaq_32.c | 2 +- 7 files changed, 167 insertions(+), 193 deletions(-) delete mode 100644 arch/x86/include/asm/numaq/apic.h delete mode 100644 arch/x86/include/asm/numaq/apicdef.h delete mode 100644 arch/x86/include/asm/numaq/ipi.h delete mode 100644 arch/x86/include/asm/numaq/mpparse.h delete mode 100644 arch/x86/include/asm/numaq/wakecpu.h diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h deleted file mode 100644 index ce95e79f723..00000000000 --- a/arch/x86/include/asm/numaq/apic.h +++ /dev/null @@ -1,122 +0,0 @@ -#ifndef __ASM_NUMAQ_APIC_H -#define __ASM_NUMAQ_APIC_H - -#include -#include -#include - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline const cpumask_t *numaq_target_cpus(void) -{ - return &CPU_MASK_ALL; -} - -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; - -static inline int numaq_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_NUMAQ_APIC_H */ diff --git a/arch/x86/include/asm/numaq/apicdef.h b/arch/x86/include/asm/numaq/apicdef.h deleted file mode 100644 index cd927d5bd50..00000000000 --- a/arch/x86/include/asm/numaq/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_NUMAQ_APICDEF_H -#define __ASM_NUMAQ_APICDEF_H - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -#endif diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h deleted file mode 100644 index 5dbc4b4cd5e..00000000000 --- a/arch/x86/include/asm/numaq/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_NUMAQ_IPI_H -#define __ASM_NUMAQ_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/include/asm/numaq/mpparse.h b/arch/x86/include/asm/numaq/mpparse.h deleted file mode 100644 index a2eeefcd1cc..00000000000 --- a/arch/x86/include/asm/numaq/mpparse.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_NUMAQ_MPPARSE_H -#define __ASM_NUMAQ_MPPARSE_H - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#endif /* __ASM_NUMAQ_MPPARSE_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h deleted file mode 100644 index afe81439c7d..00000000000 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __ASM_NUMAQ_WAKECPU_H -#define __ASM_NUMAQ_WAKECPU_H - -/* This file copes with machines that wakeup secondary CPUs by NMIs */ - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void -numaq_store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); -} - -#endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index ddb50fba286..c221cfb2c2d 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -11,14 +11,175 @@ #include #include #include -#include +#include #include -#include -#include -#include -#include #include +#include +#include +#include +#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline unsigned int numaq_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0x0F; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void numaq_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void numaq_send_IPI_all(int vector) +{ + numaq_send_IPI_mask(cpu_online_mask, vector); +} + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: + */ +static inline void numaq_smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); +} + +static inline const cpumask_t *numaq_target_cpus(void) +{ + return &CPU_MASK_ALL; +} + +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long numaq_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int numaq_apic_id_registered(void) +{ + return 1; +} + +static inline void numaq_init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void numaq_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int numaq_multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; + +static inline int numaq_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int numaq_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int numaq_apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +{ + int node = numaq_apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return 0x0F; +} + +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + return 0x0F; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { numaq_mps_oem_check(mpc, oem, productid); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 2089354968a..1b2d773612e 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include From b11b867f78910192fc54bd0d09148cf768c7aaad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 18:49:31 +0100 Subject: [PATCH 268/682] x86, summit: consolidate code Consolidate all the Summit code into a single file: arch/x86/kernel/summit_32.c. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/summit/apic.h | 195 ------------ arch/x86/include/asm/summit/apicdef.h | 9 - arch/x86/include/asm/summit/ipi.h | 26 -- arch/x86/include/asm/summit/mpparse.h | 109 ------- arch/x86/kernel/summit_32.c | 417 +++++++++++++++++++++++++- arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/summit.c | 94 ------ 7 files changed, 416 insertions(+), 435 deletions(-) delete mode 100644 arch/x86/include/asm/summit/apic.h delete mode 100644 arch/x86/include/asm/summit/apicdef.h delete mode 100644 arch/x86/include/asm/summit/ipi.h delete mode 100644 arch/x86/include/asm/summit/mpparse.h delete mode 100644 arch/x86/mach-generic/summit.c diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h deleted file mode 100644 index 15b8dbd19e1..00000000000 --- a/arch/x86/include/asm/summit/apic.h +++ /dev/null @@ -1,195 +0,0 @@ -#ifndef __ASM_SUMMIT_APIC_H -#define __ASM_SUMMIT_APIC_H - -#include -#include - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline const cpumask_t *summit_target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return &cpumask_of_cpu(0); -} - -static inline unsigned long -summit_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long summit_check_apicid_present(int bit) -{ - return 1; -} - -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - -extern u8 cpu_2_logical_apicid[]; - -static inline void summit_init_apic_ldr(void) -{ - unsigned long val, id; - int count = 0; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = nr_cpu_ids; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static inline int summit_apic_id_registered(void) -{ - return 1; -} - -static inline void summit_setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static inline int summit_apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -/* Mapping from cpu number to logical apicid */ -static inline int summit_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static inline int summit_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline physid_mask_t -summit_ioapic_phys_id_map(physid_mask_t phys_id_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); -} - -static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) -{ - return physid_mask_of_physid(0); -} - -static inline void summit_setup_portio_remap(void) -{ -} - -static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set >= nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = summit_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = summit_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static inline unsigned int -summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = summit_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -/* - * cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -#endif /* __ASM_SUMMIT_APIC_H */ diff --git a/arch/x86/include/asm/summit/apicdef.h b/arch/x86/include/asm/summit/apicdef.h deleted file mode 100644 index c24b0df2dec..00000000000 --- a/arch/x86/include/asm/summit/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_SUMMIT_APICDEF_H -#define __ASM_SUMMIT_APICDEF_H - -static inline unsigned summit_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h deleted file mode 100644 index f87a43fe0ae..00000000000 --- a/arch/x86/include/asm/summit/ipi.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __ASM_SUMMIT_IPI_H -#define __ASM_SUMMIT_IPI_H - -void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); - -static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void summit_send_IPI_allbutself(int vector) -{ - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - summit_send_IPI_mask(&mask, vector); -} - -static inline void summit_send_IPI_all(int vector) -{ - summit_send_IPI_mask(&cpu_online_map, vector); -} - -#endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/include/asm/summit/mpparse.h b/arch/x86/include/asm/summit/mpparse.h deleted file mode 100644 index 4bbcce39acb..00000000000 --- a/arch/x86/include/asm/summit/mpparse.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef __ASM_SUMMIT_MPPARSE_H -#define __ASM_SUMMIT_MPPARSE_H - -#include - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -extern void setup_summit(void); -#else -#define setup_summit() {} -#endif - -static inline int -summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#endif /* __ASM_SUMMIT_MPPARSE_H */ diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 7b987852e87..3b60dd5e57f 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -30,7 +30,364 @@ #include #include #include -#include + +/* + * APIC driver for the IBM "Summit" chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline unsigned summit_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); + +static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void summit_send_IPI_allbutself(int vector) +{ + cpumask_t mask = cpu_online_map; + cpu_clear(smp_processor_id(), mask); + + if (!cpus_empty(mask)) + summit_send_IPI_mask(&mask, vector); +} + +static inline void summit_send_IPI_all(int vector) +{ + summit_send_IPI_mask(&cpu_online_map, vector); +} + +#include + +extern int use_cyclone; + +#ifdef CONFIG_X86_SUMMIT_NUMA +extern void setup_summit(void); +#else +#define setup_summit() {} +#endif + +static inline int +summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +/* Hook from generic ACPI tables.c */ +static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERVIGIL", 8) + || !strncmp(oem_table_id, "EXA", 3))){ + mark_tsc_unstable("Summit based system"); + use_cyclone = 1; /*enable cyclone-timer*/ + setup_summit(); + return 1; + } + return 0; +} + +struct rio_table_hdr { + unsigned char version; /* Version number of this data structure */ + /* Version 3 adds chassis_num & WP_index */ + unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ + unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ +} __attribute__((packed)); + +struct scal_detail { + unsigned char node_id; /* Scalability Node ID */ + unsigned long CBAR; /* Address of 1MB register space */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF = None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port2node; /* Node ID port connected to: 0xFF = None */ + unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + unsigned char node_id; /* RIO Node ID */ + unsigned long BBAR; /* Address of 1MB register space */ + unsigned char type; /* Type of device */ + unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ + /* For CYC: Node ID of Twister that owns this CYC */ + unsigned char port0node; /* Node ID port connected to: 0xFF=None */ + unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char port1node; /* Node ID port connected to: 0xFF=None */ + unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ + unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ + /* For CYC: 0 */ + unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie:*/ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + /* For CYC: Bits0:7 Reserved */ + unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + /* For CYC: No meaning */ + unsigned char chassis_num; /* 1 based Chassis number */ + /* For LookOut WPEGs this field indicates the */ + /* Expansion Chassis #, enumerated from Boot */ + /* Node WPEG external port, then Boot Node CYC */ + /* external port, then Next Vigil chassis WPEG */ + /* external port, etc. */ + /* Shared Lookouts have only 1 chassis number (the */ + /* first one assigned) */ +} __attribute__((packed)); + + +typedef enum { + CompatTwister = 0, /* Compatibility Twister */ + AltTwister = 1, /* Alternate Twister of internal 8-way */ + CompatCyclone = 2, /* Compatibility Cyclone */ + AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ + CompatWPEG = 4, /* Compatibility WPEG */ + AltWPEG = 5, /* Second Planar WPEG */ + LookOutAWPEG = 6, /* LookOut WPEG */ + LookOutBWPEG = 7, /* LookOut WPEG */ +} node_type; + +static inline int is_WPEG(struct rio_detail *rio){ + return (rio->type == CompatWPEG || rio->type == AltWPEG || + rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); +} + + +/* In clustered mode, the high nibble of APIC ID is a cluster number. + * The low nibble is a 4-bit bitmap. */ +#define XAPIC_DEST_CPUS_SHIFT 4 +#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) +#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) + +#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline const cpumask_t *summit_target_cpus(void) +{ + /* CPU_MASK_ALL (0xff) has undefined behaviour with + * dest_LowestPrio mode logical clustered apic interrupt routing + * Just start on cpu 0. IRQ balancing will spread load + */ + return &cpumask_of_cpu(0); +} + +static inline unsigned long +summit_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +/* we don't use the phys_cpu_present_map to indicate apicid presence */ +static inline unsigned long summit_check_apicid_present(int bit) +{ + return 1; +} + +#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) + +extern u8 cpu_2_logical_apicid[]; + +static inline void summit_init_apic_ldr(void) +{ + unsigned long val, id; + int count = 0; + u8 my_id = (u8)hard_smp_processor_id(); + u8 my_cluster = (u8)apicid_cluster(my_id); +#ifdef CONFIG_SMP + u8 lid; + int i; + + /* Create logical APIC IDs by counting CPUs already in cluster. */ + for (count = 0, i = nr_cpu_ids; --i >= 0; ) { + lid = cpu_2_logical_apicid[i]; + if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + ++count; + } +#endif + /* We only have a 4 wide bitmap in cluster mode. If a deranged + * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ + BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); + id = my_cluster | (1UL << count); + apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(id); + apic_write(APIC_LDR, val); +} + +static inline int summit_apic_id_registered(void) +{ + return 1; +} + +static inline void summit_setup_apic_routing(void) +{ + printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", + nr_ioapics); +} + +static inline int summit_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} + +/* Mapping from cpu number to logical apicid */ +static inline int summit_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static inline int summit_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline physid_mask_t +summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0x0F); +} + +static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) +{ + return physid_mask_of_physid(0); +} + +static inline void summit_setup_portio_remap(void) +{ +} + +static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set >= nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = summit_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = summit_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline unsigned int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = summit_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = summit_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +/* + * cpuid returns the value latched in the HW at reset, not the APIC ID + * register's value. For any box whose BIOS changes APIC IDs, like + * clustered APIC systems, we must use hard_smp_processor_id. + * + * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. + */ +static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + +static int probe_summit(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; @@ -186,3 +543,61 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } + + +struct genapic apic_summit = { + + .name = "summit", + .probe = probe_summit, + .acpi_madt_oem_check = summit_acpi_madt_oem_check, + .apic_id_registered = summit_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = summit_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = summit_check_apicid_used, + .check_apicid_present = summit_check_apicid_present, + + .vector_allocation_domain = summit_vector_allocation_domain, + .init_apic_ldr = summit_init_apic_ldr, + + .ioapic_phys_id_map = summit_ioapic_phys_id_map, + .setup_apic_routing = summit_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = summit_apicid_to_node, + .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, + .cpu_present_to_apicid = summit_cpu_present_to_apicid, + .apicid_to_cpu_present = summit_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = summit_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = summit_phys_pkg_id, + .mps_oem_check = summit_mps_oem_check, + + .get_apic_id = summit_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, + + .send_IPI_mask = summit_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = summit_send_IPI_allbutself, + .send_IPI_all = summit_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 6730f4e7c74..78ab5735cb8 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -6,6 +6,5 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o obj-$(CONFIG_X86_NUMAQ) += numaq.o -obj-$(CONFIG_X86_SUMMIT) += summit.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o obj-$(CONFIG_X86_ES7000) += es7000.o diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c deleted file mode 100644 index 673a64f8b46..00000000000 --- a/arch/x86/mach-generic/summit.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * APIC driver for the IBM "Summit" chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int probe_summit(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - -struct genapic apic_summit = { - - .name = "summit", - .probe = probe_summit, - .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_registered = summit_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = summit_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = summit_check_apicid_used, - .check_apicid_present = summit_check_apicid_present, - - .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = summit_init_apic_ldr, - - .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = summit_apicid_to_node, - .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, - .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = summit_mps_oem_check, - - .get_apic_id = summit_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - - .send_IPI_mask = summit_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = summit_send_IPI_allbutself, - .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From a448720ca3248e8a7a426336885549d6e923fd8e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 15:42:23 -0800 Subject: [PATCH 269/682] x86: unify asm/io.h: IO_SPACE_LIMIT Impact: Cleanup (trivial unification) Move common define IO_SPACE_LIMIT from to . Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io.h | 1 + arch/x86/include/asm/io_32.h | 2 -- arch/x86/include/asm/io_64.h | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 05cfed4485f..975207c08b3 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -106,5 +106,6 @@ extern void __iomem *early_memremap(unsigned long offset, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); +#define IO_SPACE_LIMIT 0xffff #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index d8e242e1b39..e08d8ed05a1 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -37,8 +37,6 @@ * - Arnaldo Carvalho de Melo */ -#define IO_SPACE_LIMIT 0xffff - #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index 563c16270ba..1131d8ea2c6 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -136,8 +136,6 @@ __OUTS(b) __OUTS(w) __OUTS(l) -#define IO_SPACE_LIMIT 0xffff - #if defined(__KERNEL__) && defined(__x86_64__) #include From 7c20dcc545d78946e40e8fab99637fe815b1d211 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 11:29:22 +0100 Subject: [PATCH 270/682] x86, summit: consolidate code, fix Build fix for !NUMA Summit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/summit_32.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada1..a382ca6f6a1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o -obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o +obj-$(CONFIG_X86_SUMMIT) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 3b60dd5e57f..84ff9ebbcc9 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -389,6 +389,7 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } +#ifdef CONFIG_X86_SUMMIT_NUMA static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; @@ -543,7 +544,7 @@ void __init setup_summit(void) next_wpeg = 0; } while (next_wpeg != 0); } - +#endif struct genapic apic_summit = { From 1dcdd3d15ecea0c22a09d4d001a39d425fceff2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 17:55:37 +0100 Subject: [PATCH 271/682] x86: remove mach_apic.h Spread mach_apic.h definitions into genapic.h. (with some knock-on effects on smp.h and apic.h.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/genapic.h | 139 +++++++++++++++++ arch/x86/include/asm/mach-default/mach_apic.h | 144 ------------------ arch/x86/include/asm/mach-generic/mach_apic.h | 8 - arch/x86/include/asm/smp.h | 19 --- arch/x86/kernel/acpi/boot.c | 14 +- arch/x86/kernel/apic.c | 22 ++- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/mpparse.c | 3 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/tlb_uv.c | 2 +- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mach-generic/probe.c | 5 - 22 files changed, 175 insertions(+), 207 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_apic.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_apic.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3a3202074c6..6a77068e261 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -215,7 +215,7 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -static inline unsigned default_get_apic_id(unsigned long x) +static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 1772dad01b1..ce3655a4948 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -120,4 +120,143 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) return; } +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = *(u32 *)(APIC_BASE + APIC_ID); + + return apic->get_apic_id(reg); +} + +#ifdef CONFIG_X86_64 +extern void default_setup_apic_routing(void); +#else + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +static inline void default_setup_apic_routing(void) +{ +#ifdef CONFIG_X86_IO_APIC + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Flat", nr_ioapics); +#endif +} + +extern int default_apicid_to_node(int logical_apicid); + +#endif + +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +/* Mapping from cpu number to logical apicid */ +static inline int default_cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +#endif + +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +#endif /* CONFIG_X86_LOCAL_APIC */ #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h deleted file mode 100644 index bae053cdcde..00000000000 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H -#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H - -#ifdef CONFIG_X86_LOCAL_APIC - -#include - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline const struct cpumask *default_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -#ifdef CONFIG_X86_64 -#include -#define read_apic_id() (apic->get_apic_id(apic_read(APIC_ID))) -extern void default_setup_apic_routing(void); -#else -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void default_init_apic_ldr(void) -{ - unsigned long val; - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write(APIC_LDR, val); -} - -static inline int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); -} - -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static inline void default_setup_apic_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Flat", nr_ioapics); -#endif -} - -static inline int default_apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -#endif - -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long default_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - -/* Mapping from cpu number to logical apicid */ -static inline int default_cpu_to_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static inline int __default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline int -__default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - -#ifdef CONFIG_X86_32 -static inline int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -static inline int -default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#else -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); -#endif - -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h deleted file mode 100644 index 96f217f819e..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H -#define _ASM_X86_MACH_GENERIC_MACH_APIC_H - -#include - -extern void generic_bigsmp_probe(void); - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d4ac4de4bce..47d0e21f2b9 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -173,8 +173,6 @@ extern int safe_smp_processor_id(void); #endif -#include - #ifdef CONFIG_X86_LOCAL_APIC #ifndef CONFIG_X86_64 @@ -184,26 +182,9 @@ static inline int logical_smp_processor_id(void) return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); } -static inline unsigned int read_apic_id(void) -{ - unsigned int reg; - - reg = *(u32 *)(APIC_BASE + APIC_ID); - - return apic->get_apic_id(reg); -} #endif - -# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64) extern int hard_smp_processor_id(void); -# else -static inline int hard_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return read_apic_id(); -} -# endif /* APIC_DEFINITION */ #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7b02a1cedca..cb8b52785e3 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -42,10 +42,6 @@ #include #include -#ifdef CONFIG_X86_LOCAL_APIC -# include -#endif - static int __initdata acpi_force = 0; u32 acpi_rsdt_forced; #ifdef CONFIG_ACPI @@ -56,15 +52,7 @@ int acpi_disabled = 1; EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 - -#include - -#else /* X86 */ - -#ifdef CONFIG_X86_LOCAL_APIC -#include -#endif /* CONFIG_X86_LOCAL_APIC */ - +# include #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index e6220809ca1..41a0ba34d6b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,6 @@ #include #include -#include #include /* @@ -1910,11 +1909,30 @@ void __cpuinit generic_processor_info(int apicid, int version) set_cpu_present(cpu, true); } -#ifdef CONFIG_X86_64 int hard_smp_processor_id(void) { return read_apic_id(); } + +void default_init_apic_ldr(void) +{ + unsigned long val; + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); + apic_write(APIC_LDR, val); +} + +#ifdef CONFIG_X86_32 +int default_apicid_to_node(int logical_apicid) +{ +#ifdef CONFIG_SMP + return apicid_2_node[hard_smp_processor_id()]; +#else + return 0; +#endif +} #endif /* diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index e8bb892c09f..4a48bb40974 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,7 +7,7 @@ #include #include -#include +#include struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c878f6aa91..ff4d7b9e32e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,7 @@ # include #endif -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 055b9c3a660..c4bdc7f0020 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,7 +26,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5deefae9064..1cef0aa5e5d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index abae81989c2..e0744ea6d0f 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -63,7 +63,7 @@ #include #include -#include +#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index e16c41b2e4e..50076d92fbc 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,7 +19,7 @@ #include #ifdef CONFIG_X86_32 -#include +#include #include /* diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e0f29be8ab0..d802c844991 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -231,7 +231,7 @@ unsigned int do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include +#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a1452a53d14..94fe71029c3 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -29,8 +29,7 @@ #include #include -#include - +#include /* * Checksum an MP configuration block. */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6b27f6dc7bf..92e42939fb0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index c48ba6cc32a..892e7c389be 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3fed177f345..489fde9d947 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01..755ede02b13 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -20,7 +20,7 @@ #include #include -#include +#include static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 2ed5bdf15c9..3bd7f47a91b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -34,7 +34,7 @@ #include -#include "mach_apic.h" +#include #include diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 07817b2a787..7d5123e474e 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index ab68c6e5c48..c03c7222132 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -154,8 +154,3 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } - -int hard_smp_processor_id(void) -{ - return apic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID)); -} From 83d7aeabe4cf20e59b5d7fd56a75cfd0e0b6b880 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 28 Jan 2009 17:52:57 -0800 Subject: [PATCH 272/682] x86: remove mach_apic.h, fix Use apic_read() instead of open-coded mmio. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index ce3655a4948..ccfcd19d5b7 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -145,7 +145,7 @@ static inline unsigned int read_apic_id(void) { unsigned int reg; - reg = *(u32 *)(APIC_BASE + APIC_ID); + reg = apic_read(APIC_ID); return apic->get_apic_id(reg); } From 2e096df8edefad78155bb406a5a86c182b17786e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: [PATCH 273/682] x86, ES7000: Consolidate code Move all ES7000 code into arch/x86/kernel/es7000_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 428 +++++++++++++++++++++++++++++++++ arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/es7000.c | 427 -------------------------------- 3 files changed, 428 insertions(+), 428 deletions(-) delete mode 100644 arch/x86/mach-generic/es7000.c diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 8faea13c8fa..078364ccfa0 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -372,3 +372,431 @@ void __init es7000_enable_apic_mode(void) mip_status); } } + +/* + * APIC driver for the Unisys ES7000 chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int apic_version [MAX_APICS]; +extern u8 cpu_2_logical_apicid[]; +extern unsigned int boot_cpu_physical_apicid; + +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) +#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) + +static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + + +static void es7000_wait_for_init_deassert(atomic_t *deassert) +{ +#ifndef CONFIG_ES7000_CLUSTERED_APIC + while (!atomic_read(deassert)) + cpu_relax(); +#endif + return; +} + +static unsigned int es7000_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#ifdef CONFIG_ACPI +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} +#endif + +static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static void es7000_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static void es7000_send_IPI_all(int vector) +{ + es7000_send_IPI_mask(cpu_online_mask, vector); +} + +static int es7000_apic_id_registered(void) +{ + return 1; +} + +static const cpumask_t *target_cpus_cluster(void) +{ + return &CPU_MASK_ALL; +} + +static const cpumask_t *es7000_target_cpus(void) +{ + return &cpumask_of_cpu(smp_processor_id()); +} + +static unsigned long +es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} +static unsigned long es7000_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static unsigned long calculate_ldr(int cpu) +{ + unsigned long id = xapic_phys_to_log_apicid(cpu); + + return (SET_APIC_LOGICAL_ID(id)); +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LdR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static void es7000_init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static void es7000_setup_apic_routing(void) +{ + int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + (apic_version[apic] == 0x14) ? + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); +} + +static int es7000_apicid_to_node(int logical_apicid) +{ + return 0; +} + + +static int es7000_cpu_present_to_apicid(int mps_cpu) +{ + if (!mps_cpu) + return boot_cpu_physical_apicid; + else if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +{ + static int id = 0; + physid_mask_t mask; + + mask = physid_mask_of_physid(id); + ++id; + + return mask; +} + +/* Mapping from cpu number to logical apicid */ +static int es7000_cpu_to_logical_apicid(int cpu) +{ +#ifdef CONFIG_SMP + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +#else + return logical_smp_processor_id(); +#endif +} + +static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xff); +} + +static int es7000_check_phys_apicid_present(int cpu_physical_apicid) +{ + boot_cpu_physical_apicid = read_apic_id(); + return (1); +} + +static unsigned int +es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return 0xFF; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = cpumask_first(cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return 0xFF; + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + int cpus_found = 0; + int num_bits_set; + int apicid; + int cpu; + + num_bits_set = cpus_weight(*cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + return es7000_cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of target_cpus(): + */ + cpu = first_cpu(*cpumask); + apicid = es7000_cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); + + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)) { + printk ("%s: Not a valid mask!\n", __func__); + + return es7000_cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static unsigned int +es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int apicid = es7000_cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = es7000_cpu_mask_to_apicid(cpumask); + + free_cpumask_var(cpumask); + + return apicid; +} + +static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void __init es7000_update_genapic_to_cluster(void) +{ + apic->target_cpus = target_cpus_cluster; + apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; + + apic->init_apic_ldr = es7000_init_apic_ldr_cluster; + + apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; +} + +static int probe_es7000(void) +{ + /* probed later in mptable/ACPI hooks */ + return 0; +} + +static __init int +es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + if (mpc->oemptr) { + struct mpc_oemtable *oem_table = + (struct mpc_oemtable *)mpc->oemptr; + + if (!strncmp(oem, "UNISYS", 6)) + return parse_unisys_oem((char *)oem_table); + } + return 0; +} + +#ifdef CONFIG_ACPI +/* Hook from generic ACPI tables.c */ +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr = 0; + int check_dsdt; + int ret = 0; + + /* check dsdt at first to avoid clear fix_map for oem_addr */ + check_dsdt = es7000_check_dsdt(); + + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (check_dsdt) + ret = parse_unisys_oem((char *)oem_addr); + else { + setup_unisys(); + ret = 1; + } + /* + * we need to unmap it + */ + unmap_unisys_acpi_oem_table(oem_addr); + } + return ret; +} +#else +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif + + +struct genapic apic_es7000 = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check, + .apic_id_registered = es7000_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + /* phys delivery to target CPUs: */ + .irq_dest_mode = 0, + + .target_cpus = es7000_target_cpus, + .disable_esr = 1, + .dest_logical = 0, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, + + .vector_allocation_domain = es7000_vector_allocation_domain, + .init_apic_ldr = es7000_init_apic_ldr, + + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, + .setup_apic_routing = es7000_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = es7000_apicid_to_node, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = es7000_check_phys_apicid_present, + .enable_apic_mode = es7000_enable_apic_mode, + .phys_pkg_id = es7000_phys_pkg_id, + .mps_oem_check = es7000_mps_oem_check, + + .get_apic_id = es7000_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, + + .send_IPI_mask = es7000_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = es7000_wait_for_init_deassert, + + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 78ab5735cb8..05e47acfd66 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -7,4 +7,3 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o -obj-$(CONFIG_X86_ES7000) += es7000.o diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c deleted file mode 100644 index bb11166b7c3..00000000000 --- a/arch/x86/mach-generic/es7000.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * APIC driver for the Unisys ES7000 chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -extern void es7000_enable_apic_mode(void); -extern int apic_version [MAX_APICS]; -extern u8 cpu_2_logical_apicid[]; -extern unsigned int boot_cpu_physical_apicid; - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#define apicid_cluster(apicid) (apicid & 0xF0) -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) - -static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - - -static void es7000_wait_for_init_deassert(atomic_t *deassert) -{ -#ifndef CONFIG_ES7000_CLUSTERED_APIC - while (!atomic_read(deassert)) - cpu_relax(); -#endif - return; -} - -static unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#ifdef CONFIG_ACPI -static int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - -static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -static int es7000_apic_id_registered(void) -{ - return 1; -} - -static const cpumask_t *target_cpus_cluster(void) -{ - return &CPU_MASK_ALL; -} - -static const cpumask_t *es7000_target_cpus(void) -{ - return &cpumask_of_cpu(smp_processor_id()); -} - -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} -static unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static unsigned long calculate_ldr(int cpu) -{ - unsigned long id = xapic_phys_to_log_apicid(cpu); - - return (SET_APIC_LOGICAL_ID(id)); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); -} - -static int es7000_apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) -{ - static int id = 0; - physid_mask_t mask; - - mask = physid_mask_of_physid(id); - ++id; - - return mask; -} - -/* Mapping from cpu number to logical apicid */ -static int es7000_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); -} - -static int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return (1); -} - -static unsigned int -es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return es7000_cpu_to_logical_apicid(0); - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = first_cpu(*cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); - - return es7000_cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - -static unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = es7000_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -void __init es7000_update_genapic_to_cluster(void) -{ - apic->target_cpus = target_cpus_cluster; - apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; - - apic->init_apic_ldr = es7000_init_apic_ldr_cluster; - - apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; -} - -static int probe_es7000(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static __init int -es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (mpc->oemptr) { - struct mpc_oemtable *oem_table = - (struct mpc_oemtable *)mpc->oemptr; - - if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); - } - return 0; -} - -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr = 0; - int check_dsdt; - int ret = 0; - - /* check dsdt at first to avoid clear fix_map for oem_addr */ - check_dsdt = es7000_check_dsdt(); - - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) - ret = parse_unisys_oem((char *)oem_addr); - else { - setup_unisys(); - ret = 1; - } - /* - * we need to unmap it - */ - unmap_unisys_acpi_oem_table(oem_addr); - } - return ret; -} -#else -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif - - -struct genapic apic_es7000 = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPUs: */ - .irq_dest_mode = 0, - - .target_cpus = es7000_target_cpus, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = es7000_wait_for_init_deassert, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From 61b90b7ca10cc65d8b850ab542859dc593e5a381 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:01:05 +0100 Subject: [PATCH 274/682] x86, NUMAQ: Consolidate code Move all NUMAQ code into arch/x86/kernel/numaq.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 278 +++++++++++++++++++++++++++++++++ arch/x86/mach-generic/Makefile | 1 - arch/x86/mach-generic/numaq.c | 277 -------------------------------- 3 files changed, 278 insertions(+), 278 deletions(-) delete mode 100644 arch/x86/mach-generic/numaq.c diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 3928280278f..83bb05524f4 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -291,3 +291,281 @@ int __init get_memcfg_numaq(void) smp_dump_qct(); return 1; } + +/* + * APIC driver for the IBM NUMAQ chipset. + */ +#define APIC_DEFINITION 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) + +static inline unsigned int numaq_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0x0F; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void numaq_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void numaq_send_IPI_all(int vector) +{ + numaq_send_IPI_mask(cpu_online_mask, vector); +} + +extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); + +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) + +/* + * Because we use NMIs rather than the INIT-STARTUP sequence to + * bootstrap the CPUs, the APIC may be in a weird state. Kick it: + */ +static inline void numaq_smp_callin_clear_local_apic(void) +{ + clear_local_APIC(); +} + +static inline void +numaq_store_NMI_vector(unsigned short *high, unsigned short *low) +{ + printk("Storing NMI vector\n"); + *high = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); +} + +static inline const cpumask_t *numaq_target_cpus(void) +{ + return &CPU_MASK_ALL; +} + +static inline unsigned long +numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long numaq_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +#define apicid_cluster(apicid) (apicid & 0xF0) + +static inline int numaq_apic_id_registered(void) +{ + return 1; +} + +static inline void numaq_init_apic_ldr(void) +{ + /* Already done in NUMA-Q firmware */ +} + +static inline void numaq_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "NUMA-Q", nr_ioapics); +} + +/* + * Skip adding the timer int on secondary nodes, which causes + * a small but painful rift in the time-space continuum. + */ +static inline int numaq_multi_timer_check(int apic, int irq) +{ + return apic != 0 && irq == 0; +} + +static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* We don't have a good way to do this yet - hack */ + return physids_promote(0xFUL); +} + +/* Mapping from cpu number to logical apicid */ +extern u8 cpu_2_logical_apicid[]; + +static inline int numaq_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; +} + +/* + * Supporting over 60 cpus on NUMA-Q requires a locality-dependent + * cpu to APIC ID relation to properly interact with the intelligent + * mode of the cluster controller. + */ +static inline int numaq_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < 60) + return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); + else + return BAD_APICID; +} + +static inline int numaq_apicid_to_node(int logical_apicid) +{ + return logical_apicid >> 4; +} + +static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +{ + int node = numaq_apicid_to_node(logical_apicid); + int cpu = __ffs(logical_apicid & 0xf); + + return physid_mask_of_physid(cpu + 4*node); +} + +extern void *xquad_portio; + +static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* + * We use physical apicids here, not logical, so just return the default + * physical broadcast to stop people from breaking us + */ +static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return 0x0F; +} + +static inline unsigned int +numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + return 0x0F; +} + +/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ +static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} +static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +{ + numaq_mps_oem_check(mpc, oem, productid); + return found_numaq; +} + +static int probe_numaq(void) +{ + /* already know from get_memcfg_numaq() */ + return found_numaq; +} + +static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; +} + +static void numaq_setup_portio_remap(void) +{ + int num_quads = num_online_nodes(); + + if (num_quads <= 1) + return; + + printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); + printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); +} + +struct genapic apic_numaq = { + + .name = "NUMAQ", + .probe = probe_numaq, + .acpi_madt_oem_check = NULL, + .apic_id_registered = numaq_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* physical delivery on LOCAL quad: */ + .irq_dest_mode = 0, + + .target_cpus = numaq_target_cpus, + .disable_esr = 1, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = numaq_check_apicid_used, + .check_apicid_present = numaq_check_apicid_present, + + .vector_allocation_domain = numaq_vector_allocation_domain, + .init_apic_ldr = numaq_init_apic_ldr, + + .ioapic_phys_id_map = numaq_ioapic_phys_id_map, + .setup_apic_routing = numaq_setup_apic_routing, + .multi_timer_check = numaq_multi_timer_check, + .apicid_to_node = numaq_apicid_to_node, + .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, + .cpu_present_to_apicid = numaq_cpu_present_to_apicid, + .apicid_to_cpu_present = numaq_apicid_to_cpu_present, + .setup_portio_remap = numaq_setup_portio_remap, + .check_phys_apicid_present = numaq_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = numaq_phys_pkg_id, + .mps_oem_check = __numaq_mps_oem_check, + + .get_apic_id = numaq_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, + + .send_IPI_mask = numaq_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = numaq_send_IPI_allbutself, + .send_IPI_all = numaq_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, + + /* We don't do anything here because we use NMI's to boot instead */ + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, + .store_NMI_vector = numaq_store_NMI_vector, + .inquire_remote_apic = NULL, +}; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 05e47acfd66..4ede08d309e 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -5,5 +5,4 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o -obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_X86_BIGSMP) += bigsmp.o diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c deleted file mode 100644 index c221cfb2c2d..00000000000 --- a/arch/x86/mach-generic/numaq.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * APIC driver for the IBM NUMAQ chipset. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline void -numaq_store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk("Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); -} - -static inline const cpumask_t *numaq_target_cpus(void) -{ - return &CPU_MASK_ALL; -} - -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -#define apicid_cluster(apicid) (apicid & 0xF0) - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); -} - -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; - -static inline int numaq_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - return physid_mask_of_physid(cpu + 4*node); -} - -extern void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} -static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - numaq_mps_oem_check(mpc, oem, productid); - return found_numaq; -} - -static int probe_numaq(void) -{ - /* already know from get_memcfg_numaq() */ - return found_numaq; -} - -static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; -} - -static void numaq_setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -struct genapic apic_numaq = { - - .name = "NUMAQ", - .probe = probe_numaq, - .acpi_madt_oem_check = NULL, - .apic_id_registered = numaq_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* physical delivery on LOCAL quad: */ - .irq_dest_mode = 0, - - .target_cpus = numaq_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = numaq_check_apicid_used, - .check_apicid_present = numaq_check_apicid_present, - - .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = numaq_init_apic_ldr, - - .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = numaq_multi_timer_check, - .apicid_to_node = numaq_apicid_to_node, - .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, - .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = __numaq_mps_oem_check, - - .get_apic_id = numaq_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - - .send_IPI_mask = numaq_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = numaq_send_IPI_allbutself, - .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - - /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = NULL, - - .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .store_NMI_vector = numaq_store_NMI_vector, - .inquire_remote_apic = NULL, -}; From b3daa3a1a56cf09fb91773f3658692fd02d08bb1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:04:37 +0100 Subject: [PATCH 275/682] x86, bigsmp: consolidate code Move all code to arch/x86/kernel/bigsmp_32.c. With this it ceases to rely on any build-time subarch features. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 + arch/x86/{mach-generic/bigsmp.c => kernel/bigsmp_32.c} | 0 arch/x86/mach-generic/Makefile | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename arch/x86/{mach-generic/bigsmp.c => kernel/bigsmp_32.c} (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a382ca6f6a1..4239847906a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -71,6 +71,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o +obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT) += summit_32.o diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/kernel/bigsmp_32.c similarity index 100% rename from arch/x86/mach-generic/bigsmp.c rename to arch/x86/kernel/bigsmp_32.c diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile index 4ede08d309e..05e4a7ca774 100644 --- a/arch/x86/mach-generic/Makefile +++ b/arch/x86/mach-generic/Makefile @@ -5,4 +5,3 @@ EXTRA_CFLAGS := -Iarch/x86/kernel obj-y := probe.o default.o -obj-$(CONFIG_X86_BIGSMP) += bigsmp.o From 9f4187f0a3b93fc215b4472063b6c0b44364e60c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:19:12 +0100 Subject: [PATCH 276/682] x86, bigsmp: consolidate header code Move all the asm/bigsmp/*.h definitions into bigsmp_32.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 163 +++++++++++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 626f45ca4e7..b1f91931003 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -12,10 +12,165 @@ #include #include #include -#include #include -#include -#include + + +static inline unsigned bigsmp_get_apic_id(unsigned long x) +{ + return (x >> 24) & 0xFF; +} + +#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) + +static inline int bigsmp_apic_id_registered(void) +{ + return 1; +} + +static inline const cpumask_t *bigsmp_target_cpus(void) +{ +#ifdef CONFIG_SMP + return &cpu_online_map; +#else + return &cpumask_of_cpu(0); +#endif +} + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline unsigned long +bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return 0; +} + +static inline unsigned long bigsmp_check_apicid_present(int bit) +{ + return 1; +} + +static inline unsigned long calculate_ldr(int cpu) +{ + unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; + id = xapic_phys_to_log_apicid(cpu); + val |= SET_APIC_LOGICAL_ID(id); + return val; +} + +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +static inline void bigsmp_init_apic_ldr(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + +static inline void bigsmp_setup_apic_routing(void) +{ + printk("Enabling APIC mode: %s. Using %d I/O APICs\n", + "Physflat", nr_ioapics); +} + +static inline int bigsmp_apicid_to_node(int logical_apicid) +{ + return apicid_2_node[hard_smp_processor_id()]; +} + +static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids) + return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + + return BAD_APICID; +} + +static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +extern u8 cpu_2_logical_apicid[]; +/* Mapping from cpu number to logical apicid */ +static inline int bigsmp_cpu_to_logical_apicid(int cpu) +{ + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return cpu_physical_id(cpu); +} + +static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +{ + /* For clustered we don't have a good way to do this yet - hack */ + return physids_promote(0xFFL); +} + +static inline void bigsmp_setup_portio_remap(void) +{ +} + +static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return 1; +} + +/* As we are using single CPU as destination, pick only one CPU here */ +static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); +} + +static inline unsigned int +bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + if (cpu < nr_cpu_ids) + return bigsmp_cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + +static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_sequence(mask, vector); +} + +static inline void bigsmp_send_IPI_allbutself(int vector) +{ + default_send_IPI_mask_allbutself(cpu_online_mask, vector); +} + +static inline void bigsmp_send_IPI_all(int vector) +{ + bigsmp_send_IPI_mask(cpu_online_mask, vector); +} static int dmi_bigsmp; /* can be set by dmi scanners */ @@ -95,7 +250,7 @@ struct genapic apic_bigsmp = { .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, From d53e2f2855f1c7c2725d550c1ae6b26f4d671c50 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:14:52 +0100 Subject: [PATCH 277/682] x86, smp: remove mach_ipi.h Move mach_ipi.h definitions into genapic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 1 + arch/x86/include/asm/ipi.h | 61 ++++++++++++++++++- arch/x86/include/asm/mach-default/mach_ipi.h | 58 ------------------ arch/x86/include/asm/mach-generic/gpio.h | 15 ----- arch/x86/include/asm/mach-generic/mach_ipi.h | 6 -- .../include/asm/mach-generic/mach_wakecpu.h | 4 -- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/io_apic.c | 1 - arch/x86/kernel/ipi.c | 1 - arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/smp.c | 1 - arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/mach-default/setup.c | 2 +- arch/x86/mach-generic/default.c | 2 +- arch/x86/mm/tlb.c | 2 +- 17 files changed, 68 insertions(+), 96 deletions(-) delete mode 100644 arch/x86/include/asm/mach-default/mach_ipi.h delete mode 100644 arch/x86/include/asm/mach-generic/gpio.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_ipi.h delete mode 100644 arch/x86/include/asm/mach-generic/mach_wakecpu.h diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index ccfcd19d5b7..273b99452ae 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -259,4 +259,5 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) } #endif /* CONFIG_X86_LOCAL_APIC */ + #endif /* _ASM_X86_GENAPIC_64_H */ diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index a8d717f2c7e..e2e8e4e0a65 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_IPI_H #define _ASM_X86_IPI_H +#ifdef CONFIG_X86_LOCAL_APIC + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -56,8 +58,7 @@ static inline void __xapic_wait_icr_idle(void) } static inline void -__default_send_IPI_shortcut(unsigned int shortcut, - int vector, unsigned int dest) +__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) { /* * Subtle. In the case of the 'never do double writes' workaround @@ -156,4 +157,60 @@ default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_restore(flags); } + +/* Avoid include hell */ +#define NMI_VECTOR 0x02 + +void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); + +extern int no_broadcast; + +#ifdef CONFIG_X86_64 +#include +#else +static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_bitmask(mask, vector); +} +void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); +#endif + +static inline void __default_local_send_IPI_allbutself(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask_allbutself(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical); +} + +static inline void __default_local_send_IPI_all(int vector) +{ + if (no_broadcast || vector == NMI_VECTOR) + apic->send_IPI_mask(cpu_online_mask, vector); + else + __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical); +} + +#ifdef CONFIG_X86_32 +static inline void default_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __default_local_send_IPI_allbutself(vector); +} + +static inline void default_send_IPI_all(int vector) +{ + __default_local_send_IPI_all(vector); +} +#endif + +#endif + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h deleted file mode 100644 index 85dec630c69..00000000000 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H -#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H - -/* Avoid include hell */ -#define NMI_VECTOR 0x02 - -void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -void __default_send_IPI_shortcut(unsigned int shortcut, int vector); - -extern int no_broadcast; - -#ifdef CONFIG_X86_64 -#include -#else -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_bitmask(mask, vector); -} -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -#endif - -static inline void __default_local_send_IPI_allbutself(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask_allbutself(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector); -} - -static inline void __default_local_send_IPI_all(int vector) -{ - if (no_broadcast || vector == NMI_VECTOR) - apic->send_IPI_mask(cpu_online_mask, vector); - else - __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector); -} - -#ifdef CONFIG_X86_32 -static inline void default_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __default_local_send_IPI_allbutself(vector); -} - -static inline void default_send_IPI_all(int vector) -{ - __default_local_send_IPI_all(vector); -} -#endif - -#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/gpio.h b/arch/x86/include/asm/mach-generic/gpio.h deleted file mode 100644 index 995c45efdb3..00000000000 --- a/arch/x86/include/asm/mach-generic/gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_GPIO_H -#define _ASM_X86_MACH_GENERIC_GPIO_H - -int gpio_request(unsigned gpio, const char *label); -void gpio_free(unsigned gpio); -int gpio_direction_input(unsigned gpio); -int gpio_direction_output(unsigned gpio, int value); -int gpio_get_value(unsigned gpio); -void gpio_set_value(unsigned gpio, int value); -int gpio_to_irq(unsigned gpio); -int irq_to_gpio(unsigned irq); - -#include /* cansleep wrappers */ - -#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_ipi.h b/arch/x86/include/asm/mach-generic/mach_ipi.h deleted file mode 100644 index 5691c09645c..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_ipi.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H -#define _ASM_X86_MACH_GENERIC_MACH_IPI_H - -#include - -#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */ diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h deleted file mode 100644 index 0b884c03a3f..00000000000 --- a/arch/x86/include/asm/mach-generic/mach_wakecpu.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H -#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H - -#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 41a0ba34d6b..81efe86eca8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,7 +49,7 @@ #include #include -#include +#include /* * Sanity check diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 11b93cabdf7..ad7f2a696f4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,7 @@ #include #include -#include +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e0744ea6d0f..241a01d6fd4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -62,7 +62,6 @@ #include #include -#include #include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 50076d92fbc..0893fa14458 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,7 +20,6 @@ #ifdef CONFIG_X86_32 #include -#include /* * the following functions deal with sending IPIs between CPUs. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b62a3811e01..5c4f5548384 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,7 +46,7 @@ #include #include -#include +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 38dace28d62..32e8f0af292 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,7 +24,7 @@ # include #endif -#include +#include /* * Power off function, if any diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 892e7c389be..0eb32ae9bf1 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,7 +26,6 @@ #include #include #include -#include #include /* * Some notes on x86 processor bugs affecting SMP operation: diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 3bd7f47a91b..4fd646e6dd4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index df167f26562..b65ff0bf730 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,7 +10,7 @@ #include #include -#include +#include #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 7d5123e474e..d9d44c8c3db 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6348e114692..14c5af4d11e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; -#include +#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. From 7b38725318f4517af6168ccbff99060d67aba1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:11:44 +0100 Subject: [PATCH 278/682] x86: remove subarchitecture support code Remove remaining bits of the subarchitecture code. Now that all the special platforms are runtime probed and runtime handled, we can remove these facilities. Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 - arch/x86/kernel/Makefile | 2 +- .../probe.c => kernel/probe_32.c} | 92 +++++++++++++++++++ arch/x86/mach-generic/Makefile | 7 -- 4 files changed, 93 insertions(+), 13 deletions(-) rename arch/x86/{mach-generic/probe.c => kernel/probe_32.c} (54%) delete mode 100644 arch/x86/mach-generic/Makefile diff --git a/arch/x86/Makefile b/arch/x86/Makefile index cacee981d16..799a0d931c8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -113,11 +113,6 @@ mcore-y := arch/x86/mach-default/ mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ -# generic subarchitecture -mflags-$(CONFIG_X86_GENERICARCH):= -Iarch/x86/include/asm/mach-generic -fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ -mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ - # default subarch .h files mflags-y += -Iarch/x86/include/asm/mach-default diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4239847906a..d3f8f49aed6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -30,7 +30,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o -obj-$(CONFIG_X86_32) += probe_roms_32.o +obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/kernel/probe_32.c similarity index 54% rename from arch/x86/mach-generic/probe.c rename to arch/x86/kernel/probe_32.c index c03c7222132..a6fba691416 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/kernel/probe_32.c @@ -1,4 +1,6 @@ /* + * Default generic APIC driver. This handles up to 8 CPUs. + * * Copyright 2003 Andi Kleen, SuSE Labs. * Subject to the GNU Public License, v.2 * @@ -17,6 +19,96 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + /* + * Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; +} + +/* should be called last. */ +static int probe_default(void) +{ + return 1; +} + +struct genapic apic_default = { + + .name = "default", + .probe = probe_default, + .acpi_madt_oem_check = NULL, + .apic_id_registered = default_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = default_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = default_check_apicid_used, + .check_apicid_present = default_check_apicid_present, + + .vector_allocation_domain = default_vector_allocation_domain, + .init_apic_ldr = default_init_apic_ldr, + + .ioapic_phys_id_map = default_ioapic_phys_id_map, + .setup_apic_routing = default_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = default_apicid_to_node, + .cpu_to_logical_apicid = default_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = default_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = default_phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = default_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = default_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = default_send_IPI_allbutself, + .send_IPI_all = default_send_IPI_all, + .send_IPI_self = NULL, + + .wakeup_cpu = NULL, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = default_wait_for_init_deassert, + + .smp_callin_clear_local_apic = NULL, + .store_NMI_vector = NULL, + .inquire_remote_apic = default_inquire_remote_apic, +}; + extern struct genapic apic_numaq; extern struct genapic apic_summit; extern struct genapic apic_bigsmp; diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile deleted file mode 100644 index 05e4a7ca774..00000000000 --- a/arch/x86/mach-generic/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the generic architecture -# - -EXTRA_CFLAGS := -Iarch/x86/kernel - -obj-y := probe.o default.o From 1164dd0099c0d79146a55319670f57ab7ad1d352 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:34:09 +0100 Subject: [PATCH 279/682] x86: move mach-default/*.h files to asm/ We are getting rid of subarchitecture support - move the hook files to asm/. (These are now stale and should be replaced with more explicit runtime mechanisms - but the transition is simpler this way.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/{mach-default => }/apm.h | 0 arch/x86/include/asm/{mach-default => }/do_timer.h | 0 arch/x86/include/asm/{mach-default => }/entry_arch.h | 5 +++++ arch/x86/include/asm/{mach-default => }/mach_timer.h | 0 arch/x86/include/asm/{mach-default => }/mach_traps.h | 0 arch/x86/include/asm/{mach-default => }/pci-functions.h | 0 arch/x86/include/asm/{mach-default => }/setup_arch.h | 0 arch/x86/include/asm/{mach-default => }/smpboot_hooks.h | 0 arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/probe_roms_32.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/time_32.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/pci/pcbios.c | 2 +- drivers/clocksource/acpi_pm.c | 2 +- drivers/clocksource/cyclone.c | 2 +- 19 files changed, 16 insertions(+), 11 deletions(-) rename arch/x86/include/asm/{mach-default => }/apm.h (100%) rename arch/x86/include/asm/{mach-default => }/do_timer.h (100%) rename arch/x86/include/asm/{mach-default => }/entry_arch.h (95%) rename arch/x86/include/asm/{mach-default => }/mach_timer.h (100%) rename arch/x86/include/asm/{mach-default => }/mach_traps.h (100%) rename arch/x86/include/asm/{mach-default => }/pci-functions.h (100%) rename arch/x86/include/asm/{mach-default => }/setup_arch.h (100%) rename arch/x86/include/asm/{mach-default => }/smpboot_hooks.h (100%) diff --git a/arch/x86/include/asm/mach-default/apm.h b/arch/x86/include/asm/apm.h similarity index 100% rename from arch/x86/include/asm/mach-default/apm.h rename to arch/x86/include/asm/apm.h diff --git a/arch/x86/include/asm/mach-default/do_timer.h b/arch/x86/include/asm/do_timer.h similarity index 100% rename from arch/x86/include/asm/mach-default/do_timer.h rename to arch/x86/include/asm/do_timer.h diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/entry_arch.h similarity index 95% rename from arch/x86/include/asm/mach-default/entry_arch.h rename to arch/x86/include/asm/entry_arch.h index 6fa399ad1de..b87b077cc23 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -41,10 +41,15 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, * a much simpler SMP time architecture: */ #ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#ifdef CONFIG_PERF_COUNTERS +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +#endif + #ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/include/asm/mach-default/mach_timer.h b/arch/x86/include/asm/mach_timer.h similarity index 100% rename from arch/x86/include/asm/mach-default/mach_timer.h rename to arch/x86/include/asm/mach_timer.h diff --git a/arch/x86/include/asm/mach-default/mach_traps.h b/arch/x86/include/asm/mach_traps.h similarity index 100% rename from arch/x86/include/asm/mach-default/mach_traps.h rename to arch/x86/include/asm/mach_traps.h diff --git a/arch/x86/include/asm/mach-default/pci-functions.h b/arch/x86/include/asm/pci-functions.h similarity index 100% rename from arch/x86/include/asm/mach-default/pci-functions.h rename to arch/x86/include/asm/pci-functions.h diff --git a/arch/x86/include/asm/mach-default/setup_arch.h b/arch/x86/include/asm/setup_arch.h similarity index 100% rename from arch/x86/include/asm/mach-default/setup_arch.h rename to arch/x86/include/asm/setup_arch.h diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h similarity index 100% rename from arch/x86/include/asm/mach-default/smpboot_hooks.h rename to arch/x86/include/asm/smpboot_hooks.h diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 98807bb095a..37ba5f85b71 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int); */ #define APM_ZERO_SEGS -#include "apm.h" +#include /* * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a..65efd42454b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -688,7 +688,7 @@ ENDPROC(name) #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) /* The include is where all of the SMP etc. interrupts come from */ -#include "entry_arch.h" +#include ENTRY(coprocessor_error) RING0_INT_FRAME diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 23b6d9e6e4f..bdfad80c3cf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -34,7 +34,7 @@ #include -#include +#include int unknown_nmi_panic; int nmi_watchdog_enabled; diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c index 675a48c404a..071e7fea42e 100644 --- a/arch/x86/kernel/probe_roms_32.c +++ b/arch/x86/kernel/probe_roms_32.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include static struct resource system_rom_resource = { .name = "System ROM", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 92e42939fb0..e645d4793e4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -81,7 +81,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 489fde9d947..e90b3e50b54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -66,7 +66,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 3985cac0ed4..764c74e871f 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -38,7 +38,7 @@ #include #include -#include "do_timer.h" +#include int timer_ack; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ed5aee5f3fc..214bc327a0c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,7 +54,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index b82cae970df..1c975cc9839 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index e1129fad96d..ee19b6e8fcb 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -143,7 +143,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_LE, #endif #ifndef CONFIG_X86_64 -#include "mach_timer.h" +#include #define PMTMR_EXPECTED_RATE \ ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) /* diff --git a/drivers/clocksource/cyclone.c b/drivers/clocksource/cyclone.c index 1bde303b970..8615059a872 100644 --- a/drivers/clocksource/cyclone.c +++ b/drivers/clocksource/cyclone.c @@ -7,7 +7,7 @@ #include #include -#include "mach_timer.h" +#include #define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */ #define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */ From 6bda2c8b32febeb38ee128047253751e080bad52 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:32:55 +0100 Subject: [PATCH 280/682] x86: remove subarchitecture support Remove the 32-bit subarchitecture support code. All subarchitectures but Voyager have been converted. Voyager will be done later or will be removed. Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 21 ---- arch/x86/include/asm/apic.h | 2 + arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 17 ---- arch/x86/kernel/probe_32.c | 163 ++++++++++++++++++++++++++++++++ arch/x86/mach-default/setup.c | 162 ------------------------------- arch/x86/mach-generic/default.c | 93 ------------------ 7 files changed, 166 insertions(+), 294 deletions(-) delete mode 100644 arch/x86/mach-default/setup.c delete mode 100644 arch/x86/mach-generic/default.c diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 799a0d931c8..99550c40799 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -102,24 +102,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) -### -# Sub architecture support -# fcore-y is linked before mcore-y files. - -# Default subarch .c files -mcore-y := arch/x86/mach-default/ - -# Voyager subarch support -mflags-$(CONFIG_X86_VOYAGER) := -Iarch/x86/include/asm/mach-voyager -mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ - -# default subarch .h files -mflags-y += -Iarch/x86/include/asm/mach-default - -# 64 bit does not support subarch support - clear sub arch variables -fcore-$(CONFIG_X86_64) := -mcore-$(CONFIG_X86_64) := - KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) @@ -145,9 +127,6 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ core-y += arch/x86/kernel/ core-y += arch/x86/mm/ -# Remaining sub architecture files -core-y += $(mcore-y) - core-y += arch/x86/crypto/ core-y += arch/x86/vdso/ core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6a77068e261..b03711d7990 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -215,6 +215,7 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else +#ifdef CONFIG_X86_LOCAL_APIC static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -224,6 +225,7 @@ static inline unsigned default_get_apic_id(unsigned long x) else return (x >> 24) & 0x0F; } +#endif #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d3f8f49aed6..1bc4cdc797d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -63,7 +63,7 @@ obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 241a01d6fd4..3378ffb2140 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -797,23 +797,6 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void default_send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | apic->dest_logical; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - #ifdef CONFIG_X86_32 /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index a6fba691416..61339a0d55c 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -32,6 +32,26 @@ #include #include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define DEFAULT_SEND_IPI (1) +#else +#define DEFAULT_SEND_IPI (0) +#endif + +int no_broadcast = DEFAULT_SEND_IPI; + +#ifdef CONFIG_X86_LOCAL_APIC + static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* @@ -246,3 +266,146 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ + +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +void __init pre_intr_init_hook(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) + return; + } +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On Voyager + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) + return; + } +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +/** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + if (x86_quirks->arch_time_init) { + /* + * A nonzero return code does not mean failure, it means + * that the architecture quirk does not want any + * generic (timer) setup to be performed after this: + */ + if (x86_quirks->arch_time_init()) + return; + } + + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Architecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void mca_nmi_hook(void) +{ + /* + * If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + pr_warning("NMI generated from unknown source!\n"); +} +#endif + +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); + return 1; +} +__setup("no_ipi_broadcast=", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); + return 0; +} + +late_initcall(print_ipi_mode); + diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c deleted file mode 100644 index b65ff0bf730..00000000000 --- a/arch/x86/mach-default/setup.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -int no_broadcast = DEFAULT_SEND_IPI; - -/** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init pre_intr_init_hook(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** - * intr_init_hook - post gate setup interrupt initialisation - * - * Description: - * Fill in any interrupts that may have been left out by the general - * init_IRQ() routine. interrupts having to do with the machine rather - * than the devices on the I/O bus (like APIC interrupts in intel MP - * systems) are started here. - **/ -void __init intr_init_hook(void) -{ - if (x86_quirks->arch_intr_init) { - if (x86_quirks->arch_intr_init()) - return; - } -} - -/** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On Voyager - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - -/** - * trap_init_hook - initialise system specific traps - * - * Description: - * Called as the final act of trap_init(). Used in VISWS to initialise - * the various board specific APIC traps. - **/ -void __init trap_init_hook(void) -{ - if (x86_quirks->arch_trap_init) { - if (x86_quirks->arch_trap_init()) - return; - } -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -/** - * pre_time_init_hook - do any specific initialisations before. - * - **/ -void __init pre_time_init_hook(void) -{ - if (x86_quirks->arch_pre_time_init) - x86_quirks->arch_pre_time_init(); -} - -/** - * time_init_hook - do any specific initialisations for the system timer. - * - * Description: - * Must plug the system timer interrupt source at HZ into the IRQ listed - * in irq_vectors.h:TIMER_IRQ - **/ -void __init time_init_hook(void) -{ - if (x86_quirks->arch_time_init) { - /* - * A nonzero return code does not mean failure, it means - * that the architecture quirk does not want any - * generic (timer) setup to be performed after this: - */ - if (x86_quirks->arch_time_init()) - return; - } - - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Architecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* - * If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - pr_warning("NMI generated from unknown source!\n"); -} -#endif - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); - return 1; -} -__setup("no_ipi_broadcast=", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); - return 0; -} - -late_initcall(print_ipi_mode); - diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c deleted file mode 100644 index d9d44c8c3db..00000000000 --- a/arch/x86/mach-generic/default.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Default generic APIC driver. This handles up to 8 CPUs. - */ -#define APIC_DEFINITION 1 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; -} - -/* should be called last. */ -static int probe_default(void) -{ - return 1; -} - -struct genapic apic_default = { - - .name = "default", - .probe = probe_default, - .acpi_madt_oem_check = NULL, - .apic_id_registered = default_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = default_target_cpus, - .disable_esr = 0, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = default_check_apicid_used, - .check_apicid_present = default_check_apicid_present, - - .vector_allocation_domain = default_vector_allocation_domain, - .init_apic_ldr = default_init_apic_ldr, - - .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = default_apicid_to_node, - .cpu_to_logical_apicid = default_cpu_to_logical_apicid, - .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = default_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = default_phys_pkg_id, - .mps_oem_check = NULL, - - .get_apic_id = default_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - - .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = default_send_IPI_allbutself, - .send_IPI_all = default_send_IPI_all, - .send_IPI_self = NULL, - - .wakeup_cpu = NULL, - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, - .inquire_remote_apic = default_inquire_remote_apic, -}; From e7c64981949ec983ee41c2927c036fdb00d8e68b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:54:17 +0100 Subject: [PATCH 281/682] x86/Voyager: clean up BROKEN Kconfig reference CONFIG_BROKEN has been removed from the upstream kernel years ago, but X86_VOYAGER still had a stale reference to it - remove it. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6218e6c982..45c7bdbc156 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,7 +303,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && (SMP || BROKEN) && !PCI + depends on X86_32 && SMP && !PCI help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. From 61b8172e57c4b8db1fcb7f5fd8dfda85ffd8e052 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 Jan 2009 19:55:34 +0100 Subject: [PATCH 282/682] x86: disable Voyager temporarily x86/Voyager does not build right now and it's unclear whether it will be cleaned up and ported to the subarch-less 32-bit x86 code - so disable it for now. If it's fixed we'll re-enable it - or remove it after some time. There's a very low number of systems running development kernels on x86/Voyager currently. (one or two on the whole planet) Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 45c7bdbc156..f983f4055b1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -303,7 +303,7 @@ config X86_ELAN config X86_VOYAGER bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI + depends on X86_32 && SMP && !PCI && BROKEN help Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. From 72ee6ebbb3fe1ac23d1a669b177b858d2028bf09 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 16:57:49 +0100 Subject: [PATCH 283/682] x86/Voyager: remove MCA Kconfig quirk Remove Voyager Kconfig quirk: just like any other hardware platform users of Voyager systems can configure in the hardware drivers they need. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f983f4055b1..ae37a7504b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1873,8 +1873,7 @@ config EISA source "drivers/eisa/Kconfig" config MCA - bool "MCA support" if !X86_VOYAGER - default y if X86_VOYAGER + bool "MCA support" help MicroChannel Architecture is found in some IBM PS/2 machines and laptops. It is a bus system similar to PCI or ISA. See From 07ef83ae9e99377f38f4d0e472ba6ff09324e5e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 16:59:11 +0100 Subject: [PATCH 284/682] x86/Voyager: remove NATSEMI Kconfig quirk x86/Voyager has this quirk for SCx200 support: config SCx200 tristate "NatSemi SCx200 support" depends on !X86_VOYAGER Remove it - Voyager users can disable drivers they dont need. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ae37a7504b5..9727dd59f54 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1884,7 +1884,6 @@ source "drivers/mca/Kconfig" config SCx200 tristate "NatSemi SCx200 support" - depends on !X86_VOYAGER help This provides basic support for National Semiconductor's (now AMD's) Geode processors. The driver probes for the From e0ec9483dbe8f534e0f3ef413f9eba9a5ff78050 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:01:14 +0100 Subject: [PATCH 285/682] x86/Voyager: remove KVM Kconfig quirk Voyager and other subarchitectures have this Kconfig quirk: select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) This is unnecessary, as KVM cleanly detects based on CPUID capabilities. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9727dd59f54..a783fe7759c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,7 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) + select HAVE_KVM select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 From 49793b0341a802cf5ee4179e837a2eb20f12c9fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:02:29 +0100 Subject: [PATCH 286/682] x86/Voyager: remove KGDB Kconfig quirk x86/Voyager has this KGDB quirk: select HAVE_ARCH_KGDB if !X86_VOYAGER This is completely pointless - there's nothing in KGDB that cannot work on Voyager. Remove it. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a783fe7759c..12b433aaaa6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -35,7 +35,7 @@ config X86 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM - select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS From aced3cee555f0b2fd58501e9b8a8a1295011e134 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:03:24 +0100 Subject: [PATCH 287/682] x86/Voyager: remove HIBERNATION Kconfig quirk Voyager has this hibernation quirk: config ARCH_HIBERNATION_POSSIBLE def_bool y depends on !SMP || !X86_VOYAGER Hibernation is a generic facility provided on all x86 platforms. If it is buggy on Voyager then that bug should be fixed - not worked around. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 12b433aaaa6..df952e788a6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -140,7 +140,7 @@ config HAVE_CPUMASK_OF_CPU_MAP config ARCH_HIBERNATION_POSSIBLE def_bool y - depends on !SMP || !X86_VOYAGER + depends on !SMP config ARCH_SUSPEND_POSSIBLE def_bool y From f2fc0e3071230bb9ea9f64a08c3e619ad1357cfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:04:39 +0100 Subject: [PATCH 288/682] x86/Voyager: remove ARCH_SUSPEND_POSSIBLE Kconfig quirk Voyager has this Kconfig quirk for suspend/resume: config ARCH_SUSPEND_POSSIBLE def_bool y depends on !X86_VOYAGER The proper mechanism to not suspend on a piece of hardware to disable CONFIG_SUSPEND. Remove the quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df952e788a6..8e6413e0e7c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -144,7 +144,6 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y - depends on !X86_VOYAGER config ZONE_DMA32 bool From 3e5095d15276efd14a45393666b1bb7536bf179f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:07:08 +0100 Subject: [PATCH 289/682] x86: replace CONFIG_X86_SMP with CONFIG_SMP The x86/Voyager subarch used to have this distinction between 'x86 SMP support' and 'Voyager SMP support': config X86_SMP bool depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) This is a pointless distinction - Voyager can (and already does) use smp_ops to implement various SMP quirks it has - and it can be extended more to cover all the specialities of Voyager. So remove this complication in the Kconfig space. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 7 +------ arch/x86/Kconfig.debug | 2 +- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/tsc.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/mm/Makefile | 2 +- 12 files changed, 13 insertions(+), 18 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8e6413e0e7c..3671506fb8d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -173,11 +173,6 @@ config GENERIC_PENDING_IRQ depends on GENERIC_HARDIRQS && SMP default y -config X86_SMP - bool - depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) - default y - config USE_GENERIC_SMP_HELPERS def_bool y depends on SMP @@ -203,7 +198,7 @@ config X86_BIOS_REBOOT config X86_TRAMPOLINE bool - depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP) + depends on SMP || (64BIT && ACPI_SLEEP) default y config KTIME_SCALAR diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 28f111461ca..a38dd6064f1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -83,7 +83,7 @@ config DEBUG_PAGEALLOC config DEBUG_PER_CPU_MAPS bool "Debug access to per_cpu maps" depends on DEBUG_KERNEL - depends on X86_SMP + depends on SMP default n help Say Y to verify that the per_cpu map being accessed has diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index b87b077cc23..854d538ae85 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -9,7 +9,7 @@ * is no hardware IRQ pin equivalent for them, they are triggered * through the ICC by us (IPIs) */ -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index bfa921fad13..41550797396 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -98,7 +98,7 @@ extern asmlinkage void qic_call_function_interrupt(void); extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); extern void smp_call_function_single_interrupt(struct pt_regs *); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1bc4cdc797d..dc05a57a474 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,8 +57,8 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o -obj-$(CONFIG_X86_SMP) += smp.o -obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 81efe86eca8..968c817762a 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1900,7 +1900,7 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 4a48bb40974..e48640cfac0 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e3086..89537f678b2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -344,7 +344,7 @@ static void c1e_idle(void) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { printk(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e645d4793e4..eeb180b1d7a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -588,7 +588,7 @@ early_param("elfcorehdr", setup_elfcorehdr); static int __init default_update_genapic(void) { -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (!apic->wakeup_cpu) apic->wakeup_cpu = wakeup_secondary_cpu_via_init; #endif diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 599e5816863..83d53ce5d4c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void) if (!cpu_has_tsc || tsc_unstable) return 1; -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP if (apic_is_clustered_box()) return 1; #endif diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index c4c1f9e0940..a4791ef412d 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void) */ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); local_irq_disable(); -#ifdef CONFIG_X86_SMP +#ifdef CONFIG_SMP /* * XXX handle_percpu_irq only defined for SMP; we need to switch over * to using it, since this is a local interrupt, which each CPU must diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9f05157220f..2b938a38491 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o -obj-$(CONFIG_X86_SMP) += tlb.o +obj-$(CONFIG_SMP) += tlb.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o From c0b5842a457d44c8788b3fd0c64969be7ef673cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:13:05 +0100 Subject: [PATCH 290/682] x86: generalize boot_cpu_id x86/Voyager can boot on non-zero processors. While that can probably be fixed by properly remapping the physical CPU IDs, keep boot_cpu_id for now for easier transition - and expand it to all of x86. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/include/asm/cpu.h | 6 +----- arch/x86/kernel/setup.c | 14 ++++++++++++++ arch/x86/kernel/smpboot.c | 12 ------------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3671506fb8d..f4dd851384d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,10 +238,6 @@ config SMP If you don't know what to do here, say N. -config X86_HAS_BOOT_CPU_ID - def_bool y - depends on X86_VOYAGER - config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index f03b23e3286..b185091bf19 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -32,10 +32,6 @@ extern void arch_unregister_cpu(int); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_X86_HAS_BOOT_CPU_ID -extern unsigned char boot_cpu_id; -#else -#define boot_cpu_id 0 -#endif +extern unsigned int boot_cpu_id; #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index eeb180b1d7a..609e5af6028 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,20 @@ #define ARCH_SETUP #endif +unsigned int boot_cpu_id __read_mostly; + +#ifdef CONFIG_X86_64 +int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +int default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#endif + #ifndef CONFIG_DEBUG_BOOT_PARAMS struct boot_params __initdata boot_params; #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e90b3e50b54..bc7e220ba0b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -905,18 +905,6 @@ do_rest: return boot_error; } -#ifdef CONFIG_X86_64 -int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -int default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#endif - int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = apic->cpu_present_to_apicid(cpu); From 23394d1c9346d9c0aabbc1b6fca52a9c0aa1c297 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:16:16 +0100 Subject: [PATCH 291/682] x86/Voyager: remove X86_HT Kconfig quirk Voyager has this Kconfig quirk: depends on (X86_32 && !X86_VOYAGER) || X86_64 That is unnecessary as HT support is CPUID driven and explicitly enumerated. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f4dd851384d..1d50c9de4d2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -188,7 +188,6 @@ config X86_64_SMP config X86_HT bool depends on SMP - depends on (X86_32 && !X86_VOYAGER) || X86_64 default y config X86_BIOS_REBOOT From f095df0a0cb35a52605541f619d038339b90d7cc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:17:55 +0100 Subject: [PATCH 292/682] x86/Voyager: remove X86_BIOS_REBOOT Kconfig quirk Voyager has this Kconfig quirk: config X86_BIOS_REBOOT bool depends on !X86_VOYAGER default y Voyager should use the existing machine_ops.emergency_restart reboot quirk mechanism instead of a build-time quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 ------ arch/x86/include/asm/proto.h | 4 ---- arch/x86/kernel/Makefile | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d50c9de4d2..c0d79ab366d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -190,11 +190,6 @@ config X86_HT depends on SMP default y -config X86_BIOS_REBOOT - bool - depends on !X86_VOYAGER - default y - config X86_TRAMPOLINE bool depends on SMP || (64BIT && ACPI_SLEEP) @@ -1361,7 +1356,6 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" - depends on X86_BIOS_REBOOT help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index d6a22f92ba7..49fb3ecf3bb 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -18,11 +18,7 @@ extern void syscall32_cpu_init(void); extern void check_efer(void); -#ifdef CONFIG_X86_BIOS_REBOOT extern int reboot_force; -#else -static const int reboot_force = 0; -#endif long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index dc05a57a474..24f357e7557 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -50,7 +50,7 @@ obj-y += step.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ -obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o +obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o From 550fe4f198558c147c6b8273a709568222a1668a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:28:08 +0100 Subject: [PATCH 293/682] x86/Voyager: remove X86_FIND_SMP_CONFIG Kconfig quirk x86/Voyager had this Kconfig quirk: config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Which splits off the find_smp_config() callback into a build-time quirk. Voyager should use the existing x86_quirks.mach_find_smp_config() callback to introduce SMP-config quirks. NUMAQ-32 and VISWS already use this. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- arch/x86/include/asm/mpspec.h | 4 +++- arch/x86/kernel/setup.c | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c0d79ab366d..df7cb8d68e2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,10 +254,6 @@ config NUMA_MIGRATE_IRQ_DESC If you don't know what to do here, say N. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_MPPARSE || X86_VOYAGER - config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 03fb0d39654..d22f732eab8 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -56,11 +56,13 @@ extern int smp_found_config; extern int mpc_default_type; extern unsigned long mp_lapic_addr; -extern void find_smp_config(void); extern void get_smp_config(void); + #ifdef CONFIG_X86_MPPARSE +extern void find_smp_config(void); extern void early_reserve_e820_mpc_new(void); #else +static inline void find_smp_config(void) { } static inline void early_reserve_e820_mpc_new(void) { } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 609e5af6028..6abce6703c5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -905,12 +905,11 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_X86_FIND_SMP_CONFIG /* * Find and reserve possible boot-time SMP configuration: */ find_smp_config(); -#endif + reserve_crashkernel(); #ifdef CONFIG_X86_64 From 36619a8a80793a803588a17f772313d5c948357d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:33:34 +0100 Subject: [PATCH 294/682] x86/VisWS: remove Kconfig quirk VisWS has this quirk currently: config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT The !Voyager quirk is unnecessary. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df7cb8d68e2..f59d2920198 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -351,7 +351,7 @@ endchoice config X86_VISWS bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT + depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. From f154f47d5180c2012bf97999e6c600d45db8af2d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:34:56 +0100 Subject: [PATCH 295/682] x86/Voyager: remove VMI Kconfig quirk x86/Voyager has this build-time quirk: bool "VMI Guest support" select PARAVIRT depends on X86_32 depends on !X86_VOYAGER Since VMI is auto-detected (and Voyager will be auto-detected) there's no reason for this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f59d2920198..4bd0c8febae 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -406,7 +406,6 @@ config VMI bool "VMI Guest support" select PARAVIRT depends on X86_32 - depends on !X86_VOYAGER help VMI provides a paravirtualized interface to the VMware ESX server (it could be used by other hypervisors in theory too, but is not From e084e531000a488d2d27864266c13ac824575a8b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:36:31 +0100 Subject: [PATCH 296/682] x86/Voyager: remove KVM_CLOCK quirk Voyager has this build-time quirk to exclude KVM_CLOCK: bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK depends on !X86_VOYAGER Voyager support built into a kernel image does not exclude KVM paravirt clock support - so remove this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4bd0c8febae..0bf0653969d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -416,7 +416,6 @@ config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK - depends on !X86_VOYAGER help Turning on this option will allow you to run a paravirtualized clock when running over the KVM hypervisor. Instead of relying on a PIT From 54523edd237b9e792a3b76988fde23a91d739f43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:37:33 +0100 Subject: [PATCH 297/682] x86/Voyager: remove KVM_GUEST quirk Voyager has this quirk currently: config KVM_GUEST bool "KVM Guest support" select PARAVIRT depends on !X86_VOYAGER Voyager support built into a kernel image does not exclude KVM paravirt guest support - so remove this quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0bf0653969d..363111f40f3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -426,7 +426,6 @@ config KVM_CLOCK config KVM_GUEST bool "KVM Guest support" select PARAVIRT - depends on !X86_VOYAGER help This option enables various optimizations for running under the KVM hypervisor. From c3e6a2042fef33b747d2ae3961f5312af801973d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:38:46 +0100 Subject: [PATCH 298/682] x86/Voyager: remove PARAVIRT Kconfig quirk Remove this Kconfig quirk: config PARAVIRT bool "Enable paravirtualization code" depends on !X86_VOYAGER help Voyager support built into a kernel does not preclude paravirt support. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 363111f40f3..82105349612 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -434,7 +434,6 @@ source "arch/x86/lguest/Kconfig" config PARAVIRT bool "Enable paravirtualization code" - depends on !X86_VOYAGER help This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly From 7cd92366a593246650cc7d6198e2c7d3af8c1d8a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:40:48 +0100 Subject: [PATCH 299/682] x86/Voyager: remove APIC/IO-APIC Kbuild quirk The lapic/ioapic code properly auto-detects and is safe to run on CPUs that have no local APIC. (or which have their lapic turned off in the hardware) Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 82105349612..6fbb3639d7b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -642,7 +642,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH) + depends on X86_32 && !SMP && !X86_GENERICARCH help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -667,11 +667,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) + depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC config X86_IO_APIC def_bool y - depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) + depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC config X86_VISWS_APIC def_bool y From e006235e5b9cfb785ecbc05551788e33f96ea0ce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:45:17 +0100 Subject: [PATCH 300/682] x86/Voyager: remove MCE quirk If no MCE code is desired on Voyager hw then the solution is to turn them off in the .config - and to extend the MCE code to not initialize on Voyager. Remove the build-time quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6fbb3639d7b..7cf9f0a8c8c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -703,7 +703,6 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check Exception" - depends on !X86_VOYAGER ---help--- Machine Check Exception support allows the processor to notify the kernel if it detects a problem (e.g. overheating, component failure). From 4b19ed915576e8034c3653b4b10b79bde10f69fa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:47:24 +0100 Subject: [PATCH 301/682] x86/Voyager: remove HOTPLUG_CPU Kconfig quirk Voyager has this Kconfig quirk: config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SMP && HOTPLUG && !X86_VOYAGER But this exception will be moot once Voyager starts using the generic x86 code. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7cf9f0a8c8c..206645ac6f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1470,7 +1470,7 @@ config PHYSICAL_ALIGN config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP && HOTPLUG && !X86_VOYAGER + depends on SMP && HOTPLUG ---help--- Say Y here to allow turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. From 1c61d8c309a4080980474de8c6689527be180782 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:49:22 +0100 Subject: [PATCH 302/682] x86/Voyager: remove power management Kconfig quirk Voyager has this PM/ACPI Kconfig quirk: menu "Power management and ACPI options" depends on !X86_VOYAGER Most of the PM features are auto-detect so they should be safe to run on just about any hardware. (If not, those instances need fixing.) In any case, if a kernel is built for Voyager, the power management options can be disabled. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 206645ac6f6..f95c3b40e45 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1551,7 +1551,6 @@ config HAVE_ARCH_EARLY_PFN_TO_NID depends on NUMA menu "Power management and ACPI options" - depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER def_bool y From 1ec2dafd937c0f6fed46cbd8f6878f2c1db4a623 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 17:51:37 +0100 Subject: [PATCH 303/682] x86/Voyager: remove ISA quirk Voyager has this ISA quirk (because Voyager has no ISA support): config ISA bool "ISA support" depends on !X86_VOYAGER There's a ton of x86 hardware that does not support ISA, and because most ISA drivers cannot auto-detect in a safe way, the convention in the kernel has always been to not enable ISA drivers if they are not needed. Voyager users can do likewise - no need for a Kconfig quirk. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f95c3b40e45..38ed1a6c6d8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1819,7 +1819,6 @@ if X86_32 config ISA bool "ISA support" - depends on !X86_VOYAGER help Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff From 06ac8346af04f6a972072f6c5780ba734832ad13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:11:43 +0100 Subject: [PATCH 304/682] x86: cleanup, introduce CONFIG_NON_STANDARD_PLATFORMS Introduce a Y/N Kconfig option for non-PC x86 platforms. Make VisWS, RDC321 and SGI/UV depend on this. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 38ed1a6c6d8..b090e5a27da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -349,9 +349,23 @@ config X86_VSMP endchoice +config X86_NON_STANDARD + bool "Support for non-standard x86 platforms" + help + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select a number + of less common non-PC x86 platforms: VisWS, RDC321, SGI/UV. + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. + config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT + depends on X86_NON_STANDARD help The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -364,6 +378,7 @@ config X86_VISWS config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 + depends on X86_NON_STANDARD select M486 select X86_REBOOTFIXUPS help @@ -374,6 +389,7 @@ config X86_RDC321X config X86_UV bool "SGI Ultraviolet" depends on X86_64 + depends on X86_NON_STANDARD help This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. From 9e111f3e167a14dd6252cff14fc7dd2ba4c650c6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:18:25 +0100 Subject: [PATCH 305/682] x86: move ELAN to the NON_STANDARD_PLATFORM section Move X86_ELAN (old, AMD based web-boxes) from the subarchitecture menu to the non-standard-platform section. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b090e5a27da..b7617e3781a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,16 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_ELAN - bool "AMD Elan" - depends on X86_32 - help - Select this for an AMD Elan processor. - - Do not use this option for K6/Athlon/Opteron processors! - - If unsure, choose "PC-compatible" instead. - config X86_VOYAGER bool "Voyager (NCR)" depends on X86_32 && SMP && !PCI && BROKEN @@ -394,6 +384,17 @@ config X86_UV This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. +config X86_ELAN + bool "AMD Elan" + depends on X86_32 + depends on X86_NON_STANDARD + help + Select this for an AMD Elan processor. + + Do not use this option for K6/Athlon/Opteron processors! + + If unsure, choose "PC-compatible" instead. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From f67ae5c9e52e385492b94c14376e322004701555 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:20:09 +0100 Subject: [PATCH 306/682] x86: move VOYAGER to the NON_STANDARD_PLATFORM section Move X86_ELAN (old, NCR hw platform built on Intel CPUs) from the subarchitecture menu to the non-standard-platform section. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b7617e3781a..207dc9592d5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,18 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI && BROKEN - help - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config X86_GENERICARCH bool "Generic architecture" depends on X86_32 @@ -395,6 +383,19 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. +config X86_VOYAGER + bool "Voyager (NCR)" + depends on X86_32 && SMP && !PCI && BROKEN + depends on X86_NON_STANDARD + help + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From 9c39801763ed6e08ea8bc694c5ab936643a2b763 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:24:57 +0100 Subject: [PATCH 307/682] x86: move non-standard 32-bit platform Kconfig entries - make X86_GENERICARCH depend X86_NON_STANDARD - move X86_SUMMIT, X86_ES7000 and X86_BIGSMP out of the subarchitecture menu and under this option Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 88 +++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 207dc9592d5..2fe298297ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,51 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_GENERICARCH - bool "Generic architecture" - depends on X86_32 - help - This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default - subarchitectures. It is intended for a generic binary kernel. - if you select them all, kernel will probe it one by one. and will - fallback to default. - -if X86_GENERICARCH - -config X86_NUMAQ - bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 && PCI && X86_MPPARSE - select NUMA - help - This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) - NUMA multiquad box. This changes the way that processors are - bootstrapped, and uses Clustered Logical APIC addressing mode instead - of Flat Logical. You will need a new lynxer.elf file to flash your - firmware with - send email to . - -config X86_SUMMIT - bool "Summit/EXA (IBM x440)" - depends on X86_32 && SMP - help - This option is needed for IBM systems that use the Summit/EXA chipset. - In particular, it is needed for the x440. - -config X86_ES7000 - bool "Support for Unisys ES7000 IA32 series" - depends on X86_32 && SMP - help - Support for Unisys ES7000 systems. Say 'Y' here if this kernel is - supposed to run on an IA32-based Unisys ES7000 system. - -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_32 && SMP - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - -endif - config X86_VSMP bool "Support for ScaleMP vSMP" select PARAVIRT @@ -396,6 +351,49 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. +config X86_GENERICARCH + bool "Support non-standard 32-bit SMP architectures" + depends on X86_32 && SMP + depends on X86_NON_STANDARD + help + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + subarchitectures. It is intended for a generic binary kernel. + if you select them all, kernel will probe it one by one. and will + fallback to default. + +config X86_NUMAQ + bool "NUMAQ (IBM/Sequent)" + depends on X86_GENERICARCH + select NUMA + select X86_MPPARSE + help + This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) + NUMA multiquad box. This changes the way that processors are + bootstrapped, and uses Clustered Logical APIC addressing mode instead + of Flat Logical. You will need a new lynxer.elf file to flash your + firmware with - send email to . + +config X86_SUMMIT + bool "Summit/EXA (IBM x440)" + depends on X86_GENERICARCH + help + This option is needed for IBM systems that use the Summit/EXA chipset. + In particular, it is needed for the x440. + +config X86_ES7000 + bool "Support for Unisys ES7000 IA32 series" + depends on X86_GENERICARCH + help + Support for Unisys ES7000 systems. Say 'Y' here if this kernel is + supposed to run on an IA32-based Unisys ES7000 system. + +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_GENERICARCH + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" From 6a48565ed6ac76f351def25cd5e9f181331065f6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:29:13 +0100 Subject: [PATCH 308/682] x86: move X86_VSMP from subarch menu Move X86_VSMP out of the subarch menu - this way it can be enabled together with standard PC support as well, in the same kernel. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fe298297ce..ddddfb8fe09 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -271,15 +271,6 @@ config X86_PC help Choose this option if your computer is a standard PC or compatible. -config X86_VSMP - bool "Support for ScaleMP vSMP" - select PARAVIRT - depends on X86_64 && PCI - help - Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is - supposed to run on these EM64T-based machines. Only choose this option - if you have one of these machines. - endchoice config X86_NON_STANDARD @@ -327,6 +318,16 @@ config X86_UV This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. +config X86_VSMP + bool "Support for ScaleMP vSMP" + select PARAVIRT + depends on X86_64 && PCI + depends on X86_NON_STANDARD + help + Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is + supposed to run on these EM64T-based machines. Only choose this option + if you have one of these machines. + config X86_ELAN bool "AMD Elan" depends on X86_32 From e2c75d9f54334646b3dcdf1fea0d1afe7bfbf644 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:31:41 +0100 Subject: [PATCH 309/682] x86: remove the subarch menu Remove the subarch menu and standardize on X86_PC. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ddddfb8fe09..4773f1c54fb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,16 +262,8 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -choice - prompt "Subarchitecture Type" - default X86_PC - config X86_PC - bool "PC-compatible" - help - Choose this option if your computer is a standard PC or compatible. - -endchoice + def_bool y config X86_NON_STANDARD bool "Support for non-standard x86 platforms" From e0c7ae376a13fd79a4dad8becab51040d13dfa90 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:43:09 +0100 Subject: [PATCH 310/682] x86: rename X86_GENERICARCH to X86_32_NON_STANDARD X86_GENERICARCH is a misnomer - it contains non-PC 32-bit architectures that are not included in the default build. Rename it to X86_32_NON_STANDARD. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 22 +++++++++++----------- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- drivers/mtd/nand/Kconfig | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4773f1c54fb..1427cb1ccd9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -344,7 +344,7 @@ config X86_VOYAGER If you do not specifically know you have a Voyager based machine, say N here, otherwise the kernel you build will not be bootable. -config X86_GENERICARCH +config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP depends on X86_NON_STANDARD @@ -356,7 +356,7 @@ config X86_GENERICARCH config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD select NUMA select X86_MPPARSE help @@ -368,21 +368,21 @@ config X86_NUMAQ config X86_SUMMIT bool "Summit/EXA (IBM x440)" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help This option is needed for IBM systems that use the Summit/EXA chipset. In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD help This option is needed for the systems that have more than 8 CPUs and if the system is not of any sub-arch type above. @@ -475,11 +475,11 @@ config MEMTEST config X86_SUMMIT_NUMA def_bool y - depends on X86_32 && NUMA && X86_GENERICARCH + depends on X86_32 && NUMA && X86_32_NON_STANDARD config X86_CYCLONE_TIMER def_bool y - depends on X86_GENERICARCH + depends on X86_32_NON_STANDARD source "arch/x86/Kconfig.cpu" @@ -651,7 +651,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_GENERICARCH + depends on X86_32 && !SMP && !X86_32_NON_STANDARD help A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -676,11 +676,11 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y - depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC config X86_IO_APIC def_bool y - depends on X86_64 || SMP || X86_GENERICARCH || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC config X86_VISWS_APIC def_bool y @@ -1122,7 +1122,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cb8b52785e3..7352c60f29d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 94fe71029c3..89aaced51bd 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,7 +372,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_bigsmp_probe(); #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6abce6703c5..f64e1a487c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_GENERICARCH +#ifdef CONFIG_X86_32_NON_STANDARD generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bc7e220ba0b..fc80bc18943 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 8b12e6e109d..928923665f6 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -273,7 +273,7 @@ config MTD_NAND_CAFE config MTD_NAND_CS553X tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)" - depends on X86_32 && (X86_PC || X86_GENERICARCH) + depends on X86_32 && (X86_PC || X86_32_NON_STANDARD) help The CS553x companion chips for the AMD Geode processor include NAND flash controllers with built-in hardware ECC From 3769e7b4d8ef113e08221a210f849ba57475aff5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 27 Jan 2009 18:46:23 +0100 Subject: [PATCH 311/682] x86/Voyager: move to the X86_32_NON_STANDARD code section Make Voyager depend on X86_32_NON_STANDARD - it is a non-standard 32-bit SMP architecture. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1427cb1ccd9..5bf0e0c5828 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -331,19 +331,6 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on X86_32 && SMP && !PCI && BROKEN - depends on X86_NON_STANDARD - help - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP @@ -354,6 +341,13 @@ config X86_32_NON_STANDARD if you select them all, kernel will probe it one by one. and will fallback to default. +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32_NON_STANDARD + help + This option is needed for the systems that have more than 8 CPUs + and if the system is not of any sub-arch type above. + config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -380,12 +374,18 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" +config X86_VOYAGER + bool "Voyager (NCR)" + depends on SMP && !PCI && BROKEN depends on X86_32_NON_STANDARD help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. + Voyager is an MCA-based 32-way capable SMP architecture proprietary + to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. + + *** WARNING *** + + If you do not specifically know you have a Voyager based machine, + say N here, otherwise the kernel you build will not be bootable. config SCHED_OMIT_FRAME_POINTER def_bool y From 0a1e8869f453ceda121a1ff8d6c4380832377be7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 28 Jan 2009 23:21:25 +0300 Subject: [PATCH 312/682] x86: trampoline_64.S - use predefined constants with simplification Impact: cleanup We may use macros from processor-flags.h instead of hardcoding bits. Actually it's not direct mapping of old instructions with new ones -- BTS does change CF flag while MOV does not. But i didn't find any dependency on CF in this code. Signed-off-by: Cyrill Gorcunov Acked-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/trampoline_64.S | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 894293c598d..95a012a4664 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -29,6 +29,7 @@ #include #include #include +#include .section .rodata, "a", @progbits @@ -37,7 +38,7 @@ ENTRY(trampoline_data) r_base = . cli # We should be safe anyway - wbinvd + wbinvd mov %cs, %ax # Code and data in the same place mov %ax, %ds mov %ax, %es @@ -73,9 +74,8 @@ r_base = . lidtl tidt - r_base # load idt with 0, 0 lgdtl tgdt - r_base # load gdt with whatever is appropriate - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode + mov $X86_CR0_PE, %ax # protected mode (PE) bit + lmsw %ax # into protected mode # flush prefetch and jump to startup_32 ljmpl *(startup_32_vector - r_base) @@ -86,9 +86,8 @@ startup_32: movl $__KERNEL_DS, %eax # Initialize the %ds segment register movl %eax, %ds - xorl %eax, %eax - btsl $5, %eax # Enable PAE mode - movl %eax, %cr4 + movl $X86_CR4_PAE, %eax + movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables leal (trampoline_level4_pgt - r_base)(%esi), %eax @@ -99,9 +98,9 @@ startup_32: xorl %edx, %edx wrmsr - xorl %eax, %eax - btsl $31, %eax # Enable paging and in turn activate Long Mode - btsl $0, %eax # Enable protected mode + # Enable paging and in turn activate Long Mode + # Enable protected mode + movl $(X86_CR0_PG | X86_CR0_PE), %eax movl %eax, %cr0 /* From 010060741ad35eacb504414bc6fb9bb575b15f62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Jan 2009 16:02:12 +0100 Subject: [PATCH 313/682] x86: add might_sleep() to do_page_fault() Impact: widen debug checks VirtualBox calls do_page_fault() from an atomic context but runs into a might_sleep() way pas this point, cure that. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8f4b859a04b..eb4d7fe0593 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -888,6 +888,12 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in which + * case we'll have missed the might_sleep() from down_read(). + */ + might_sleep(); } vma = find_vma(mm, address); From dba3d36b2f0842ed7f25c33cd3a2ccdb3d0df9db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 Jan 2009 17:10:12 +0100 Subject: [PATCH 314/682] Revert "generic, x86: fix __per_cpu_load relocation" This reverts commit 5a611268b69f05262936dd177205acbce4471358. It is causing occasional boot crashes, caused by certain linker versions (GNU ld version 2.18.50.0.6-2 20080403) messing up: 82dcc000 D __per_cpu_load c16e6000 A __per_cpu_load_abs The __per_cpu_load value is out of whack. Hpa noticed the following detail: * (gdb) p/x -(0xc16e6000-0x82dcc000) * $2 = 0xc16e6000 * I.e. one is the other << 1 The two symbols should be equal. Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f3180a85c66..53e21f36a80 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,18 +451,17 @@ * end offset. */ #define PERCPU_VADDR(vaddr, phdr) \ - VMLINUX_SYMBOL(__per_cpu_load_abs) = .; \ - .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load_abs) \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ - VMLINUX_SYMBOL(__per_cpu_load) = LOADADDR(.data.percpu) + LOAD_OFFSET;\ *(.data.percpu.first) \ *(.data.percpu.page_aligned) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ VMLINUX_SYMBOL(__per_cpu_end) = .; \ } phdr \ - . = VMLINUX_SYMBOL(__per_cpu_load_abs) + SIZEOF(.data.percpu); + . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu); /** * PERCPU - define output section for percpu area, simple version From fbeb2ca0224182033f196cf8f63989c3e6b90aba Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 29 Jan 2009 12:11:08 -0800 Subject: [PATCH 315/682] x86: unify genapic code, unify subarchitectures, remove old subarchitecture code, xapic fix xapic fix for 32bit platform with less than 8 cpu's. Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/probe_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 61339a0d55c..b5db26f8e06 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -113,7 +113,7 @@ struct genapic apic_default = { .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, From 019a1369667c3978f9644982ebe6d261dd2bbc40 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 29 Jan 2009 11:49:18 -0800 Subject: [PATCH 316/682] x86: uaccess: fix compilation error on CONFIG_M386 In case of !CONFIG_X86_WP_WORKS_OK, __put_user_size_ex() is not defined. Add macros for !CONFIG_X86_WP_WORKS_OK case. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0ec6de4bcb0..b9a24155f7a 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -525,8 +525,6 @@ struct __large_struct { unsigned long buf[100]; }; */ #define get_user_try uaccess_try #define get_user_catch(err) uaccess_catch(err) -#define put_user_try uaccess_try -#define put_user_catch(err) uaccess_catch(err) #define get_user_ex(x, ptr) do { \ unsigned long __gue_val; \ @@ -534,9 +532,29 @@ struct __large_struct { unsigned long buf[100]; }; (x) = (__force __typeof__(*(ptr)))__gue_val; \ } while (0) +#ifdef CONFIG_X86_WP_WORKS_OK + +#define put_user_try uaccess_try +#define put_user_catch(err) uaccess_catch(err) + #define put_user_ex(x, ptr) \ __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) +#else /* !CONFIG_X86_WP_WORKS_OK */ + +#define put_user_try do { \ + int __uaccess_err = 0; + +#define put_user_catch(err) \ + (err) |= __uaccess_err; \ +} while (0) + +#define put_user_ex(x, ptr) do { \ + __uaccess_err |= __put_user(x, ptr); \ +} while (0) + +#endif /* CONFIG_X86_WP_WORKS_OK */ + /* * movsl can be slow when source and dest are not both 8-byte aligned */ From 4272ebfbefd0db40073f3ee5990bceaf2894f08b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 15:14:46 -0800 Subject: [PATCH 317/682] x86: allow more than 8 cpus to be used on 32-bit X86_PC is the only remaining 'sub' architecture, so we dont need it anymore. This also cleans up a few spurious references to X86_PC in the driver space - those certainly should be X86. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 6 +----- arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - arch/x86/kernel/smpboot.c | 2 +- drivers/eisa/Kconfig | 6 +++--- drivers/input/keyboard/Kconfig | 4 ++-- drivers/input/mouse/Kconfig | 2 +- drivers/mtd/nand/Kconfig | 2 +- sound/drivers/Kconfig | 2 +- 9 files changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bf0e0c5828..afaf2cb7c1a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,9 +262,6 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it -config X86_PC - def_bool y - config X86_NON_STANDARD bool "Support for non-standard x86 platforms" help @@ -1019,7 +1016,6 @@ config NUMA bool "Numa Memory Allocation and Scheduler Support" depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) - default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) help Enable NUMA (Non Uniform Memory Access) support. @@ -1122,7 +1118,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_32_NON_STANDARD + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edba00d98ac..739bce993b5 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -188,7 +188,6 @@ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y -CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 322dd2748fc..02b514e8f4c 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -187,7 +187,6 @@ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y -CONFIG_X86_PC=y # CONFIG_X86_ELAN is not set # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc80bc18943..2912fa3a8ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) +#ifndef CONFIG_X86_BIGSMP if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; diff --git a/drivers/eisa/Kconfig b/drivers/eisa/Kconfig index c0646576cf4..2705284f622 100644 --- a/drivers/eisa/Kconfig +++ b/drivers/eisa/Kconfig @@ -3,7 +3,7 @@ # config EISA_VLB_PRIMING bool "Vesa Local Bus priming" - depends on X86_PC && EISA + depends on X86 && EISA default n ---help--- Activate this option if your system contains a Vesa Local @@ -24,11 +24,11 @@ config EISA_PCI_EISA When in doubt, say Y. # Using EISA_VIRTUAL_ROOT on something other than an Alpha or -# an X86_PC may lead to crashes... +# an X86 may lead to crashes... config EISA_VIRTUAL_ROOT bool "EISA virtual root device" - depends on EISA && (ALPHA || X86_PC) + depends on EISA && (ALPHA || X86) default y ---help--- Activate this option if your system only have EISA bus diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 35561689ff3..ea2638b4198 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -13,11 +13,11 @@ menuconfig INPUT_KEYBOARD if INPUT_KEYBOARD config KEYBOARD_ATKBD - tristate "AT keyboard" if EMBEDDED || !X86_PC + tristate "AT keyboard" if EMBEDDED || !X86 default y select SERIO select SERIO_LIBPS2 - select SERIO_I8042 if X86_PC + select SERIO_I8042 if X86 select SERIO_GSCPS2 if GSC help Say Y here if you want to use a standard AT or PS/2 keyboard. Usually diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 093c8c1bca7..9bef935ef19 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -17,7 +17,7 @@ config MOUSE_PS2 default y select SERIO select SERIO_LIBPS2 - select SERIO_I8042 if X86_PC + select SERIO_I8042 if X86 select SERIO_GSCPS2 if GSC help Say Y here if you have a PS/2 mouse connected to your system. This diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 928923665f6..2ff88791ceb 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -273,7 +273,7 @@ config MTD_NAND_CAFE config MTD_NAND_CS553X tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)" - depends on X86_32 && (X86_PC || X86_32_NON_STANDARD) + depends on X86_32 help The CS553x companion chips for the AMD Geode processor include NAND flash controllers with built-in hardware ECC diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig index 0bcf14640fd..84714a65e5c 100644 --- a/sound/drivers/Kconfig +++ b/sound/drivers/Kconfig @@ -33,7 +33,7 @@ if SND_DRIVERS config SND_PCSP tristate "PC-Speaker support (READ HELP!)" - depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS + depends on PCSPKR_PLATFORM && X86 && HIGH_RES_TIMERS depends on INPUT depends on EXPERIMENTAL select SND_PCM From 754ef0cd65faac4840ada4362bda322d9a811fbf Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:51:09 +0900 Subject: [PATCH 318/682] x86: fix debug message of CPU clock speed Impact: Fixes incorrect printk LOCAL APIC is corrected by PM-Timer, when SMI occurred while LOCAL APIC is calibrated. In this case, LOCAL APIC debug message(Boot with apic=debug) is displayed correctly, however, CPU clock speed debug message is displayed wrongly . When SMI occured on my machine, which has 1.6GHz CPU, CPU clock speed is displayed 3622.0205 MHz as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773130 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773130) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 3622.0205 MHz. =====> here ..... host bus clock speed is 265.0987 MHz. This patch fixes to displaying CPU clock speed correctly as follow. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 47 +++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df2469fe..7bcd746d704 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -535,7 +535,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } -static int __init calibrate_by_pmtimer(long deltapm, long *delta) +static int __init +calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) { const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; const long pm_thresh = pm_100ms / 100; @@ -557,18 +558,29 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64)deltapm) * mult) >> 22; - do_div(res, 1000000); - pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64)(*delta)) * pm_100ms); + return 0; + } + + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + pr_warning("APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n",(long)res); + + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + pr_info("APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + + /* Correct the tsc counter value */ + if (cpu_has_tsc) { + res = (((u64)(*deltatsc)) * pm_100ms); do_div(res, deltapm); - pr_info("APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long)res, *delta); - *delta = (long)res; + apic_printk(APIC_VERBOSE, "TSC delta adjusted to " + "PM-Timer: %lu (%ld) \n", + (unsigned long)res, *deltatsc); + *deltatsc = (long)res; } return 0; @@ -579,7 +591,7 @@ static int __init calibrate_APIC_clock(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta; + long delta, deltatsc; int pm_referenced = 0; local_irq_disable(); @@ -609,9 +621,11 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + /* we trust the PM based calibration if possible */ pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, - &delta); + &delta, &deltatsc); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -629,11 +643,10 @@ static int __init calibrate_APIC_clock(void) calibration_result); if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ)); } apic_printk(APIC_VERBOSE, "..... host bus clock speed is " From 39ba5d43fc9133696240fc8b6b13e7a41fea87cd Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 28 Jan 2009 12:52:24 +0900 Subject: [PATCH 319/682] x86: unify PM-Timer messages Impact: Cleans up printk formatting When LOCAL APIC was calibrated, the debug message is displayed as follows. CPU0: Intel(R) Xeon(R) CPU 5110 @ 1.60GHz stepping 06 Using local APIC timer interrupts. calibrating APIC timer ... ... lapic delta = 3773131 ... PM timer delta = 812434 APIC calibration not consistent with PM Timer: 226ms instead of 100ms APIC delta adjusted to PM-Timer: 1662420 (3773131) TSC delta adjusted to PM-Timer: 159592409 (362220564) ..... delta 1662420 ..... mult: 71411249 ..... calibration result: 265987 ..... CPU clock speed is 1595.0924 MHz. ..... host bus clock speed is 265.0987 MHz. There are three type of PM-Timer (PM-Timer, PM Timer, and PM timer), in this message. This patch unifies those messages to PM-Timer. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7bcd746d704..0d935d218d0 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -547,7 +547,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) return -1; #endif - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm); /* Check, if the PM timer is available */ if (!deltapm) @@ -557,14 +557,14 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) if (deltapm > (pm_100ms - pm_thresh) && deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n"); return 0; } res = (((u64)deltapm) * mult) >> 22; do_div(res, 1000000); pr_warning("APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n",(long)res); + "with PM-Timer: %ldms instead of 100ms\n",(long)res); /* Correct the lapic counter value */ res = (((u64)(*delta)) * pm_100ms); From 36ef4944ee8118491631e317e406f9bd15e20e97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:29:24 -0800 Subject: [PATCH 320/682] x86, apic unification: remove left over files Impact: cleanup remove unused files Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bigsmp/apic.h | 138 -------------------------- arch/x86/include/asm/bigsmp/apicdef.h | 9 -- arch/x86/include/asm/bigsmp/ipi.h | 22 ---- arch/x86/mach-default/Makefile | 5 - 4 files changed, 174 deletions(-) delete mode 100644 arch/x86/include/asm/bigsmp/apic.h delete mode 100644 arch/x86/include/asm/bigsmp/apicdef.h delete mode 100644 arch/x86/include/asm/bigsmp/ipi.h delete mode 100644 arch/x86/mach-default/Makefile diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h deleted file mode 100644 index ee29d66cd30..00000000000 --- a/arch/x86/include/asm/bigsmp/apic.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H - -#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) - -static inline int bigsmp_apic_id_registered(void) -{ - return 1; -} - -static inline const cpumask_t *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return &cpu_online_map; -#else - return &cpumask_of_cpu(0); -#endif -} - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline unsigned long -bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return 0; -} - -static inline unsigned long bigsmp_check_apicid_present(int bit) -{ - return 1; -} - -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); - val |= SET_APIC_LOGICAL_ID(id); - return val; -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static inline void bigsmp_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_VALUE); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static inline void bigsmp_setup_apic_routing(void) -{ - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); -} - -static inline int bigsmp_apicid_to_node(int logical_apicid) -{ - return apicid_2_node[hard_smp_processor_id()]; -} - -static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); - - return BAD_APICID; -} - -static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -extern u8 cpu_2_logical_apicid[]; -/* Mapping from cpu number to logical apicid */ -static inline int bigsmp_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_physical_id(cpu); -} - -static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) -{ - /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); -} - -static inline void bigsmp_setup_portio_remap(void) -{ -} - -static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return 1; -} - -/* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) -{ - return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); -} - -static inline unsigned int -bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - if (cpu < nr_cpu_ids) - return bigsmp_cpu_to_logical_apicid(cpu); - - return BAD_APICID; -} - -static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -#endif /* __ASM_MACH_APIC_H */ diff --git a/arch/x86/include/asm/bigsmp/apicdef.h b/arch/x86/include/asm/bigsmp/apicdef.h deleted file mode 100644 index e58dee84757..00000000000 --- a/arch/x86/include/asm/bigsmp/apicdef.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H - -static inline unsigned bigsmp_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -#endif diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h deleted file mode 100644 index a91db69cda6..00000000000 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence(mask, vector); -} - -static inline void bigsmp_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself(cpu_online_mask, vector); -} - -static inline void bigsmp_send_IPI_all(int vector) -{ - default_send_IPI_mask(cpu_online_mask, vector); -} - -#endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/mach-default/Makefile b/arch/x86/mach-default/Makefile deleted file mode 100644 index 012fe34459e..00000000000 --- a/arch/x86/mach-default/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for the linux kernel. -# - -obj-y := setup.o From 1ff2f20de354a621ef4b56b9cfe6f9139a7e493b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:30:04 -0800 Subject: [PATCH 321/682] x86: fix compiling with 64bit with def_to_bigsmp only need to do cut off with 32bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2912fa3a8ef..4c3cff57494 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1000,7 +1000,7 @@ static int __init smp_sanity_check(unsigned max_cpus) { preempt_disable(); -#ifndef CONFIG_X86_BIGSMP +#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) if (def_to_bigsmp && nr_cpu_ids > 8) { unsigned int cpu; unsigned nr; From 43f39890db2959b10891cf7bbf3f53fffc8ce3bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 19:31:49 -0800 Subject: [PATCH 322/682] x86: seperate default_send_IPI_mask_sequence/allbutself from logical Impact: 32-bit should use logical version there are two version: for default_send_IPI_mask_sequence/allbutself one in ipi.h and one in ipi.c for 32bit it seems .h version overwrote ipi.c for a while. restore it so 32 bit could use its old logical version. also remove dupicated functions in .c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ipi.h | 71 ++++++++++++--- arch/x86/kernel/bigsmp_32.c | 8 +- arch/x86/kernel/es7000_32.c | 4 +- arch/x86/kernel/genapic_flat_64.c | 6 +- arch/x86/kernel/ipi.c | 139 +----------------------------- arch/x86/kernel/numaq_32.c | 8 +- arch/x86/kernel/probe_32.c | 4 +- arch/x86/kernel/summit_32.c | 6 +- 8 files changed, 77 insertions(+), 169 deletions(-) diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index e2e8e4e0a65..aa79945445b 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -120,7 +120,7 @@ static inline void } static inline void -default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) +default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) { unsigned long query_cpu; unsigned long flags; @@ -139,7 +139,7 @@ default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) } static inline void -default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector) { unsigned int this_cpu = smp_processor_id(); unsigned int query_cpu; @@ -157,23 +157,72 @@ default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) local_irq_restore(flags); } +#include + +static inline void +default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + local_irq_restore(flags); +} + +static inline void +default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } + local_irq_restore(flags); +} /* Avoid include hell */ #define NMI_VECTOR 0x02 -void default_send_IPI_mask_bitmask(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - extern int no_broadcast; -#ifdef CONFIG_X86_64 -#include -#else -static inline void default_send_IPI_mask(const struct cpumask *mask, int vector) +#ifndef CONFIG_X86_64 +/* + * This is only used on smaller machines. + */ +static inline void default_send_IPI_mask_bitmask_logical(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_bitmask(mask, vector); + unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + local_irq_restore(flags); +} + +static inline void default_send_IPI_mask_logical(const struct cpumask *mask, int vector) +{ + default_send_IPI_mask_bitmask_logical(mask, vector); } -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __default_local_send_IPI_allbutself(int vector) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index b1f91931003..ab645c93a6e 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -154,17 +155,14 @@ static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static inline void bigsmp_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static inline void bigsmp_send_IPI_all(int vector) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 078364ccfa0..b5b50e8b94a 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -451,12 +451,12 @@ static int es7000_check_dsdt(void) static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_phys(mask, vector); } static void es7000_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void es7000_send_IPI_all(int vector) diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 19bffb3f732..249d2d3c034 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -267,18 +267,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_sequence(cpumask, vector); + default_send_IPI_mask_sequence_phys(cpumask, vector); } static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { - default_send_IPI_mask_allbutself(cpumask, vector); + default_send_IPI_mask_allbutself_phys(cpumask, vector); } static void physflat_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 0893fa14458..339f4f3feee 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -17,148 +17,13 @@ #include #include #include +#include #ifdef CONFIG_X86_32 -#include - -/* - * the following functions deal with sending IPIs between CPUs. - * - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. - */ - -static inline int __prepare_ICR(unsigned int shortcut, int vector) -{ - unsigned int icr = shortcut | apic->dest_logical; - - switch (vector) { - default: - icr |= APIC_DM_FIXED | vector; - break; - case NMI_VECTOR: - icr |= APIC_DM_NMI; - break; - } - return icr; -} - -static inline int __prepare_ICR2(unsigned int mask) -{ - return SET_APIC_DEST_FIELD(mask); -} - -void __default_send_IPI_shortcut(unsigned int shortcut, int vector) -{ - /* - * Subtle. In the case of the 'never do double writes' workaround - * we have to lock out interrupts to be safe. As we don't care - * of the value read we use an atomic rmw access to avoid costly - * cli/sti. Otherwise we use an even cheaper single atomic write - * to the APIC. - */ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - /* - * No need to touch the target chip field - */ - cfg = __prepare_ICR(shortcut, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} void default_send_IPI_self(int vector) { - __default_send_IPI_shortcut(APIC_DEST_SELF, vector); -} - -/* - * This is used to send an IPI with no shorthand notation (the destination is - * specified in bits 56 to 63 of the ICR). - */ -static inline void __default_send_IPI_dest_field(unsigned long mask, int vector) -{ - unsigned long cfg; - - /* - * Wait for idle. - */ - if (unlikely(vector == NMI_VECTOR)) - safe_apic_wait_icr_idle(); - else - apic_wait_icr_idle(); - - /* - * prepare target chip field - */ - cfg = __prepare_ICR2(mask); - apic_write(APIC_ICR2, cfg); - - /* - * program the ICR - */ - cfg = __prepare_ICR(0, vector); - - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} - -/* - * This is only used on smaller machines. - */ -void default_send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field(apic->cpu_to_logical_apicid(query_cpu), vector); - local_irq_restore(flags); -} - -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector); - } - local_irq_restore(flags); + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); } /* must come after the send_IPI functions above for inlining */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 83bb05524f4..c143b329216 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -302,6 +302,7 @@ int __init get_memcfg_numaq(void) #include #include #include +#include #include #include #include @@ -319,17 +320,14 @@ static inline unsigned int numaq_get_apic_id(unsigned long x) return (x >> 24) & 0x0F; } -void default_send_IPI_mask_sequence(const struct cpumask *mask, int vector); -void default_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); - static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void numaq_send_IPI_allbutself(int vector) { - default_send_IPI_mask_allbutself(cpu_online_mask, vector); + default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); } static inline void numaq_send_IPI_all(int vector) diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index b5db26f8e06..3f12a401182 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -112,8 +112,8 @@ struct genapic apic_default = { .cpu_mask_to_apicid = default_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, - .send_IPI_mask = default_send_IPI_mask, - .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself, + .send_IPI_mask = default_send_IPI_mask_logical, + .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, .send_IPI_self = NULL, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 84ff9ebbcc9..ecb41b9d7aa 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -54,12 +55,9 @@ static inline unsigned summit_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -void default_send_IPI_mask_sequence(const cpumask_t *mask, int vector); -void default_send_IPI_mask_allbutself(const cpumask_t *mask, int vector); - static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) { - default_send_IPI_mask_sequence(mask, vector); + default_send_IPI_mask_sequence_logical(mask, vector); } static inline void summit_send_IPI_allbutself(int vector) From 26f7ef14a76b0e590a3797fd7b2f3cee868d9664 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 29 Jan 2009 14:19:22 -0800 Subject: [PATCH 323/682] x86: don't treat bigsmp as non-standard just like 64 bit switch from flat logical APIC messages to flat physical mode automatically. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 15 +++++++-------- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/mpparse.c | 4 ++-- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index afaf2cb7c1a..c6e567bb649 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -262,6 +262,12 @@ config X86_MPPARSE For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it +config X86_BIGSMP + bool "Support for big SMP systems with more than 8 CPUs" + depends on X86_32 && SMP + help + This option is needed for the systems that have more than 8 CPUs + config X86_NON_STANDARD bool "Support for non-standard x86 platforms" help @@ -338,13 +344,6 @@ config X86_32_NON_STANDARD if you select them all, kernel will probe it one by one. and will fallback to default. -config X86_BIGSMP - bool "Support for big SMP systems with more than 8 CPUs" - depends on X86_32_NON_STANDARD - help - This option is needed for the systems that have more than 8 CPUs - and if the system is not of any sub-arch type above. - config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -366,7 +365,7 @@ config X86_SUMMIT config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" - depends on X86_32_NON_STANDARD + depends on X86_32_NON_STANDARD && X86_BIGSMP help Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7352c60f29d..3efa996b036 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1335,7 +1335,7 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_32_NON_STANDARD +#ifdef CONFIG_X86_BIGSMP generic_bigsmp_probe(); #endif /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 89aaced51bd..b46ca7d31fe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -372,8 +372,8 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) (*x86_quirks->mpc_record)++; } -#ifdef CONFIG_X86_32_NON_STANDARD - generic_bigsmp_probe(); +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); #endif if (apic->setup_apic_routing) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f64e1a487c9..df64afff580 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32_NON_STANDARD +#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) generic_apic_probe(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4c3cff57494..1268a862abb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1007,7 +1007,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_WARNING "More than 8 CPUs detected - skipping them.\n" - "Use CONFIG_X86_32_NON_STANDARD and CONFIG_X86_BIGSMP.\n"); + "Use CONFIG_X86_BIGSMP.\n"); nr = 0; for_each_present_cpu(cpu) { From 3ac6cffea4aa18007a454a7442da2855882f403d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 30 Jan 2009 16:32:22 +0900 Subject: [PATCH 324/682] linker script: use separate simpler definition for PERCPU() Impact: fix linker screwup on x86_32 Recent x86_64 zerobased patches introduced PERCPU_VADDR() to put .data.percpu to a predefined address and re-defined PERCPU() in terms of it. The new macro defined one extra symbol, __per_cpu_load, for LMA of the section so that the init data could be accessed. This new symbol introduced the following problems to x86_32. 1. If __per_cpu_load is defined outside of .data.percpu as an absolute symbol, relocation generation for relocatable kernel fails due to absolute relocation. 2. If __per_cpu_load is put inside .data.percpu with absolute address assignment to work around #1, linker gets confused and under certain configurations ends up relocating the symbol against .data.percpu such that the load address gets added on top of already set load address. As x86_32 doesn't use predefined address for .data.percpu, there's no need for it to care about the possibility of __per_cpu_load being different from __per_cpu_start. This patch defines PERCPU() separately so that __per_cpu_load is defined inside .data.percpu so that everything is ordinary linking-wise. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 53e21f36a80..5406e70aba8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -445,10 +445,9 @@ * section in the linker script will go there too. @phdr should have * a leading colon. * - * This macro defines three symbols, __per_cpu_load, __per_cpu_start - * and __per_cpu_end. The first one is the vaddr of loaded percpu - * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the - * end offset. + * Note that this macros defines __per_cpu_load as an absolute symbol. + * If there is no need to put the percpu section at a predetermined + * address, use PERCPU(). */ #define PERCPU_VADDR(vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -470,7 +469,20 @@ * Align to @align and outputs output section for percpu area. This * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. + * + * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except + * that __per_cpu_load is defined as a relative symbol against + * .data.percpu which is required for relocatable x86_32 + * configuration. */ #define PERCPU(align) \ . = ALIGN(align); \ - PERCPU_VADDR( , ) + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__per_cpu_load) = .; \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data.percpu.first) \ + *(.data.percpu.page_aligned) \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; \ + } From 6b64ee02da20d6c0d97115e0b1ab47f9fa2f0d8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 30 Jan 2009 23:42:18 +0100 Subject: [PATCH 325/682] x86, apic, 32-bit: add self-IPI methods Impact: fix rare crash on 32-bit The 32-bit APIC drivers had their send_IPI_self vectors set to NULL, but ioapic_retrigger_irq() depends on it being always set. Fix it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/es7000_32.c | 2 +- arch/x86/kernel/numaq_32.c | 2 +- arch/x86/kernel/probe_32.c | 2 +- arch/x86/kernel/summit_32.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index ab645c93a6e..47a62f46afd 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -252,7 +252,7 @@ struct genapic apic_bigsmp = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = bigsmp_send_IPI_allbutself, .send_IPI_all = bigsmp_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index b5b50e8b94a..d6184c12a18 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -786,7 +786,7 @@ struct genapic apic_es7000 = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = es7000_send_IPI_allbutself, .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index c143b329216..947748e1721 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -554,7 +554,7 @@ struct genapic apic_numaq = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = numaq_send_IPI_allbutself, .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 3f12a401182..22337b75de6 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -116,7 +116,7 @@ struct genapic apic_default = { .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, .send_IPI_allbutself = default_send_IPI_allbutself, .send_IPI_all = default_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index ecb41b9d7aa..1e733eff9b3 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -588,7 +588,7 @@ struct genapic apic_summit = { .send_IPI_mask_allbutself = NULL, .send_IPI_allbutself = summit_send_IPI_allbutself, .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = NULL, + .send_IPI_self = default_send_IPI_self, .wakeup_cpu = NULL, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, From 319f3ba52c71630865b10ac3b99dd020440d681d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:01 -0800 Subject: [PATCH 326/682] xen: move remaining mmu-related stuff into mmu.c Impact: Cleanup Move remaining mmu-related stuff into mmu.c. A general cleanup, and lay the groundwork for later patches. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 731 +-------------------------------------- arch/x86/xen/mmu.c | 700 +++++++++++++++++++++++++++++++++++++ arch/x86/xen/mmu.h | 3 + arch/x86/xen/xen-ops.h | 8 + 4 files changed, 712 insertions(+), 730 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6b3f7eef57e..0cd2a165f17 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -61,35 +61,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); -/* - * Identity map, in addition to plain kernel map. This needs to be - * large enough to allocate page table pages to allocate the rest. - * Each page can map 2MB. - */ -static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; - -#ifdef CONFIG_X86_64 -/* l3 pud for userspace vsyscall mapping */ -static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; -#endif /* CONFIG_X86_64 */ - -/* - * Note about cr3 (pagetable base) values: - * - * xen_cr3 contains the current logical cr3 value; it contains the - * last set cr3. This may not be the current effective cr3, because - * its update may be being lazily deferred. However, a vcpu looking - * at its own cr3 can use this value knowing that it everything will - * be self-consistent. - * - * xen_current_cr3 contains the actual vcpu cr3; it is set once the - * hypercall to set the vcpu cr3 is complete (so it may be a little - * out of date, but it will never be set early). If one vcpu is - * looking at another vcpu's cr3 value, it should use this variable. - */ -DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ -DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ - struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); @@ -237,7 +208,7 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy(void) +void xen_leave_lazy(void) { paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); @@ -598,76 +569,6 @@ static struct apic_ops xen_basic_apic_ops = { #endif -static void xen_flush_tlb(void) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = MMUEXT_TLB_FLUSH_LOCAL; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_single(unsigned long addr) -{ - struct mmuext_op *op; - struct multicall_space mcs; - - preempt_disable(); - - mcs = xen_mc_entry(sizeof(*op)); - op = mcs.args; - op->cmd = MMUEXT_INVLPG_LOCAL; - op->arg1.linear_addr = addr & PAGE_MASK; - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - preempt_enable(); -} - -static void xen_flush_tlb_others(const struct cpumask *cpus, - struct mm_struct *mm, unsigned long va) -{ - struct { - struct mmuext_op op; - DECLARE_BITMAP(mask, NR_CPUS); - } *args; - struct multicall_space mcs; - - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); - - mcs = xen_mc_entry(sizeof(*args)); - args = mcs.args; - args->op.arg2.vcpumask = to_cpumask(args->mask); - - /* Remove us, and any offline CPUS. */ - cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - if (unlikely(cpumask_empty(to_cpumask(args->mask)))) - goto issue; - - if (va == TLB_FLUSH_ALL) { - args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; - } else { - args->op.cmd = MMUEXT_INVLPG_MULTI; - args->op.arg1.linear_addr = va; - } - - MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); - -issue: - xen_mc_issue(PARAVIRT_LAZY_MMU); -} static void xen_clts(void) { @@ -693,21 +594,6 @@ static void xen_write_cr0(unsigned long cr0) xen_mc_issue(PARAVIRT_LAZY_CPU); } -static void xen_write_cr2(unsigned long cr2) -{ - percpu_read(xen_vcpu)->arch.cr2 = cr2; -} - -static unsigned long xen_read_cr2(void) -{ - return percpu_read(xen_vcpu)->arch.cr2; -} - -static unsigned long xen_read_cr2_direct(void) -{ - return percpu_read(xen_vcpu_info.arch.cr2); -} - static void xen_write_cr4(unsigned long cr4) { cr4 &= ~X86_CR4_PGE; @@ -716,71 +602,6 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } -static unsigned long xen_read_cr3(void) -{ - return percpu_read(xen_cr3); -} - -static void set_current_cr3(void *v) -{ - percpu_write(xen_current_cr3, (unsigned long)v); -} - -static void __xen_write_cr3(bool kernel, unsigned long cr3) -{ - struct mmuext_op *op; - struct multicall_space mcs; - unsigned long mfn; - - if (cr3) - mfn = pfn_to_mfn(PFN_DOWN(cr3)); - else - mfn = 0; - - WARN_ON(mfn == 0 && kernel); - - mcs = __xen_mc_entry(sizeof(*op)); - - op = mcs.args; - op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; - op->arg1.mfn = mfn; - - MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); - - if (kernel) { - percpu_write(xen_cr3, cr3); - - /* Update xen_current_cr3 once the batch has actually - been submitted. */ - xen_mc_callback(set_current_cr3, (void *)cr3); - } -} - -static void xen_write_cr3(unsigned long cr3) -{ - BUG_ON(preemptible()); - - xen_mc_batch(); /* disables interrupts */ - - /* Update while interrupts are disabled, so its atomic with - respect to ipis */ - percpu_write(xen_cr3, cr3); - - __xen_write_cr3(true, cr3); - -#ifdef CONFIG_X86_64 - { - pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); - if (user_pgd) - __xen_write_cr3(false, __pa(user_pgd)); - else - __xen_write_cr3(false, 0); - } -#endif - - xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ -} - static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -822,185 +643,6 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) return ret; } -/* Early in boot, while setting up the initial pagetable, assume - everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) -{ -#ifdef CONFIG_FLATMEM - BUG_ON(mem_map); /* should only be used early */ -#endif - make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); -} - -/* Early release_pte assumes that all pts are pinned, since there's - only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) -{ - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); -} - -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) -{ - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); -} - -/* This needs to make sure the new pte page is pinned iff its being - attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(virt_to_page(mm->pgd))) { - SetPagePinned(page); - - vm_unmap_aliases(); - if (!PageHighMem(page)) { - make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else { - /* make sure there are no stray mappings of - this page */ - kmap_flush_unused(); - } - } -} - -static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PTE); -} - -static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PMD); -} - -static int xen_pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = mm->pgd; - int ret = 0; - - BUG_ON(PagePinned(virt_to_page(pgd))); - -#ifdef CONFIG_X86_64 - { - struct page *page = virt_to_page(pgd); - pgd_t *user_pgd; - - BUG_ON(page->private != 0); - - ret = -ENOMEM; - - user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - page->private = (unsigned long)user_pgd; - - if (user_pgd != NULL) { - user_pgd[pgd_index(VSYSCALL_START)] = - __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); - ret = 0; - } - - BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); - } -#endif - - return ret; -} - -static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifdef CONFIG_X86_64 - pgd_t *user_pgd = xen_get_user_pgd(pgd); - - if (user_pgd) - free_page((unsigned long)user_pgd); -#endif -} - -/* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(unsigned long pfn, unsigned level) -{ - struct page *page = pfn_to_page(pfn); - - if (PagePinned(page)) { - if (!PageHighMem(page)) { - if (level == PT_PTE && USE_SPLIT_PTLOCKS) - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); - make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); - } - ClearPagePinned(page); - } -} - -static void xen_release_pte(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PTE); -} - -static void xen_release_pmd(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PMD); -} - -#if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) -{ - xen_alloc_ptpage(mm, pfn, PT_PUD); -} - -static void xen_release_pud(unsigned long pfn) -{ - xen_release_ptpage(pfn, PT_PUD); -} -#endif - -#ifdef CONFIG_HIGHPTE -static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) -{ - pgprot_t prot = PAGE_KERNEL; - - if (PagePinned(page)) - prot = PAGE_KERNEL_RO; - - if (0 && PageHighMem(page)) - printk("mapping highpte %lx type %d prot %s\n", - page_to_pfn(page), type, - (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); - - return kmap_atomic_prot(page, type, prot); -} -#endif - -#ifdef CONFIG_X86_32 -static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) -{ - /* If there's an existing pte, then don't allow _PAGE_RW to be set */ - if (pte_val_ma(*ptep) & _PAGE_PRESENT) - pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & - pte_val_ma(pte)); - - return pte; -} - -/* Init-time set_pte while constructing initial pagetables, which - doesn't allow RO pagetable pages to be remapped RW */ -static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) -{ - pte = mask_rw_pte(ptep, pte); - - xen_set_pte(ptep, pte); -} -#endif - -static __init void xen_pagetable_setup_start(pgd_t *base) -{ -} - void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { @@ -1021,37 +663,6 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -static __init void xen_pagetable_setup_done(pgd_t *base) -{ - xen_setup_shared_info(); -} - -static __init void xen_post_allocator_init(void) -{ - pv_mmu_ops.set_pte = xen_set_pte; - pv_mmu_ops.set_pmd = xen_set_pmd; - pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.set_pgd = xen_set_pgd; -#endif - - /* This will work as long as patching hasn't happened yet - (which it hasn't) */ - pv_mmu_ops.alloc_pte = xen_alloc_pte; - pv_mmu_ops.alloc_pmd = xen_alloc_pmd; - pv_mmu_ops.release_pte = xen_release_pte; - pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 - pv_mmu_ops.alloc_pud = xen_alloc_pud; - pv_mmu_ops.release_pud = xen_release_pud; -#endif - -#ifdef CONFIG_X86_64 - SetPagePinned(virt_to_page(level3_user_vsyscall)); -#endif - xen_mark_init_mm_pinned(); -} - /* This is called once we have the cpu_possible_map */ void xen_setup_vcpu_info_placement(void) { @@ -1126,49 +737,6 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, return ret; } -static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) -{ - pte_t pte; - - phys >>= PAGE_SHIFT; - - switch (idx) { - case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif -#ifdef CONFIG_X86_32 - case FIX_WP_TEST: - case FIX_VDSO: -# ifdef CONFIG_HIGHMEM - case FIX_KMAP_BEGIN ... FIX_KMAP_END: -# endif -#else - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: -#endif -#ifdef CONFIG_X86_LOCAL_APIC - case FIX_APIC_BASE: /* maps dummy local APIC */ -#endif - pte = pfn_pte(phys, prot); - break; - - default: - pte = mfn_pte(phys, prot); - break; - } - - __native_set_fixmap(idx, pte); - -#ifdef CONFIG_X86_64 - /* Replicate changes to map the vsyscall page into the user - pagetable vsyscall mapping. */ - if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { - unsigned long vaddr = __fix_to_virt(idx); - set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); - } -#endif -} - static const struct pv_info xen_info __initdata = { .paravirt_enabled = 1, .shared_kernel_pmd = 0, @@ -1264,86 +832,6 @@ static const struct pv_apic_ops xen_apic_ops __initdata = { #endif }; -static const struct pv_mmu_ops xen_mmu_ops __initdata = { - .pagetable_setup_start = xen_pagetable_setup_start, - .pagetable_setup_done = xen_pagetable_setup_done, - - .read_cr2 = xen_read_cr2, - .write_cr2 = xen_write_cr2, - - .read_cr3 = xen_read_cr3, - .write_cr3 = xen_write_cr3, - - .flush_tlb_user = xen_flush_tlb, - .flush_tlb_kernel = xen_flush_tlb, - .flush_tlb_single = xen_flush_tlb_single, - .flush_tlb_others = xen_flush_tlb_others, - - .pte_update = paravirt_nop, - .pte_update_defer = paravirt_nop, - - .pgd_alloc = xen_pgd_alloc, - .pgd_free = xen_pgd_free, - - .alloc_pte = xen_alloc_pte_init, - .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, - .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, - -#ifdef CONFIG_HIGHPTE - .kmap_atomic_pte = xen_kmap_atomic_pte, -#endif - -#ifdef CONFIG_X86_64 - .set_pte = xen_set_pte, -#else - .set_pte = xen_set_pte_init, -#endif - .set_pte_at = xen_set_pte_at, - .set_pmd = xen_set_pmd_hyper, - - .ptep_modify_prot_start = __ptep_modify_prot_start, - .ptep_modify_prot_commit = __ptep_modify_prot_commit, - - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, - - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, - -#ifdef CONFIG_X86_PAE - .set_pte_atomic = xen_set_pte_atomic, - .set_pte_present = xen_set_pte_at, - .pte_clear = xen_pte_clear, - .pmd_clear = xen_pmd_clear, -#endif /* CONFIG_X86_PAE */ - .set_pud = xen_set_pud_hyper, - - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, - -#if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, - .set_pgd = xen_set_pgd_hyper, - - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, -#endif /* PAGETABLE_LEVELS == 4 */ - - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, - .exit_mmap = xen_exit_mmap, - - .lazy_mode = { - .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, - }, - - .set_fixmap = xen_set_fixmap, -}; - static void xen_reboot(int reason) { struct sched_shutdown r = { .reason = reason }; @@ -1386,223 +874,6 @@ static const struct machine_ops __initdata xen_machine_ops = { }; -static void __init xen_reserve_top(void) -{ -#ifdef CONFIG_X86_32 - unsigned long top = HYPERVISOR_VIRT_START; - struct xen_platform_parameters pp; - - if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) - top = pp.virt_start; - - reserve_top_address(-top); -#endif /* CONFIG_X86_32 */ -} - -/* - * Like __va(), but returns address in the kernel mapping (which is - * all we have until the physical memory mapping has been set up. - */ -static void *__ka(phys_addr_t paddr) -{ -#ifdef CONFIG_X86_64 - return (void *)(paddr + __START_KERNEL_map); -#else - return __va(paddr); -#endif -} - -/* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) -{ - phys_addr_t paddr; - - maddr &= PTE_PFN_MASK; - paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; - - return paddr; -} - -/* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) -{ - return __ka(m2p(maddr)); -} - -static void set_page_prot(void *addr, pgprot_t prot) -{ - unsigned long pfn = __pa(addr) >> PAGE_SHIFT; - pte_t pte = pfn_pte(pfn, prot); - - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) - BUG(); -} - -static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) -{ - unsigned pmdidx, pteidx; - unsigned ident_pte; - unsigned long pfn; - - ident_pte = 0; - pfn = 0; - for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { - pte_t *pte_page; - - /* Reuse or allocate a page of ptes */ - if (pmd_present(pmd[pmdidx])) - pte_page = m2v(pmd[pmdidx].pmd); - else { - /* Check for free pte pages */ - if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) - break; - - pte_page = &level1_ident_pgt[ident_pte]; - ident_pte += PTRS_PER_PTE; - - pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); - } - - /* Install mappings */ - for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { - pte_t pte; - - if (pfn > max_pfn_mapped) - max_pfn_mapped = pfn; - - if (!pte_none(pte_page[pteidx])) - continue; - - pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); - pte_page[pteidx] = pte; - } - } - - for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) - set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); - - set_page_prot(pmd, PAGE_KERNEL_RO); -} - -#ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) -{ - pte_t *pte = v; - int i; - - /* All levels are converted the same way, so just treat them - as ptes. */ - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = xen_make_pte(pte[i].pte); -} - -/* - * Set up the inital kernel pagetable. - * - * We can construct this by grafting the Xen provided pagetable into - * head_64.S's preconstructed pagetables. We copy the Xen L2's into - * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This - * means that only the kernel has a physical mapping to start with - - * but that's enough to get __va working. We need to fill in the rest - * of the physical mapping once some sort of allocator has been set - * up. - */ -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pud_t *l3; - pmd_t *l2; - - /* Zap identity mapping */ - init_level4_pgt[0] = __pgd(0); - - /* Pre-constructed entries are in pfn, so convert to mfn */ - convert_pfn_mfn(init_level4_pgt); - convert_pfn_mfn(level3_ident_pgt); - convert_pfn_mfn(level3_kernel_pgt); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); - l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); - - /* Make pagetable pieces RO */ - set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - - /* Pin down new L4 */ - pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, - PFN_DOWN(__pa_symbol(init_level4_pgt))); - - /* Unpin Xen-provided one */ - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - /* Switch over */ - pgd = init_level4_pgt; - - /* - * At this stage there can be no user pgd, and no page - * structure to attach it to, so make sure we just set kernel - * pgd. - */ - xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); - xen_mc_issue(PARAVIRT_LAZY_CPU); - - reserve_early(__pa(xen_start_info->pt_base), - __pa(xen_start_info->pt_base + - xen_start_info->nr_pt_frames * PAGE_SIZE), - "XEN PAGETABLES"); - - return pgd; -} -#else /* !CONFIG_X86_64 */ -static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; - -static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) -{ - pmd_t *kernel_pmd; - - init_pg_tables_start = __pa(pgd); - init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; - max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); - - kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); - - xen_map_identity_early(level2_kernel_pgt, max_pfn); - - memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); - set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], - __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); - - set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); - set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); - set_page_prot(empty_zero_page, PAGE_KERNEL_RO); - - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - - xen_write_cr3(__pa(swapper_pg_dir)); - - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); - - return swapper_pg_dir; -} -#endif /* CONFIG_X86_64 */ - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 98cb9869eb2..94e452c0b00 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,8 @@ #include #include +#include +#include #include "multicalls.h" #include "mmu.h" @@ -114,6 +117,37 @@ static inline void check_zero(void) #endif /* CONFIG_XEN_DEBUG_FS */ + +/* + * Identity map, in addition to plain kernel map. This needs to be + * large enough to allocate page table pages to allocate the rest. + * Each page can map 2MB. + */ +static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; + +#ifdef CONFIG_X86_64 +/* l3 pud for userspace vsyscall mapping */ +static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; +#endif /* CONFIG_X86_64 */ + +/* + * Note about cr3 (pagetable base) values: + * + * xen_cr3 contains the current logical cr3 value; it contains the + * last set cr3. This may not be the current effective cr3, because + * its update may be being lazily deferred. However, a vcpu looking + * at its own cr3 can use this value knowing that it everything will + * be self-consistent. + * + * xen_current_cr3 contains the actual vcpu cr3; it is set once the + * hypercall to set the vcpu cr3 is complete (so it may be a little + * out of date, but it will never be set early). If one vcpu is + * looking at another vcpu's cr3 value, it should use this variable. + */ +DEFINE_PER_CPU(unsigned long, xen_cr3); /* cr3 stored as physaddr */ +DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ + + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -1152,6 +1186,672 @@ void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } +static __init void xen_pagetable_setup_start(pgd_t *base) +{ +} + +static __init void xen_pagetable_setup_done(pgd_t *base) +{ + xen_setup_shared_info(); +} + +static void xen_write_cr2(unsigned long cr2) +{ + percpu_read(xen_vcpu)->arch.cr2 = cr2; +} + +static unsigned long xen_read_cr2(void) +{ + return percpu_read(xen_vcpu)->arch.cr2; +} + +unsigned long xen_read_cr2_direct(void) +{ + return percpu_read(xen_vcpu_info.arch.cr2); +} + +static void xen_flush_tlb(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_LOCAL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_single(unsigned long addr) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + op = mcs.args; + op->cmd = MMUEXT_INVLPG_LOCAL; + op->arg1.linear_addr = addr & PAGE_MASK; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} + +static void xen_flush_tlb_others(const struct cpumask *cpus, + struct mm_struct *mm, unsigned long va) +{ + struct { + struct mmuext_op op; + DECLARE_BITMAP(mask, NR_CPUS); + } *args; + struct multicall_space mcs; + + BUG_ON(cpumask_empty(cpus)); + BUG_ON(!mm); + + mcs = xen_mc_entry(sizeof(*args)); + args = mcs.args; + args->op.arg2.vcpumask = to_cpumask(args->mask); + + /* Remove us, and any offline CPUS. */ + cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); + if (unlikely(cpumask_empty(to_cpumask(args->mask)))) + goto issue; + + if (va == TLB_FLUSH_ALL) { + args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; + } else { + args->op.cmd = MMUEXT_INVLPG_MULTI; + args->op.arg1.linear_addr = va; + } + + MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); + +issue: + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + +static unsigned long xen_read_cr3(void) +{ + return percpu_read(xen_cr3); +} + +static void set_current_cr3(void *v) +{ + percpu_write(xen_current_cr3, (unsigned long)v); +} + +static void __xen_write_cr3(bool kernel, unsigned long cr3) +{ + struct mmuext_op *op; + struct multicall_space mcs; + unsigned long mfn; + + if (cr3) + mfn = pfn_to_mfn(PFN_DOWN(cr3)); + else + mfn = 0; + + WARN_ON(mfn == 0 && kernel); + + mcs = __xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; + op->arg1.mfn = mfn; + + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + if (kernel) { + percpu_write(xen_cr3, cr3); + + /* Update xen_current_cr3 once the batch has actually + been submitted. */ + xen_mc_callback(set_current_cr3, (void *)cr3); + } +} + +static void xen_write_cr3(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + percpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + +#ifdef CONFIG_X86_64 + { + pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); + if (user_pgd) + __xen_write_cr3(false, __pa(user_pgd)); + else + __xen_write_cr3(false, 0); + } +#endif + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ +} + +static int xen_pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = mm->pgd; + int ret = 0; + + BUG_ON(PagePinned(virt_to_page(pgd))); + +#ifdef CONFIG_X86_64 + { + struct page *page = virt_to_page(pgd); + pgd_t *user_pgd; + + BUG_ON(page->private != 0); + + ret = -ENOMEM; + + user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + page->private = (unsigned long)user_pgd; + + if (user_pgd != NULL) { + user_pgd[pgd_index(VSYSCALL_START)] = + __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); + ret = 0; + } + + BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); + } +#endif + + return ret; +} + +static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_X86_64 + pgd_t *user_pgd = xen_get_user_pgd(pgd); + + if (user_pgd) + free_page((unsigned long)user_pgd); +#endif +} + + +/* Early in boot, while setting up the initial pagetable, assume + everything is pinned. */ +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); +} + +/* Early release_pte assumes that all pts are pinned, since there's + only init_mm and anything attached to that is pinned. */ +static void xen_release_pte_init(unsigned long pfn) +{ + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); +} + +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + +/* This needs to make sure the new pte page is pinned iff its being + attached to a pinned pagetable. */ +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(virt_to_page(mm->pgd))) { + SetPagePinned(page); + + vm_unmap_aliases(); + if (!PageHighMem(page)) { + make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); + } else { + /* make sure there are no stray mappings of + this page */ + kmap_flush_unused(); + } + } +} + +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PTE); +} + +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PMD); +} + +/* This should never happen until we're OK to use struct page */ +static void xen_release_ptpage(unsigned long pfn, unsigned level) +{ + struct page *page = pfn_to_page(pfn); + + if (PagePinned(page)) { + if (!PageHighMem(page)) { + if (level == PT_PTE && USE_SPLIT_PTLOCKS) + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); + } + ClearPagePinned(page); + } +} + +static void xen_release_pte(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PTE); +} + +static void xen_release_pmd(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PMD); +} + +#if PAGETABLE_LEVELS == 4 +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) +{ + xen_alloc_ptpage(mm, pfn, PT_PUD); +} + +static void xen_release_pud(unsigned long pfn) +{ + xen_release_ptpage(pfn, PT_PUD); +} +#endif + +void __init xen_reserve_top(void) +{ +#ifdef CONFIG_X86_32 + unsigned long top = HYPERVISOR_VIRT_START; + struct xen_platform_parameters pp; + + if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) + top = pp.virt_start; + + reserve_top_address(-top); +#endif /* CONFIG_X86_32 */ +} + +/* + * Like __va(), but returns address in the kernel mapping (which is + * all we have until the physical memory mapping has been set up. + */ +static void *__ka(phys_addr_t paddr) +{ +#ifdef CONFIG_X86_64 + return (void *)(paddr + __START_KERNEL_map); +#else + return __va(paddr); +#endif +} + +/* Convert a machine address to physical address */ +static unsigned long m2p(phys_addr_t maddr) +{ + phys_addr_t paddr; + + maddr &= PTE_PFN_MASK; + paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; + + return paddr; +} + +/* Convert a machine address to kernel virtual */ +static void *m2v(phys_addr_t maddr) +{ + return __ka(m2p(maddr)); +} + +static void set_page_prot(void *addr, pgprot_t prot) +{ + unsigned long pfn = __pa(addr) >> PAGE_SHIFT; + pte_t pte = pfn_pte(pfn, prot); + + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + BUG(); +} + +static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) +{ + unsigned pmdidx, pteidx; + unsigned ident_pte; + unsigned long pfn; + + ident_pte = 0; + pfn = 0; + for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { + pte_t *pte_page; + + /* Reuse or allocate a page of ptes */ + if (pmd_present(pmd[pmdidx])) + pte_page = m2v(pmd[pmdidx].pmd); + else { + /* Check for free pte pages */ + if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) + break; + + pte_page = &level1_ident_pgt[ident_pte]; + ident_pte += PTRS_PER_PTE; + + pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); + } + + /* Install mappings */ + for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { + pte_t pte; + + if (pfn > max_pfn_mapped) + max_pfn_mapped = pfn; + + if (!pte_none(pte_page[pteidx])) + continue; + + pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); + pte_page[pteidx] = pte; + } + } + + for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) + set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); + + set_page_prot(pmd, PAGE_KERNEL_RO); +} + +#ifdef CONFIG_X86_64 +static void convert_pfn_mfn(void *v) +{ + pte_t *pte = v; + int i; + + /* All levels are converted the same way, so just treat them + as ptes. */ + for (i = 0; i < PTRS_PER_PTE; i++) + pte[i] = xen_make_pte(pte[i].pte); +} + +/* + * Set up the inital kernel pagetable. + * + * We can construct this by grafting the Xen provided pagetable into + * head_64.S's preconstructed pagetables. We copy the Xen L2's into + * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This + * means that only the kernel has a physical mapping to start with - + * but that's enough to get __va working. We need to fill in the rest + * of the physical mapping once some sort of allocator has been set + * up. + */ +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pud_t *l3; + pmd_t *l2; + + /* Zap identity mapping */ + init_level4_pgt[0] = __pgd(0); + + /* Pre-constructed entries are in pfn, so convert to mfn */ + convert_pfn_mfn(init_level4_pgt); + convert_pfn_mfn(level3_ident_pgt); + convert_pfn_mfn(level3_kernel_pgt); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); + + memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); + l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); + memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); + + /* Set up identity map */ + xen_map_identity_early(level2_ident_pgt, max_pfn); + + /* Make pagetable pieces RO */ + set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); + + /* Pin down new L4 */ + pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, + PFN_DOWN(__pa_symbol(init_level4_pgt))); + + /* Unpin Xen-provided one */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + /* Switch over */ + pgd = init_level4_pgt; + + /* + * At this stage there can be no user pgd, and no page + * structure to attach it to, so make sure we just set kernel + * pgd. + */ + xen_mc_batch(); + __xen_write_cr3(true, __pa(pgd)); + xen_mc_issue(PARAVIRT_LAZY_CPU); + + reserve_early(__pa(xen_start_info->pt_base), + __pa(xen_start_info->pt_base + + xen_start_info->nr_pt_frames * PAGE_SIZE), + "XEN PAGETABLES"); + + return pgd; +} +#else /* !CONFIG_X86_64 */ +static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; + +__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, + unsigned long max_pfn) +{ + pmd_t *kernel_pmd; + + init_pg_tables_start = __pa(pgd); + init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; + max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); + + kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); + memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + + xen_map_identity_early(level2_kernel_pgt, max_pfn); + + memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], + __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); + + set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); + set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); + set_page_prot(empty_zero_page, PAGE_KERNEL_RO); + + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); + + xen_write_cr3(__pa(swapper_pg_dir)); + + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); + + return swapper_pg_dir; +} +#endif /* CONFIG_X86_64 */ + +static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) +{ + pte_t pte; + + phys >>= PAGE_SHIFT; + + switch (idx) { + case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: +#ifdef CONFIG_X86_F00F_BUG + case FIX_F00F_IDT: +#endif +#ifdef CONFIG_X86_32 + case FIX_WP_TEST: + case FIX_VDSO: +# ifdef CONFIG_HIGHMEM + case FIX_KMAP_BEGIN ... FIX_KMAP_END: +# endif +#else + case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: +#endif +#ifdef CONFIG_X86_LOCAL_APIC + case FIX_APIC_BASE: /* maps dummy local APIC */ +#endif + pte = pfn_pte(phys, prot); + break; + + default: + pte = mfn_pte(phys, prot); + break; + } + + __native_set_fixmap(idx, pte); + +#ifdef CONFIG_X86_64 + /* Replicate changes to map the vsyscall page into the user + pagetable vsyscall mapping. */ + if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { + unsigned long vaddr = __fix_to_virt(idx); + set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); + } +#endif +} + +__init void xen_post_allocator_init(void) +{ + pv_mmu_ops.set_pte = xen_set_pte; + pv_mmu_ops.set_pmd = xen_set_pmd; + pv_mmu_ops.set_pud = xen_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = xen_set_pgd; +#endif + + /* This will work as long as patching hasn't happened yet + (which it hasn't) */ + pv_mmu_ops.alloc_pte = xen_alloc_pte; + pv_mmu_ops.alloc_pmd = xen_alloc_pmd; + pv_mmu_ops.release_pte = xen_release_pte; + pv_mmu_ops.release_pmd = xen_release_pmd; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.alloc_pud = xen_alloc_pud; + pv_mmu_ops.release_pud = xen_release_pud; +#endif + +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif + xen_mark_init_mm_pinned(); +} + + +const struct pv_mmu_ops xen_mmu_ops __initdata = { + .pagetable_setup_start = xen_pagetable_setup_start, + .pagetable_setup_done = xen_pagetable_setup_done, + + .read_cr2 = xen_read_cr2, + .write_cr2 = xen_write_cr2, + + .read_cr3 = xen_read_cr3, + .write_cr3 = xen_write_cr3, + + .flush_tlb_user = xen_flush_tlb, + .flush_tlb_kernel = xen_flush_tlb, + .flush_tlb_single = xen_flush_tlb_single, + .flush_tlb_others = xen_flush_tlb_others, + + .pte_update = paravirt_nop, + .pte_update_defer = paravirt_nop, + + .pgd_alloc = xen_pgd_alloc, + .pgd_free = xen_pgd_free, + + .alloc_pte = xen_alloc_pte_init, + .release_pte = xen_release_pte_init, + .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd_clone = paravirt_nop, + .release_pmd = xen_release_pte_init, + +#ifdef CONFIG_HIGHPTE + .kmap_atomic_pte = xen_kmap_atomic_pte, +#endif + +#ifdef CONFIG_X86_64 + .set_pte = xen_set_pte, +#else + .set_pte = xen_set_pte_init, +#endif + .set_pte_at = xen_set_pte_at, + .set_pmd = xen_set_pmd_hyper, + + .ptep_modify_prot_start = __ptep_modify_prot_start, + .ptep_modify_prot_commit = __ptep_modify_prot_commit, + + .pte_val = xen_pte_val, + .pgd_val = xen_pgd_val, + + .make_pte = xen_make_pte, + .make_pgd = xen_make_pgd, + +#ifdef CONFIG_X86_PAE + .set_pte_atomic = xen_set_pte_atomic, + .set_pte_present = xen_set_pte_at, + .pte_clear = xen_pte_clear, + .pmd_clear = xen_pmd_clear, +#endif /* CONFIG_X86_PAE */ + .set_pud = xen_set_pud_hyper, + + .make_pmd = xen_make_pmd, + .pmd_val = xen_pmd_val, + +#if PAGETABLE_LEVELS == 4 + .pud_val = xen_pud_val, + .make_pud = xen_make_pud, + .set_pgd = xen_set_pgd_hyper, + + .alloc_pud = xen_alloc_pte_init, + .release_pud = xen_release_pte_init, +#endif /* PAGETABLE_LEVELS == 4 */ + + .activate_mm = xen_activate_mm, + .dup_mmap = xen_dup_mmap, + .exit_mmap = xen_exit_mmap, + + .lazy_mode = { + .enter = paravirt_enter_lazy_mmu, + .leave = xen_leave_lazy, + }, + + .set_fixmap = xen_set_fixmap, +}; + + #ifdef CONFIG_XEN_DEBUG_FS static struct dentry *d_mmu_debug; diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 98d71659da5..24d1b44a337 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +unsigned long xen_read_cr2_direct(void); + +extern const struct pv_mmu_ops xen_mmu_ops; #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index c1f8faf0a2c..11913fc94c1 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -13,6 +13,7 @@ extern const char xen_failsafe_callback[]; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); +DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); @@ -22,6 +23,13 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_setup_machphys_mapping(void); +pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_ident_map_ISA(void); +void xen_reserve_top(void); + +void xen_leave_lazy(void); +void xen_post_allocator_init(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); From 41edafdb78feac1d1f8823846209975fde990633 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:02 -0800 Subject: [PATCH 327/682] x86/pvops: add a paravirt_ident functions to allow special patching Impact: Optimization Several paravirt ops implementations simply return their arguments, the most obvious being the make_pte/pte_val class of operations on native. On 32-bit, the identity function is literally a no-op, as the calling convention uses the same registers for the first argument and return. On 64-bit, it can be implemented with a single "mov". This patch adds special identity functions for 32 and 64 bit argument, and machinery to recognize them and replace them with either nops or a mov as appropriate. At the moment, the only users for the identity functions are the pagetable entry conversion functions. The result is a measureable improvement on pagetable-heavy benchmarks (2-3%, reducing the pvops overhead from 5 to 2%). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 5 ++ arch/x86/kernel/paravirt.c | 75 +++++++++++++++++++++++++---- arch/x86/kernel/paravirt_patch_32.c | 12 +++++ arch/x86/kernel/paravirt_patch_64.c | 15 ++++++ 4 files changed, 98 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 17577888709..961d10c12f1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -388,6 +388,8 @@ extern struct pv_lock_ops pv_lock_ops; asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") unsigned paravirt_patch_nop(void); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); unsigned paravirt_patch_ignore(unsigned len); unsigned paravirt_patch_call(void *insnbuf, const void *target, u16 tgt_clobbers, @@ -1371,6 +1373,9 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, } void _paravirt_nop(void); +u32 _paravirt_ident_32(u32); +u64 _paravirt_ident_64(u64); + #define paravirt_nop ((void *)_paravirt_nop) void paravirt_use_bytelocks(void); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 202514be592..dd25e2b1593 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -44,6 +44,17 @@ void _paravirt_nop(void) { } +/* identity function, which can be inlined */ +u32 _paravirt_ident_32(u32 x) +{ + return x; +} + +u64 _paravirt_ident_64(u64 x) +{ + return x; +} + static void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", @@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); - else if (opfunc == paravirt_nop) + else if (opfunc == _paravirt_nop) /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); + + /* identity functions just return their single argument */ + else if (opfunc == _paravirt_ident_32) + ret = paravirt_patch_ident_32(insnbuf, len); + else if (opfunc == _paravirt_ident_64) + ret = paravirt_patch_ident_64(insnbuf, len); + else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || @@ -373,6 +391,45 @@ struct pv_apic_ops pv_apic_ops = { #endif }; +typedef pte_t make_pte_t(pteval_t); +typedef pmd_t make_pmd_t(pmdval_t); +typedef pud_t make_pud_t(pudval_t); +typedef pgd_t make_pgd_t(pgdval_t); + +typedef pteval_t pte_val_t(pte_t); +typedef pmdval_t pmd_val_t(pmd_t); +typedef pudval_t pud_val_t(pud_t); +typedef pgdval_t pgd_val_t(pgd_t); + + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) +/* 32-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#else +/* 64-bit pagetable entries */ +#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 +#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 +#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 +#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 + +#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 +#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#endif + struct pv_mmu_ops pv_mmu_ops = { #ifndef CONFIG_X86_64 .pagetable_setup_start = native_pagetable_setup_start, @@ -424,21 +481,21 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = native_pmd_val, - .make_pmd = native_make_pmd, + .pmd_val = paravirt_native_pmd_val, + .make_pmd = paravirt_native_make_pmd, #if PAGETABLE_LEVELS == 4 - .pud_val = native_pud_val, - .make_pud = native_make_pud, + .pud_val = paravirt_native_pud_val, + .make_pud = paravirt_native_make_pud, .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = native_pte_val, - .pgd_val = native_pgd_val, + .pte_val = paravirt_native_pte_val, + .pgd_val = paravirt_native_pgd_val, - .make_pte = native_make_pte, - .make_pgd = native_make_pgd, + .make_pte = paravirt_native_make_pte, + .make_pgd = paravirt_native_make_pgd, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 9fe644f4861..d9f32e6d6ab 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); DEF_NATIVE(pv_cpu_ops, clts, "clts"); DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + /* arg in %eax, return in %eax */ + return 0; +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + /* arg in %edx:%eax, return in %edx:%eax */ + return 0; +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 061d01df9ae..3f08f34f93e 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); +DEF_NATIVE(, mov32, "mov %edi, %eax"); +DEF_NATIVE(, mov64, "mov %rdi, %rax"); + +unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov32, end__mov32); +} + +unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) +{ + return paravirt_patch_insns(insnbuf, len, + start__mov64, end__mov64); +} + unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) { From b8aa287f77be943e37a84fa4657e27df95269bfb Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:03 -0800 Subject: [PATCH 328/682] x86: fix paravirt clobber in entry_64.S Impact: Fix latent bug The clobber is trying to say that anything except RDI is available for clobbering, but actually clobbers everything. This hasn't mattered because the clobbers were basically ignored, but subsequent patches will rely on them. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a52703864a1..e4c9710cae5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1140,7 +1140,7 @@ ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 - DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) SWAPGS gs_change: movl %edi,%gs From 9104a18dcdd8dfefdddca8ce44988563f13ed3c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:04 -0800 Subject: [PATCH 329/682] x86/paravirt: selectively save/restore regs around pvops calls Impact: Optimization Each asm paravirt-ops call says what registers are available for clobbering. This patch makes use of this to selectively save/restore registers around each pvops call. In many cases this significantly shrinks code size. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 100 +++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 961d10c12f1..dcce961262b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -12,19 +12,29 @@ #define CLBR_EAX (1 << 0) #define CLBR_ECX (1 << 1) #define CLBR_EDX (1 << 2) +#define CLBR_EDI (1 << 3) -#ifdef CONFIG_X86_64 -#define CLBR_RSI (1 << 3) -#define CLBR_RDI (1 << 4) +#ifdef CONFIG_X86_32 +/* CLBR_ANY should match all regs platform has. For i386, that's just it */ +#define CLBR_ANY ((1 << 4) - 1) +#else +#define CLBR_RAX CLBR_EAX +#define CLBR_RCX CLBR_ECX +#define CLBR_RDX CLBR_EDX +#define CLBR_RDI CLBR_EDI +#define CLBR_RSI (1 << 4) #define CLBR_R8 (1 << 5) #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) #define CLBR_ANY ((1 << 9) - 1) + +#define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ + CLBR_RCX | CLBR_R8 | CLBR_R9) +#define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) +#define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) + #include -#else -/* CLBR_ANY should match all regs platform has. For i386, that's just it */ -#define CLBR_ANY ((1 << 3) - 1) #endif /* X86_64 */ #ifndef __ASSEMBLY__ @@ -1530,33 +1540,49 @@ static inline unsigned long __raw_local_irq_save(void) .popsection +#define COND_PUSH(set, mask, reg) \ + .if ((~set) & mask); push %reg; .endif +#define COND_POP(set, mask, reg) \ + .if ((~set) & mask); pop %reg; .endif + #ifdef CONFIG_X86_64 -#define PV_SAVE_REGS \ - push %rax; \ - push %rcx; \ - push %rdx; \ - push %rsi; \ - push %rdi; \ - push %r8; \ - push %r9; \ - push %r10; \ - push %r11 -#define PV_RESTORE_REGS \ - pop %r11; \ - pop %r10; \ - pop %r9; \ - pop %r8; \ - pop %rdi; \ - pop %rsi; \ - pop %rdx; \ - pop %rcx; \ - pop %rax + +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_RAX, rax); \ + COND_PUSH(set, CLBR_RCX, rcx); \ + COND_PUSH(set, CLBR_RDX, rdx); \ + COND_PUSH(set, CLBR_RSI, rsi); \ + COND_PUSH(set, CLBR_RDI, rdi); \ + COND_PUSH(set, CLBR_R8, r8); \ + COND_PUSH(set, CLBR_R9, r9); \ + COND_PUSH(set, CLBR_R10, r10); \ + COND_PUSH(set, CLBR_R11, r11) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_R11, r11); \ + COND_POP(set, CLBR_R10, r10); \ + COND_POP(set, CLBR_R9, r9); \ + COND_POP(set, CLBR_R8, r8); \ + COND_POP(set, CLBR_RDI, rdi); \ + COND_POP(set, CLBR_RSI, rsi); \ + COND_POP(set, CLBR_RDX, rdx); \ + COND_POP(set, CLBR_RCX, rcx); \ + COND_POP(set, CLBR_RAX, rax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) #else -#define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx -#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax +#define PV_SAVE_REGS(set) \ + COND_PUSH(set, CLBR_EAX, eax); \ + COND_PUSH(set, CLBR_EDI, edi); \ + COND_PUSH(set, CLBR_ECX, ecx); \ + COND_PUSH(set, CLBR_EDX, edx) +#define PV_RESTORE_REGS(set) \ + COND_POP(set, CLBR_EDX, edx); \ + COND_POP(set, CLBR_ECX, ecx); \ + COND_POP(set, CLBR_EDI, edi); \ + COND_POP(set, CLBR_EAX, eax) + #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) #define PARA_INDIRECT(addr) *%cs:addr @@ -1568,15 +1594,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS;) \ + PV_RESTORE_REGS(clobbers);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS; \ + PV_SAVE_REGS(clobbers); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS;) + PV_RESTORE_REGS(clobbers);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ @@ -1606,11 +1632,15 @@ static inline unsigned long __raw_local_irq_save(void) PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ swapgs) +/* + * Note: swapgs is very special, and in practise is either going to be + * implemented with a single "swapgs" instruction or something very + * special. Either way, we don't need to save any registers for + * it. + */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - PV_SAVE_REGS; \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ - PV_RESTORE_REGS \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ ) #define GET_CR2_INTO_RCX \ From ecb93d1ccd0aac63f03be2db3cac3fa974716f4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:05 -0800 Subject: [PATCH 330/682] x86/paravirt: add register-saving thunks to reduce caller register pressure Impact: Optimization One of the problems with inserting a pile of C calls where previously there were none is that the register pressure is greatly increased. The C calling convention says that the caller must expect a certain set of registers may be trashed by the callee, and that the callee can use those registers without restriction. This includes the function argument registers, and several others. This patch seeks to alleviate this pressure by introducing wrapper thunks that will do the register saving/restoring, so that the callsite doesn't need to worry about it, but the callee function can be conventional compiler-generated code. In many cases (particularly performance-sensitive cases) the callee will be in assembler anyway, and need not use the compiler's calling convention. Standard calling convention is: arguments return scratch x86-32 eax edx ecx eax ? x86-64 rdi rsi rdx rcx rax r8 r9 r10 r11 The thunk preserves all argument and scratch registers. The return register is not preserved, and is available as a scratch register for unwrapped callee code (and of course the return value). Wrapped function pointers are themselves wrapped in a struct paravirt_callee_save structure, in order to get some warning from the compiler when functions with mismatched calling conventions are used. The most common paravirt ops, both statically and dynamically, are interrupt enable/disable/save/restore, so handle them first. This is particularly easy since their calls are handled specially anyway. XXX Deal with VMI. What's their calling convention? Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 126 ++++++++++++++++++++++++-------- arch/x86/kernel/paravirt.c | 8 +- arch/x86/kernel/vsmp_64.c | 12 ++- arch/x86/lguest/boot.c | 13 +++- arch/x86/xen/enlighten.c | 8 +- arch/x86/xen/irq.c | 14 +++- 6 files changed, 132 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index dcce961262b..f9107b88631 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,6 +17,10 @@ #ifdef CONFIG_X86_32 /* CLBR_ANY should match all regs platform has. For i386, that's just it */ #define CLBR_ANY ((1 << 4) - 1) + +#define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) +#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX #define CLBR_RCX CLBR_ECX @@ -27,16 +31,19 @@ #define CLBR_R9 (1 << 6) #define CLBR_R10 (1 << 7) #define CLBR_R11 (1 << 8) + #define CLBR_ANY ((1 << 9) - 1) #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ CLBR_RCX | CLBR_R8 | CLBR_R9) -#define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) +#define CLBR_RET_REG (CLBR_RAX) #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) #include #endif /* X86_64 */ +#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG) + #ifndef __ASSEMBLY__ #include #include @@ -50,6 +57,14 @@ struct tss_struct; struct mm_struct; struct desc_struct; +/* + * Wrapper type for pointers to code which uses the non-standard + * calling convention. See PV_CALL_SAVE_REGS_THUNK below. + */ +struct paravirt_callee_save { + void *func; +}; + /* general info */ struct pv_info { unsigned int kernel_rpl; @@ -199,11 +214,15 @@ struct pv_irq_ops { * expected to use X86_EFLAGS_IF; all other bits * returned from save_fl are undefined, and may be ignored by * restore_fl. + * + * NOTE: These functions callers expect the callee to preserve + * more registers than the standard C calling convention. */ - unsigned long (*save_fl)(void); - void (*restore_fl)(unsigned long); - void (*irq_disable)(void); - void (*irq_enable)(void); + struct paravirt_callee_save save_fl; + struct paravirt_callee_save restore_fl; + struct paravirt_callee_save irq_disable; + struct paravirt_callee_save irq_enable; + void (*safe_halt)(void); void (*halt)(void); @@ -1437,12 +1456,37 @@ extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #ifdef CONFIG_X86_32 -#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;" -#define PV_RESTORE_REGS "popl %%edx; popl %%ecx" +#define PV_SAVE_REGS "pushl %ecx; pushl %edx;" +#define PV_RESTORE_REGS "popl %edx; popl %ecx;" + +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS +#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS + #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS #define PV_VEXTRA_CLOBBERS #else +/* save and restore all caller-save registers, except return value */ +#define PV_SAVE_ALL_CALLER_REGS \ + "push %rcx;" \ + "push %rdx;" \ + "push %rsi;" \ + "push %rdi;" \ + "push %r8;" \ + "push %r9;" \ + "push %r10;" \ + "push %r11;" +#define PV_RESTORE_ALL_CALLER_REGS \ + "pop %r11;" \ + "pop %r10;" \ + "pop %r9;" \ + "pop %r8;" \ + "pop %rdi;" \ + "pop %rsi;" \ + "pop %rdx;" \ + "pop %rcx;" + /* We save some registers, but all of them, that's too much. We clobber all * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" @@ -1452,52 +1496,76 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_FLAGS_ARG "D" #endif +/* + * Generate a thunk around a function which saves all caller-save + * registers except for the return value. This allows C functions to + * be called from assembler code where fewer than normal registers are + * available. It may also help code generation around calls from C + * code if the common case doesn't use many registers. + * + * When a callee is wrapped in a thunk, the caller can assume that all + * arg regs and all scratch registers are preserved across the + * call. The return value in rax/eax will not be saved, even for void + * functions. + */ +#define PV_CALLEE_SAVE_REGS_THUNK(func) \ + extern typeof(func) __raw_callee_save_##func; \ + static void *__##func##__ __used = func; \ + \ + asm(".pushsection .text;" \ + "__raw_callee_save_" #func ": " \ + PV_SAVE_ALL_CALLER_REGS \ + "call " #func ";" \ + PV_RESTORE_ALL_CALLER_REGS \ + "ret;" \ + ".popsection") + +/* Get a reference to a callee-save function */ +#define PV_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { __raw_callee_save_##func }) + +/* Promise that "func" already uses the right calling convention */ +#define __PV_IS_CALLEE_SAVE(func) \ + ((struct paravirt_callee_save) { func }) + static inline unsigned long __raw_local_save_flags(void) { unsigned long f; - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : paravirt_type(pv_irq_ops.save_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_VEXTRA_CLOBBERS); + : "memory", "cc"); return f; } static inline void raw_local_irq_restore(unsigned long f) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : "=a"(f) : PV_FLAGS_ARG(f), paravirt_type(pv_irq_ops.restore_fl), paravirt_clobber(CLBR_EAX) - : "memory", "cc" PV_EXTRA_CLOBBERS); + : "memory", "cc"); } static inline void raw_local_irq_disable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_disable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline void raw_local_irq_enable(void) { - asm volatile(paravirt_alt(PV_SAVE_REGS - PARAVIRT_CALL - PV_RESTORE_REGS) + asm volatile(paravirt_alt(PARAVIRT_CALL) : : paravirt_type(pv_irq_ops.irq_enable), paravirt_clobber(CLBR_EAX) - : "memory", "eax", "cc" PV_EXTRA_CLOBBERS); + : "memory", "eax", "cc"); } static inline unsigned long __raw_local_irq_save(void) @@ -1541,9 +1609,9 @@ static inline unsigned long __raw_local_irq_save(void) #define COND_PUSH(set, mask, reg) \ - .if ((~set) & mask); push %reg; .endif + .if ((~(set)) & mask); push %reg; .endif #define COND_POP(set, mask, reg) \ - .if ((~set) & mask); pop %reg; .endif + .if ((~(set)) & mask); pop %reg; .endif #ifdef CONFIG_X86_64 @@ -1594,15 +1662,15 @@ static inline unsigned long __raw_local_irq_save(void) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ - PV_SAVE_REGS(clobbers); \ + PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ - PV_RESTORE_REGS(clobbers);) + PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define USERGS_SYSRET32 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index dd25e2b1593..8adb6b5aa42 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -310,10 +310,10 @@ struct pv_time_ops pv_time_ops = { struct pv_irq_ops pv_irq_ops = { .init_IRQ = native_init_IRQ, - .save_fl = native_save_fl, - .restore_fl = native_restore_fl, - .irq_disable = native_irq_disable, - .irq_enable = native_irq_enable, + .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), + .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), + .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a688f3bfaec..c609205df59 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void) flags &= ~X86_EFLAGS_IF; return flags; } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); static void vsmp_restore_fl(unsigned long flags) { @@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags) flags |= X86_EFLAGS_AC; native_restore_fl(flags); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); static void vsmp_irq_disable(void) { @@ -53,6 +55,7 @@ static void vsmp_irq_disable(void) native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); static void vsmp_irq_enable(void) { @@ -60,6 +63,7 @@ static void vsmp_irq_enable(void) native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); } +PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable); static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void) cap, ctl); if (cap & ctl & (1 << 4)) { /* Setup irq ops and turn on vSMP IRQ fastpath handling */ - pv_irq_ops.irq_disable = vsmp_irq_disable; - pv_irq_ops.irq_enable = vsmp_irq_enable; - pv_irq_ops.save_fl = vsmp_save_fl; - pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable); + pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); pv_init_ops.patch = vsmp_patch; ctl &= ~(1 << 4); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92f1c6f3e19..19e33b6cd59 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -173,24 +173,29 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } +PV_CALLEE_SAVE_REGS_THUNK(save_fl); /* restore_flags() just sets the flags back to the value given. */ static void restore_fl(unsigned long flags) { lguest_data.irq_enabled = flags; } +PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } +PV_CALLEE_SAVE_REGS_THUNK(irq_disable); /* Interrupts go on... */ static void irq_enable(void) { lguest_data.irq_enabled = X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(irq_enable); + /*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, @@ -984,10 +989,10 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; - pv_irq_ops.save_fl = save_fl; - pv_irq_ops.restore_fl = restore_fl; - pv_irq_ops.irq_disable = irq_disable; - pv_irq_ops.irq_enable = irq_enable; + pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); + pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); + pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0cd2a165f17..ff6d530ccc7 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -676,10 +676,10 @@ void xen_setup_vcpu_info_placement(void) if (have_vcpu_info_placement) { printk(KERN_INFO "Xen: using vcpu_info placement\n"); - pv_irq_ops.save_fl = xen_save_fl_direct; - pv_irq_ops.restore_fl = xen_restore_fl_direct; - pv_irq_ops.irq_disable = xen_irq_disable_direct; - pv_irq_ops.irq_enable = xen_irq_enable_direct; + pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); + pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct); pv_mmu_ops.read_cr2 = xen_read_cr2_direct; } } diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 2e8271431e1..5a070900ad3 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -50,6 +50,7 @@ static unsigned long xen_save_fl(void) */ return (-flags) & X86_EFLAGS_IF; } +PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); static void xen_restore_fl(unsigned long flags) { @@ -76,6 +77,7 @@ static void xen_restore_fl(unsigned long flags) xen_force_evtchn_callback(); } } +PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); static void xen_irq_disable(void) { @@ -86,6 +88,7 @@ static void xen_irq_disable(void) percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); static void xen_irq_enable(void) { @@ -106,6 +109,7 @@ static void xen_irq_enable(void) if (unlikely(vcpu->evtchn_upcall_pending)) xen_force_evtchn_callback(); } +PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable); static void xen_safe_halt(void) { @@ -124,10 +128,12 @@ static void xen_halt(void) static const struct pv_irq_ops xen_irq_ops __initdata = { .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, + + .save_fl = PV_CALLEE_SAVE(xen_save_fl), + .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), + .irq_disable = PV_CALLEE_SAVE(xen_irq_disable), + .irq_enable = PV_CALLEE_SAVE(xen_irq_enable), + .safe_halt = xen_safe_halt, .halt = xen_halt, #ifdef CONFIG_X86_64 From 791bad9d28d405d9397ea0c370ffb7c7bdd2aa6e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:06 -0800 Subject: [PATCH 331/682] x86/paravirt: implement PVOP_CALL macros for callee-save functions Impact: Optimization Functions with the callee save calling convention clobber many fewer registers than the normal C calling convention. Implement variants of PVOP_V?CALL* accordingly. This only bothers with functions up to 3 args, since functions with more args may as well use the normal calling convention. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 134 +++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f9107b88631..beb10ecdbe6 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -510,25 +510,45 @@ int paravirt_disable_iospace(void); * makes sure the incoming and outgoing types are always correct. */ #ifdef CONFIG_X86_32 -#define PVOP_VCALL_ARGS unsigned long __eax, __edx, __ecx +#define PVOP_VCALL_ARGS \ + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS + +#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=a" (__eax), "=d" (__edx), \ "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS + +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS #define VEXTRA_CLOBBERS -#else -#define PVOP_VCALL_ARGS unsigned long __edi, __esi, __edx, __ecx +#else /* CONFIG_X86_64 */ +#define PVOP_VCALL_ARGS \ + unsigned long __edi = __edi, __esi = __esi, \ + __edx = __edx, __ecx = __ecx #define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax + +#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) +#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x)) +#define PVOP_CALL_ARG3(x) "d" ((unsigned long)(x)) +#define PVOP_CALL_ARG4(x) "c" ((unsigned long)(x)) + #define PVOP_VCALL_CLOBBERS "=D" (__edi), \ "=S" (__esi), "=d" (__edx), \ "=c" (__ecx) - #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS + #define EXTRA_CLOBBERS , "r8", "r9", "r10", "r11" #define VEXTRA_CLOBBERS , "rax", "r8", "r9", "r10", "r11" -#endif +#endif /* CONFIG_X86_32 */ #ifdef CONFIG_PARAVIRT_DEBUG #define PVOP_TEST_NULL(op) BUG_ON(op == NULL) @@ -536,10 +556,11 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif -#define __PVOP_CALL(rettype, op, pre, post, ...) \ +#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ + pre, post, ...) \ ({ \ rettype __ret; \ - PVOP_CALL_ARGS; \ + PVOP_CALL_ARGS; \ PVOP_TEST_NULL(op); \ /* This is 32-bit specific, but is okay in 64-bit */ \ /* since this condition will never hold */ \ @@ -547,70 +568,113 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)((((u64)__edx) << 32) | __eax); \ } else { \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_CALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" EXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ __ret = (rettype)__eax; \ } \ __ret; \ }) -#define __PVOP_VCALL(op, pre, post, ...) \ + +#define __PVOP_CALL(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS, \ + EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__) + +#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_CALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + +#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...) \ ({ \ PVOP_VCALL_ARGS; \ PVOP_TEST_NULL(op); \ asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : PVOP_VCALL_CLOBBERS \ + : call_clbr \ : paravirt_type(op), \ - paravirt_clobber(CLBR_ANY), \ + paravirt_clobber(clbr), \ ##__VA_ARGS__ \ - : "memory", "cc" VEXTRA_CLOBBERS); \ + : "memory", "cc" extra_clbr); \ }) +#define __PVOP_VCALL(op, pre, post, ...) \ + ____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS, \ + VEXTRA_CLOBBERS, \ + pre, post, ##__VA_ARGS__) + +#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \ + ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \ + PVOP_VCALLEE_CLOBBERS, , \ + pre, post, ##__VA_ARGS__) + + + #define PVOP_CALL0(rettype, op) \ __PVOP_CALL(rettype, op, "", "") #define PVOP_VCALL0(op) \ __PVOP_VCALL(op, "", "") +#define PVOP_CALLEE0(rettype, op) \ + __PVOP_CALLEESAVE(rettype, op, "", "") +#define PVOP_VCALLEE0(op) \ + __PVOP_VCALLEESAVE(op, "", "") + + #define PVOP_CALL1(rettype, op, arg1) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) #define PVOP_VCALL1(op, arg1) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1)) + +#define PVOP_CALLEE1(rettype, op, arg1) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1)) +#define PVOP_VCALLEE1(op, arg1) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1)) + #define PVOP_CALL2(rettype, op, arg1, arg2) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) #define PVOP_VCALL2(op, arg1, arg2) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1" ((unsigned long)(arg2))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + +#define PVOP_CALLEE2(rettype, op, arg1, arg2) \ + __PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) +#define PVOP_VCALLEE2(op, arg1, arg2) \ + __PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2)) + #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) #define PVOP_VCALL3(op, arg1, arg2, arg3) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3))) + __PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1), \ + PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3)) /* This is the only difference in x86_64. We can make it much simpler */ #ifdef CONFIG_X86_32 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ __PVOP_CALL(rettype, op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ - "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ - "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4))) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ __PVOP_VCALL(op, \ "push %[_arg4];", "lea 4(%%esp),%%esp;", \ @@ -618,13 +682,13 @@ int paravirt_disable_iospace(void); "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) #else #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ - __PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_CALL(rettype, op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ - __PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), \ - "1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)), \ - "3"((unsigned long)(arg4))) + __PVOP_VCALL(op, "", "", \ + PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ + PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) #endif static inline int paravirt_enabled(void) From da5de7c22eb705be709a57e486e7475a6969b994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 Jan 2009 14:35:07 -0800 Subject: [PATCH 332/682] x86/paravirt: use callee-saved convention for pte_val/make_pte/etc Impact: Optimization In the native case, pte_val, make_pte, etc are all just identity functions, so there's no need to clobber a lot of registers over them. (This changes the 32-bit callee-save calling convention to return both EAX and EDX so functions can return 64-bit values.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 78 ++++++++++++++++----------------- arch/x86/kernel/paravirt.c | 53 +++++----------------- arch/x86/xen/mmu.c | 24 ++++++---- 3 files changed, 67 insertions(+), 88 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index beb10ecdbe6..2d098b78bc1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -19,7 +19,7 @@ #define CLBR_ANY ((1 << 4) - 1) #define CLBR_ARG_REGS (CLBR_EAX | CLBR_EDX | CLBR_ECX) -#define CLBR_RET_REG (CLBR_EAX) +#define CLBR_RET_REG (CLBR_EAX | CLBR_EDX) #define CLBR_SCRATCH (0) #else #define CLBR_RAX CLBR_EAX @@ -308,11 +308,11 @@ struct pv_mmu_ops { void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); - pteval_t (*pte_val)(pte_t); - pte_t (*make_pte)(pteval_t pte); + struct paravirt_callee_save pte_val; + struct paravirt_callee_save make_pte; - pgdval_t (*pgd_val)(pgd_t); - pgd_t (*make_pgd)(pgdval_t pgd); + struct paravirt_callee_save pgd_val; + struct paravirt_callee_save make_pgd; #if PAGETABLE_LEVELS >= 3 #ifdef CONFIG_X86_PAE @@ -327,12 +327,12 @@ struct pv_mmu_ops { void (*set_pud)(pud_t *pudp, pud_t pudval); - pmdval_t (*pmd_val)(pmd_t); - pmd_t (*make_pmd)(pmdval_t pmd); + struct paravirt_callee_save pmd_val; + struct paravirt_callee_save make_pmd; #if PAGETABLE_LEVELS == 4 - pudval_t (*pud_val)(pud_t); - pud_t (*make_pud)(pudval_t pud); + struct paravirt_callee_save pud_val; + struct paravirt_callee_save make_pud; void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); #endif /* PAGETABLE_LEVELS == 4 */ @@ -1155,13 +1155,13 @@ static inline pte_t __pte(pteval_t val) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, - pv_mmu_ops.make_pte, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pteval_t, + pv_mmu_ops.make_pte, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pteval_t, - pv_mmu_ops.make_pte, - val); + ret = PVOP_CALLEE1(pteval_t, + pv_mmu_ops.make_pte, + val); return (pte_t) { .pte = ret }; } @@ -1171,11 +1171,11 @@ static inline pteval_t pte_val(pte_t pte) pteval_t ret; if (sizeof(pteval_t) > sizeof(long)) - ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val, - pte.pte, (u64)pte.pte >> 32); + ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val, + pte.pte, (u64)pte.pte >> 32); else - ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val, - pte.pte); + ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val, + pte.pte); return ret; } @@ -1185,11 +1185,11 @@ static inline pgd_t __pgd(pgdval_t val) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd, - val); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd, + val); return (pgd_t) { ret }; } @@ -1199,11 +1199,11 @@ static inline pgdval_t pgd_val(pgd_t pgd) pgdval_t ret; if (sizeof(pgdval_t) > sizeof(long)) - ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd, (u64)pgd.pgd >> 32); + ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd, (u64)pgd.pgd >> 32); else - ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val, - pgd.pgd); + ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val, + pgd.pgd); return ret; } @@ -1267,11 +1267,11 @@ static inline pmd_t __pmd(pmdval_t val) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd, - val); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd, + val); return (pmd_t) { ret }; } @@ -1281,11 +1281,11 @@ static inline pmdval_t pmd_val(pmd_t pmd) pmdval_t ret; if (sizeof(pmdval_t) > sizeof(long)) - ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd, (u64)pmd.pmd >> 32); + ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd, (u64)pmd.pmd >> 32); else - ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val, - pmd.pmd); + ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val, + pmd.pmd); return ret; } @@ -1307,11 +1307,11 @@ static inline pud_t __pud(pudval_t val) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud, - val, (u64)val >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud, + val, (u64)val >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud, - val); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud, + val); return (pud_t) { ret }; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8adb6b5aa42..cea11c8e304 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -391,43 +391,12 @@ struct pv_apic_ops pv_apic_ops = { #endif }; -typedef pte_t make_pte_t(pteval_t); -typedef pmd_t make_pmd_t(pmdval_t); -typedef pud_t make_pud_t(pudval_t); -typedef pgd_t make_pgd_t(pgdval_t); - -typedef pteval_t pte_val_t(pte_t); -typedef pmdval_t pmd_val_t(pmd_t); -typedef pudval_t pud_val_t(pud_t); -typedef pgdval_t pgd_val_t(pgd_t); - - #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) /* 32-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_32 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_32 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_32 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_32 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_32 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_32 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32) #else /* 64-bit pagetable entries */ -#define paravirt_native_make_pte (make_pte_t *)_paravirt_ident_64 -#define paravirt_native_pte_val (pte_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pmd (make_pmd_t *)_paravirt_ident_64 -#define paravirt_native_pmd_val (pmd_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pud (make_pud_t *)_paravirt_ident_64 -#define paravirt_native_pud_val (pud_val_t *)_paravirt_ident_64 - -#define paravirt_native_make_pgd (make_pgd_t *)_paravirt_ident_64 -#define paravirt_native_pgd_val (pgd_val_t *)_paravirt_ident_64 +#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) #endif struct pv_mmu_ops pv_mmu_ops = { @@ -481,21 +450,23 @@ struct pv_mmu_ops pv_mmu_ops = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, - .pmd_val = paravirt_native_pmd_val, - .make_pmd = paravirt_native_make_pmd, + + .pmd_val = PTE_IDENT, + .make_pmd = PTE_IDENT, #if PAGETABLE_LEVELS == 4 - .pud_val = paravirt_native_pud_val, - .make_pud = paravirt_native_make_pud, + .pud_val = PTE_IDENT, + .make_pud = PTE_IDENT, + .set_pgd = native_set_pgd, #endif #endif /* PAGETABLE_LEVELS >= 3 */ - .pte_val = paravirt_native_pte_val, - .pgd_val = paravirt_native_pgd_val, + .pte_val = PTE_IDENT, + .pgd_val = PTE_IDENT, - .make_pte = paravirt_native_make_pte, - .make_pgd = paravirt_native_make_pgd, + .make_pte = PTE_IDENT, + .make_pgd = PTE_IDENT, .dup_mmap = paravirt_nop, .exit_mmap = paravirt_nop, diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 94e452c0b00..5e41f7fc6cf 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -492,28 +492,33 @@ pteval_t xen_pte_val(pte_t pte) { return pte_mfn_to_pfn(pte.pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); pgdval_t xen_pgd_val(pgd_t pgd) { return pte_mfn_to_pfn(pgd.pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val); pte_t xen_make_pte(pteval_t pte) { pte = pte_pfn_to_mfn(pte); return native_make_pte(pte); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); return native_make_pgd(pgd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); pmdval_t xen_pmd_val(pmd_t pmd) { return pte_mfn_to_pfn(pmd.pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val); void xen_set_pud_hyper(pud_t *ptr, pud_t val) { @@ -590,12 +595,14 @@ pmd_t xen_make_pmd(pmdval_t pmd) pmd = pte_pfn_to_mfn(pmd); return native_make_pmd(pmd); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); #if PAGETABLE_LEVELS == 4 pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); pud_t xen_make_pud(pudval_t pud) { @@ -603,6 +610,7 @@ pud_t xen_make_pud(pudval_t pud) return native_make_pud(pud); } +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud); pgd_t *xen_get_user_pgd(pgd_t *pgd) { @@ -1813,11 +1821,11 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_start = __ptep_modify_prot_start, .ptep_modify_prot_commit = __ptep_modify_prot_commit, - .pte_val = xen_pte_val, - .pgd_val = xen_pgd_val, + .pte_val = PV_CALLEE_SAVE(xen_pte_val), + .pgd_val = PV_CALLEE_SAVE(xen_pgd_val), - .make_pte = xen_make_pte, - .make_pgd = xen_make_pgd, + .make_pte = PV_CALLEE_SAVE(xen_make_pte), + .make_pgd = PV_CALLEE_SAVE(xen_make_pgd), #ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, @@ -1827,12 +1835,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { #endif /* CONFIG_X86_PAE */ .set_pud = xen_set_pud_hyper, - .make_pmd = xen_make_pmd, - .pmd_val = xen_pmd_val, + .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), + .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), #if PAGETABLE_LEVELS == 4 - .pud_val = xen_pud_val, - .make_pud = xen_make_pud, + .pud_val = PV_CALLEE_SAVE(xen_pud_val), + .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pte_init, From 4767afbf1f60f73997a7eb69a86d380f1fb27a92 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 29 Jan 2009 01:51:34 -0800 Subject: [PATCH 333/682] x86/paravirt: fix missing callee-save call on pud_val Impact: Fix build when CONFIG_PARAVIRT_DEBUG is enabled Fix missed convertion to using callee-saved calls for pud_val, which causes a compile error when CONFIG_PARAVIRT_DEBUG is enabled. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2d098b78bc1..b17365c3974 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1321,11 +1321,11 @@ static inline pudval_t pud_val(pud_t pud) pudval_t ret; if (sizeof(pudval_t) > sizeof(long)) - ret = PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val, - pud.pud, (u64)pud.pud >> 32); + ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val, + pud.pud, (u64)pud.pud >> 32); else - ret = PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val, - pud.pud); + ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val, + pud.pud); return ret; } From 193c81b979adbc4a540bf89e75b9039fae75bf82 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:23:27 +0100 Subject: [PATCH 334/682] x86, irq: add LOCAL_PERF_VECTOR Add a slot for the performance monitoring interrupt. Not yet used by any subsystem - but the hardware has it. (This eases integration with performance monitoring code.) Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 9a83a10a5d5..0e2220bb314 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -88,6 +88,11 @@ */ #define LOCAL_TIMER_VECTOR 0xef +/* + * Performance monitoring interrupt vector: + */ +#define LOCAL_PERF_VECTOR 0xee + /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority From d1de36f5b5a30b8f9dae7142516fb122ce1e0661 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 01:59:14 +0100 Subject: [PATCH 335/682] x86, apic: clean up header section Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 48 ++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 968c817762a..29e7186b0e8 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -14,43 +14,41 @@ * Mikael Pettersson : PM converted to driver model. */ -#include - -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include #include -#include -#include -#include +#include +#include +#include #include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include -#include -#include -#include -#include #include -#include #include +#include +#include +#include #include -#include +#include #include #include -#include +#include +#include +#include +#include #include -#include - /* * Sanity check */ From 8f47e16348e8e25eedf639092a8a2f10a66aba34 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:03:42 +0100 Subject: [PATCH 336/682] x86: update copyrights Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/kernel/stacktrace.c | 2 +- arch/x86/mm/mmap.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 29e7186b0e8..d6da6dd2f60 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1,7 +1,7 @@ /* * Local APIC handling, local APIC timers * - * (c) 1999, 2000 Ingo Molnar + * (c) 1999, 2000, 2009 Ingo Molnar * * Fixes * Maciej W. Rozycki : Bits for genuine 82489DX APICs; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3378ffb2140..57d60c741e3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b46ca7d31fe..66ebb823f39 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -3,7 +3,7 @@ * compliant MP-table parsing routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * (c) 2008 Alexey Starikovskiy */ diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 0eb32ae9bf1..eaaffae31cc 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -2,7 +2,7 @@ * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998-99, 2000 Ingo Molnar + * (c) 1998-99, 2000, 2009 Ingo Molnar * (c) 2002,2003 Andi Kleen, SuSE Labs. * * i386 and x86_64 integration by Glauber Costa diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1268a862abb..f40f86fec2f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -2,7 +2,7 @@ * x86 SMP booting functions * * (c) 1995 Alan Cox, Building #3 - * (c) 1998, 1999, 2000 Ingo Molnar + * (c) 1998, 1999, 2000, 2009 Ingo Molnar * Copyright 2001 Andi Kleen, SuSE Labs. * * Much of the core SMP work is based on previous work by Thomas Radke, to diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 10786af9554..f7bddc2e37d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -1,7 +1,7 @@ /* * Stack trace management functions * - * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar */ #include #include diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 56fe7124fbe..16582960056 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -4,7 +4,7 @@ * Based on code by Ingo Molnar and Andi Kleen, copyrighted * as follows: * - * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * Copyright 2003-2009 Red Hat Inc. * All Rights Reserved. * Copyright 2005 Andi Kleen, SUSE Labs. * Copyright 2007 Jiri Kosina, SUSE Labs. From 5da690d29f0de17cc1835dd3eb8f8bd0945521f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:10:03 +0100 Subject: [PATCH 337/682] x86, apic: unify the APIC vector enumeration Most of the vector layout on 32-bit and 64-bit is identical now, so eliminate the duplicated enumeration of the vectors. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 37 +++++++++++------------------- 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0e2220bb314..393f85ecdd8 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -50,37 +50,26 @@ * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * TLB, reschedule and local APIC vectors are performance-critical. */ + +#define SPURIOUS_APIC_VECTOR 0xff +#define ERROR_APIC_VECTOR 0xfe +#define RESCHEDULE_VECTOR 0xfd +#define CALL_FUNCTION_VECTOR 0xfc +#define CALL_FUNCTION_SINGLE_VECTOR 0xfb +#define THERMAL_APIC_VECTOR 0xfa + #ifdef CONFIG_X86_32 - -# define SPURIOUS_APIC_VECTOR 0xff -# define ERROR_APIC_VECTOR 0xfe -# define RESCHEDULE_VECTOR 0xfd -# define CALL_FUNCTION_VECTOR 0xfc -# define CALL_FUNCTION_SINGLE_VECTOR 0xfb -# define THERMAL_APIC_VECTOR 0xfa /* 0xf8 - 0xf9 : free */ -# define INVALIDATE_TLB_VECTOR_END 0xf7 -# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -# define NUM_INVALIDATE_TLB_VECTORS 8 - #else - -# define SPURIOUS_APIC_VECTOR 0xff -# define ERROR_APIC_VECTOR 0xfe -# define RESCHEDULE_VECTOR 0xfd -# define CALL_FUNCTION_VECTOR 0xfc -# define CALL_FUNCTION_SINGLE_VECTOR 0xfb -# define THERMAL_APIC_VECTOR 0xfa # define THRESHOLD_APIC_VECTOR 0xf9 # define UV_BAU_MESSAGE 0xf8 -# define INVALIDATE_TLB_VECTOR_END 0xf7 -# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ - -#define NUM_INVALIDATE_TLB_VECTORS 8 - #endif +/* f0-f7 used for spreading out TLB flushes: */ +#define INVALIDATE_TLB_VECTOR_END 0xf7 +#define INVALIDATE_TLB_VECTOR_START 0xf0 +#define NUM_INVALIDATE_TLB_VECTORS 8 + /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ From 647ad94fc0479e33958cb4d0e20e241c0bcf599c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:06:50 +0100 Subject: [PATCH 338/682] x86, apic: clean up spurious vector sanity check Move the spurious vector sanity check to the place where it's defined - out of a .c file. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 7 +++++++ arch/x86/kernel/apic.c | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 393f85ecdd8..2601fd108c7 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -52,6 +52,13 @@ */ #define SPURIOUS_APIC_VECTOR 0xff +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + #define ERROR_APIC_VECTOR 0xfe #define RESCHEDULE_VECTOR 0xfd #define CALL_FUNCTION_VECTOR 0xfc diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d6da6dd2f60..85d8b50d1af 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -49,13 +49,6 @@ #include #include -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; /* Processor that is doing the boot up */ From ed74ca6d5a3e57eb0969d4e14e46cf9f88d25d3f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:16:04 +0100 Subject: [PATCH 339/682] x86, voyager: move Voyager-specific defines to voyager.h They dont belong into the generic headers. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 10 ------- arch/x86/include/asm/irq_vectors.h | 35 ------------------------- arch/x86/include/asm/voyager.h | 42 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 41550797396..3ef2bded97a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -84,16 +84,6 @@ extern atomic_t irq_mis_count; /* EISA */ extern void eisa_set_level_irq(unsigned int irq); -/* Voyager functions */ -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); - /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 2601fd108c7..067d22ffb3e 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -135,39 +135,4 @@ #endif -/* Voyager specific defines */ -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - - #endif /* _ASM_X86_IRQ_VECTORS_H */ diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index b3e64730762..c1635d43616 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -527,3 +527,45 @@ extern void voyager_smp_intr_init(void); #define VOYAGER_PSI_SUBREAD 2 #define VOYAGER_PSI_SUBWRITE 3 extern void voyager_cat_psi(__u8, __u16, __u8 *); + +/* These define the CPIs we use in linux */ +#define VIC_CPI_LEVEL0 0 +#define VIC_CPI_LEVEL1 1 +/* now the fake CPIs */ +#define VIC_TIMER_CPI 2 +#define VIC_INVALIDATE_CPI 3 +#define VIC_RESCHEDULE_CPI 4 +#define VIC_ENABLE_IRQ_CPI 5 +#define VIC_CALL_FUNCTION_CPI 6 +#define VIC_CALL_FUNCTION_SINGLE_CPI 7 + +/* Now the QIC CPIs: Since we don't need the two initial levels, + * these are 2 less than the VIC CPIs */ +#define QIC_CPI_OFFSET 1 +#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) +#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) +#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) +#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) +#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) + +#define VIC_START_FAKE_CPI VIC_TIMER_CPI +#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI + +/* this is the SYS_INT CPI. */ +#define VIC_SYS_INT 8 +#define VIC_CMN_INT 15 + +/* This is the boot CPI for alternate processors. It gets overwritten + * by the above once the system has activated all available processors */ +#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 +#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) + +extern asmlinkage void vic_cpi_interrupt(void); +extern asmlinkage void vic_sys_interrupt(void); +extern asmlinkage void vic_cmn_interrupt(void); +extern asmlinkage void qic_timer_interrupt(void); +extern asmlinkage void qic_invalidate_interrupt(void); +extern asmlinkage void qic_reschedule_interrupt(void); +extern asmlinkage void qic_enable_irq_interrupt(void); +extern asmlinkage void qic_call_function_interrupt(void); From 3e92ab3d7e2edef5dccd8b0db21528699c81d2c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:21:42 +0100 Subject: [PATCH 340/682] x86, irqs, voyager: remove Voyager quirk Remove a Voyager complication from the generic irq_vectors.h header. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 067d22ffb3e..81fc883b3c0 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,7 +106,7 @@ #define NR_IRQS_LEGACY 16 -#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) +#ifdef CONFIG_X86_IO_APIC #include /* need MAX_IO_APICS */ @@ -117,22 +117,14 @@ # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else - # define NR_IRQS \ ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ - (NR_VECTORS + (32 * MAX_IO_APICS))) \ - + (NR_VECTORS + (32 * MAX_IO_APICS))) #endif -#elif defined(CONFIG_X86_VOYAGER) - -# define NR_IRQS 224 - -#else /* IO_APIC || VOYAGER */ - +#else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS 16 - #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From 9fc2e79d4f239c1c1dfdab7b10854c7588b39d9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:48:17 +0100 Subject: [PATCH 341/682] x86, irq: add IRQ layout comments Describe the layout of x86 trap/exception/IRQ vectors and clean up indentation and other small details. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 92 +++++++++++++++++++----------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 81fc883b3c0..5f7d6a1e3d2 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -3,45 +3,69 @@ #include -#define NMI_VECTOR 0x02 +/* + * Linux IRQ vector layout. + * + * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can + * be defined by Linux. They are used as a jump table by the CPU when a + * given vector is triggered - by a CPU-external, CPU-internal or + * software-triggered event. + * + * Linux sets the kernel code address each entry jumps to early during + * bootup, and never changes them. This is the general layout of the + * IDT entries: + * + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... 237 : device interrupts + * Vectors 238 ... 255 : special interrupts + * + * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. + * + * This file enumerates the exact layout of them: + */ + +#define NMI_VECTOR 0x02 /* * IDT vectors usable for external interrupt sources start * at 0x20: */ -#define FIRST_EXTERNAL_VECTOR 0x20 +#define FIRST_EXTERNAL_VECTOR 0x20 #ifdef CONFIG_X86_32 -# define SYSCALL_VECTOR 0x80 +# define SYSCALL_VECTOR 0x80 #else -# define IA32_SYSCALL_VECTOR 0x80 +# define IA32_SYSCALL_VECTOR 0x80 #endif /* * Reserve the lowest usable priority level 0x20 - 0x2f for triggering * cleanup after irq migration. */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* * Vectors 0x30-0x3f are used for ISA interrupts. */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) -#define IRQ1_VECTOR (IRQ0_VECTOR + 1) -#define IRQ2_VECTOR (IRQ0_VECTOR + 2) -#define IRQ3_VECTOR (IRQ0_VECTOR + 3) -#define IRQ4_VECTOR (IRQ0_VECTOR + 4) -#define IRQ5_VECTOR (IRQ0_VECTOR + 5) -#define IRQ6_VECTOR (IRQ0_VECTOR + 6) -#define IRQ7_VECTOR (IRQ0_VECTOR + 7) -#define IRQ8_VECTOR (IRQ0_VECTOR + 8) -#define IRQ9_VECTOR (IRQ0_VECTOR + 9) -#define IRQ10_VECTOR (IRQ0_VECTOR + 10) -#define IRQ11_VECTOR (IRQ0_VECTOR + 11) -#define IRQ12_VECTOR (IRQ0_VECTOR + 12) -#define IRQ13_VECTOR (IRQ0_VECTOR + 13) -#define IRQ14_VECTOR (IRQ0_VECTOR + 14) -#define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) + +#define IRQ1_VECTOR (IRQ0_VECTOR + 1) +#define IRQ2_VECTOR (IRQ0_VECTOR + 2) +#define IRQ3_VECTOR (IRQ0_VECTOR + 3) +#define IRQ4_VECTOR (IRQ0_VECTOR + 4) +#define IRQ5_VECTOR (IRQ0_VECTOR + 5) +#define IRQ6_VECTOR (IRQ0_VECTOR + 6) +#define IRQ7_VECTOR (IRQ0_VECTOR + 7) +#define IRQ8_VECTOR (IRQ0_VECTOR + 8) +#define IRQ9_VECTOR (IRQ0_VECTOR + 9) +#define IRQ10_VECTOR (IRQ0_VECTOR + 10) +#define IRQ11_VECTOR (IRQ0_VECTOR + 11) +#define IRQ12_VECTOR (IRQ0_VECTOR + 12) +#define IRQ13_VECTOR (IRQ0_VECTOR + 13) +#define IRQ14_VECTOR (IRQ0_VECTOR + 14) +#define IRQ15_VECTOR (IRQ0_VECTOR + 15) /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff @@ -75,36 +99,36 @@ /* f0-f7 used for spreading out TLB flushes: */ #define INVALIDATE_TLB_VECTOR_END 0xf7 #define INVALIDATE_TLB_VECTOR_START 0xf0 -#define NUM_INVALIDATE_TLB_VECTORS 8 +#define NUM_INVALIDATE_TLB_VECTORS 8 /* * Local APIC timer IRQ vector is on a different priority level, * to work around the 'lost local interrupt if more than 2 IRQ * sources per level' errata. */ -#define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_TIMER_VECTOR 0xef /* * Performance monitoring interrupt vector: */ -#define LOCAL_PERF_VECTOR 0xee +#define LOCAL_PERF_VECTOR 0xee /* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -#define NR_VECTORS 256 +#define NR_VECTORS 256 -#define FPU_IRQ 13 +#define FPU_IRQ 13 -#define FIRST_VM86_IRQ 3 -#define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +#define FIRST_VM86_IRQ 3 +#define LAST_VM86_IRQ 15 +#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#define NR_IRQS_LEGACY 16 +#define NR_IRQS_LEGACY 16 #ifdef CONFIG_X86_IO_APIC @@ -112,9 +136,9 @@ #ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif #else # define NR_IRQS \ @@ -124,7 +148,7 @@ #endif #else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS 16 +# define NR_IRQS 16 #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From c379698fdac7cb65c96dec549850ce606dd6ceba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:50:46 +0100 Subject: [PATCH 342/682] x86, irq_vectors.h: remove needless includes Reduce include file dependencies a bit - remove the two headers that are included in irq_vectors.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 5f7d6a1e3d2..ec87910025d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_IRQ_VECTORS_H #define _ASM_X86_IRQ_VECTORS_H -#include - /* * Linux IRQ vector layout. * @@ -131,22 +129,18 @@ #define NR_IRQS_LEGACY 16 #ifdef CONFIG_X86_IO_APIC - -#include /* need MAX_IO_APICS */ - -#ifndef CONFIG_SPARSE_IRQ -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# ifndef CONFIG_SPARSE_IRQ +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif # else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif -#else -# define NR_IRQS \ +# define NR_IRQS \ ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ (NR_VECTORS + (8 * NR_CPUS)) : \ (NR_VECTORS + (32 * MAX_IO_APICS))) -#endif - +# endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS 16 #endif From 009eb3fe146aa6f1951f3c5235851bb8d1330dfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 02:56:44 +0100 Subject: [PATCH 343/682] x86, irq: describe NR_IRQ sizing details, clean up Impact: cleanup Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 36 +++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index ec87910025d..41e2450e13b 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -126,23 +126,37 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) +/* + * Size the maximum number of interrupts. + * + * If the irq_desc[] array has a sparse layout, we can size things + * generously - it scales up linearly with the maximum number of CPUs, + * and the maximum number of IO-APICs, whichever is higher. + * + * In other cases we size more conservatively, to not create too large + * static arrays. + */ + #define NR_IRQS_LEGACY 16 +#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) +#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) + #ifdef CONFIG_X86_IO_APIC -# ifndef CONFIG_SPARSE_IRQ -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) -# else -# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) -# endif -# else +# ifdef CONFIG_SPARSE_IRQ # define NR_IRQS \ - ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \ - (NR_VECTORS + (8 * NR_CPUS)) : \ - (NR_VECTORS + (32 * MAX_IO_APICS))) + (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) +# else +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) +# else +# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +# endif # endif #else /* !CONFIG_X86_IO_APIC: */ -# define NR_IRQS 16 +# define NR_IRQS NR_IRQS_LEGACY #endif #endif /* _ASM_X86_IRQ_VECTORS_H */ From d8106d2e24d54497233ca9cd97fa9bec807de458 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:06:17 +0100 Subject: [PATCH 344/682] x86, vm86: clean up invalid_vm86_irq() Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 41e2450e13b..b07278c55e9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -124,7 +124,13 @@ #define FIRST_VM86_IRQ 3 #define LAST_VM86_IRQ 15 -#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) + +#ifndef __ASSEMBLY__ +static inline int invalid_vm86_irq(int irq) +{ + return irq < 3 || irq > 15; +} +#endif /* * Size the maximum number of interrupts. From 2749ebe320ff9f77548d10fcc0a3464ac21c8e58 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Thu, 29 Jan 2009 15:35:26 -0600 Subject: [PATCH 345/682] x86: UV fix uv_flush_send_and_wait() Impact: fix possible tlb mis-flushing on UV uv_flush_send_and_wait() should return a pointer if the broadcast remote tlb shootdown requests fail. That causes the conventional IPI method of shootdown to be used. Signed-off-by: Cliff Wickman Signed-off-by: Tejun Heo --- arch/x86/kernel/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 89fce1b6d01..f4b2f27d19b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -259,7 +259,7 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade, * the cpu's, all of which are still in the mask. */ __get_cpu_var(ptcstats).ptc_i++; - return 0; + return flush_mask; } /* From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 30 Jan 2009 17:47:53 +0900 Subject: [PATCH 346/682] x86: pass in cpu number to switch_to_new_gdt() Impact: cleanup, prepare for xen boot fix. Xen needs to call this function very early to setup the GDT and per-cpu segments. Remove the call to smp_processor_id() and just pass in the cpu number. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 7 +++---- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 11 ++++++----- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index befa20b4a68..1c25eb69ea8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -768,7 +768,7 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 652fdc9a757..6eacd64b602 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -255,10 +255,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ -void switch_to_new_gdt(void) +void switch_to_new_gdt(int cpu) { struct desc_ptr gdt_descr; - int cpu = smp_processor_id(); gdt_descr.address = (long)get_cpu_gdt_table(cpu); gdt_descr.size = GDT_SIZE - 1; @@ -993,7 +992,7 @@ void __cpuinit cpu_init(void) * and set up the GDT descriptor: */ - switch_to_new_gdt(); + switch_to_new_gdt(cpu); loadsegment(fs, 0); load_idt((const struct desc_ptr *)&idt_descr); @@ -1098,7 +1097,7 @@ void __cpuinit cpu_init(void) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); load_idt(&idt_descr); - switch_to_new_gdt(); + switch_to_new_gdt(cpu); /* * Set up and load the per-CPU TSS and LDT diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0d1e7ac439f..ef91747bbed 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -122,7 +122,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (cpu == boot_cpu_id) - switch_to_new_gdt(); + switch_to_new_gdt(cpu); DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f9dbcff4354..612d3c74f6a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1185,7 +1185,7 @@ out: void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(); + switch_to_new_gdt(me); /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); per_cpu(cpu_state, me) = CPU_ONLINE; diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 331cd6d5648..58c7cac3440 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1746,12 +1746,13 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) static void __cpuinit voyager_smp_prepare_boot_cpu(void) { - switch_to_new_gdt(); + int cpu = smp_processor_id(); + switch_to_new_gdt(cpu); - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); - cpu_set(smp_processor_id(), cpu_possible_map); - cpu_set(smp_processor_id(), cpu_present_map); + cpu_set(cpu, cpu_online_map); + cpu_set(cpu, cpu_callout_map); + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } static int __cpuinit voyager_cpu_up(unsigned int cpu) From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: [PATCH 347/682] x86: split loading percpu segments from loading gdt Impact: split out a function, no functional change Xen needs to be able to access percpu data from very early on. For various reasons, it cannot also load the gdt at that time. It does, however, have a pefectly functional gdt at that point, so there's no pressing need to reload the gdt. Split the function to load the segment registers off, so Xen can call it directly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/common.c | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1c25eb69ea8..656d02ea509 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -769,6 +769,7 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6eacd64b602..0f73ea42308 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -253,6 +253,16 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; +void load_percpu_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + loadsegment(fs, __KERNEL_PERCPU); +#else + loadsegment(gs, 0); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); +#endif +} + /* Current gdt points %fs at the "master" per-cpu area: after this, * it's on the real one. */ void switch_to_new_gdt(int cpu) @@ -263,12 +273,8 @@ void switch_to_new_gdt(int cpu) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); /* Reload the per-cpu base */ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - loadsegment(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -#endif + + load_percpu_segment(cpu); } static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; From 795f99b61d20c34cb04d17d8906b32f745a635ec Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: [PATCH 348/682] xen: setup percpu data pointers Impact: fix xen booting We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/xen/enlighten.c | 3 +++ arch/x86/xen/smp.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bef941f6145..fe19c88a502 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1647,6 +1647,9 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif + /* setup percpu state */ + load_percpu_segment(0); + xen_smp_init(); /* Get mfn list */ diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 7735e3dd359..88d5d5ec6be 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,7 +170,8 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(__per_cpu_load + + (unsigned long)&per_cpu_var(gdt_page)); xen_setup_vcpu_info_placement(); } @@ -235,6 +236,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; +#else + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ From e584f559c7b8711cccdf319400acd6294b2c074e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 23:17:23 -0800 Subject: [PATCH 349/682] x86/paravirt: don't restore second return reg Impact: bugfix In the 32-bit calling convention, %eax:%edx is used to return 64-bit values. Don't save and restore %edx around wrapped functions, or they can't return a full 64-bit result. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index b17365c3974..016dce31130 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1524,8 +1524,8 @@ extern struct paravirt_patch_site __parainstructions[], #define PV_RESTORE_REGS "popl %edx; popl %ecx;" /* save and restore all caller-save registers, except return value */ -#define PV_SAVE_ALL_CALLER_REGS PV_SAVE_REGS -#define PV_RESTORE_ALL_CALLER_REGS PV_RESTORE_REGS +#define PV_SAVE_ALL_CALLER_REGS "pushl %ecx;" +#define PV_RESTORE_ALL_CALLER_REGS "popl %ecx;" #define PV_FLAGS_ARG "0" #define PV_EXTRA_CLOBBERS From 664c7954721adfc9bd61de6ec78f89f482f1a802 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 23:18:41 -0800 Subject: [PATCH 350/682] x86/vmi: fix interrupt enable/disable/save/restore calling convention. Zach says: > Enable/Disable have no clobbers at all. > Save clobbers only return value, %eax > Restore also clobbers nothing. This is precisely compatible with the calling convention, so we can just call them directly without wrapping. (Compile tested only.) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmi_32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302cc2dd..eb9e7347928 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -670,10 +670,11 @@ static inline int __init activate_vmi(void) para_fill(pv_mmu_ops.write_cr2, SetCR2); para_fill(pv_mmu_ops.write_cr3, SetCR3); para_fill(pv_cpu_ops.write_cr4, SetCR4); - para_fill(pv_irq_ops.save_fl, GetInterruptMask); - para_fill(pv_irq_ops.restore_fl, SetInterruptMask); - para_fill(pv_irq_ops.irq_disable, DisableInterrupts); - para_fill(pv_irq_ops.irq_enable, EnableInterrupts); + + para_fill(pv_irq_ops.save_fl.func, GetInterruptMask); + para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask); + para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts); + para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts); para_fill(pv_cpu_ops.wbinvd, WBINVD); para_fill(pv_cpu_ops.read_tsc, RDTSC); From 3bd323a1da42525317e2ce6c93b97b5ba653bc9d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 2 Feb 2009 14:52:00 -0800 Subject: [PATCH 351/682] x86 setup: a20: early timeout for a nonexistent keyboard controller When probing the keyboard controller to enable A20, if we get FF back (which is *possible* as a valid status word, but is extremely unlikely) then bail after much fewer iterations than we otherwise would, and abort the attempt to access the KBC. This hopefully should make it work a lot better for embedded platforms which don't have a KBC and where the BIOS doesn't implement INT 15h AX=2401h (and doesn't boot with A20 already enabled.) If this works, it will be the one remaining use of CONFIG_X86_ELAN as anything other than a processor type optimization option. Signed-off-by: H. Peter Anvin --- arch/x86/boot/a20.c | 71 +++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 4063d630def..fba8e9c6a50 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -15,16 +16,23 @@ #include "boot.h" #define MAX_8042_LOOPS 100000 +#define MAX_8042_FF 32 static int empty_8042(void) { u8 status; int loops = MAX_8042_LOOPS; + int ffs = MAX_8042_FF; while (loops--) { io_delay(); status = inb(0x64); + if (status == 0xff) { + /* FF is a plausible, but very unlikely status */ + if (!--ffs) + return -1; /* Assume no KBC present */ + } if (status & 1) { /* Read and discard input data */ io_delay(); @@ -118,44 +126,43 @@ static void enable_a20_fast(void) int enable_a20(void) { -#if defined(CONFIG_X86_ELAN) - /* Elan croaks if we try to touch the KBC */ - enable_a20_fast(); - while (!a20_test_long()) - ; - return 0; -#elif defined(CONFIG_X86_VOYAGER) +#ifdef CONFIG_X86_VOYAGER /* On Voyager, a20_test() is unsafe? */ enable_a20_kbc(); return 0; #else int loops = A20_ENABLE_LOOPS; - while (loops--) { - /* First, check to see if A20 is already enabled - (legacy free, etc.) */ - if (a20_test_short()) - return 0; + int kbc_err; - /* Next, try the BIOS (INT 0x15, AX=0x2401) */ - enable_a20_bios(); - if (a20_test_short()) - return 0; + while (loops--) { + /* First, check to see if A20 is already enabled + (legacy free, etc.) */ + if (a20_test_short()) + return 0; + + /* Next, try the BIOS (INT 0x15, AX=0x2401) */ + enable_a20_bios(); + if (a20_test_short()) + return 0; + + /* Try enabling A20 through the keyboard controller */ + kbc_err = empty_8042(); - /* Try enabling A20 through the keyboard controller */ - empty_8042(); - if (a20_test_short()) - return 0; /* BIOS worked, but with delayed reaction */ - - enable_a20_kbc(); - if (a20_test_long()) - return 0; - - /* Finally, try enabling the "fast A20 gate" */ - enable_a20_fast(); - if (a20_test_long()) - return 0; - } - - return -1; + if (a20_test_short()) + return 0; /* BIOS worked, but with delayed reaction */ + + if (!kbc_err) { + enable_a20_kbc(); + if (a20_test_long()) + return 0; + } + + /* Finally, try enabling the "fast A20 gate" */ + enable_a20_fast(); + if (a20_test_long()) + return 0; + } + + return -1; #endif } From ef3892bd63420380d115f755d351d2071f1f805f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 2 Feb 2009 18:16:19 -0800 Subject: [PATCH 352/682] x86, percpu: fix kexec with vmlinux Impact: fix regression with kexec with vmlinux Split data.init into data.init, percpu, data.init2 sections instead of let data.init wrap percpu secion. Thus kexec loading will be happy, because sections will not overlap. Before the patch we have: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 6 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000a2d938 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs .bss 04 .data.percpu 05 .notes After patch we've got: Elf file type is EXEC (Executable file) Entry point 0x200000 There are 7 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff80200000 0x0000000000200000 0x0000000000ca6000 0x0000000000ca6000 R E 200000 LOAD 0x0000000000ea6000 0xffffffff80ea6000 0x0000000000ea6000 0x000000000014dfe0 0x000000000014dfe0 RWE 200000 LOAD 0x0000000001000000 0xffffffffff600000 0x0000000000ff4000 0x0000000000000888 0x0000000000000888 RWE 200000 LOAD 0x00000000011f6000 0xffffffff80ff6000 0x0000000000ff6000 0x0000000000073086 0x0000000000073086 RWE 200000 LOAD 0x0000000001400000 0x0000000000000000 0x000000000106a000 0x00000000001d2ce0 0x00000000001d2ce0 RWE 200000 LOAD 0x000000000163d000 0xffffffff8123d000 0x000000000123d000 0x0000000000000000 0x00000000007e6938 RWE 200000 NOTE 0x00000000009e2c1c 0xffffffff809e2c1c 0x00000000009e2c1c 0x0000000000000024 0x0000000000000024 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .builtin_fw __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param 01 .data .init.rodata .data.cacheline_aligned .data.read_mostly 02 .vsyscall_0 .vsyscall_fn .vsyscall_gtod_data .vsyscall_1 .vsyscall_2 .vgetcpu_mode .jiffies 03 .data.init_task .smp_locks .init.text .init.data .init.setup .initcall.init .con_initcall.init .x86_cpu_dev.init .altinstructions .altinstr_replacement .exit.text .init.ramfs 04 .data.percpu 05 .bss 06 .notes Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index c9740996430..07f62d287ff 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -22,6 +22,7 @@ PHDRS { #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(7); /* RWE */ #endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS @@ -215,7 +216,7 @@ SECTIONS /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the * output PHDR, so the next output section - __data_nosave - should - * switch it back to data.init. Also, pda should be at the head of + * start another section data.init2. Also, pda should be at the head of * percpu area. Preallocate it and define the percpu offset symbol * so that it can be accessed as a percpu variable. */ @@ -232,7 +233,7 @@ SECTIONS __nosave_begin = .; .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) - } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ + } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ . = ALIGN(PAGE_SIZE); __nosave_end = .; From 063f8913afb48842b9329e195d90d2c28e58aacc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 3 Feb 2009 18:02:36 +0100 Subject: [PATCH 353/682] x86: document 64-bit and 32-bit function call convention ABI - also clean up the calling.h file a tiny bit Signed-off-by: Ingo Molnar --- arch/x86/include/asm/calling.h | 56 ++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 2bc162e0ec6..0e63c9a2a8d 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -1,5 +1,55 @@ /* - * Some macros to handle stack frames in assembly. + + x86 function call convention, 64-bit: + ------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + --------------------------------------------------------------------------- + rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] + + ( rsp is obviously invariant across normal function calls. (gcc can 'merge' + functions when it sees tail-call optimization possibilities) rflags is + clobbered. Leftover arguments are passed over the stack frame.) + + [*] In the frame-pointers case rbp is fixed to the stack frame. + + [**] for struct return values wider than 64 bits the return convention is a + bit more complex: up to 128 bits width we return small structures + straight in rax, rdx. For structures larger than that (3 words or + larger) the caller puts a pointer to an on-stack return struct + [allocated in the caller's stack frame] into the first argument - i.e. + into rdi. All other arguments shift up by one in this case. + Fortunately this case is rare in the kernel. + +For 32-bit we have the following conventions - kernel is built with +-mregparm=3 and -freg-struct-return: + + x86 function calling convention, 32-bit: + ---------------------------------------- + arguments | callee-saved | extra caller-saved | return + [callee-clobbered] | | [callee-clobbered] | + ------------------------------------------------------------------------- + eax edx ecx | ebx edi esi ebp [*] | | eax, edx [**] + + ( here too esp is obviously invariant across normal function calls. eflags + is clobbered. Leftover arguments are passed over the stack frame. ) + + [*] In the frame-pointers case ebp is fixed to the stack frame. + + [**] We build with -freg-struct-return, which on 32-bit means similar + semantics as on 64-bit: edx can be used for a second return value + (i.e. covering integer and structure sizes up to 64 bits) - after that + it gets more complex and more expensive: 3-word or larger struct returns + get done in the caller's frame and the pointer to the return struct goes + into regparm0, i.e. eax - the other arguments shift up and the + function's register parameters degenerate to regparm=2 in essence. + +*/ + + +/* + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ #define R15 0 @@ -9,7 +59,7 @@ #define RBP 32 #define RBX 40 -/* arguments: interrupts/non tracing syscalls only save upto here*/ +/* arguments: interrupts/non tracing syscalls only save up to here: */ #define R11 48 #define R10 56 #define R9 64 @@ -22,7 +72,7 @@ #define ORIG_RAX 120 /* + error_code */ /* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ +/* cpu exception frame or undefined in case of fast syscall: */ #define RIP 128 #define CS 136 #define EFLAGS 144 From 0eb592dbba40baebec9cdde3ff4574185de6cbcc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Feb 2009 16:00:38 -0800 Subject: [PATCH 354/682] x86/paravirt: return full 64-bit result Impact: Bug fix A hunk went missing in the original patch, and callee-save callsites were not marked as returning the upper 32-bit of result, causing Badness. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 016dce31130..c85e7475e17 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -522,7 +522,7 @@ int paravirt_disable_iospace(void); "=c" (__ecx) #define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS -#define PVOP_VCALLEE_CLOBBERS "=a" (__eax) +#define PVOP_VCALLEE_CLOBBERS "=a" (__eax), "=d" (__edx) #define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS #define EXTRA_CLOBBERS From f5deb79679af6eb41b61112fadcda28b2a4cfb0d Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Tue, 3 Feb 2009 14:22:48 +0800 Subject: [PATCH 355/682] x86: kexec: Use one page table in x86_64 machine_kexec Impact: reduce kernel BSS size by 7 pages, improve code readability Two page tables are used in current x86_64 kexec implementation. One is used to jump from kernel virtual address to identity map address, the other is used to map all physical memory. In fact, on x86_64, there is no conflict between kernel virtual address space and physical memory space, so just one page table is sufficient. The page table pages used to map control page are dynamically allocated to save memory if kexec image is not loaded. ASM code used to map control page is replaced by C code too. Signed-off-by: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/kexec.h | 27 ++---- arch/x86/kernel/machine_kexec_64.c | 82 ++++++++++++------ arch/x86/kernel/relocate_kernel_64.S | 125 +-------------------------- 3 files changed, 67 insertions(+), 167 deletions(-) diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index c61d8b2ab8b..0ceb6d19ed3 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -9,23 +9,8 @@ # define PAGES_NR 4 #else # define PA_CONTROL_PAGE 0 -# define VA_CONTROL_PAGE 1 -# define PA_PGD 2 -# define VA_PGD 3 -# define PA_PUD_0 4 -# define VA_PUD_0 5 -# define PA_PMD_0 6 -# define VA_PMD_0 7 -# define PA_PTE_0 8 -# define VA_PTE_0 9 -# define PA_PUD_1 10 -# define VA_PUD_1 11 -# define PA_PMD_1 12 -# define VA_PMD_1 13 -# define PA_PTE_1 14 -# define VA_PTE_1 15 -# define PA_TABLE_PAGE 16 -# define PAGES_NR 17 +# define PA_TABLE_PAGE 1 +# define PAGES_NR 2 #endif #ifdef CONFIG_X86_32 @@ -157,9 +142,9 @@ relocate_kernel(unsigned long indirection_page, unsigned long start_address) ATTRIB_NORET; #endif -#ifdef CONFIG_X86_32 #define ARCH_HAS_KIMAGE_ARCH +#ifdef CONFIG_X86_32 struct kimage_arch { pgd_t *pgd; #ifdef CONFIG_X86_PAE @@ -169,6 +154,12 @@ struct kimage_arch { pte_t *pte0; pte_t *pte1; }; +#else +struct kimage_arch { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +}; #endif #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index c43caa3a91f..6993d51b7fd 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,15 +18,6 @@ #include #include -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u64 kexec_pgd[512] PAGE_ALIGNED; -static u64 kexec_pud0[512] PAGE_ALIGNED; -static u64 kexec_pmd0[512] PAGE_ALIGNED; -static u64 kexec_pte0[512] PAGE_ALIGNED; -static u64 kexec_pud1[512] PAGE_ALIGNED; -static u64 kexec_pmd1[512] PAGE_ALIGNED; -static u64 kexec_pte1[512] PAGE_ALIGNED; - static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; @@ -107,12 +98,65 @@ out: return result; } +static void free_transition_pgtable(struct kimage *image) +{ + free_page((unsigned long)image->arch.pud); + free_page((unsigned long)image->arch.pmd); + free_page((unsigned long)image->arch.pte); +} + +static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long vaddr, paddr; + int result = -ENOMEM; + + vaddr = (unsigned long)relocate_kernel; + paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL); + if (!pud) + goto err; + image->arch.pud = pud; + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(pgd, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); + if (!pmd) + goto err; + image->arch.pmd = pmd; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)get_zeroed_page(GFP_KERNEL); + if (!pte) + goto err; + image->arch.pte = pte; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); + return 0; +err: + free_transition_pgtable(image); + return result; +} + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { pgd_t *level4p; + int result; level4p = (pgd_t *)__va(start_pgtable); - return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + if (result) + return result; + return init_transition_pgtable(image, level4p); } static void set_idt(void *newidt, u16 limit) @@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - return; + free_transition_pgtable(image); } /* @@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image) memcpy(control_page, relocate_kernel, PAGE_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); - page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; - page_list[PA_PGD] = virt_to_phys(&kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; - page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0); - page_list[VA_PUD_0] = (unsigned long)kexec_pud0; - page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1); - page_list[VA_PUD_1] = (unsigned long)kexec_pud1; - page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; - page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index f5afe665a82..b0bbdd4829c 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -29,122 +29,6 @@ relocate_kernel: * %rdx start address */ - /* map the control page at its virtual address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_0)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_0)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - /* identity map the control page at its physical address */ - - movq $0x0000ff8000000000, %r10 /* mask */ - mov $(39 - 3), %cl /* bits to shift */ - movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */ - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PGD)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PUD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PUD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PMD_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PMD_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_PTE_1)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - - shrq $9, %r10 - sub $9, %cl - - movq %r11, %r9 - andq %r10, %r9 - shrq %cl, %r9 - - movq PTR(VA_PTE_1)(%rsi), %r8 - addq %r8, %r9 - movq PTR(PA_CONTROL_PAGE)(%rsi), %r8 - orq $PAGE_ATTR, %r8 - movq %r8, (%r9) - -relocate_new_kernel: - /* %rdi indirection_page - * %rsi page_list - * %rdx start address - */ - /* zero out flags, and disable interrupts */ pushq $0 popfq @@ -156,9 +40,8 @@ relocate_new_kernel: /* get physical address of page table now too */ movq PTR(PA_TABLE_PAGE)(%rsi), %rcx - /* switch to new set of page tables */ - movq PTR(PA_PGD)(%rsi), %r9 - movq %r9, %cr3 + /* Switch to the identity mapped page tables */ + movq %rcx, %cr3 /* setup a new stack at the end of the physical control page */ lea PAGE_SIZE(%r8), %rsp @@ -194,9 +77,7 @@ identity_mapped: jmp 1f 1: - /* Switch to the identity mapped page tables, - * and flush the TLB. - */ + /* Flush the TLB (needed?) */ movq %rcx, %cr3 /* Do the copies */ From 0973a06cde8cc1522fbcf2baacb926f1ee3f4c79 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Feb 2009 15:24:09 -0800 Subject: [PATCH 356/682] x86: mm: introduce helper function in fault.c Impact: cleanup Introduce helper function fault_in_kernel_address() to make editors happy. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/mm/fault.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index eb4d7fe0593..8e9b0f1fd87 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -775,6 +775,15 @@ static inline int access_error(unsigned long error_code, int write, return 0; } +static int fault_in_kernel_space(unsigned long address) +{ +#ifdef CONFIG_X86_32 + return address >= TASK_SIZE; +#else /* !CONFIG_X86_32 */ + return address >= TASK_SIZE64; +#endif /* CONFIG_X86_32 */ +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -817,11 +826,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * (error_code & 4) == 0, and that the fault was not a * protection error (error_code & 9) == 0. */ -#ifdef CONFIG_X86_32 - if (unlikely(address >= TASK_SIZE)) { -#else - if (unlikely(address >= TASK_SIZE64)) { -#endif + if (unlikely(fault_in_kernel_space(address))) { if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; From 1f4f931501e9270c156d05ee76b7b872de486304 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:58:06 -0800 Subject: [PATCH 357/682] xen: fix 32-bit build resulting from mmu move Moving the mmu code from enlighten.c to mmu.c inadvertently broke the 32-bit build. Fix it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/mmu.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e41f7fc6cf..d2e8ed1aff3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1396,6 +1396,43 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) #endif } +#ifdef CONFIG_HIGHPTE +static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) +{ + pgprot_t prot = PAGE_KERNEL; + + if (PagePinned(page)) + prot = PAGE_KERNEL_RO; + + if (0 && PageHighMem(page)) + printk("mapping highpte %lx type %d prot %s\n", + page_to_pfn(page), type, + (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ"); + + return kmap_atomic_prot(page, type, prot); +} +#endif + +#ifdef CONFIG_X86_32 +static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) +{ + /* If there's an existing pte, then don't allow _PAGE_RW to be set */ + if (pte_val_ma(*ptep) & _PAGE_PRESENT) + pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) & + pte_val_ma(pte)); + + return pte; +} + +/* Init-time set_pte while constructing initial pagetables, which + doesn't allow RO pagetable pages to be remapped RW */ +static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) +{ + pte = mask_rw_pte(ptep, pte); + + xen_set_pte(ptep, pte); +} +#endif /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ From 383414322b3b3ced0cbc146801e0cc6c60a6c5f4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:31 -0800 Subject: [PATCH 358/682] xen: setup percpu data pointers We need to access percpu data fairly early, so set up the percpu registers as soon as possible. We only need to load the appropriate segment register. We already have a GDT, but its hard to change it early because we need to manipulate the pagetable to do so, and that hasn't been set up yet. Also, set the kernel stack when bringing up secondary CPUs. If we don't they all end up sharing the same stack... Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 15 ++++++++++++++- arch/x86/xen/smp.c | 6 ++++-- arch/x86/xen/xen-ops.h | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index cd022c43dfb..aed7ceeb4b6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -66,6 +66,8 @@ EXPORT_SYMBOL_GPL(xen_start_info); struct shared_info xen_dummy_shared_info; +void *xen_initial_gdt; + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. @@ -917,8 +919,19 @@ asmlinkage void __init xen_start_kernel(void) have_vcpu_info_placement = 0; #endif - /* setup percpu state */ +#ifdef CONFIG_X86_64 + /* + * Setup percpu state. We only need to do this for 64-bit + * because 32-bit already has %fs set properly. + */ load_percpu_segment(0); +#endif + /* + * The only reliable way to retain the initial address of the + * percpu gdt_page is to remember it here, so we can go and + * mark it RW later, when the initial percpu area is freed. + */ + xen_initial_gdt = &per_cpu(gdt_page, 0); xen_smp_init(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 88d5d5ec6be..035582ae815 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -170,8 +170,7 @@ static void __init xen_smp_prepare_boot_cpu(void) /* We've switched to the "real" per-cpu gdt, so make sure the old memory can be recycled */ - make_lowmem_page_readwrite(__per_cpu_load + - (unsigned long)&per_cpu_var(gdt_page)); + make_lowmem_page_readwrite(xen_initial_gdt); xen_setup_vcpu_info_placement(); } @@ -287,6 +286,9 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) irq_ctx_init(cpu); #else clear_tsk_thread_flag(idle, TIF_FORK); + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) - + KERNEL_STACK_OFFSET + THREAD_SIZE; #endif xen_setup_timer(cpu); xen_init_lock_cpu(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 11913fc94c1..2f5ef2632ea 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,6 +10,8 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +extern void *xen_initial_gdt; + struct trap_info; void xen_copy_trap_info(struct trap_info *traps); From 5393744b71ce797f1b1546fafaed127fc50c2b61 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:42 -0800 Subject: [PATCH 359/682] xen: make direct versions of irq_enable/disable/save/restore to common code Now that x86-64 has directly accessible percpu variables, it can also implement the direct versions of these operations, which operate on a vcpu_info structure directly embedded in the percpu area. In fact, the 64-bit versions are more or less identical, and so can be shared. The only two differences are: 1. xen_restore_fl_direct takes its argument in eax on 32-bit, and rdi on 64-bit. Unfortunately it isn't possible to directly refer to the 2nd lsb of rdi directly (as you can with %ah), so the code isn't quite as dense. 2. check_events needs to variants to save different registers. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/Makefile | 3 +- arch/x86/xen/xen-asm.S | 140 ++++++++++++++++++++++++++++++++++++++ arch/x86/xen/xen-asm.h | 12 ++++ arch/x86/xen/xen-asm_32.S | 111 ++++-------------------------- arch/x86/xen/xen-asm_64.S | 134 +----------------------------------- 5 files changed, 169 insertions(+), 231 deletions(-) create mode 100644 arch/x86/xen/xen-asm.S create mode 100644 arch/x86/xen/xen-asm.h diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 6dcefba7836..3b767d03fd6 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg endif obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ - time.o xen-asm_$(BITS).o grant-table.o suspend.o + time.o xen-asm.o xen-asm_$(BITS).o \ + grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o spinlock.o obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o \ No newline at end of file diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S new file mode 100644 index 00000000000..4c6f9679913 --- /dev/null +++ b/arch/x86/xen/xen-asm.S @@ -0,0 +1,140 @@ +/* + Asm versions of Xen pv-ops, suitable for either direct use or inlining. + The inline versions are the same as the direct-use versions, with the + pre- and post-amble chopped off. + + This code is encoded for size rather than absolute efficiency, + with a view to being able to inline as much as possible. + + We only bother with direct forms (ie, vcpu in percpu data) of + the operations here; the indirect forms are better handled in + C, since they're generally too large to inline anyway. + */ + +#include +#include +#include + +#include "xen-asm.h" + +/* + Enable events. This clears the event mask and tests the pending + event status with one and operation. If there are pending + events, then enter the hypervisor to get them handled. + */ +ENTRY(xen_irq_enable_direct) + /* Unmask events */ + movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* Test for pending */ + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f + +2: call check_events +1: +ENDPATCH(xen_irq_enable_direct) + ret + ENDPROC(xen_irq_enable_direct) + RELOC(xen_irq_enable_direct, 2b+1) + + +/* + Disabling events is simply a matter of making the event mask + non-zero. + */ +ENTRY(xen_irq_disable_direct) + movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask +ENDPATCH(xen_irq_disable_direct) + ret + ENDPROC(xen_irq_disable_direct) + RELOC(xen_irq_disable_direct, 0) + +/* + (xen_)save_fl is used to get the current interrupt enable status. + Callers expect the status to be in X86_EFLAGS_IF, and other bits + may be set in the return value. We take advantage of this by + making sure that X86_EFLAGS_IF has the right value (and other bits + in that byte are 0), but other bits in the return value are + undefined. We need to toggle the state of the bit, because + Xen and x86 use opposite senses (mask vs enable). + */ +ENTRY(xen_save_fl_direct) + testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + setz %ah + addb %ah,%ah +ENDPATCH(xen_save_fl_direct) + ret + ENDPROC(xen_save_fl_direct) + RELOC(xen_save_fl_direct, 0) + + +/* + In principle the caller should be passing us a value return + from xen_save_fl_direct, but for robustness sake we test only + the X86_EFLAGS_IF flag rather than the whole byte. After + setting the interrupt mask state, it checks for unmasked + pending events and enters the hypervisor to get them delivered + if so. + */ +ENTRY(xen_restore_fl_direct) +#ifdef CONFIG_X86_64 + testw $X86_EFLAGS_IF, %di +#else + testb $X86_EFLAGS_IF>>8, %ah +#endif + setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask + /* Preempt here doesn't matter because that will deal with + any pending interrupts. The pending check may end up being + run on the wrong CPU, but that doesn't hurt. */ + + /* check for unmasked and pending */ + cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending + jz 1f +2: call check_events +1: +ENDPATCH(xen_restore_fl_direct) + ret + ENDPROC(xen_restore_fl_direct) + RELOC(xen_restore_fl_direct, 2b+1) + + +/* + Force an event check by making a hypercall, + but preserve regs before making the call. + */ +check_events: +#ifdef CONFIG_X86_32 + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax +#else + push %rax + push %rcx + push %rdx + push %rsi + push %rdi + push %r8 + push %r9 + push %r10 + push %r11 + call xen_force_evtchn_callback + pop %r11 + pop %r10 + pop %r9 + pop %r8 + pop %rdi + pop %rsi + pop %rdx + pop %rcx + pop %rax +#endif + ret + diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h new file mode 100644 index 00000000000..465276467a4 --- /dev/null +++ b/arch/x86/xen/xen-asm.h @@ -0,0 +1,12 @@ +#ifndef _XEN_XEN_ASM_H +#define _XEN_XEN_ASM_H + +#include + +#define RELOC(x, v) .globl x##_reloc; x##_reloc=v +#define ENDPATCH(x) .globl x##_end; x##_end=. + +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 + +#endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 42786f59d9c..082d173caaf 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -11,101 +11,28 @@ generally too large to inline anyway. */ -#include - -#include +//#include #include -#include #include #include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 +#include "xen-asm.h" /* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. + Force an event check by making a hypercall, + but preserve regs before making the call. */ -ENTRY(xen_irq_enable_direct) - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) +check_events: + push %eax + push %ecx + push %edx + call xen_force_evtchn_callback + pop %edx + pop %ecx + pop %eax ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* We can't use sysexit directly, because we're not running in ring0. @@ -289,17 +216,3 @@ ENTRY(xen_iret_crit_fixup) lea 4(%edi),%esp /* point esp to new frame */ 2: jmp xen_do_upcall - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d6fc51f4ce8..d205a283efe 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -11,142 +11,14 @@ generally too large to inline anyway. */ -#include - -#include -#include #include -#include #include +#include +#include #include -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#if 1 -/* - FIXME: x86_64 now can support direct access to percpu variables - via a segment override. Update xen accordingly. - */ -#define BUG ud2a -#endif - -/* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. - */ -ENTRY(xen_irq_enable_direct) - BUG - - /* Unmask events */ - movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* Test for pending */ - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f - -2: call check_events -1: -ENDPATCH(xen_irq_enable_direct) - ret - ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) - -/* - Disabling events is simply a matter of making the event mask - non-zero. - */ -ENTRY(xen_irq_disable_direct) - BUG - - movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) - ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) - -/* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). - */ -ENTRY(xen_save_fl_direct) - BUG - - testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - setz %ah - addb %ah,%ah -ENDPATCH(xen_save_fl_direct) - ret - ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) - -/* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. - */ -ENTRY(xen_restore_fl_direct) - BUG - - testb $X86_EFLAGS_IF>>8, %ah - setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ - - /* check for unmasked and pending */ - cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f -2: call check_events -1: -ENDPATCH(xen_restore_fl_direct) - ret - ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) - - -/* - Force an event check by making a hypercall, - but preserve regs before making the call. - */ -check_events: - push %rax - push %rcx - push %rdx - push %rsi - push %rdi - push %r8 - push %r9 - push %r10 - push %r11 - call xen_force_evtchn_callback - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rdi - pop %rsi - pop %rdx - pop %rcx - pop %rax - ret +#include "xen-asm.h" ENTRY(xen_adjust_exception_frame) mov 8+0(%rsp),%rcx From e4d0407185cdbdcfd99fc23bde2e5454bbc46329 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Feb 2009 13:55:54 -0800 Subject: [PATCH 360/682] xen: use direct ops on 64-bit Enable the use of the direct vcpu-access operations on 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: H. Peter Anvin --- arch/x86/xen/enlighten.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index aed7ceeb4b6..37230342c2c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -87,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; * * 0: not available, 1: available */ -static int have_vcpu_info_placement = -#ifdef CONFIG_X86_32 - 1 -#else - 0 -#endif - ; - +static int have_vcpu_info_placement = 1; static void xen_vcpu_setup(int cpu) { @@ -914,11 +907,6 @@ asmlinkage void __init xen_start_kernel(void) machine_ops = xen_machine_ops; -#ifdef CONFIG_X86_64 - /* Disable until direct per-cpu data access. */ - have_vcpu_info_placement = 0; -#endif - #ifdef CONFIG_X86_64 /* * Setup percpu state. We only need to do this for 64-bit From 18114f61359ac05e3aa797d53d63f40db41f798d Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 30 Jan 2009 18:16:46 -0800 Subject: [PATCH 361/682] x86: uaccess: use errret as error value in __put_user_size() Impact: cleanup In __put_user_size() macro errret is used for error value. But if size is 8, errret isn't passed to__put_user_asm_u64(). This behavior is inconsistent. Signed-off-by: Hiroshi Shimamoto Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b9a24155f7a..b685ece89d5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -186,7 +186,7 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 -#define __put_user_asm_u64(x, addr, err) \ +#define __put_user_asm_u64(x, addr, err, errret) \ asm volatile("1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ "3:\n" \ @@ -197,7 +197,7 @@ extern int __get_user_bad(void); _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (err) \ - : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) + : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ asm volatile("1: movl %%eax,0(%1)\n" \ @@ -211,8 +211,8 @@ extern int __get_user_bad(void); asm volatile("call __put_user_8" : "=a" (__ret_pu) \ : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") #else -#define __put_user_asm_u64(x, ptr, retval) \ - __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) +#define __put_user_asm_u64(x, ptr, retval, errret) \ + __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) #define __put_user_asm_ex_u64(x, addr) \ __put_user_asm_ex(x, addr, "q", "", "Zr") #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) @@ -289,7 +289,8 @@ do { \ __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ break; \ case 8: \ - __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ + __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ + errret); \ break; \ default: \ __put_user_bad(); \ From 130ace11a9dc682541336d1fe5cb3bc7771a149e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Feb 2009 00:57:48 +0900 Subject: [PATCH 362/682] x86: style cleanups for xen assemblies Make the following style cleanups: * drop unnecessary //#include from xen-asm_32.S * compulsive adding of space after comma * reformat multiline comments Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-asm.S | 78 +++++++------ arch/x86/xen/xen-asm_32.S | 238 ++++++++++++++++++++------------------ arch/x86/xen/xen-asm_64.S | 107 ++++++++--------- 3 files changed, 219 insertions(+), 204 deletions(-) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 4c6f9679913..79d7362ad6d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in percpu data) of - the operations here; the indirect forms are better handled in - C, since they're generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in percpu data) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -18,17 +18,19 @@ #include "xen-asm.h" /* - Enable events. This clears the event mask and tests the pending - event status with one and operation. If there are pending - events, then enter the hypervisor to get them handled. + * Enable events. This clears the event mask and tests the pending + * event status with one and operation. If there are pending events, + * then enter the hypervisor to get them handled. */ ENTRY(xen_irq_enable_direct) /* Unmask events */ movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* Test for pending */ testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct) /* - Disabling events is simply a matter of making the event mask - non-zero. + * Disabling events is simply a matter of making the event mask + * non-zero. */ ENTRY(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask @@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct) RELOC(xen_irq_disable_direct, 0) /* - (xen_)save_fl is used to get the current interrupt enable status. - Callers expect the status to be in X86_EFLAGS_IF, and other bits - may be set in the return value. We take advantage of this by - making sure that X86_EFLAGS_IF has the right value (and other bits - in that byte are 0), but other bits in the return value are - undefined. We need to toggle the state of the bit, because - Xen and x86 use opposite senses (mask vs enable). + * (xen_)save_fl is used to get the current interrupt enable status. + * Callers expect the status to be in X86_EFLAGS_IF, and other bits + * may be set in the return value. We take advantage of this by + * making sure that X86_EFLAGS_IF has the right value (and other bits + * in that byte are 0), but other bits in the return value are + * undefined. We need to toggle the state of the bit, because Xen and + * x86 use opposite senses (mask vs enable). */ ENTRY(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah - addb %ah,%ah + addb %ah, %ah ENDPATCH(xen_save_fl_direct) ret ENDPROC(xen_save_fl_direct) @@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct) /* - In principle the caller should be passing us a value return - from xen_save_fl_direct, but for robustness sake we test only - the X86_EFLAGS_IF flag rather than the whole byte. After - setting the interrupt mask state, it checks for unmasked - pending events and enters the hypervisor to get them delivered - if so. + * In principle the caller should be passing us a value return from + * xen_save_fl_direct, but for robustness sake we test only the + * X86_EFLAGS_IF flag rather than the whole byte. After setting the + * interrupt mask state, it checks for unmasked pending events and + * enters the hypervisor to get them delivered if so. */ ENTRY(xen_restore_fl_direct) #ifdef CONFIG_X86_64 @@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct) testb $X86_EFLAGS_IF>>8, %ah #endif setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask - /* Preempt here doesn't matter because that will deal with - any pending interrupts. The pending check may end up being - run on the wrong CPU, but that doesn't hurt. */ + /* + * Preempt here doesn't matter because that will deal with any + * pending interrupts. The pending check may end up being run + * on the wrong CPU, but that doesn't hurt. + */ /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending @@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct) /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: #ifdef CONFIG_X86_32 @@ -137,4 +140,3 @@ check_events: pop %rax #endif ret - diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 082d173caaf..88e15deb8b8 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,17 +1,16 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ -//#include #include #include #include @@ -21,8 +20,8 @@ #include "xen-asm.h" /* - Force an event check by making a hypercall, - but preserve regs before making the call. + * Force an event check by making a hypercall, but preserve regs + * before making the call. */ check_events: push %eax @@ -35,10 +34,10 @@ check_events: ret /* - We can't use sysexit directly, because we're not running in ring0. - But we can easily fake it up using iret. Assuming xen_sysexit - is jumped to with a standard stack frame, we can just strip it - back to a standard iret frame and use iret. + * We can't use sysexit directly, because we're not running in ring0. + * But we can easily fake it up using iret. Assuming xen_sysexit is + * jumped to with a standard stack frame, we can just strip it back to + * a standard iret frame and use iret. */ ENTRY(xen_sysexit) movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ @@ -49,33 +48,31 @@ ENTRY(xen_sysexit) ENDPROC(xen_sysexit) /* - This is run where a normal iret would be run, with the same stack setup: - 8: eflags - 4: cs - esp-> 0: eip - - This attempts to make sure that any pending events are dealt - with on return to usermode, but there is a small window in - which an event can happen just before entering usermode. If - the nested interrupt ends up setting one of the TIF_WORK_MASK - pending work flags, they will not be tested again before - returning to usermode. This means that a process can end up - with pending work, which will be unprocessed until the process - enters and leaves the kernel again, which could be an - unbounded amount of time. This means that a pending signal or - reschedule event could be indefinitely delayed. - - The fix is to notice a nested interrupt in the critical - window, and if one occurs, then fold the nested interrupt into - the current interrupt stack frame, and re-process it - iteratively rather than recursively. This means that it will - exit via the normal path, and all pending work will be dealt - with appropriately. - - Because the nested interrupt handler needs to deal with the - current stack state in whatever form its in, we keep things - simple by only using a single register which is pushed/popped - on the stack. + * This is run where a normal iret would be run, with the same stack setup: + * 8: eflags + * 4: cs + * esp-> 0: eip + * + * This attempts to make sure that any pending events are dealt with + * on return to usermode, but there is a small window in which an + * event can happen just before entering usermode. If the nested + * interrupt ends up setting one of the TIF_WORK_MASK pending work + * flags, they will not be tested again before returning to + * usermode. This means that a process can end up with pending work, + * which will be unprocessed until the process enters and leaves the + * kernel again, which could be an unbounded amount of time. This + * means that a pending signal or reschedule event could be + * indefinitely delayed. + * + * The fix is to notice a nested interrupt in the critical window, and + * if one occurs, then fold the nested interrupt into the current + * interrupt stack frame, and re-process it iteratively rather than + * recursively. This means that it will exit via the normal path, and + * all pending work will be dealt with appropriately. + * + * Because the nested interrupt handler needs to deal with the current + * stack state in whatever form its in, we keep things simple by only + * using a single register which is pushed/popped on the stack. */ ENTRY(xen_iret) /* test eflags for special cases */ @@ -85,13 +82,15 @@ ENTRY(xen_iret) push %eax ESP_OFFSET=4 # bytes pushed onto stack - /* Store vcpu_info pointer for easy access. Do it this - way to avoid having to reload %fs */ + /* + * Store vcpu_info pointer for easy access. Do it this way to + * avoid having to reload %fs + */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax),%eax - movl __per_cpu_offset(,%eax,4),%eax - mov per_cpu__xen_vcpu(%eax),%eax + movl TI_cpu(%eax), %eax + movl __per_cpu_offset(,%eax,4), %eax + mov per_cpu__xen_vcpu(%eax), %eax #else movl per_cpu__xen_vcpu, %eax #endif @@ -99,37 +98,46 @@ ENTRY(xen_iret) /* check IF state we're restoring */ testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) - /* Maybe enable events. Once this happens we could get a - recursive event, so the critical region starts immediately - afterwards. However, if that happens we don't end up - resuming the code, so we don't have to be worried about - being preempted to another CPU. */ + /* + * Maybe enable events. Once this happens we could get a + * recursive event, so the critical region starts immediately + * afterwards. However, if that happens we don't end up + * resuming the code, so we don't have to be worried about + * being preempted to another CPU. + */ setz XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ cmpw $0x0001, XEN_vcpu_info_pending(%eax) - /* If there's something pending, mask events again so we - can jump back into xen_hypervisor_callback */ + /* + * If there's something pending, mask events again so we can + * jump back into xen_hypervisor_callback + */ sete XEN_vcpu_info_mask(%eax) popl %eax - /* From this point on the registers are restored and the stack - updated, so we don't need to worry about it if we're preempted */ + /* + * From this point on the registers are restored and the stack + * updated, so we don't need to worry about it if we're + * preempted + */ iret_restore_end: - /* Jump to hypervisor_callback after fixing up the stack. - Events are masked, so jumping out of the critical - region is OK. */ + /* + * Jump to hypervisor_callback after fixing up the stack. + * Events are masked, so jumping out of the critical region is + * OK. + */ je xen_hypervisor_callback 1: iret xen_iret_end_crit: -.section __ex_table,"a" +.section __ex_table, "a" .align 4 - .long 1b,iret_exc + .long 1b, iret_exc .previous hyper_iret: @@ -139,55 +147,55 @@ hyper_iret: .globl xen_iret_start_crit, xen_iret_end_crit /* - This is called by xen_hypervisor_callback in entry.S when it sees - that the EIP at the time of interrupt was between xen_iret_start_crit - and xen_iret_end_crit. We're passed the EIP in %eax so we can do - a more refined determination of what to do. - - The stack format at this point is: - ---------------- - ss : (ss/esp may be present if we came from usermode) - esp : - eflags } outer exception info - cs } - eip } - ---------------- <- edi (copy dest) - eax : outer eax if it hasn't been restored - ---------------- - eflags } nested exception info - cs } (no ss/esp because we're nested - eip } from the same ring) - orig_eax }<- esi (copy src) - - - - - - - - - - fs } - es } - ds } SAVE_ALL state - eax } - : : - ebx }<- esp - ---------------- - - In order to deliver the nested exception properly, we need to shift - everything from the return addr up to the error code so it - sits just under the outer exception info. This means that when we - handle the exception, we do it in the context of the outer exception - rather than starting a new one. - - The only caveat is that if the outer eax hasn't been - restored yet (ie, it's still on stack), we need to insert - its value into the SAVE_ALL state before going on, since - it's usermode state which we eventually need to restore. + * This is called by xen_hypervisor_callback in entry.S when it sees + * that the EIP at the time of interrupt was between + * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in + * %eax so we can do a more refined determination of what to do. + * + * The stack format at this point is: + * ---------------- + * ss : (ss/esp may be present if we came from usermode) + * esp : + * eflags } outer exception info + * cs } + * eip } + * ---------------- <- edi (copy dest) + * eax : outer eax if it hasn't been restored + * ---------------- + * eflags } nested exception info + * cs } (no ss/esp because we're nested + * eip } from the same ring) + * orig_eax }<- esi (copy src) + * - - - - - - - - + * fs } + * es } + * ds } SAVE_ALL state + * eax } + * : : + * ebx }<- esp + * ---------------- + * + * In order to deliver the nested exception properly, we need to shift + * everything from the return addr up to the error code so it sits + * just under the outer exception info. This means that when we + * handle the exception, we do it in the context of the outer + * exception rather than starting a new one. + * + * The only caveat is that if the outer eax hasn't been restored yet + * (ie, it's still on stack), we need to insert its value into the + * SAVE_ALL state before going on, since it's usermode state which we + * eventually need to restore. */ ENTRY(xen_iret_crit_fixup) /* - Paranoia: Make sure we're really coming from kernel space. - One could imagine a case where userspace jumps into the - critical range address, but just before the CPU delivers a GP, - it decides to deliver an interrupt instead. Unlikely? - Definitely. Easy to avoid? Yes. The Intel documents - explicitly say that the reported EIP for a bad jump is the - jump instruction itself, not the destination, but some virtual - environments get this wrong. + * Paranoia: Make sure we're really coming from kernel space. + * One could imagine a case where userspace jumps into the + * critical range address, but just before the CPU delivers a + * GP, it decides to deliver an interrupt instead. Unlikely? + * Definitely. Easy to avoid? Yes. The Intel documents + * explicitly say that the reported EIP for a bad jump is the + * jump instruction itself, not the destination, but some + * virtual environments get this wrong. */ movl PT_CS(%esp), %ecx andl $SEGMENT_RPL_MASK, %ecx @@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) lea PT_ORIG_EAX(%esp), %esi lea PT_EFLAGS(%esp), %edi - /* If eip is before iret_restore_end then stack - hasn't been restored yet. */ + /* + * If eip is before iret_restore_end then stack + * hasn't been restored yet. + */ cmp $iret_restore_end, %eax jae 1f - movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ + movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ movl %eax, PT_EAX(%esp) - lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ + lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ /* set up the copy */ 1: std @@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup) rep movsl cld - lea 4(%edi),%esp /* point esp to new frame */ + lea 4(%edi), %esp /* point esp to new frame */ 2: jmp xen_do_upcall diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d205a283efe..02f496a8dba 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -1,14 +1,14 @@ /* - Asm versions of Xen pv-ops, suitable for either direct use or inlining. - The inline versions are the same as the direct-use versions, with the - pre- and post-amble chopped off. - - This code is encoded for size rather than absolute efficiency, - with a view to being able to inline as much as possible. - - We only bother with direct forms (ie, vcpu in pda) of the operations - here; the indirect forms are better handled in C, since they're - generally too large to inline anyway. + * Asm versions of Xen pv-ops, suitable for either direct use or + * inlining. The inline versions are the same as the direct-use + * versions, with the pre- and post-amble chopped off. + * + * This code is encoded for size rather than absolute efficiency, with + * a view to being able to inline as much as possible. + * + * We only bother with direct forms (ie, vcpu in pda) of the + * operations here; the indirect forms are better handled in C, since + * they're generally too large to inline anyway. */ #include @@ -21,25 +21,25 @@ #include "xen-asm.h" ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp),%rcx - mov 8+8(%rsp),%r11 + mov 8+0(%rsp), %rcx + mov 8+8(%rsp), %r11 ret $16 hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* - Xen64 iret frame: - - ss - rsp - rflags - cs - rip <-- standard iret frame - - flags - - rcx } - r11 }<-- pushed by hypercall page -rsp -> rax } + * Xen64 iret frame: + * + * ss + * rsp + * rflags + * cs + * rip <-- standard iret frame + * + * flags + * + * rcx } + * r11 }<-- pushed by hypercall page + * rsp->rax } */ ENTRY(xen_iret) pushq $0 @@ -48,8 +48,8 @@ ENDPATCH(xen_iret) RELOC(xen_iret, 1b+1) /* - sysexit is not used for 64-bit processes, so it's - only ever used to return to 32-bit compat userspace. + * sysexit is not used for 64-bit processes, so it's only ever used to + * return to 32-bit compat userspace. */ ENTRY(xen_sysexit) pushq $__USER32_DS @@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit) RELOC(xen_sysexit, 1b+1) ENTRY(xen_sysret64) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) - movq PER_CPU_VAR(kernel_stack),%rsp + movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER_DS pushq PER_CPU_VAR(old_rsp) @@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64) RELOC(xen_sysret64, 1b+1) ENTRY(xen_sysret32) - /* We're already on the usermode stack at this point, but still - with the kernel gs, so we can easily switch back */ + /* + * We're already on the usermode stack at this point, but + * still with the kernel gs, so we can easily switch back + */ movq %rsp, PER_CPU_VAR(old_rsp) movq PER_CPU_VAR(kernel_stack), %rsp @@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) /* - Xen handles syscall callbacks much like ordinary exceptions, - which means we have: - - kernel gs - - kernel rsp - - an iret-like stack frame on the stack (including rcx and r11): - ss - rsp - rflags - cs - rip - r11 - rsp-> rcx - - In all the entrypoints, we undo all that to make it look - like a CPU-generated syscall/sysenter and jump to the normal - entrypoint. + * Xen handles syscall callbacks much like ordinary exceptions, which + * means we have: + * - kernel gs + * - kernel rsp + * - an iret-like stack frame on the stack (including rcx and r11): + * ss + * rsp + * rflags + * cs + * rip + * r11 + * rsp->rcx + * + * In all the entrypoints, we undo all that to make it look like a + * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ .macro undo_xen_syscall - mov 0*8(%rsp),%rcx - mov 1*8(%rsp),%r11 - mov 5*8(%rsp),%rsp + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp .endm /* Normal 64-bit system call target */ @@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target) ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) - lea 16(%rsp), %rsp /* strip %rcx,%r11 */ + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $VGCF_in_syscall jmp hypercall_iret From 56fc82c5360cdf0b250b5eb74f38657b0402faa5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Feb 2009 00:48:02 +0900 Subject: [PATCH 363/682] modpost: NOBITS sections may point beyond the end of the file Impact: fix link failure on certain toolchains with specific configs Recent percpu change made x86_64 split .data.init section into three separate segments - data.init, percpu and data.init2. data.init2 gets .data.nosave and .bss.* and is followed by .notes segment. Depending on configuration both segments might contain no data, in which case the tool chain makes the section header to contain offset beyond the end of the file. modpost isn't too happy about it and fails build - as reported by Pawel Dziekonski: Building modules, stage 2. MODPOST 416 modules FATAL: vmlinux is truncated. sechdrs[i].sh_offset=10354688 > sizeof(*hrd)=64 make[1]: *** [__modpost] Error 1 Teach modpost that NOBITS section may point beyond the end of the file and that .modinfo can't be NOBITS. Reported-by: Pawel Dziekonski Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- scripts/mod/modpost.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 88921611b22..7e62303133d 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *info, const char *filename) const char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; const char *secname; + int nobits = sechdrs[i].sh_type == SHT_NOBITS; - if (sechdrs[i].sh_offset > info->size) { + if (!nobits && sechdrs[i].sh_offset > info->size) { fatal("%s is truncated. sechdrs[i].sh_offset=%lu > " "sizeof(*hrd)=%zu\n", filename, (unsigned long)sechdrs[i].sh_offset, @@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *info, const char *filename) } secname = secstrings + sechdrs[i].sh_name; if (strcmp(secname, ".modinfo") == 0) { + if (nobits) + fatal("%s has NOBITS .modinfo\n", filename); info->modinfo = (void *)hdr + sechdrs[i].sh_offset; info->modinfo_len = sechdrs[i].sh_size; } else if (strcmp(secname, "__ksymtab") == 0) From 65a4e574d2382d83f71b30ea92f86d2e40a6ef8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:36:17 +0100 Subject: [PATCH 364/682] smp, generic: introduce arch_disable_smp_support() instead of disable_ioapic_setup() Impact: cleanup disable_ioapic_setup() in init/main.c is ugly as the function is x86-specific. The #ifdef inline prototype there is ugly too. Replace it with a generic arch_disable_smp_support() function - which has a weak alias for non-x86 architectures and for non-ioapic x86 builds. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 9 --------- arch/x86/kernel/apic.c | 4 +--- arch/x86/kernel/io_apic.c | 11 ++++++++++- arch/x86/kernel/smpboot.c | 2 +- include/linux/smp.h | 6 ++++++ init/main.c | 12 ++++++------ 6 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 08ec793aa04..309d0e23193 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -143,15 +143,6 @@ extern int noioapicreroute; /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; -static inline void disable_ioapic_setup(void) -{ -#ifdef CONFIG_PCI - noioapicquirk = 1; - noioapicreroute = -1; -#endif - skip_ioapic_setup = 1; -} - /* * If we use the IO-APIC for IRQ routing, disable automatic * assignment of PCI IRQ's. diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 85d8b50d1af..a04a73a51d2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1138,9 +1138,7 @@ void __cpuinit setup_local_APIC(void) int i, j; if (disable_apic) { -#ifdef CONFIG_X86_IO_APIC - disable_ioapic_setup(); -#endif + arch_disable_smp_support(); return; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 57d60c741e3..84bccac4619 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -98,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; +void arch_disable_smp_support(void) +{ +#ifdef CONFIG_PCI + noioapicquirk = 1; + noioapicreroute = -1; +#endif + skip_ioapic_setup = 1; +} + static int __init parse_noapic(char *str) { /* disable IO-APIC */ - disable_ioapic_setup(); + arch_disable_smp_support(); return 0; } early_param("noapic", parse_noapic); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f40f86fec2f..96f7d304f5c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1071,7 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus) printk(KERN_ERR "... forcing use of dummy APIC emulation." "(tell your hw vendor)\n"); smpboot_clear_io_apic(); - disable_ioapic_setup(); + arch_disable_smp_support(); return -1; } diff --git a/include/linux/smp.h b/include/linux/smp.h index 715196b09d6..d41a3a865fe 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,6 +66,12 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + /* * Call a function on all other processors */ diff --git a/init/main.c b/init/main.c index bfe4fb0c984..6441083f827 100644 --- a/init/main.c +++ b/init/main.c @@ -136,14 +136,14 @@ unsigned int __initdata setup_max_cpus = NR_CPUS; * greater than 0, limits the maximum number of CPUs activated in * SMP mode to . */ -#ifndef CONFIG_X86_IO_APIC -static inline void disable_ioapic_setup(void) {}; -#endif + +void __weak arch_disable_smp_support(void) { } static int __init nosmp(char *str) { setup_max_cpus = 0; - disable_ioapic_setup(); + arch_disable_smp_support(); + return 0; } @@ -153,14 +153,14 @@ static int __init maxcpus(char *str) { get_option(&str, &setup_max_cpus); if (setup_max_cpus == 0) - disable_ioapic_setup(); + arch_disable_smp_support(); return 0; } early_param("maxcpus", maxcpus); #else -#define setup_max_cpus NR_CPUS +const unsigned int setup_max_cpus = NR_CPUS; #endif /* From fdbecd9fd14198853bec4cbae8bc7af93f2e3de3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:57:12 +0100 Subject: [PATCH 365/682] x86, apic: explain the purpose of max_physical_apicid Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index a04a73a51d2..5475e1c3180 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -50,13 +50,26 @@ #include unsigned int num_processors; + unsigned disabled_cpus __cpuinitdata; + /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; -EXPORT_SYMBOL(boot_cpu_physical_apicid); + +/* + * The highest APIC ID seen during enumeration. + * + * This determines the messaging protocol we can use: if all APIC IDs + * are in the 0 ... 7 range, then we can use logical addressing which + * has some performance advantages (better broadcasting). + * + * If there's an APIC ID above 8, we use physical addressing. + */ unsigned int max_physical_apicid; -/* Bitmask of physically existing CPUs */ +/* + * Bitmask of physically existing CPUs: + */ physid_mask_t phys_cpu_present_map; /* From c5e954820335ef5aed1662b70aaf5deb9de16735 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 30 Jan 2009 17:29:27 -0800 Subject: [PATCH 366/682] x86: move default_ipi_xx back to ipi.c Impact: cleanup only leave _default_ipi_xx etc in .h Beyond the cleanup factor, this saves a bit of code size as well: text data bss dec hex filename 7281931 1630144 1463304 10375379 9e50d3 vmlinux.before 7281753 1630144 1463304 10375201 9e5021 vmlinux.after Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 6 -- arch/x86/include/asm/ipi.h | 127 ++++------------------------------ arch/x86/kernel/ipi.c | 108 +++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 120 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3ef2bded97a..1a20e3d1200 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -71,12 +71,6 @@ extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); #endif -/* IPI functions */ -#ifdef CONFIG_X86_32 -extern void default_send_IPI_self(int vector); -#endif -extern void default_send_IPI(int dest, int vector); - /* Statistics */ extern atomic_t irq_err_count; extern atomic_t irq_mis_count; diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index aa79945445b..5f2efc5d992 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -119,112 +119,22 @@ static inline void native_apic_mem_write(APIC_ICR, cfg); } -static inline void -default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) -{ - unsigned long query_cpu; - unsigned long flags; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicast to each CPU instead. - * - mbligh - */ - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -static inline void -default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector) -{ - unsigned int this_cpu = smp_processor_id(); - unsigned int query_cpu; - unsigned long flags; - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, - query_cpu), vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - +extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector); #include -static inline void -default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - - /* - * Hack. The clustered APIC addressing mode doesn't allow us to send - * to an arbitrary mask, so I do a unicasts to each CPU instead. This - * should be modified to do 1 message per cluster ID - mbligh - */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); - local_irq_restore(flags); -} - -static inline void -default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector) -{ - unsigned long flags; - unsigned int query_cpu; - unsigned int this_cpu = smp_processor_id(); - - /* See Hack comment above */ - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); - } - local_irq_restore(flags); -} +extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector); /* Avoid include hell */ #define NMI_VECTOR 0x02 extern int no_broadcast; -#ifndef CONFIG_X86_64 -/* - * This is only used on smaller machines. - */ -static inline void default_send_IPI_mask_bitmask_logical(const struct cpumask *cpumask, int vector) -{ - unsigned long mask = cpumask_bits(cpumask)[0]; - unsigned long flags; - - local_irq_save(flags); - WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); - __default_send_IPI_dest_field(mask, vector, apic->dest_logical); - local_irq_restore(flags); -} - -static inline void default_send_IPI_mask_logical(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_bitmask_logical(mask, vector); -} -#endif - static inline void __default_local_send_IPI_allbutself(int vector) { if (no_broadcast || vector == NMI_VECTOR) @@ -242,22 +152,11 @@ static inline void __default_local_send_IPI_all(int vector) } #ifdef CONFIG_X86_32 -static inline void default_send_IPI_allbutself(int vector) -{ - /* - * if there are no other CPUs in the system then we get an APIC send - * error if we try to broadcast, thus avoid sending IPIs in this case. - */ - if (!(num_online_cpus() > 1)) - return; - - __default_local_send_IPI_allbutself(vector); -} - -static inline void default_send_IPI_all(int vector) -{ - __default_local_send_IPI_all(vector); -} +extern void default_send_IPI_mask_logical(const struct cpumask *mask, + int vector); +extern void default_send_IPI_allbutself(int vector); +extern void default_send_IPI_all(int vector); +extern void default_send_IPI_self(int vector); #endif #endif diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 339f4f3feee..dbf5445727a 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -19,8 +19,116 @@ #include #include +void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) +{ + unsigned long query_cpu; + unsigned long flags; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicast to each CPU instead. + * - mbligh + */ + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, + int vector) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int query_cpu; + unsigned long flags; + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, + query_cpu), vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} + +void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + + /* + * Hack. The clustered APIC addressing mode doesn't allow us to send + * to an arbitrary mask, so I do a unicasts to each CPU instead. This + * should be modified to do 1 message per cluster ID - mbligh + */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu == this_cpu) + continue; + __default_send_IPI_dest_field( + apic->cpu_to_logical_apicid(query_cpu), vector, + apic->dest_logical); + } + local_irq_restore(flags); +} + #ifdef CONFIG_X86_32 +/* + * This is only used on smaller machines. + */ +void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + unsigned long flags; + + local_irq_save(flags); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); + __default_send_IPI_dest_field(mask, vector, apic->dest_logical); + local_irq_restore(flags); +} + +void default_send_IPI_allbutself(int vector) +{ + /* + * if there are no other CPUs in the system then we get an APIC send + * error if we try to broadcast, thus avoid sending IPIs in this case. + */ + if (!(num_online_cpus() > 1)) + return; + + __default_local_send_IPI_allbutself(vector); +} + +void default_send_IPI_all(int vector) +{ + __default_local_send_IPI_all(vector); +} + void default_send_IPI_self(int vector) { __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical); From a146649bc19d5eba4f5bfac6720c5f252d517a71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 14:09:06 +0100 Subject: [PATCH 367/682] smp, generic: introduce arch_disable_smp_support(), build fix This function should be provided on UP too. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index d41a3a865fe..bbacb7baa44 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,12 +66,6 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); -/* - * Callback to arch code if there's nosmp or maxcpus=0 on the - * boot command line: - */ -extern void arch_disable_smp_support(void); - /* * Call a function on all other processors */ @@ -182,6 +176,12 @@ static inline void init_call_single_data(void) #define put_cpu() preempt_enable() #define put_cpu_no_resched() preempt_enable_no_resched() +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + void smp_setup_processor_id(void); #endif /* __LINUX_SMP_H */ From 4f179d121885142fb907b64956228b369d495958 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Feb 2009 11:25:57 +0100 Subject: [PATCH 368/682] x86, numaq: cleanups Also move xquad_portio over to where it's allocated. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numaq.h | 2 ++ arch/x86/kernel/numaq_32.c | 33 ++++++++++++++++++--------------- arch/x86/pci/numaq_32.c | 4 ---- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h index 1e8bd30b4c1..9f0a5f5d29e 100644 --- a/arch/x86/include/asm/numaq.h +++ b/arch/x86/include/asm/numaq.h @@ -31,6 +31,8 @@ extern int found_numaq; extern int get_memcfg_numaq(void); +extern void *xquad_portio; + /* * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 947748e1721..0cc41a1d255 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -3,7 +3,7 @@ * * Copyright (C) 2002, IBM Corp. * - * All rights reserved. + * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,17 +23,18 @@ * Send feedback to */ -#include +#include #include #include #include -#include -#include -#include +#include + #include +#include #include -#include +#include #include +#include #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) @@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void) } int found_numaq; + /* * Have to match translation table entries to main table entries by counter * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; }; /* x86_quirks member */ @@ -444,7 +446,8 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) return physid_mask_of_physid(cpu + 4*node); } -extern void *xquad_portio; +/* Where the IO area was mapped on multiquad, always 0 otherwise */ +void *xquad_portio; static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) { @@ -502,7 +505,7 @@ static void numaq_setup_portio_remap(void) int num_quads = num_online_nodes(); if (num_quads <= 1) - return; + return; printk("Remapping cross-quad port I/O for %d quads\n", num_quads); xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1b2d773612e..5601e829c38 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -18,10 +18,6 @@ #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; -EXPORT_SYMBOL(xquad_portio); - #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ From 8f9ca475c994e4d32f405183d07e8c7eedbdbdb4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Feb 2009 16:21:53 +0100 Subject: [PATCH 369/682] x86: clean up arch/x86/Kconfig* - Consistent alignment of help text - Use the ---help--- keyword everywhere consistently as a visual separator - fix whitespace mismatches Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 398 ++++++++++++++++++++--------------------- arch/x86/Kconfig.cpu | 70 ++++---- arch/x86/Kconfig.debug | 47 +++-- 3 files changed, 257 insertions(+), 258 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 80291f749b6..270ecf90bdb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -5,7 +5,7 @@ mainmenu "Linux Kernel Configuration for x86" config 64BIT bool "64-bit kernel" if ARCH = "x86" default ARCH = "x86_64" - help + ---help--- Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 @@ -235,7 +235,7 @@ config SMP config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ - help + ---help--- This enables support for sparse irqs. This is useful for distro kernels that want to define a high CONFIG_NR_CPUS value but still want to have low kernel memory footprint on smaller machines. @@ -249,7 +249,7 @@ config NUMA_MIGRATE_IRQ_DESC bool "Move irq desc when changing irq smp_affinity" depends on SPARSE_IRQ && NUMA default n - help + ---help--- This enables moving irq_desc to cpu/node that irq will use handled. If you don't know what to do here, say N. @@ -258,19 +258,19 @@ config X86_MPPARSE bool "Enable MPS table" if ACPI default y depends on X86_LOCAL_APIC - help + ---help--- For old smp systems that do not have proper acpi support. Newer systems (esp with 64bit cpus) with acpi support, MADT and DSDT will override it config X86_BIGSMP bool "Support for big SMP systems with more than 8 CPUs" depends on X86_32 && SMP - help + ---help--- This option is needed for the systems that have more than 8 CPUs config X86_NON_STANDARD bool "Support for non-standard x86 platforms" - help + ---help--- If you disable this option then the kernel will only support standard PC platforms. (which covers the vast majority of systems out there.) @@ -285,7 +285,7 @@ config X86_VISWS bool "SGI 320/540 (Visual Workstation)" depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT depends on X86_NON_STANDARD - help + ---help--- The SGI Visual Workstation series is an IA32-based workstation based on SGI systems chips with some legacy PC hardware attached. @@ -300,7 +300,7 @@ config X86_RDC321X depends on X86_NON_STANDARD select M486 select X86_REBOOTFIXUPS - help + ---help--- This option is needed for RDC R-321x system-on-chip, also known as R-8610-(G). If you don't have one of these chips, you should say N here. @@ -309,7 +309,7 @@ config X86_UV bool "SGI Ultraviolet" depends on X86_64 depends on X86_NON_STANDARD - help + ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. @@ -318,7 +318,7 @@ config X86_VSMP select PARAVIRT depends on X86_64 && PCI depends on X86_NON_STANDARD - help + ---help--- Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option if you have one of these machines. @@ -327,7 +327,7 @@ config X86_ELAN bool "AMD Elan" depends on X86_32 depends on X86_NON_STANDARD - help + ---help--- Select this for an AMD Elan processor. Do not use this option for K6/Athlon/Opteron processors! @@ -338,8 +338,8 @@ config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP depends on X86_NON_STANDARD - help - This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default + ---help--- + This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. if you select them all, kernel will probe it one by one. and will fallback to default. @@ -349,7 +349,7 @@ config X86_NUMAQ depends on X86_32_NON_STANDARD select NUMA select X86_MPPARSE - help + ---help--- This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) NUMA multiquad box. This changes the way that processors are bootstrapped, and uses Clustered Logical APIC addressing mode instead @@ -359,14 +359,14 @@ config X86_NUMAQ config X86_SUMMIT bool "Summit/EXA (IBM x440)" depends on X86_32_NON_STANDARD - help + ---help--- This option is needed for IBM systems that use the Summit/EXA chipset. In particular, it is needed for the x440. config X86_ES7000 bool "Support for Unisys ES7000 IA32 series" depends on X86_32_NON_STANDARD && X86_BIGSMP - help + ---help--- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. @@ -374,7 +374,7 @@ config X86_VOYAGER bool "Voyager (NCR)" depends on SMP && !PCI && BROKEN depends on X86_32_NON_STANDARD - help + ---help--- Voyager is an MCA-based 32-way capable SMP architecture proprietary to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. @@ -387,7 +387,7 @@ config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" depends on X86 - help + ---help--- Calculate simpler /proc//wchan values. If this option is disabled then wchan values will recurse back to the caller function. This provides more accurate wchan values, @@ -397,7 +397,7 @@ config SCHED_OMIT_FRAME_POINTER menuconfig PARAVIRT_GUEST bool "Paravirtualized guest support" - help + ---help--- Say Y here to get to see options related to running Linux under various hypervisors. This option alone does not add any kernel code. @@ -411,7 +411,7 @@ config VMI bool "VMI Guest support" select PARAVIRT depends on X86_32 - help + ---help--- VMI provides a paravirtualized interface to the VMware ESX server (it could be used by other hypervisors in theory too, but is not at the moment), by linking the kernel to a GPL-ed ROM module @@ -421,7 +421,7 @@ config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT select PARAVIRT_CLOCK - help + ---help--- Turning on this option will allow you to run a paravirtualized clock when running over the KVM hypervisor. Instead of relying on a PIT (or probably other) emulation by the underlying device model, the host @@ -431,15 +431,15 @@ config KVM_CLOCK config KVM_GUEST bool "KVM Guest support" select PARAVIRT - help - This option enables various optimizations for running under the KVM - hypervisor. + ---help--- + This option enables various optimizations for running under the KVM + hypervisor. source "arch/x86/lguest/Kconfig" config PARAVIRT bool "Enable paravirtualization code" - help + ---help--- This changes the kernel so it can modify itself when it is run under a hypervisor, potentially improving performance significantly over full virtualization. However, when run without a hypervisor @@ -452,21 +452,21 @@ config PARAVIRT_CLOCK endif config PARAVIRT_DEBUG - bool "paravirt-ops debugging" - depends on PARAVIRT && DEBUG_KERNEL - help - Enable to debug paravirt_ops internals. Specifically, BUG if - a paravirt_op is missing when it is called. + bool "paravirt-ops debugging" + depends on PARAVIRT && DEBUG_KERNEL + ---help--- + Enable to debug paravirt_ops internals. Specifically, BUG if + a paravirt_op is missing when it is called. config MEMTEST bool "Memtest" - help + ---help--- This option adds a kernel parameter 'memtest', which allows memtest to be set. - memtest=0, mean disabled; -- default - memtest=1, mean do 1 test pattern; - ... - memtest=4, mean do 4 test patterns. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=4, mean do 4 test patterns. If you are unsure how to answer this question, answer N. config X86_SUMMIT_NUMA @@ -482,21 +482,21 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 - help - Use the IA-PC HPET (High Precision Event Timer) to manage - time in preference to the PIT and RTC, if a HPET is - present. - HPET is the next generation timer replacing legacy 8254s. - The HPET provides a stable time base on SMP - systems, unlike the TSC, but it is more expensive to access, - as it is off-chip. You can find the HPET spec at - . + ---help--- + Use the IA-PC HPET (High Precision Event Timer) to manage + time in preference to the PIT and RTC, if a HPET is + present. + HPET is the next generation timer replacing legacy 8254s. + The HPET provides a stable time base on SMP + systems, unlike the TSC, but it is more expensive to access, + as it is off-chip. You can find the HPET spec at + . - You can safely choose Y here. However, HPET will only be - activated if the platform and the BIOS support this feature. - Otherwise the 8254 will be used for timing services. + You can safely choose Y here. However, HPET will only be + activated if the platform and the BIOS support this feature. + Otherwise the 8254 will be used for timing services. - Choose N to continue using the legacy 8254 timer. + Choose N to continue using the legacy 8254 timer. config HPET_EMULATE_RTC def_bool y @@ -507,7 +507,7 @@ config HPET_EMULATE_RTC config DMI default y bool "Enable DMI scanning" if EMBEDDED - help + ---help--- Enabled scanning of DMI to identify machine quirks. Say Y here unless you have verified that your setup is not affected by entries in the DMI blacklist. Required by PNP @@ -519,7 +519,7 @@ config GART_IOMMU select SWIOTLB select AGP depends on X86_64 && PCI - help + ---help--- Support for full DMA access of devices with 32bit memory access only on systems with more than 3GB. This is usually needed for USB, sound, many IDE/SATA chipsets and some other devices. @@ -534,7 +534,7 @@ config CALGARY_IOMMU bool "IBM Calgary IOMMU support" select SWIOTLB depends on X86_64 && PCI && EXPERIMENTAL - help + ---help--- Support for hardware IOMMUs in IBM's xSeries x366 and x460 systems. Needed to run systems with more than 3GB of memory properly with 32-bit PCI devices that do not support DAC @@ -552,7 +552,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT def_bool y prompt "Should Calgary be enabled by default?" depends on CALGARY_IOMMU - help + ---help--- Should Calgary be enabled by default? if you choose 'y', Calgary will be used (if it exists). If you choose 'n', Calgary will not be used even if it exists. If you choose 'n' and would like to use @@ -564,7 +564,7 @@ config AMD_IOMMU select SWIOTLB select PCI_MSI depends on X86_64 && PCI && ACPI - help + ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides remapping of DMA memory accesses from devices. With an AMD IOMMU you @@ -579,7 +579,7 @@ config AMD_IOMMU_STATS bool "Export AMD IOMMU statistics to debugfs" depends on AMD_IOMMU select DEBUG_FS - help + ---help--- This option enables code in the AMD IOMMU driver to collect various statistics about whats happening in the driver and exports that information to userspace via debugfs. @@ -588,7 +588,7 @@ config AMD_IOMMU_STATS # need this always selected by IOMMU for the VIA workaround config SWIOTLB def_bool y if X86_64 - help + ---help--- Support for software bounce buffers used on x86-64 systems which don't have a hardware IOMMU (e.g. the current generation of Intel's x86-64 CPUs). Using this PCI devices which can only @@ -606,7 +606,7 @@ config MAXSMP depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL select CPUMASK_OFFSTACK default n - help + ---help--- Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. @@ -617,7 +617,7 @@ config NR_CPUS default "4096" if MAXSMP default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) default "8" if SMP - help + ---help--- This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the minimum value which makes sense is 2. @@ -628,7 +628,7 @@ config NR_CPUS config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on X86_HT - help + ---help--- SMT scheduler support improves the CPU scheduler's decision making when dealing with Intel Pentium 4 chips with HyperThreading at a cost of slightly increased overhead in some places. If unsure say @@ -638,7 +638,7 @@ config SCHED_MC def_bool y prompt "Multi-core scheduler support" depends on X86_HT - help + ---help--- Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. @@ -648,7 +648,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on X86_32 && !SMP && !X86_32_NON_STANDARD - help + ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU system which has a processor with a local APIC, you can say Y here to @@ -661,7 +661,7 @@ config X86_UP_APIC config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC - help + ---help--- An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an SMP-capable replacement for PC-style interrupt controllers. Most SMP systems and many recent uniprocessor systems have one. @@ -686,7 +686,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS bool "Reroute for broken boot IRQs" default n depends on X86_IO_APIC - help + ---help--- This option enables a workaround that fixes a source of spurious interrupts. This is recommended when threaded interrupt handling is used on systems where the generation of @@ -726,7 +726,7 @@ config X86_MCE_INTEL def_bool y prompt "Intel MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - help + ---help--- Additional support for intel specific MCE features such as the thermal monitor. @@ -734,14 +734,14 @@ config X86_MCE_AMD def_bool y prompt "AMD MCE features" depends on X86_64 && X86_MCE && X86_LOCAL_APIC - help + ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. config X86_MCE_NONFATAL tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" depends on X86_32 && X86_MCE - help + ---help--- Enabling this feature starts a timer that triggers every 5 seconds which will look at the machine check registers to see if anything happened. Non-fatal problems automatically get corrected (but still logged). @@ -754,7 +754,7 @@ config X86_MCE_NONFATAL config X86_MCE_P4THERMAL bool "check for P4 thermal throttling interrupt." depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) - help + ---help--- Enabling this feature will cause a message to be printed when the P4 enters thermal throttling. @@ -762,11 +762,11 @@ config VM86 bool "Enable VM86 support" if EMBEDDED default y depends on X86_32 - help - This option is required by programs like DOSEMU to run 16-bit legacy + ---help--- + This option is required by programs like DOSEMU to run 16-bit legacy code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. + XFree86 to initialize some video cards via BIOS. Disabling this + option saves about 6k. config TOSHIBA tristate "Toshiba Laptop support" @@ -840,33 +840,33 @@ config MICROCODE module will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" - depends on MICROCODE - default MICROCODE - select FW_LOADER - --help--- - This options enables microcode patch loading support for Intel - processors. + bool "Intel microcode patch loading support" + depends on MICROCODE + default MICROCODE + select FW_LOADER + ---help--- + This options enables microcode patch loading support for Intel + processors. - For latest news and information on obtaining all the required - Intel ingredients for this driver, check: - . + For latest news and information on obtaining all the required + Intel ingredients for this driver, check: + . config MICROCODE_AMD - bool "AMD microcode patch loading support" - depends on MICROCODE - select FW_LOADER - --help--- - If you select this option, microcode patch loading support for AMD - processors will be enabled. + bool "AMD microcode patch loading support" + depends on MICROCODE + select FW_LOADER + ---help--- + If you select this option, microcode patch loading support for AMD + processors will be enabled. - config MICROCODE_OLD_INTERFACE +config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" - help + ---help--- This device gives privileged processes access to the x86 Model-Specific Registers (MSRs). It is a character device with major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr. @@ -875,7 +875,7 @@ config X86_MSR config X86_CPUID tristate "/dev/cpu/*/cpuid - CPU information support" - help + ---help--- This device gives processes access to the x86 CPUID instruction to be executed on a specific processor. It is a character device with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to @@ -927,7 +927,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" depends on !X86_NUMAQ - help + ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. @@ -935,7 +935,7 @@ config HIGHMEM64G bool "64GB" depends on !M386 && !M486 select X86_PAE - help + ---help--- Select this if you have a 32-bit processor and more than 4 gigabytes of physical RAM. @@ -946,7 +946,7 @@ choice prompt "Memory split" if EMBEDDED default VMSPLIT_3G depends on X86_32 - help + ---help--- Select the desired split between kernel and user memory. If the address range available to the kernel is less than the @@ -992,20 +992,20 @@ config HIGHMEM config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G - help + ---help--- PAE is required for NX support, and furthermore enables larger swapspace support for non-overcommit purposes. It has the cost of more pagetable lookup overhead, and also consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool X86_64 || X86_PAE config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EMBEDDED default y depends on X86_64 - help + ---help--- Allow the kernel linear mapping to use 1GB pages on CPUs that support it. This can improve the kernel's performance a tiny bit by reducing TLB pressure. If in doubt, say "Y". @@ -1016,7 +1016,7 @@ config NUMA depends on SMP depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) - help + ---help--- Enable NUMA (Non Uniform Memory Access) support. The kernel will try to allocate memory used by a CPU on the @@ -1039,19 +1039,19 @@ config K8_NUMA def_bool y prompt "Old style AMD Opteron NUMA detection" depends on X86_64 && NUMA && PCI - help - Enable K8 NUMA node topology detection. You should say Y here if - you have a multi processor AMD K8 system. This uses an old - method to read the NUMA configuration directly from the builtin - Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA - instead, which also takes priority if both are compiled in. + ---help--- + Enable K8 NUMA node topology detection. You should say Y here if + you have a multi processor AMD K8 system. This uses an old + method to read the NUMA configuration directly from the builtin + Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA + instead, which also takes priority if both are compiled in. config X86_64_ACPI_NUMA def_bool y prompt "ACPI NUMA detection" depends on X86_64 && NUMA && ACPI && PCI select ACPI_NUMA - help + ---help--- Enable ACPI SRAT based node topology detection. # Some NUMA nodes have memory ranges that span @@ -1066,7 +1066,7 @@ config NODES_SPAN_OTHER_NODES config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA - help + ---help--- Enable NUMA emulation. A flat machine will be split into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. @@ -1079,7 +1079,7 @@ config NODES_SHIFT default "4" if X86_NUMAQ default "3" depends on NEED_MULTIPLE_NODES - help + ---help--- Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accomodate various tables. @@ -1134,61 +1134,61 @@ source "mm/Kconfig" config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) - help + ---help--- The VM uses one page table entry for each page of physical memory. For systems with a lot of RAM, this can be wasteful of precious low memory. Setting this option will put user-space page table entries in high memory. config X86_CHECK_BIOS_CORRUPTION - bool "Check for low memory corruption" - help - Periodically check for memory corruption in low memory, which - is suspected to be caused by BIOS. Even when enabled in the - configuration, it is disabled at runtime. Enable it by - setting "memory_corruption_check=1" on the kernel command - line. By default it scans the low 64k of memory every 60 - seconds; see the memory_corruption_check_size and - memory_corruption_check_period parameters in - Documentation/kernel-parameters.txt to adjust this. + bool "Check for low memory corruption" + ---help--- + Periodically check for memory corruption in low memory, which + is suspected to be caused by BIOS. Even when enabled in the + configuration, it is disabled at runtime. Enable it by + setting "memory_corruption_check=1" on the kernel command + line. By default it scans the low 64k of memory every 60 + seconds; see the memory_corruption_check_size and + memory_corruption_check_period parameters in + Documentation/kernel-parameters.txt to adjust this. - When enabled with the default parameters, this option has - almost no overhead, as it reserves a relatively small amount - of memory and scans it infrequently. It both detects corruption - and prevents it from affecting the running system. + When enabled with the default parameters, this option has + almost no overhead, as it reserves a relatively small amount + of memory and scans it infrequently. It both detects corruption + and prevents it from affecting the running system. - It is, however, intended as a diagnostic tool; if repeatable - BIOS-originated corruption always affects the same memory, - you can use memmap= to prevent the kernel from using that - memory. + It is, however, intended as a diagnostic tool; if repeatable + BIOS-originated corruption always affects the same memory, + you can use memmap= to prevent the kernel from using that + memory. config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK - bool "Set the default setting of memory_corruption_check" + bool "Set the default setting of memory_corruption_check" depends on X86_CHECK_BIOS_CORRUPTION default y - help - Set whether the default state of memory_corruption_check is - on or off. + ---help--- + Set whether the default state of memory_corruption_check is + on or off. config X86_RESERVE_LOW_64K - bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" + bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" default y - help - Reserve the first 64K of physical RAM on BIOSes that are known - to potentially corrupt that memory range. A numbers of BIOSes are - known to utilize this area during suspend/resume, so it must not - be used by the kernel. + ---help--- + Reserve the first 64K of physical RAM on BIOSes that are known + to potentially corrupt that memory range. A numbers of BIOSes are + known to utilize this area during suspend/resume, so it must not + be used by the kernel. - Set this to N if you are absolutely sure that you trust the BIOS - to get all its memory reservations and usages right. + Set this to N if you are absolutely sure that you trust the BIOS + to get all its memory reservations and usages right. - If you have doubts about the BIOS (e.g. suspend/resume does not - work or there's kernel crashes after certain hardware hotplug - events) and it's not AMI or Phoenix, then you might want to enable - X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical - corruption patterns. + If you have doubts about the BIOS (e.g. suspend/resume does not + work or there's kernel crashes after certain hardware hotplug + events) and it's not AMI or Phoenix, then you might want to enable + X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical + corruption patterns. - Say Y if unsure. + Say Y if unsure. config MATH_EMULATION bool @@ -1254,7 +1254,7 @@ config MTRR_SANITIZER def_bool y prompt "MTRR cleanup support" depends on MTRR - help + ---help--- Convert MTRR layout from continuous to discrete, so X drivers can add writeback entries. @@ -1269,7 +1269,7 @@ config MTRR_SANITIZER_ENABLE_DEFAULT range 0 1 default "0" depends on MTRR_SANITIZER - help + ---help--- Enable mtrr cleanup default value config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT @@ -1277,7 +1277,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT range 0 7 default "1" depends on MTRR_SANITIZER - help + ---help--- mtrr cleanup spare entries default, it can be changed via mtrr_spare_reg_nr=N on the kernel command line. @@ -1285,7 +1285,7 @@ config X86_PAT bool prompt "x86 PAT support" depends on MTRR - help + ---help--- Use PAT attributes to setup page level cache control. PATs are the modern equivalents of MTRRs and are much more @@ -1300,20 +1300,20 @@ config EFI bool "EFI runtime service support" depends on ACPI ---help--- - This enables the kernel to use EFI runtime services that are - available (such as the EFI variable services). + This enables the kernel to use EFI runtime services that are + available (such as the EFI variable services). - This option is only useful on systems that have EFI firmware. - In addition, you should use the latest ELILO loader available - at in order to take advantage - of EFI runtime services. However, even with this option, the - resultant kernel should continue to boot on existing non-EFI - platforms. + This option is only useful on systems that have EFI firmware. + In addition, you should use the latest ELILO loader available + at in order to take advantage + of EFI runtime services. However, even with this option, the + resultant kernel should continue to boot on existing non-EFI + platforms. config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" - help + ---help--- This kernel feature is useful for number crunching applications that may need to compute untrusted bytecode during their execution. By using pipes or other transports made available to @@ -1333,8 +1333,8 @@ config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" depends on X86_64 select CC_STACKPROTECTOR_ALL - help - This option turns on the -fstack-protector GCC feature. This + ---help--- + This option turns on the -fstack-protector GCC feature. This feature puts, at the beginning of functions, a canary value on the stack just before the return address, and validates the value just before actually returning. Stack based buffer @@ -1351,7 +1351,7 @@ source kernel/Kconfig.hz config KEXEC bool "kexec system call" - help + ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot but it is independent of the system firmware. And like a reboot @@ -1368,7 +1368,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) - help + ---help--- Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels which are loaded in the main kernel with kexec-tools into @@ -1383,7 +1383,7 @@ config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" depends on EXPERIMENTAL depends on KEXEC && HIBERNATION && X86_32 - help + ---help--- Jump between original kernel and kexeced kernel and invoke code in physical address mode via KEXEC @@ -1392,7 +1392,7 @@ config PHYSICAL_START default "0x1000000" if X86_NUMAQ default "0x200000" if X86_64 default "0x100000" - help + ---help--- This gives the physical address where the kernel is loaded. If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then @@ -1433,7 +1433,7 @@ config PHYSICAL_START config RELOCATABLE bool "Build a relocatable kernel (EXPERIMENTAL)" depends on EXPERIMENTAL - help + ---help--- This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. The relocations tend to make the kernel binary about 10% larger, @@ -1453,7 +1453,7 @@ config PHYSICAL_ALIGN default "0x100000" if X86_32 default "0x200000" if X86_64 range 0x2000 0x400000 - help + ---help--- This value puts the alignment restrictions on physical address where kernel is loaded and run from. Kernel is compiled for an address which meets above alignment restriction. @@ -1486,7 +1486,7 @@ config COMPAT_VDSO def_bool y prompt "Compat VDSO support" depends on X86_32 || IA32_EMULATION - help + ---help--- Map the 32-bit VDSO to the predictable old-style address too. ---help--- Say N here if you are running a sufficiently recent glibc @@ -1498,7 +1498,7 @@ config COMPAT_VDSO config CMDLINE_BOOL bool "Built-in kernel command line" default n - help + ---help--- Allow for specifying boot arguments to the kernel at build time. On some systems (e.g. embedded ones), it is necessary or convenient to provide some or all of the @@ -1516,7 +1516,7 @@ config CMDLINE string "Built-in kernel command string" depends on CMDLINE_BOOL default "" - help + ---help--- Enter arguments here that should be compiled into the kernel image and used at boot time. If the boot loader provides a command line at boot time, it is appended to this string to @@ -1533,7 +1533,7 @@ config CMDLINE_OVERRIDE bool "Built-in command line overrides boot loader arguments" default n depends on CMDLINE_BOOL - help + ---help--- Set this option to 'Y' to have the kernel ignore the boot loader command line, and use ONLY the built-in command line. @@ -1632,7 +1632,7 @@ if APM config APM_IGNORE_USER_SUSPEND bool "Ignore USER SUSPEND" - help + ---help--- This option will ignore USER SUSPEND requests. On machines with a compliant APM BIOS, you want to say N. However, on the NEC Versa M series notebooks, it is necessary to say Y because of a BIOS bug. @@ -1656,7 +1656,7 @@ config APM_DO_ENABLE config APM_CPU_IDLE bool "Make CPU Idle calls when idle" - help + ---help--- Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. On some machines, this can activate improved power savings, such as a slowed CPU clock rate, when the machine is idle. These idle calls @@ -1667,7 +1667,7 @@ config APM_CPU_IDLE config APM_DISPLAY_BLANK bool "Enable console blanking using APM" - help + ---help--- Enable console blanking using the APM. Some laptops can use this to turn off the LCD backlight when the screen blanker of the Linux virtual console blanks the screen. Note that this is only used by @@ -1680,7 +1680,7 @@ config APM_DISPLAY_BLANK config APM_ALLOW_INTS bool "Allow interrupts during APM BIOS calls" - help + ---help--- Normally we disable external interrupts while we are making calls to the APM BIOS as a measure to lessen the effects of a badly behaving BIOS implementation. The BIOS should reenable interrupts if it @@ -1705,7 +1705,7 @@ config PCI bool "PCI support" default y select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC) - help + ---help--- Find out whether you have a PCI motherboard. PCI is the name of a bus system, i.e. the way the CPU talks to the other stuff inside your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or @@ -1776,7 +1776,7 @@ config PCI_MMCONFIG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL - help + ---help--- DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. These DMA remapping devices are reported via ACPI tables @@ -1798,29 +1798,29 @@ config DMAR_GFX_WA def_bool y prompt "Support for Graphics workaround" depends on DMAR - help - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA. + ---help--- + Current Graphics drivers tend to use physical address + for DMA and avoid using DMA APIs. Setting this config + option permits the IOMMU driver to set a unity map for + all the OS-visible memory. Hence the driver can continue + to use physical addresses for DMA. config DMAR_FLOPPY_WA def_bool y depends on DMAR - help - Floppy disk drivers are know to bypass DMA API calls - thereby failing to work when IOMMU is enabled. This - workaround will setup a 1:1 mapping for the first - 16M to make floppy (an ISA device) work. + ---help--- + Floppy disk drivers are know to bypass DMA API calls + thereby failing to work when IOMMU is enabled. This + workaround will setup a 1:1 mapping for the first + 16M to make floppy (an ISA device) work. config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL - help - Supports Interrupt remapping for IO-APIC and MSI devices. - To use x2apic mode in the CPU's which support x2APIC enhancements or - to support platforms with CPU's having > 8 bit APIC ID, say Y. + ---help--- + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. source "drivers/pci/pcie/Kconfig" @@ -1834,7 +1834,7 @@ if X86_32 config ISA bool "ISA support" - help + ---help--- Find out whether you have ISA slots on your motherboard. ISA is the name of a bus system, i.e. the way the CPU talks to the other stuff inside your box. Other bus systems are PCI, EISA, MicroChannel @@ -1861,7 +1861,7 @@ source "drivers/eisa/Kconfig" config MCA bool "MCA support" - help + ---help--- MicroChannel Architecture is found in some IBM PS/2 machines and laptops. It is a bus system similar to PCI or ISA. See (and especially the web page given @@ -1871,7 +1871,7 @@ source "drivers/mca/Kconfig" config SCx200 tristate "NatSemi SCx200 support" - help + ---help--- This provides basic support for National Semiconductor's (now AMD's) Geode processors. The driver probes for the PCI-IDs of several on-chip devices, so its a good dependency @@ -1883,7 +1883,7 @@ config SCx200HR_TIMER tristate "NatSemi SCx200 27MHz High-Resolution Timer Support" depends on SCx200 && GENERIC_TIME default y - help + ---help--- This driver provides a clocksource built upon the on-chip 27MHz high-resolution timer. Its also a workaround for NSC Geode SC-1100's buggy TSC, which loses time when the @@ -1894,7 +1894,7 @@ config GEODE_MFGPT_TIMER def_bool y prompt "Geode Multi-Function General Purpose Timer (MFGPT) events" depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS - help + ---help--- This driver provides a clock event source based on the MFGPT timer(s) in the CS5535 and CS5536 companion chip for the geode. MFGPTs have a better resolution and max interval than the @@ -1903,7 +1903,7 @@ config GEODE_MFGPT_TIMER config OLPC bool "One Laptop Per Child support" default n - help + ---help--- Add support for detecting the unique features of the OLPC XO hardware. @@ -1928,16 +1928,16 @@ config IA32_EMULATION bool "IA32 Emulation" depends on X86_64 select COMPAT_BINFMT_ELF - help + ---help--- Include code to run 32-bit programs under a 64-bit kernel. You should likely turn this on, unless you're 100% sure that you don't have any 32-bit programs left. config IA32_AOUT - tristate "IA32 a.out support" - depends on IA32_EMULATION - help - Support old a.out binaries in the 32bit emulation. + tristate "IA32 a.out support" + depends on IA32_EMULATION + ---help--- + Support old a.out binaries in the 32bit emulation. config COMPAT def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 085fef4d866..a95eaf0e582 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -50,7 +50,7 @@ config M386 config M486 bool "486" depends on X86_32 - help + ---help--- Select this for a 486 series processor, either Intel or one of the compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or @@ -59,7 +59,7 @@ config M486 config M586 bool "586/K5/5x86/6x86/6x86MX" depends on X86_32 - help + ---help--- Select this for an 586 or 686 series processor such as the AMD K5, the Cyrix 5x86, 6x86 and 6x86MX. This choice does not assume the RDTSC (Read Time Stamp Counter) instruction. @@ -67,21 +67,21 @@ config M586 config M586TSC bool "Pentium-Classic" depends on X86_32 - help + ---help--- Select this for a Pentium Classic processor with the RDTSC (Read Time Stamp Counter) instruction for benchmarking. config M586MMX bool "Pentium-MMX" depends on X86_32 - help + ---help--- Select this for a Pentium with the MMX graphics/multimedia extended instructions. config M686 bool "Pentium-Pro" depends on X86_32 - help + ---help--- Select this for Intel Pentium Pro chips. This enables the use of Pentium Pro extended instructions, and disables the init-time guard against the f00f bug found in earlier Pentiums. @@ -89,7 +89,7 @@ config M686 config MPENTIUMII bool "Pentium-II/Celeron(pre-Coppermine)" depends on X86_32 - help + ---help--- Select this for Intel chips based on the Pentium-II and pre-Coppermine Celeron core. This option enables an unaligned copy optimization, compiles the kernel with optimization flags @@ -99,7 +99,7 @@ config MPENTIUMII config MPENTIUMIII bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" depends on X86_32 - help + ---help--- Select this for Intel chips based on the Pentium-III and Celeron-Coppermine core. This option enables use of some extended prefetch instructions in addition to the Pentium II @@ -108,14 +108,14 @@ config MPENTIUMIII config MPENTIUMM bool "Pentium M" depends on X86_32 - help + ---help--- Select this for Intel Pentium M (not Pentium-4 M) notebook chips. config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" depends on X86_32 - help + ---help--- Select this for Intel Pentium 4 chips. This includes the Pentium 4, Pentium D, P4-based Celeron and Xeon, and Pentium-4 M (not Pentium M) chips. This option enables compile @@ -151,7 +151,7 @@ config MPENTIUM4 config MK6 bool "K6/K6-II/K6-III" depends on X86_32 - help + ---help--- Select this for an AMD K6-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. @@ -159,14 +159,14 @@ config MK6 config MK7 bool "Athlon/Duron/K7" depends on X86_32 - help + ---help--- Select this for an AMD Athlon K7-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. config MK8 bool "Opteron/Athlon64/Hammer/K8" - help + ---help--- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. @@ -174,7 +174,7 @@ config MK8 config MCRUSOE bool "Crusoe" depends on X86_32 - help + ---help--- Select this for a Transmeta Crusoe processor. Treats the processor like a 586 with TSC, and sets some GCC optimization flags (like a Pentium Pro with no alignment requirements). @@ -182,13 +182,13 @@ config MCRUSOE config MEFFICEON bool "Efficeon" depends on X86_32 - help + ---help--- Select this for a Transmeta Efficeon processor. config MWINCHIPC6 bool "Winchip-C6" depends on X86_32 - help + ---help--- Select this for an IDT Winchip C6 chip. Linux and GCC treat this chip as a 586TSC with some extended instructions and alignment requirements. @@ -196,7 +196,7 @@ config MWINCHIPC6 config MWINCHIP3D bool "Winchip-2/Winchip-2A/Winchip-3" depends on X86_32 - help + ---help--- Select this for an IDT Winchip-2, 2A or 3. Linux and GCC treat this chip as a 586TSC with some extended instructions and alignment requirements. Also enable out of order memory @@ -206,19 +206,19 @@ config MWINCHIP3D config MGEODEGX1 bool "GeodeGX1" depends on X86_32 - help + ---help--- Select this for a Geode GX1 (Cyrix MediaGX) chip. config MGEODE_LX bool "Geode GX/LX" depends on X86_32 - help + ---help--- Select this for AMD Geode GX and LX processors. config MCYRIXIII bool "CyrixIII/VIA-C3" depends on X86_32 - help + ---help--- Select this for a Cyrix III or C3 chip. Presently Linux and GCC treat this chip as a generic 586. Whilst the CPU is 686 class, it lacks the cmov extension which gcc assumes is present when @@ -230,7 +230,7 @@ config MCYRIXIII config MVIAC3_2 bool "VIA C3-2 (Nehemiah)" depends on X86_32 - help + ---help--- Select this for a VIA C3 "Nehemiah". Selecting this enables usage of SSE and tells gcc to treat the CPU as a 686. Note, this kernel will not boot on older (pre model 9) C3s. @@ -238,14 +238,14 @@ config MVIAC3_2 config MVIAC7 bool "VIA C7" depends on X86_32 - help + ---help--- Select this for a VIA C7. Selecting this uses the correct cache shift and tells gcc to treat the CPU as a 686. config MPSC bool "Intel P4 / older Netburst based Xeon" depends on X86_64 - help + ---help--- Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey Xeon CPUs with Intel 64bit which is compatible with x86-64. Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the @@ -255,7 +255,7 @@ config MPSC config MCORE2 bool "Core 2/newer Xeon" - help + ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) CPUs. You can distinguish newer from older Xeons by the CPU @@ -265,7 +265,7 @@ config MCORE2 config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 - help + ---help--- Generic x86-64 CPU. Run equally well on all x86-64 CPUs. @@ -274,7 +274,7 @@ endchoice config X86_GENERIC bool "Generic x86 support" depends on X86_32 - help + ---help--- Instead of just including optimizations for the selected x86 variant (e.g. PII, Crusoe or Athlon), include some more generic optimizations as well. This will make the kernel @@ -319,7 +319,7 @@ config X86_XADD config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 - help + ---help--- Old PentiumPro multiprocessor systems had errata that could cause memory operations to violate the x86 ordering standard in rare cases. Enabling this option will attempt to work around some (but not all) @@ -412,14 +412,14 @@ config X86_DEBUGCTLMSR menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EMBEDDED - help + ---help--- This lets you choose what x86 vendor support code your kernel will include. config CPU_SUP_INTEL default y bool "Support Intel processors" if PROCESSOR_SELECT - help + ---help--- This enables detection, tunings and quirks for Intel processors You need this enabled if you want your kernel to run on an @@ -433,7 +433,7 @@ config CPU_SUP_CYRIX_32 default y bool "Support Cyrix processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Cyrix processors You need this enabled if you want your kernel to run on a @@ -446,7 +446,7 @@ config CPU_SUP_CYRIX_32 config CPU_SUP_AMD default y bool "Support AMD processors" if PROCESSOR_SELECT - help + ---help--- This enables detection, tunings and quirks for AMD processors You need this enabled if you want your kernel to run on an @@ -460,7 +460,7 @@ config CPU_SUP_CENTAUR_32 default y bool "Support Centaur processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Centaur processors You need this enabled if you want your kernel to run on a @@ -474,7 +474,7 @@ config CPU_SUP_CENTAUR_64 default y bool "Support Centaur processors" if PROCESSOR_SELECT depends on 64BIT - help + ---help--- This enables detection, tunings and quirks for Centaur processors You need this enabled if you want your kernel to run on a @@ -488,7 +488,7 @@ config CPU_SUP_TRANSMETA_32 default y bool "Support Transmeta processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for Transmeta processors You need this enabled if you want your kernel to run on a @@ -502,7 +502,7 @@ config CPU_SUP_UMC_32 default y bool "Support UMC processors" if PROCESSOR_SELECT depends on !64BIT - help + ---help--- This enables detection, tunings and quirks for UMC processors You need this enabled if you want your kernel to run on a @@ -521,7 +521,7 @@ config X86_PTRACE_BTS bool "Branch Trace Store" default y depends on X86_DEBUGCTLMSR - help + ---help--- This adds a ptrace interface to the hardware's branch trace store. Debuggers may use it to collect an execution trace of the debugged diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index a38dd6064f1..ba4781b9389 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -7,7 +7,7 @@ source "lib/Kconfig.debug" config STRICT_DEVMEM bool "Filter access to /dev/mem" - help + ---help--- If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental access to this is obviously disastrous, but specific access can @@ -25,7 +25,7 @@ config STRICT_DEVMEM config X86_VERBOSE_BOOTUP bool "Enable verbose x86 bootup info messages" default y - help + ---help--- Enables the informational output from the decompression stage (e.g. bzImage) of the boot. If you disable this you will still see errors. Disable this if you want silent bootup. @@ -33,7 +33,7 @@ config X86_VERBOSE_BOOTUP config EARLY_PRINTK bool "Early printk" if EMBEDDED default y - help + ---help--- Write kernel log output directly into the VGA buffer or to a serial port. @@ -47,7 +47,7 @@ config EARLY_PRINTK_DBGP bool "Early printk via EHCI debug port" default n depends on EARLY_PRINTK && PCI - help + ---help--- Write kernel log output directly into the EHCI debug port. This is useful for kernel debugging when your machine crashes very @@ -59,14 +59,14 @@ config EARLY_PRINTK_DBGP config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL - help + ---help--- This option will cause messages to be printed if free stack space drops below a certain limit. config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL - help + ---help--- Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. @@ -75,7 +75,7 @@ config DEBUG_STACK_USAGE config DEBUG_PAGEALLOC bool "Debug page memory allocations" depends on DEBUG_KERNEL - help + ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types of memory corruptions. @@ -85,7 +85,7 @@ config DEBUG_PER_CPU_MAPS depends on DEBUG_KERNEL depends on SMP default n - help + ---help--- Say Y to verify that the per_cpu map being accessed has been setup. Adds a fair amount of code to kernel memory and decreases performance. @@ -96,7 +96,7 @@ config X86_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL select DEBUG_FS - help + ---help--- Say Y here if you want to show the kernel pagetable layout in a debugfs file. This information is only useful for kernel developers who are working in architecture specific areas of the kernel. @@ -108,7 +108,7 @@ config DEBUG_RODATA bool "Write protect kernel read-only data structures" default y depends on DEBUG_KERNEL - help + ---help--- Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const data. This is recommended so that we can catch kernel bugs sooner. @@ -118,7 +118,7 @@ config DEBUG_RODATA_TEST bool "Testcase for the DEBUG_RODATA feature" depends on DEBUG_RODATA default y - help + ---help--- This option enables a testcase for the DEBUG_RODATA feature as well as for the change_page_attr() infrastructure. If in doubt, say "N" @@ -126,7 +126,7 @@ config DEBUG_RODATA_TEST config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m - help + ---help--- This option enables a testcase for the CPU NX capability and the software setup of this feature. If in doubt, say "N" @@ -134,7 +134,7 @@ config DEBUG_NX_TEST config 4KSTACKS bool "Use 4Kb for kernel stacks instead of 8Kb" depends on X86_32 - help + ---help--- If you say Y here the kernel will use a 4Kb stacksize for the kernel stack attached to each process/thread. This facilitates running more threads on a system and also reduces the pressure @@ -145,7 +145,7 @@ config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED depends on X86_32 - help + ---help--- This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this option saves about 4k and might cause you much additional grey @@ -155,7 +155,7 @@ config IOMMU_DEBUG bool "Enable IOMMU debugging" depends on GART_IOMMU && DEBUG_KERNEL depends on X86_64 - help + ---help--- Force the IOMMU to on even when you have less than 4GB of memory and add debugging code. On overflow always panic. And allow to enable IOMMU leak tracing. Can be disabled at boot @@ -171,7 +171,7 @@ config IOMMU_LEAK bool "IOMMU leak tracing" depends on DEBUG_KERNEL depends on IOMMU_DEBUG - help + ---help--- Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. @@ -224,25 +224,25 @@ choice config IO_DELAY_0X80 bool "port 0x80 based port-IO delay [recommended]" - help + ---help--- This is the traditional Linux IO delay used for in/out_p. It is the most tested hence safest selection here. config IO_DELAY_0XED bool "port 0xed based port-IO delay" - help + ---help--- Use port 0xed as the IO delay. This frees up port 0x80 which is often used as a hardware-debug port. config IO_DELAY_UDELAY bool "udelay based port-IO delay" - help + ---help--- Use udelay(2) as the IO delay method. This provides the delay while not having any side-effect on the IO port space. config IO_DELAY_NONE bool "no port-IO delay" - help + ---help--- No port-IO delay. Will break on old boxes that require port-IO delay for certain operations. Should work on most new machines. @@ -276,18 +276,18 @@ config DEBUG_BOOT_PARAMS bool "Debug boot parameters" depends on DEBUG_KERNEL depends on DEBUG_FS - help + ---help--- This option will cause struct boot_params to be exported via debugfs. config CPA_DEBUG bool "CPA self-test code" depends on DEBUG_KERNEL - help + ---help--- Do change_page_attr() self-tests every 30 seconds. config OPTIMIZE_INLINING bool "Allow gcc to uninline functions marked 'inline'" - help + ---help--- This option determines if the kernel forces gcc to inline the functions developers have marked 'inline'. Doing so takes away freedom from gcc to do what it thinks is best, which is desirable for the gcc 3.x series of @@ -300,4 +300,3 @@ config OPTIMIZE_INLINING If unsure, say N. endmenu - From a034a010f48bf49efe25098c16c16b9708ccbba5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:43 -0800 Subject: [PATCH 370/682] x86: unify pte_none Impact: cleanup Unify and demacro pte_none. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-2level.h | 2 -- arch/x86/include/asm/pgtable-3level.h | 5 ----- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 1 - 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index e0d199fe1d8..c1774ac9da7 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -53,8 +53,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define pte_none(x) (!(x).pte_low) - /* * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, * split up the 29 bits of offset into this range: diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 447da43cddb..07e0734f620 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -151,11 +151,6 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte_low == b.pte_low && a.pte_high == b.pte_high; } -static inline int pte_none(pte_t pte) -{ - return !pte.pte_low && !pte.pte_high; -} - /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 06bbcbd66e9..841e573b27f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -429,6 +429,11 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) } #endif /* CONFIG_PARAVIRT */ +static inline int pte_none(pte_t pte) +{ + return !pte.pte; +} + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ba09289acca..5906a41e849 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -171,7 +171,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } -#define pte_none(x) (!pte_val((x))) #define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ From 8de01da35e9dbbb4a9d1e9d5a37df98395dfa558 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:44 -0800 Subject: [PATCH 371/682] x86: unify pte_same Impact: cleanup Unify and demacro pte_same. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 6 ------ arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 07e0734f620..51832fa0474 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -145,12 +145,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t a, pte_t b) -{ - return a.pte_low == b.pte_low && a.pte_high == b.pte_high; -} - /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 841e573b27f..e929d43753c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -434,6 +434,12 @@ static inline int pte_none(pte_t pte) return !pte.pte; } +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t a, pte_t b) +{ + return a.pte == b.pte; +} + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 5906a41e849..ff2571865bf 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -134,8 +134,6 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -#define pte_same(a, b) ((a).pte == (b).pte) - #endif /* !__ASSEMBLY__ */ #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) From 7c683851d96c8313586c0695b25ca41bde9f0f73 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:45 -0800 Subject: [PATCH 372/682] x86: unify pte_present Impact: cleanup Unify and demacro pte_present. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e929d43753c..17fcc17d6b4 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -440,6 +440,11 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } +static inline int pte_present(pte_t a) +{ + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); +} + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 72b020deb46..188073713fe 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -85,8 +85,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* The boot page tables (all created as a single array) */ extern unsigned long pg0[]; -#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) - /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index ff2571865bf..35b8dbc068e 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -169,8 +169,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } -#define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) - #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ /* From 5ba7c91341be61e0942f792c237ac067d9f32f51 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:48 -0800 Subject: [PATCH 373/682] x86: unify pud_present Impact: cleanup Unify and demacro pud_present. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 5 ----- arch/x86/include/asm/pgtable.h | 7 +++++++ arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 51832fa0474..524bd91b995 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -28,11 +28,6 @@ static inline int pud_bad(pud_t pud) return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; } -static inline int pud_present(pud_t pud) -{ - return pud_val(pud) & _PAGE_PRESENT; -} - /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 17fcc17d6b4..c117b28df15 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -445,6 +445,13 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +#if PAGETABLE_LEVELS > 2 +static inline int pud_present(pud_t pud) +{ + return pud_val(pud) & _PAGE_PRESENT; +} +#endif /* PAGETABLE_LEVELS > 2 */ + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 35b8dbc068e..acdc27b202c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -194,7 +194,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset(pgd, address) \ ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) -#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT) static inline int pud_large(pud_t pte) { From 9f38d7e85e914f10a875f65d283432d55a12fc27 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:49 -0800 Subject: [PATCH 374/682] x86: unify pgd_present Impact: cleanup Unify and demacro pgd_present. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 7 +++++++ arch/x86/include/asm/pgtable_64.h | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c117b28df15..339e49a9bb6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -452,6 +452,13 @@ static inline int pud_present(pud_t pud) } #endif /* PAGETABLE_LEVELS > 2 */ +#if PAGETABLE_LEVELS > 3 +static inline int pgd_present(pgd_t pgd) +{ + return pgd_val(pgd) & _PAGE_PRESENT; +} +#endif /* PAGETABLE_LEVELS > 3 */ + #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index acdc27b202c..447634698f5 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -182,7 +182,6 @@ static inline int pmd_bad(pmd_t pmd) #define pgd_page_vaddr(pgd) \ ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) -#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) From 649e8ef60fac0a2f6960cdb090d73e78717ac065 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:50 -0800 Subject: [PATCH 375/682] x86: unify pmd_present Impact: cleanup Unify and demacro pmd_present. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 1 - arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 339e49a9bb6..147d3f097ab 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -445,6 +445,11 @@ static inline int pte_present(pte_t a) return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); } +static inline int pmd_present(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_PRESENT; +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 188073713fe..f35160730b6 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -87,7 +87,6 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) -#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) #define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 447634698f5..471b3058f3d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -208,7 +208,6 @@ static inline int pud_large(pud_t pte) #define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val((x))) -#define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) #define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) #define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) From 4fea801ac95d6534a93aa01d3ac62be163d845af Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:51 -0800 Subject: [PATCH 376/682] x86: unify pmd_none Impact: cleanup Unify and demacro pmd_none. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 7 +++++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 147d3f097ab..2f38bbee77e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -450,6 +450,13 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) & _PAGE_PRESENT; } +static inline int pmd_none(pmd_t pmd) +{ + /* Only check low word on 32-bit platforms, since it might be + out of sync with upper half. */ + return !(unsigned long)native_pmd_val(pmd); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index f35160730b6..26e73569223 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -85,8 +85,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* The boot page tables (all created as a single array) */ extern unsigned long pg0[]; -/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ -#define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 471b3058f3d..f3ad8943324 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -207,7 +207,6 @@ static inline int pud_large(pud_t pte) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ pmd_index(address)) -#define pmd_none(x) (!pmd_val((x))) #define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) #define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) From c5f040b12b2381591932a007432e7ed86b3f2796 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:52 -0800 Subject: [PATCH 377/682] x86: unify pgd_page_vaddr Impact: cleanup Unify and demacro pgd_page_vaddr. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2f38bbee77e..cca4321e076 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -469,6 +469,11 @@ static inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_PRESENT; } + +static inline unsigned long pgd_page_vaddr(pgd_t pgd) +{ + return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f3ad8943324..4f8dbb99b69 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -179,8 +179,6 @@ static inline int pmd_bad(pmd_t pmd) /* * Level 4 access. */ -#define pgd_page_vaddr(pgd) \ - ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) From 6fff47e3ac5e17f7e164ac4ff9ea29aba3c54d73 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:53 -0800 Subject: [PATCH 378/682] x86: unify pud_page_vaddr Impact: cleanup Unify and demacro pud_page_vaddr. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 524bd91b995..542616ac192 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -117,9 +117,6 @@ static inline void pud_clear(pud_t *pudp) #define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) - - /* Find an entry in the second-level page table.. */ #define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ pmd_index(address)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index cca4321e076..4638b4af675 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -462,6 +462,11 @@ static inline int pud_present(pud_t pud) { return pud_val(pud) & _PAGE_PRESENT; } + +static inline unsigned long pud_page_vaddr(pud_t pud) +{ + return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 4f8dbb99b69..9875e40c058 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -185,8 +185,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* PUD - Level3 access */ /* to find an entry in a page-table-directory. */ -#define pud_page_vaddr(pud) \ - ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK)) #define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset(pgd, address) \ From aca159dbb13a5221819d5b3849b8c013f4829e9e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:54 -0800 Subject: [PATCH 379/682] x86: include pgtable_SIZE.h earlier We'll need the definitions sooner. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4638b4af675..bd38feb3492 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -429,6 +429,16 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) } #endif /* CONFIG_PARAVIRT */ +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_32 +# include "pgtable_32.h" +#else +# include "pgtable_64.h" +#endif + +#ifndef __ASSEMBLY__ + static inline int pte_none(pte_t pte) { return !pte.pte; @@ -483,12 +493,6 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_X86_32 -# include "pgtable_32.h" -#else -# include "pgtable_64.h" -#endif - /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] * From f476961cb16312fe4cb80b2b457ef9acf220a7fc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:55 -0800 Subject: [PATCH 380/682] x86: unify pud_page Impact: cleanup Unify and demacro pud_page. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 2 -- arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 542616ac192..28ba09ac230 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -115,8 +115,6 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) - /* Find an entry in the second-level page table.. */ #define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ pmd_index(address)) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bd38feb3492..a871ae55a5c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -438,6 +438,7 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #endif #ifndef __ASSEMBLY__ +#include static inline int pte_none(pte_t pte) { @@ -477,6 +478,11 @@ static inline unsigned long pud_page_vaddr(pud_t pud) { return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); } + +static inline struct page *pud_page(pud_t pud) +{ + return pfn_to_page(pud_val(pud) >> PAGE_SHIFT); +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 9875e40c058..7edacc7ec89 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -185,7 +185,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* PUD - Level3 access */ /* to find an entry in a page-table-directory. */ -#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT)) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset(pgd, address) \ ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) From 777cba16aac5a1096db0b936912eb7fd06fb0cc5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:56 -0800 Subject: [PATCH 381/682] x86: unify pgd_page Impact: cleanup Unify and demacro pgd_page. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a871ae55a5c..c1a36dd1e59 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -495,6 +495,11 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) { return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); } + +static inline struct page *pgd_page(pgd_t pgd) +{ + return pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT); +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7edacc7ec89..02477ad40fc 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -179,7 +179,6 @@ static inline int pmd_bad(pmd_t pmd) /* * Level 4 access. */ -#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) From 7cfb81024bc1dbe8ad2bf5affd58a6a7ad4172ba Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:57 -0800 Subject: [PATCH 382/682] x86: unify pud_index Impact: cleanup Unify and demacro pud_index. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/include/asm/pgtable_64.h | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c1a36dd1e59..a51a97ade63 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -500,6 +500,12 @@ static inline struct page *pgd_page(pgd_t pgd) { return pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT); } + +/* to find an entry in a page-table-directory. */ +static inline unsigned pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 02477ad40fc..c17c30f5751 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -183,8 +183,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) /* PUD - Level3 access */ -/* to find an entry in a page-table-directory. */ -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset(pgd, address) \ ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) From 3d081b1812bd4de2bbef58c6d598ddf45493010e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:58 -0800 Subject: [PATCH 383/682] x86: unify pud_offset Impact: cleanup Unify and demacro pud_offset. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a51a97ade63..decccb0a641 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -506,6 +506,11 @@ static inline unsigned pud_index(unsigned long address) { return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); } + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index c17c30f5751..958dc1e7335 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -183,8 +183,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE) /* PUD - Level3 access */ -#define pud_offset(pgd, address) \ - ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address))) static inline int pud_large(pud_t pte) { From 3ffb3564cd3cd59de8a0d74430ffe2d43ae11f19 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:30:59 -0800 Subject: [PATCH 384/682] x86: unify pmd_page_vaddr Impact: cleanup Unify and demacro pmd_page_vaddr. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 3 --- arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index decccb0a641..3789c05bf30 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -468,6 +468,11 @@ static inline int pmd_none(pmd_t pmd) return !(unsigned long)native_pmd_val(pmd); } +static inline unsigned long pmd_page_vaddr(pmd_t pmd) +{ + return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 26e73569223..f7f7e297a0a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -126,9 +126,6 @@ static inline int pud_large(pud_t pud) { return 0; } #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) -#define pmd_page_vaddr(pmd) \ - ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 958dc1e7335..7a510e8a878 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,7 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) From 20063ca4eb26d4b10f01d59925deea4aeee415e8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:00 -0800 Subject: [PATCH 385/682] x86: unify pmd_page Impact: cleanup Unify and demacro pmd_page. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3789c05bf30..38330d6288f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -473,6 +473,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); } +static inline struct page *pmd_page(pmd_t pmd) +{ + return pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index f7f7e297a0a..8714110b4a7 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -124,8 +124,6 @@ static inline int pud_large(pud_t pud) { return 0; } #define pte_offset_kernel(dir, address) \ ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) -#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 7a510e8a878..97f24d2d60d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,8 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) - #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ pmd_index(address)) From e24d7eee0beda24504bf6a4aa03be68328557475 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:01 -0800 Subject: [PATCH 386/682] x86: unify pmd_index Impact: cleanup Unify and demacro pmd_index. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 11 +++++++++++ arch/x86/include/asm/pgtable_32.h | 9 --------- arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 38330d6288f..4ec24b6d099 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -478,6 +478,17 @@ static inline struct page *pmd_page(pmd_t pmd) return pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT); } +/* + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] + * + * this macro returns the index of the entry in the pmd page which would + * control the given virtual address + */ +static inline unsigned pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 8714110b4a7..40b066215bb 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -104,15 +104,6 @@ extern unsigned long pg0[]; static inline int pud_large(pud_t pud) { return 0; } -/* - * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] - * - * this macro returns the index of the entry in the pmd page which would - * control the given virtual address - */ -#define pmd_index(address) \ - (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) - /* * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] * diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 97f24d2d60d..15f42d6ee2f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,7 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) #define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ pmd_index(address)) #define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) From 01ade20d5a22e6ef002cbb751dddc3a01a78f998 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:02 -0800 Subject: [PATCH 387/682] x86: unify pmd_offset Impact: cleanup Unify and demacro pmd_offset. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 4 ---- arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 28ba09ac230..7ad9d05710b 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -115,10 +115,6 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ - pmd_index(address)) - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 4ec24b6d099..a7dbb05075d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -504,6 +504,12 @@ static inline struct page *pud_page(pud_t pud) { return pfn_to_page(pud_val(pud) >> PAGE_SHIFT); } + +/* Find an entry in the second-level page table.. */ +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 15f42d6ee2f..78269656cf0 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,8 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \ - pmd_index(address)) #define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) #define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) From bd44d64db1db6acf2dea9ba130b8cb0a54e1dabd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:03 -0800 Subject: [PATCH 388/682] x86: remove redundant pfn_pmd definition Impact: cleanup It's already defined in pgtable.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable_64.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 78269656cf0..6cc4133705b 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,7 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot)))) #define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) From 3180fba0eec0d14e4ac8183a90d643d0d3383c75 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:04 -0800 Subject: [PATCH 389/682] x86: unify pmd_pfn Impact: cleanup Unify and demacro pmd_pfn. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a7dbb05075d..532144c2f1c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -510,6 +510,11 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6cc4133705b..279ddc6a4eb 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -191,8 +191,6 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) - #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ _PAGE_FILE }) From 97e2817d3423d753fd2f80ea936a370032846382 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:05 -0800 Subject: [PATCH 390/682] x86: unify pmd_pfn Impact: cleanup Unify pmd_pfn. Unfortunately it can't be demacroed because it has a cyclic dependency on linux/mm.h:page_to_nid(). Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 9 +++++++++ arch/x86/include/asm/pgtable_32.h | 7 ------- arch/x86/include/asm/pgtable_64.h | 3 --- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 532144c2f1c..49b5cff78c2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -489,6 +489,15 @@ static inline unsigned pmd_index(unsigned long address) return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + * + * (Currently stuck as a macro because of indirect forward reference + * to linux/mm.h:page_to_nid()) + */ +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 40b066215bb..6335be843c1 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -95,13 +95,6 @@ extern unsigned long pg0[]; # include #endif -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) - - static inline int pud_large(pud_t pud) { return 0; } /* diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 279ddc6a4eb..adce05628a0 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -198,9 +198,6 @@ static inline int pud_large(pud_t pte) /* PTE - Level 1 access. */ -/* page, protection -> pte */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot)) - #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ pte_index((address))) From 346309cff6127a38731cf102de3413a562700b84 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:06 -0800 Subject: [PATCH 391/682] x86: unify pte_index Impact: cleanup Unify and demacro pte_index. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 11 +++++++++++ arch/x86/include/asm/pgtable_32.h | 8 -------- arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 49b5cff78c2..10a8c2e5178 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -498,6 +498,17 @@ static inline unsigned pmd_index(unsigned long address) */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +/* + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] + * + * this function returns the index of the entry in the pte page which would + * control the given virtual address + */ +static inline unsigned pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 6335be843c1..00d298e14a8 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -97,14 +97,6 @@ extern unsigned long pg0[]; static inline int pud_large(pud_t pud) { return 0; } -/* - * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] - * - * this macro returns the index of the entry in the pte page which would - * control the given virtual address - */ -#define pte_index(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index adce05628a0..e7321bc8aa8 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -198,7 +198,6 @@ static inline int pud_large(pud_t pte) /* PTE - Level 1 access. */ -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ pte_index((address))) From 3fbc2444f465710cdf0c832461a6a14338437453 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:07 -0800 Subject: [PATCH 392/682] x86: unify pte_offset_kernel Impact: cleanup Unify and demacro pte_offset_kernel. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 3 --- arch/x86/include/asm/pgtable_64.h | 3 --- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 10a8c2e5178..c61b37af1f2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -509,6 +509,11 @@ static inline unsigned pte_index(unsigned long address) return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 00d298e14a8..133fc4e4529 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -97,9 +97,6 @@ extern unsigned long pg0[]; static inline int pud_large(pud_t pud) { return 0; } -#define pte_offset_kernel(dir, address) \ - ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address))) - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index e7321bc8aa8..a8bfb75c76a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -198,9 +198,6 @@ static inline int pud_large(pud_t pte) /* PTE - Level 1 access. */ -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ - pte_index((address))) - /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) From 3f6cbef1d7f474d16f3a824c6d2910d930778fbd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:08 -0800 Subject: [PATCH 393/682] x86: unify pud_large Impact: cleanup Unify and demacro pud_large. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 6 ++++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 6 ------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c61b37af1f2..0c734e2a90c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -540,6 +540,12 @@ static inline unsigned long pmd_pfn(pmd_t pmd) { return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } + +static inline int pud_large(pud_t pud) +{ + return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + (_PAGE_PSE | _PAGE_PRESENT); +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 133fc4e4529..ad7830bdc9a 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -95,8 +95,6 @@ extern unsigned long pg0[]; # include #endif -static inline int pud_large(pud_t pud) { return 0; } - #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a8bfb75c76a..a85ac14df35 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -184,12 +184,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* PUD - Level3 access */ -static inline int pud_large(pud_t pte) -{ - return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); -} - /* PMD - Level 2 access */ #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \ From 30f103167fcf2b08de64f5f37ece6bfff7392290 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:09 -0800 Subject: [PATCH 394/682] x86: unify pgd_bad Impact: cleanup Unify and demacro pgd_bad. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0c734e2a90c..ebcb60e6a96 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -574,6 +574,11 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); } + +static inline int pgd_bad(pgd_t pgd) +{ + return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a85ac14df35..1dfc44932f6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -154,11 +154,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #ifndef __ASSEMBLY__ -static inline int pgd_bad(pgd_t pgd) -{ - return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - static inline int pud_bad(pud_t pud) { return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; From a61bb29af47b0e4052566d25f3391894306a23fd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:10 -0800 Subject: [PATCH 395/682] x86: unify pgd_bad Impact: cleanup Unify and demacro pgd_bad. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 5 ----- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 5 ----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 7ad9d05710b..b92524eec20 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -23,11 +23,6 @@ static inline int pud_none(pud_t pud) return pud_val(pud) == 0; } -static inline int pud_bad(pud_t pud) -{ - return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; -} - /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ebcb60e6a96..38882f6cc82 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -546,6 +546,11 @@ static inline int pud_large(pud_t pud) return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } + +static inline int pud_bad(pud_t pud) +{ + return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1dfc44932f6..fe8be33df3d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -154,11 +154,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #ifndef __ASSEMBLY__ -static inline int pud_bad(pud_t pud) -{ - return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - static inline int pmd_bad(pmd_t pmd) { return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; From 99510238bb428091e7caba020bc5e18b5f30b619 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:11 -0800 Subject: [PATCH 396/682] x86: unify pmd_bad Impact: cleanup Unify and demacro pmd_bad. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 5 ----- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 38882f6cc82..72bf53ef60b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -514,6 +514,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); } +static inline int pmd_bad(pmd_t pmd) +{ + return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index ad7830bdc9a..5362632df75 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -85,8 +85,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* The boot page tables (all created as a single array) */ extern unsigned long pg0[]; -#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) - #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) #ifdef CONFIG_X86_PAE diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index fe8be33df3d..d230e28a5f3 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -154,11 +154,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #ifndef __ASSEMBLY__ -static inline int pmd_bad(pmd_t pmd) -{ - return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; -} - #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ /* From cc290ca38cc4c78b0d6175633232f05b8d8732ab Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:12 -0800 Subject: [PATCH 397/682] x86: unify pages_to_mb Impact: cleanup Unify and demacro pages_to_mb. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_32.h | 2 -- arch/x86/include/asm/pgtable_64.h | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 72bf53ef60b..d4cbc8188c8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -519,6 +519,11 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } +static inline unsigned long pages_to_mb(unsigned long npg) +{ + return npg >> (20 - PAGE_SHIFT); +} + #if PAGETABLE_LEVELS > 2 static inline int pud_present(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 5362632df75..10c71abae07 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -85,8 +85,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* The boot page tables (all created as a single array) */ extern unsigned long pg0[]; -#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - #ifdef CONFIG_X86_PAE # include #else diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index d230e28a5f3..6c6c3c34bc5 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -154,8 +154,6 @@ static inline void native_pgd_clear(pgd_t *pgd) #ifndef __ASSEMBLY__ -#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. From deb79cfb365c96ff960570d1bcf2c205424b6195 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:13 -0800 Subject: [PATCH 398/682] x86: unify pud_none Impact: cleanup Unify and demacro pud_none. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable-3level.h | 5 ----- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index b92524eec20..3f13cdf6115 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -18,11 +18,6 @@ printk("%s:%d: bad pgd %p(%016Lx).\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -static inline int pud_none(pud_t pud) -{ - return pud_val(pud) == 0; -} - /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d4cbc8188c8..0ef49f3ebc8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -525,6 +525,11 @@ static inline unsigned long pages_to_mb(unsigned long npg) } #if PAGETABLE_LEVELS > 2 +static inline int pud_none(pud_t pud) +{ + return pud_val(pud) == 0; +} + static inline int pud_present(pud_t pud) { return pud_val(pud) & _PAGE_PRESENT; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6c6c3c34bc5..d58c2ee15c3 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -68,7 +68,6 @@ extern void paging_init(void); __FILE__, __LINE__, &(e), pgd_val(e)) #define pgd_none(x) (!pgd_val(x)) -#define pud_none(x) (!pud_val(x)) struct mm_struct; From 7325cc2e333cdaaabd2103552458876ea85adb33 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:14 -0800 Subject: [PATCH 399/682] x86: unify pgd_none Impact: cleanup Unify and demacro pgd_none. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/include/asm/pgtable_64.h | 2 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0ef49f3ebc8..18afcd31e76 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -599,6 +599,11 @@ static inline int pgd_bad(pgd_t pgd) { return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } + +static inline int pgd_none(pgd_t pgd) +{ + return !pgd_val(pgd); +} #endif /* PAGETABLE_LEVELS > 3 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index d58c2ee15c3..3b92a4ca403 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -67,8 +67,6 @@ extern void paging_init(void); printk("%s:%d: bad pgd %p(%016lx).\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) -#define pgd_none(x) (!pgd_val(x)) - struct mm_struct; void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); From 6cf7150084500962b8e225e2409ec01ed06a2c71 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:15 -0800 Subject: [PATCH 400/682] x86: unify io_remap_pfn_range Impact: cleanup Unify io_remap_pfn_range. Don't demacro yet. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 3 +++ arch/x86/include/asm/pgtable_32.h | 3 --- arch/x86/include/asm/pgtable_64.h | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 18afcd31e76..9754d06ffe6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -524,6 +524,9 @@ static inline unsigned long pages_to_mb(unsigned long npg) return npg >> (20 - PAGE_SHIFT); } +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 10c71abae07..1952bb762aa 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -133,7 +133,4 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 3b92a4ca403..100ac483a0b 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -205,9 +205,6 @@ extern int direct_gbpages; extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN From 18a7a199f97a7509fb987722e543f1aac3d7ada5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:16 -0800 Subject: [PATCH 401/682] x86: add and use pgd/pud/pmd_flags Add pgd/pud/pmd_flags which are analogous to pte_flags, and use them where-ever we only care about testing the flags portions of the respective entries. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 15 +++++++++++++++ arch/x86/include/asm/pgtable.h | 16 ++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index e9873a2e869..0b16b64a8fe 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -95,6 +95,11 @@ static inline pgdval_t native_pgd_val(pgd_t pgd) return pgd.pgd; } +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + #if PAGETABLE_LEVELS >= 3 #if PAGETABLE_LEVELS == 4 typedef struct { pudval_t pud; } pud_t; @@ -117,6 +122,11 @@ static inline pudval_t native_pud_val(pud_t pud) } #endif /* PAGETABLE_LEVELS == 4 */ +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + typedef struct { pmdval_t pmd; } pmd_t; static inline pmd_t native_make_pmd(pmdval_t val) @@ -128,6 +138,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) { return pmd.pmd; } + +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} #else /* PAGETABLE_LEVELS == 2 */ #include diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9754d06ffe6..c811d76d6fd 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -236,7 +236,7 @@ static inline unsigned long pte_pfn(pte_t pte) static inline int pmd_large(pmd_t pte) { - return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == + return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } @@ -458,7 +458,7 @@ static inline int pte_present(pte_t a) static inline int pmd_present(pmd_t pmd) { - return pmd_val(pmd) & _PAGE_PRESENT; + return pmd_flags(pmd) & _PAGE_PRESENT; } static inline int pmd_none(pmd_t pmd) @@ -516,7 +516,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) static inline int pmd_bad(pmd_t pmd) { - return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; } static inline unsigned long pages_to_mb(unsigned long npg) @@ -535,7 +535,7 @@ static inline int pud_none(pud_t pud) static inline int pud_present(pud_t pud) { - return pud_val(pud) & _PAGE_PRESENT; + return pud_flags(pud) & _PAGE_PRESENT; } static inline unsigned long pud_page_vaddr(pud_t pud) @@ -561,20 +561,20 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pud_large(pud_t pud) { - return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + return (pud_flags(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } static inline int pud_bad(pud_t pud) { - return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; + return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 static inline int pgd_present(pgd_t pgd) { - return pgd_val(pgd) & _PAGE_PRESENT; + return pgd_flags(pgd) & _PAGE_PRESENT; } static inline unsigned long pgd_page_vaddr(pgd_t pgd) @@ -600,7 +600,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) static inline int pgd_bad(pgd_t pgd) { - return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; + return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE; } static inline int pgd_none(pgd_t pgd) From 26c8e3179933c5c9071b16db76ab6de58a787d06 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 5 Feb 2009 11:31:17 -0800 Subject: [PATCH 402/682] x86: make pgd/pud/pmd/pte_none consistent The _none test is done differently for every level of the pagetable. Standardize them by: 1: Use the native_X_val to extract the raw entry, with no need to go via paravirt_ops, diff -r 1d0646d0d319 arch/x86/include/asm/pgtable.h, and 2: Compare with 0 rather than using a boolean !, since they are actually values and not booleans. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c811d76d6fd..a80a956ae65 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -465,7 +465,7 @@ static inline int pmd_none(pmd_t pmd) { /* Only check low word on 32-bit platforms, since it might be out of sync with upper half. */ - return !(unsigned long)native_pmd_val(pmd); + return (unsigned long)native_pmd_val(pmd) == 0; } static inline unsigned long pmd_page_vaddr(pmd_t pmd) @@ -530,7 +530,7 @@ static inline unsigned long pages_to_mb(unsigned long npg) #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { - return pud_val(pud) == 0; + return native_pud_val(pud) == 0; } static inline int pud_present(pud_t pud) @@ -605,7 +605,7 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { - return !pgd_val(pgd); + return !native_pgd_val(pgd); } #endif /* PAGETABLE_LEVELS > 3 */ From 976e8f677e42757e5586ea04a9ac8bb8ddaa037e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:29:44 -0800 Subject: [PATCH 403/682] x86: asm/io.h: unify virt_to_phys/phys_to_virt Impact: unify identical code asm/io_32.h and _64.h has functionally identical definitions for virt_to_phys, phys_to_virt, page_to_phys, and the isa_* variants, so just unify them. The only slightly functional change is using phys_addr_t for the physical address argument and return val. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/io.h | 59 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/io_32.h | 57 ---------------------------------- arch/x86/include/asm/io_64.h | 37 ---------------------- 3 files changed, 59 insertions(+), 94 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 1dbbdf4be9b..919e3b19f3c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -5,6 +5,7 @@ #include #include +#include #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ @@ -80,6 +81,64 @@ static inline void writeq(__u64 val, volatile void __iomem *addr) #define readq readq #define writeq writeq +/** + * virt_to_phys - map virtual addresses to physical + * @address: address to remap + * + * The returned physical address is the physical (CPU) mapping for + * the memory address given. It is only valid to use this function on + * addresses directly mapped or allocated via kmalloc. + * + * This function does not give bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return __pa(address); +} + +/** + * phys_to_virt - map physical address to virtual + * @address: address to remap + * + * The returned virtual address is a current CPU mapping for + * the memory address given. It is only valid to use this function on + * addresses that have a kernel mapping + * + * This function does not handle bus mappings for DMA transfers. In + * almost all conceivable cases a device driver should not be using + * this function + */ + +static inline void *phys_to_virt(phys_addr_t address) +{ + return __va(address); +} + +/* + * Change "struct page" to physical address. + */ +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) + +/* + * ISA I/O bus memory addresses are 1:1 with the physical address. + */ +#define isa_virt_to_bus virt_to_phys +#define isa_page_to_bus page_to_phys +#define isa_bus_to_virt phys_to_virt + +/* + * However PCI ones are not necessarily 1:1 and therefore these interfaces + * are forbidden in portable PCI drivers. + * + * Allow them on x86 for legacy drivers, though. + */ +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + + #ifdef CONFIG_X86_32 # include "io_32.h" #else diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index d8e242e1b39..2b687cb8609 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -53,47 +53,6 @@ */ #define xlate_dev_kmem_ptr(p) p -/** - * virt_to_phys - map virtual addresses to physical - * @address: address to remap - * - * The returned physical address is the physical (CPU) mapping for - * the memory address given. It is only valid to use this function on - * addresses directly mapped or allocated via kmalloc. - * - * This function does not give bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline unsigned long virt_to_phys(volatile void *address) -{ - return __pa(address); -} - -/** - * phys_to_virt - map physical address to virtual - * @address: address to remap - * - * The returned virtual address is a current CPU mapping for - * the memory address given. It is only valid to use this function on - * addresses that have a kernel mapping - * - * This function does not handle bus mappings for DMA transfers. In - * almost all conceivable cases a device driver should not be using - * this function - */ - -static inline void *phys_to_virt(unsigned long address) -{ - return __va(address); -} - -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) - /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -123,22 +82,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) extern void iounmap(volatile void __iomem *addr); -/* - * ISA I/O bus memory addresses are 1:1 with the physical address. - */ -#define isa_virt_to_bus virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt - -/* - * However PCI ones are not necessarily 1:1 and therefore these interfaces - * are forbidden in portable PCI drivers. - * - * Allow them on x86 for legacy drivers, though. - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index 563c16270ba..e71b5550877 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -142,27 +142,6 @@ __OUTS(l) #include -#ifndef __i386__ -/* - * Change virtual addresses to physical addresses and vv. - * These are pretty trivial - */ -static inline unsigned long virt_to_phys(volatile void *address) -{ - return __pa(address); -} - -static inline void *phys_to_virt(unsigned long address) -{ - return __va(address); -} -#endif - -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) - #include /* @@ -187,22 +166,6 @@ extern void iounmap(volatile void __iomem *addr); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); -/* - * ISA I/O bus memory addresses are 1:1 with the physical address. - */ -#define isa_virt_to_bus virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt - -/* - * However PCI ones are not necessarily 1:1 and therefore these interfaces - * are forbidden in portable PCI drivers. - * - * Allow them on x86 for legacy drivers, though. - */ -#define virt_to_bus virt_to_phys -#define bus_to_virt phys_to_virt - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); From 133822c5c038b265ddb6545cda3a4c88815c7d3d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:29:52 -0800 Subject: [PATCH 404/682] x86: asm/io.h: unify ioremap prototypes Impact: unify identical code asm/io_32.h and _64.h have identical prototypes for the ioremap family of functions. The 32-bit header had a more descriptive comment. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/io.h | 31 +++++++++++++++++++++++++++++++ arch/x86/include/asm/io_32.h | 29 ----------------------------- arch/x86/include/asm/io_64.h | 22 ---------------------- 3 files changed, 31 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 919e3b19f3c..f150b1ecf92 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -138,6 +138,37 @@ static inline void *phys_to_virt(phys_addr_t address) #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt +/** + * ioremap - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * If the area you are trying to map is a PCI BAR you should have a + * look at pci_iomap(). + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); + +/* + * The default ioremap() behavior is non-cached: + */ +static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap_nocache(offset, size); +} + +extern void iounmap(volatile void __iomem *addr); + +extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); + #ifdef CONFIG_X86_32 # include "io_32.h" diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h index 2b687cb8609..2fbe7dd26bb 100644 --- a/arch/x86/include/asm/io_32.h +++ b/arch/x86/include/asm/io_32.h @@ -53,35 +53,6 @@ */ #define xlate_dev_kmem_ptr(p) p -/** - * ioremap - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * If the area you are trying to map is a PCI BAR you should have a - * look at pci_iomap(). - */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} - -extern void iounmap(volatile void __iomem *addr); - static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) { diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index e71b5550877..0424c07246f 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h @@ -144,28 +144,6 @@ __OUTS(l) #include -/* - * This one maps high address device memory and turns off caching for that area. - * it's useful if some control registers are in such an area and write combining - * or read caching is not desirable: - */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ -static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) -{ - return ioremap_nocache(offset, size); -} - -extern void iounmap(volatile void __iomem *addr); - -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); - void __memcpy_fromio(void *, unsigned long, unsigned); void __memcpy_toio(unsigned long, const void *, unsigned); From fb08b20fe7c8491a35a4369cce60fcb886d7609d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:05:28 -0800 Subject: [PATCH 405/682] x86: Fix compile error in arch/x86/kernel/early_printk.c Fix compile problem: CC arch/x86/kernel/early_printk.o In file included from /home/jeremy/hg/xen/paravirt/linux/arch/x86/kernel/early_printk.c:17: /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pmd_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function '__pfn_to_section' /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function '__section_mem_map_addr' /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:516: warning: return makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pud_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:586: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:586: warning: return makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h: In function 'pgd_page': /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:625: warning: initialization makes pointer from integer without a cast /home/jeremy/hg/xen/paravirt/linux/arch/x86/include/asm/pgtable.h:625: warning: return makes pointer from integer without a cast This is a cycling dependency between asm/pgtable.h and linux/mmzone.h when using CONFIG_SPARSEMEM. Rather than hacking up the headers some more, remove asm/pgtable.h, since early_printk.c doesn't actually need it. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/early_printk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 504ad198e4a..6a36dd228b6 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include From 0fb807c3e573ff9de2965ca38c907605d4735d16 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 8 Feb 2009 11:00:25 +0530 Subject: [PATCH 406/682] unconditionally include asm/types.h from linux/types.h Reported-by: Sam Ravnborg Signed-off-by: Jaswinder Singh Rajput --- include/linux/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/types.h b/include/linux/types.h index c30973ace89..fca82ed55f4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H +#include + #ifndef __ASSEMBLY__ #ifdef __KERNEL__ @@ -10,7 +12,6 @@ #endif #include -#include #ifndef __KERNEL_STRICT_NAMES From f1ee5548a6507d2b4293635c61ddbf12e2c00e94 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 407/682] x86/irq: optimize nr_irqs Impact: make nr_irqs depend more on cards used in a system depend on nr_irq_gsi more, and have a ratio for MSI. v2: make nr_irqs less than NR_VECTORS * nr_cpu_ids aka if only one cpu, we only can support nr_irqs = NR_VECTORS Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9578d33f20a..43f95d7b13a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3479,9 +3479,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want); - irq_want++; if (irq == 0) return -1; + irq_want = irq + 1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; @@ -3825,11 +3825,17 @@ int __init arch_probe_nr_irqs(void) { int nr; - nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? - (NR_VECTORS + (8 * nr_cpu_ids)) : - (NR_VECTORS + (32 * nr_ioapics))); + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; - if (nr < nr_irqs && nr > nr_irqs_gsi) + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + nr += nr_irqs_gsi * 16; +#endif + if (nr < nr_irqs) nr_irqs = nr; return 0; From abcaa2b8319a7673e76c2391cb5de3045ab1b401 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 408/682] x86: use NR_IRQS_LEGACY to replace 16 Impact: cleanup also could kill platform_legacy_irq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hw_irq.h | 4 +--- arch/x86/kernel/io_apic.c | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1a20e3d1200..1b82781b898 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -25,8 +25,6 @@ #include #include -#define platform_legacy_irq(irq) ((irq) < 16) - /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void error_interrupt(void); @@ -58,7 +56,7 @@ extern void make_8259A_irq(unsigned int irq); extern void init_8259A(int aeoi); /* IOAPIC */ -#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs)) +#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) extern unsigned long io_apic_irqs; extern void init_VISWS_APIC_irqs(void); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 43f95d7b13a..e5be9f35ea5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3165,6 +3165,7 @@ static int __init ioapic_init_sysfs(void) device_initcall(ioapic_init_sysfs); +static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ @@ -3179,11 +3180,11 @@ unsigned int create_irq_nr(unsigned int irq_want) struct irq_desc *desc_new = NULL; irq = 0; + if (irq_want < nr_irqs_gsi) + irq_want = nr_irqs_gsi; + spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - if (platform_legacy_irq(new)) - continue; - desc_new = irq_to_desc_alloc_cpu(new, cpu); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); @@ -3208,7 +3209,6 @@ unsigned int create_irq_nr(unsigned int irq_want) return irq; } -static int nr_irqs_gsi = NR_IRQS_LEGACY; int create_irq(void) { unsigned int irq_want; From f72dccace737df74d04a96461785a3ad61724b9f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 409/682] x86: check_timer cleanup Impact: make check-timer more robust potentially solve boot fragility For edge trigger io-apic routing, we already unmasked the pin via setup_IO_APIC_irq(), so don't unmask it again. Also call local_irq_disable() between timer_irq_works(), because it calls local_irq_enable() inside. Also remove not needed apic version reading for 64-bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e5be9f35ea5..855209a1b17 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1657,7 +1657,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * to the first CPU. */ entry.dest_mode = apic->irq_dest_mode; - entry.mask = 1; /* mask IRQ now */ + entry.mask = 0; /* don't mask IRQ for edge */ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; @@ -2863,14 +2863,10 @@ static inline void __init check_timer(void) int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; - unsigned int ver; int no_pin1 = 0; local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - /* * get/set the timer IRQ vector: */ @@ -2889,7 +2885,13 @@ static inline void __init check_timer(void) apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); #ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + { + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + } #endif pin1 = find_isa_irq_pin(0, mp_INT); @@ -2928,8 +2930,17 @@ static inline void __init check_timer(void) if (no_pin1) { add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } else { + /* for edge trigger, setup_IO_APIC_irq already + * leave it unmasked. + * so only need to unmask if it is level-trigger + * do we really have level trigger timer? + */ + int idx; + idx = find_irq_entry(apic1, pin1, mp_INT); + if (idx != -1 && irq_trigger(idx)) + unmask_IO_APIC_irq_desc(desc); } - unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2943,6 +2954,7 @@ static inline void __init check_timer(void) if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); #endif + local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2957,7 +2969,6 @@ static inline void __init check_timer(void) */ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2972,6 +2983,7 @@ static inline void __init check_timer(void) /* * Cleanup, just in case ... */ + local_irq_disable(); disable_8259A_irq(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); @@ -2997,6 +3009,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); disable_8259A_irq(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); @@ -3014,6 +3027,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } + local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); From cc6c50066ec1ac98bef97117e2f078bb89bbccc7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: [PATCH 410/682] x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 6 ++++++ arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d22f732eab8..8c5620147c4 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -73,6 +73,7 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); @@ -84,6 +85,11 @@ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, return 0; } #endif +#else /* !CONFIG_ACPI: */ +static inline int acpi_probe_gsi(void) +{ + return 0; +} #endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3efa996b036..c334fe75dcd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -961,6 +961,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mpc_intsrc *m, struct mpc_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 855209a1b17..56e51eb551a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3824,14 +3824,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } #ifdef CONFIG_SPARSE_IRQ From 2c344e9d6e1938fdf15e93c56d6fe42f8410e9d3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 7 Feb 2009 12:23:37 -0800 Subject: [PATCH 411/682] x86: don't pretend that non-framepointer stack traces are reliable Without frame pointers enabled, the x86 stack traces should not pretend to be reliable; instead they should just be what they are: unreliable. The effect of this is that they have a '?' printed in the stacktrace, to warn the reader that these entries are guesses rather than known based on more reliable information. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/dumpstack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6b1f6f6f866..87d103ded1c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo, frame = frame->next_frame; bp = (unsigned long) frame; } else { - ops->address(data, addr, bp == 0); + ops->address(data, addr, 0); } print_ftrace_graph_addr(addr, data, ops, tinfo, graph); } From d3770449d3cb058b94ca1d050d5ced4a66c75ce4 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:38 -0500 Subject: [PATCH 412/682] percpu: make PER_CPU_BASE_SECTION overridable by arches Impact: bug fix IA-64 needs to put percpu data in the seperate section even on UP. Fixes regression caused by "percpu: refactor percpu.h" Signed-off-by: Brian Gerst Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/percpu.h | 4 ++-- include/linux/percpu.h | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h index 77f30b664b4..30cf46534dd 100644 --- a/arch/ia64/include/asm/percpu.h +++ b/arch/ia64/include/asm/percpu.h @@ -27,12 +27,12 @@ extern void *per_cpu_init(void); #else /* ! SMP */ -#define PER_CPU_ATTRIBUTES __attribute__((__section__(".data.percpu"))) - #define per_cpu_init() (__phys_per_cpu_start) #endif /* SMP */ +#define PER_CPU_BASE_SECTION ".data.percpu" + /* * Be extremely careful when taking the address of this variable! Due to virtual * remapping, it is different from the canonical address returned by __get_cpu_var(var)! diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0e24202b5a4..3577ffd90d4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,8 +8,15 @@ #include +#ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP #define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP #ifdef MODULE #define PER_CPU_SHARED_ALIGNED_SECTION "" @@ -20,7 +27,6 @@ #else -#define PER_CPU_BASE_SECTION ".data" #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_FIRST_SECTION "" From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:39 -0500 Subject: [PATCH 413/682] x86: use linker to offset symbols by __per_cpu_load Impact: cleanup and bug fix Use the linker to create symbols for certain per-cpu variables that are offset by __per_cpu_load. This allows the removal of the runtime fixup of the GDT pointer, which fixes a bug with resume reported by Jiri Slaby. Reported-by: Jiri Slaby Signed-off-by: Brian Gerst Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/include/asm/percpu.h | 22 ++++++++++++++++++++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/kernel/cpu/common.c | 6 +----- arch/x86/kernel/head_64.S | 21 ++------------------- arch/x86/kernel/vmlinux_64.lds.S | 8 ++++++++ 5 files changed, 35 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0b64af4f13a..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -34,6 +34,12 @@ #define PER_CPU_VAR(var) per_cpu__##var #endif /* SMP */ +#ifdef CONFIG_X86_64_SMP +#define INIT_PER_CPU_VAR(var) init_per_cpu__##var +#else +#define INIT_PER_CPU_VAR(var) per_cpu__##var +#endif + #else /* ...!ASSEMBLY */ #include @@ -45,6 +51,22 @@ #define __percpu_arg(x) "%" #x #endif +/* + * Initialized pointers to per-cpu variables needed for the boot + * processor need to use these macros to get the proper address + * offset from __per_cpu_load on SMP. + * + * There also must be an entry in vmlinux_64.lds.S + */ +#define DECLARE_INIT_PER_CPU(var) \ + extern typeof(per_cpu_var(var)) init_per_cpu_var(var) + +#ifdef CONFIG_X86_64_SMP +#define init_per_cpu_var(var) init_per_cpu__##var +#else +#define init_per_cpu_var(var) per_cpu_var(var) +#endif + /* For arch-specific code, we can use direct single-insn ops (they * don't give an lvalue though). */ extern void __bad_percpu_size(void); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..373d3f628d2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -393,6 +393,8 @@ union irq_stack_union { }; DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + DECLARE_PER_CPU(char *, irq_stack_ptr); #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0f73ea42308..41b0de6df87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -902,12 +902,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE); -#ifdef CONFIG_SMP -DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ -#else DEFINE_PER_CPU(char *, irq_stack_ptr) = - per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; -#endif + init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; DEFINE_PER_CPU(unsigned long, kernel_stack) = (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a0a2b5ca9b7..2e648e3a5ea 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) pushq $0 popfq -#ifdef CONFIG_SMP - /* - * Fix up static pointers that need __per_cpu_load added. The assembler - * is unable to do this directly. This is only needed for the boot cpu. - * These values are set up with the correct base addresses by C code for - * secondary cpus. - */ - movq initial_gs(%rip), %rax - cmpl $0, per_cpu__cpu_number(%rax) - jne 1f - addq %rax, early_gdt_descr_base(%rip) -1: -#endif /* * We must switch to a new descriptor in kernel space for the GDT * because soon the kernel won't have access anymore to the userspace @@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) ENTRY(initial_code) .quad x86_64_start_kernel ENTRY(initial_gs) -#ifdef CONFIG_SMP - .quad __per_cpu_load -#else - .quad PER_CPU_VAR(irq_stack_union) -#endif + .quad INIT_PER_CPU_VAR(irq_stack_union) __FINITDATA ENTRY(stack_start) @@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) early_gdt_descr: .word GDT_ENTRIES*8-1 early_gdt_descr_base: - .quad per_cpu__gdt_page + .quad INIT_PER_CPU_VAR(gdt_page) ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 07f62d287ff..087a7f2c639 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -257,6 +257,14 @@ SECTIONS DWARF_DEBUG } + /* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + /* * Build-time check on the image size: */ From 44581a28e805a31661469c4b466b9cd14b36e7b6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:40 -0500 Subject: [PATCH 414/682] x86: fix abuse of per_cpu_offset Impact: bug fix Don't use per_cpu_offset() to determine if it valid to access a per-cpu variable for a given cpu number. It is not a valid assumption on x86-64 anymore. Use cpu_possible() instead. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 08d140fbc31..deb1c1ab786 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node) } #ifdef CONFIG_DEBUG_PER_CPU_MAPS - if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); dump_stack(); return; @@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu) if (early_per_cpu_ptr(x86_cpu_to_node_map)) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; - if (!per_cpu_offset(cpu)) { + if (!cpu_possible(cpu)) { printk(KERN_WARNING "early_cpu_to_node(%d): no per_cpu area!\n", cpu); dump_stack(); From 726c0d95b6bd06cb83efd36a76ccf03fa9a770f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Feb 2009 11:32:17 +0100 Subject: [PATCH 415/682] x86: early_printk.c - fix pgtable.h unification fallout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/early_printk.c: In function ‘early_dbgp_init’: arch/x86/kernel/early_printk.c:827: error: ‘PAGE_KERNEL_NOCACHE’ undeclared (first use in this function) arch/x86/kernel/early_printk.c:827: error: (Each undeclared identifier is reported only once arch/x86/kernel/early_printk.c:827: error: for each function it appears in.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 6a36dd228b6..639ad98238a 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -14,6 +14,7 @@ #include #include #include +#include #include /* Simple VGA output */ From e5f7f202f31fd05e9de7e1ba5a7b30de7855f5aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Feb 2009 11:42:57 +0100 Subject: [PATCH 416/682] x86, pgtable.h: macro-ify *_page() methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The p?d_page() methods still rely on highlevel types and methods: In file included from arch/x86/kernel/early_printk.c:18: /home/mingo/tip/arch/x86/include/asm/pgtable.h: In function ‘pmd_page’: /home/mingo/tip/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function ‘__pfn_to_section’ /home/mingo/tip/arch/x86/include/asm/pgtable.h:516: error: initialization makes pointer from integer without a cast /home/mingo/tip/arch/x86/include/asm/pgtable.h:516: error: implicit declaration of function ‘__section_mem_map_addr’ /home/mingo/tip/arch/x86/include/asm/pgtable.h:516: error: return makes pointer from integer without a cast So convert them to macros and document the type dependency. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a80a956ae65..76696e98f5b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -473,10 +473,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); } -static inline struct page *pmd_page(pmd_t pmd) -{ - return pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT); -} +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -543,10 +544,11 @@ static inline unsigned long pud_page_vaddr(pud_t pud) return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK); } -static inline struct page *pud_page(pud_t pud) -{ - return pfn_to_page(pud_val(pud) >> PAGE_SHIFT); -} +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) /* Find an entry in the second-level page table.. */ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) @@ -582,10 +584,11 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK); } -static inline struct page *pgd_page(pgd_t pgd) -{ - return pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT); -} +/* + * Currently stuck as a macro due to indirect forward reference to + * linux/mmzone.h's __section_mem_map_addr() definition: + */ +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) /* to find an entry in a page-table-directory. */ static inline unsigned pud_index(unsigned long address) From c47c1b1f3a9d6973108020df1dcab7604f7774dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Feb 2009 11:57:45 +0100 Subject: [PATCH 417/682] x86, pgtable.h: fix 2-level 32-bit build - pmd_flags() needs to be available on 2-levels too - provide pud_large() wrapper as well - include page.h - it provides basic types relied on by pgtable.h Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 9 +++++---- arch/x86/include/asm/pgtable.h | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 0b16b64a8fe..823cc931363 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -139,10 +139,6 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } -static inline pmdval_t pmd_flags(pmd_t pmd) -{ - return native_pmd_val(pmd) & PTE_FLAGS_MASK; -} #else /* PAGETABLE_LEVELS == 2 */ #include @@ -152,6 +148,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #endif /* PAGETABLE_LEVELS >= 3 */ +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} + static inline pte_t native_make_pte(pteval_t val) { return (pte_t) { .pte = val }; diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 76696e98f5b..178205305ac 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PGTABLE_H #define _ASM_X86_PGTABLE_H +#include + #define FIRST_USER_ADDRESS 0 #define _PAGE_BIT_PRESENT 0 /* is present */ @@ -528,6 +530,13 @@ static inline unsigned long pages_to_mb(unsigned long npg) #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) +#if PAGETABLE_LEVELS == 2 +static inline int pud_large(pud_t pud) +{ + return 0; +} +#endif + #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { From 9b2b76a3344146c4d8d300874e73af8161204f87 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:40 -0800 Subject: [PATCH 418/682] x86: add handle_irq() to allow interrupt injection Xen uses a different interrupt path, so introduce handle_irq() to allow interrupts to be inserted into the normal interrupt path. This is handled slightly differently on 32 and 64-bit. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/irq_32.c | 34 +++++++++++++++++++++------------- arch/x86/kernel/irq_64.c | 23 ++++++++++++++++------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 592688ed04d..d0f6f7d1771 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -39,6 +39,7 @@ extern void fixup_irqs(void); extern unsigned int do_IRQ(struct pt_regs *regs); extern void init_IRQ(void); extern void native_init_IRQ(void); +extern bool handle_irq(unsigned irq, struct pt_regs *regs); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d802c844991..61f09fb969e 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -191,6 +191,26 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } #endif +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + int overflow; + + overflow = check_stack_overflow(); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + if (!execute_on_irq_stack(overflow, desc, irq)) { + if (unlikely(overflow)) + print_stack_overflow(); + desc->handle_irq(irq, desc); + } + + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -200,31 +220,19 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow; unsigned vector = ~regs->orig_ax; - struct irq_desc *desc; unsigned irq; - old_regs = set_irq_regs(regs); irq_enter(); irq = __get_cpu_var(vector_irq)[vector]; - overflow = check_stack_overflow(); - - desc = irq_to_desc(irq); - if (unlikely(!desc)) { + if (!handle_irq(irq, regs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", __func__, irq, vector, smp_processor_id()); BUG(); } - if (!execute_on_irq_stack(overflow, desc, irq)) { - if (unlikely(overflow)) - print_stack_overflow(); - desc->handle_irq(irq, desc); - } - irq_exit(); set_irq_regs(old_regs); return 1; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 018963aa6ee..a93f3b0dc7f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -48,6 +48,20 @@ static inline void stack_overflow_check(struct pt_regs *regs) #endif } +bool handle_irq(unsigned irq, struct pt_regs *regs) +{ + struct irq_desc *desc; + + stack_overflow_check(regs); + + desc = irq_to_desc(irq); + if (unlikely(!desc)) + return false; + + generic_handle_irq_desc(irq, desc); + return true; +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -56,7 +70,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -64,14 +77,10 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) exit_idle(); irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; - stack_overflow_check(regs); - - desc = irq_to_desc(irq); - if (likely(desc)) - generic_handle_irq_desc(irq, desc); - else { + if (!handle_irq(irq, regs)) { if (!disable_apic) ack_APIC_irq(); From 7c1d7cdcef1b54f4a78892b6b99d19f12c4f398e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:41 -0800 Subject: [PATCH 419/682] x86: unify do_IRQ() With the differences in interrupt handling hoisted into handle_irq(), do_IRQ is more or less identical between 32 and 64 bit, so unify it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 3 ++- arch/x86/kernel/irq.c | 38 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 27 --------------------------- arch/x86/kernel/irq_64.c | 33 --------------------------------- 4 files changed, 40 insertions(+), 61 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index d0f6f7d1771..107eb219669 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -36,11 +36,12 @@ static inline int irq_canonicalize(int irq) extern void fixup_irqs(void); #endif -extern unsigned int do_IRQ(struct pt_regs *regs); extern void init_IRQ(void); extern void native_init_IRQ(void); extern bool handle_irq(unsigned irq, struct pt_regs *regs); +extern unsigned int do_IRQ(struct pt_regs *regs); + /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 8b30d0c2512..f13ca1650aa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -6,10 +6,12 @@ #include #include #include +#include #include #include #include +#include atomic_t irq_err_count; @@ -188,4 +190,40 @@ u64 arch_irq_stat(void) return sum; } + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +unsigned int __irq_entry do_IRQ(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + unsigned irq; + + exit_idle(); + irq_enter(); + + irq = __get_cpu_var(vector_irq)[vector]; + + if (!handle_irq(irq, regs)) { +#ifdef CONFIG_X86_64 + if (!disable_apic) + ack_APIC_irq(); +#endif + + if (printk_ratelimit()) + printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); + } + + irq_exit(); + + set_irq_regs(old_regs); + return 1; +} + EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 61f09fb969e..4beb9a13873 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,33 +211,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -unsigned int do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs; - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - old_regs = set_irq_regs(regs); - irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", - __func__, irq, vector, smp_processor_id()); - BUG(); - } - - irq_exit(); - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a93f3b0dc7f..977d8b43a0d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,39 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* high bit used in ret_from_ code */ - unsigned vector = ~regs->orig_ax; - unsigned irq; - - exit_idle(); - irq_enter(); - - irq = __get_cpu_var(vector_irq)[vector]; - - if (!handle_irq(irq, regs)) { - if (!disable_apic) - ack_APIC_irq(); - - if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", - __func__, smp_processor_id(), vector); - } - - irq_exit(); - - set_irq_regs(old_regs); - return 1; -} - #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) From 54a353a0f845c1dad5fc8183872e750d667838ac Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:42 -0800 Subject: [PATCH 420/682] xen: set irq_chip disable By default, the irq_chip.disable operation is a no-op. Explicitly set it to disable the Xen event channel. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 3141e149d59..7c3705479ea 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -820,8 +820,11 @@ void xen_irq_resume(void) static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", + + .disable = disable_dynirq, .mask = disable_dynirq, .unmask = enable_dynirq, + .ack = ack_dynirq, .set_affinity = set_affinity_irq, .retrigger = retrigger_dynirq, From 792dc4f6cdacf50d3f2b93756d282fc04ee34bd5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:43 -0800 Subject: [PATCH 421/682] xen: use our own eventchannel->irq path Rather than overloading vectors for event channels, take full responsibility for mapping an event channel to irq directly. With this patch Xen has its own irq allocator. When the kernel gets an event channel upcall, it maps the event channel number to an irq and injects it into the normal interrupt path. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/events.h | 6 ------ arch/x86/xen/irq.c | 17 +---------------- drivers/xen/events.c | 22 ++++++++++++++++++++-- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 19144184983..1df35417c41 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -15,10 +15,4 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->flags); } -static inline void xen_do_IRQ(int irq, struct pt_regs *regs) -{ - regs->orig_ax = ~irq; - do_IRQ(regs); -} - #endif /* _ASM_X86_XEN_EVENTS_H */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 5a070900ad3..cfd17799bd6 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -19,21 +19,6 @@ void xen_force_evtchn_callback(void) (void)HYPERVISOR_xen_version(0, NULL); } -static void __init __xen_init_IRQ(void) -{ - int i; - - /* Create identity vector->irq map */ - for(i = 0; i < NR_VECTORS; i++) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[i] = i; - } - - xen_init_IRQ(); -} - static unsigned long xen_save_fl(void) { struct vcpu_info *vcpu; @@ -127,7 +112,7 @@ static void xen_halt(void) } static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = __xen_init_IRQ, + .init_IRQ = xen_init_IRQ, .save_fl = PV_CALLEE_SAVE(xen_save_fl), .restore_fl = PV_CALLEE_SAVE(xen_restore_fl), diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7c3705479ea..2c8d710713f 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -517,6 +518,24 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) } +static void xen_do_irq(unsigned irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + if (WARN_ON(irq == -1)) + return; + + exit_idle(); + irq_enter(); + + //printk("cpu %d handling irq %d\n", smp_processor_id(), info->irq); + handle_irq(irq, regs); + + irq_exit(); + + set_irq_regs(old_regs); +} + /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for @@ -557,8 +576,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; - if (irq != -1) - xen_do_IRQ(irq, regs); + xen_do_irq(irq, regs); } } From ced40d0f3e8833bb8d7d8e2cbfac7da0bf7008c4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:44 -0800 Subject: [PATCH 422/682] xen: pack all irq-related info together Put all irq info into one struct. Also, use a union to keep event channel type-specific information, rather than overloading the index field. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 184 +++++++++++++++++++++++++++++++------------ 1 file changed, 135 insertions(+), 49 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 2c8d710713f..0541e07d4f6 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -52,18 +52,8 @@ static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping */ static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1}; -/* Packed IRQ information: binding type, sub-type index, and event channel. */ -struct packed_irq -{ - unsigned short evtchn; - unsigned char index; - unsigned char type; -}; - -static struct packed_irq irq_info[NR_IRQS]; - -/* Binding types. */ -enum { +/* Interrupt types. */ +enum xen_irq_type { IRQT_UNBOUND, IRQT_PIRQ, IRQT_VIRQ, @@ -71,8 +61,34 @@ enum { IRQT_EVTCHN }; -/* Convenient shorthand for packed representation of an unbound IRQ. */ -#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) +/* + * Packed IRQ information: + * type - enum xen_irq_type + * event channel - irq->event channel mapping + * cpu - cpu this event channel is bound to + * index - type-specific information: + * PIRQ - vector, with MSB being "needs EIO" + * VIRQ - virq number + * IPI - IPI vector + * EVTCHN - + */ +struct irq_info +{ + enum xen_irq_type type; /* type */ + unsigned short evtchn; /* event channel */ + unsigned short cpu; /* cpu bound */ + + union { + unsigned short virq; + enum ipi_vector ipi; + struct { + unsigned short gsi; + unsigned short vector; + } pirq; + } u; +}; + +static struct irq_info irq_info[NR_IRQS]; static int evtchn_to_irq[NR_EVENT_CHANNELS] = { [0 ... NR_EVENT_CHANNELS-1] = -1 @@ -85,7 +101,6 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) { return cpu_evtchn_mask_p[cpu].bits; } -static u8 cpu_evtchn[NR_EVENT_CHANNELS]; /* Reference counts for bindings to IRQs. */ static int irq_bindcount[NR_IRQS]; @@ -96,27 +111,107 @@ static int irq_bindcount[NR_IRQS]; static struct irq_chip xen_dynamic_chip; /* Constructor for packed IRQ information. */ -static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn) +static struct irq_info mk_unbound_info(void) { - return (struct packed_irq) { evtchn, index, type }; + return (struct irq_info) { .type = IRQT_UNBOUND }; +} + +static struct irq_info mk_evtchn_info(unsigned short evtchn) +{ + return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn }; +} + +static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) +{ + return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, + .u.ipi = ipi }; +} + +static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) +{ + return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, + .u.virq = virq }; +} + +static struct irq_info mk_pirq_info(unsigned short evtchn, + unsigned short gsi, unsigned short vector) +{ + return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, + .u.pirq = { .gsi = gsi, .vector = vector } }; } /* * Accessors for packed IRQ information. */ -static inline unsigned int evtchn_from_irq(int irq) +static struct irq_info *info_for_irq(unsigned irq) { - return irq_info[irq].evtchn; + return &irq_info[irq]; } -static inline unsigned int index_from_irq(int irq) +static unsigned int evtchn_from_irq(unsigned irq) { - return irq_info[irq].index; + return info_for_irq(irq)->evtchn; } -static inline unsigned int type_from_irq(int irq) +static enum ipi_vector ipi_from_irq(unsigned irq) { - return irq_info[irq].type; + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_IPI); + + return info->u.ipi; +} + +static unsigned virq_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_VIRQ); + + return info->u.virq; +} + +static unsigned gsi_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.gsi; +} + +static unsigned vector_from_irq(unsigned irq) +{ + struct irq_info *info = info_for_irq(irq); + + BUG_ON(info == NULL); + BUG_ON(info->type != IRQT_PIRQ); + + return info->u.pirq.vector; +} + +static enum xen_irq_type type_from_irq(unsigned irq) +{ + return info_for_irq(irq)->type; +} + +static unsigned cpu_from_irq(unsigned irq) +{ + return info_for_irq(irq)->cpu; +} + +static unsigned int cpu_from_evtchn(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + unsigned ret = 0; + + if (irq != -1) + ret = cpu_from_irq(irq); + + return ret; } static inline unsigned long active_evtchns(unsigned int cpu, @@ -137,10 +232,10 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu)); #endif - __clear_bit(chn, cpu_evtchn_mask(cpu_evtchn[chn])); + __clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); __set_bit(chn, cpu_evtchn_mask(cpu)); - cpu_evtchn[chn] = cpu; + irq_info[irq].cpu = cpu; } static void init_evtchn_cpu_bindings(void) @@ -155,15 +250,9 @@ static void init_evtchn_cpu_bindings(void) } #endif - memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0))); } -static inline unsigned int cpu_from_evtchn(unsigned int evtchn) -{ - return cpu_evtchn[evtchn]; -} - static inline void clear_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -253,6 +342,8 @@ static int find_unbound_irq(void) if (WARN_ON(desc == NULL)) return -1; + dynamic_irq_init(irq); + return irq; } @@ -267,12 +358,11 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq == -1) { irq = find_unbound_irq(); - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "event"); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + irq_info[irq] = mk_evtchn_info(evtchn); } irq_bindcount[irq]++; @@ -296,7 +386,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) if (irq < 0) goto out; - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "ipi"); @@ -307,7 +396,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) evtchn = bind_ipi.port; evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + irq_info[irq] = mk_ipi_info(evtchn, ipi); per_cpu(ipi_to_irq, cpu)[ipi] = irq; @@ -341,12 +430,11 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) irq = find_unbound_irq(); - dynamic_irq_init(irq); set_irq_chip_and_handler_name(irq, &xen_dynamic_chip, handle_level_irq, "virq"); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + irq_info[irq] = mk_virq_info(evtchn, virq); per_cpu(virq_to_irq, cpu)[virq] = irq; @@ -375,11 +463,11 @@ static void unbind_from_irq(unsigned int irq) switch (type_from_irq(irq)) { case IRQT_VIRQ: per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; + [virq_from_irq(irq)] = -1; break; case IRQT_IPI: per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn)) - [index_from_irq(irq)] = -1; + [ipi_from_irq(irq)] = -1; break; default: break; @@ -389,7 +477,7 @@ static void unbind_from_irq(unsigned int irq) bind_evtchn_to_cpu(evtchn, 0); evtchn_to_irq[evtchn] = -1; - irq_info[irq] = IRQ_UNBOUND; + irq_info[irq] = mk_unbound_info(); dynamic_irq_cleanup(irq); } @@ -507,8 +595,8 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) for(i = 0; i < NR_EVENT_CHANNELS; i++) { if (sync_test_bit(i, sh->evtchn_pending)) { printk(" %d: event %d -> irq %d\n", - cpu_evtchn[i], i, - evtchn_to_irq[i]); + cpu_from_evtchn(i), i, + evtchn_to_irq[i]); } } @@ -606,7 +694,7 @@ void rebind_evtchn_irq(int evtchn, int irq) BUG_ON(irq_bindcount[irq] == 0); evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + irq_info[irq] = mk_evtchn_info(evtchn); spin_unlock(&irq_mapping_update_lock); @@ -716,8 +804,7 @@ static void restore_cpu_virqs(unsigned int cpu) if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) continue; - BUG_ON(irq_info[irq].type != IRQT_VIRQ); - BUG_ON(irq_info[irq].index != virq); + BUG_ON(virq_from_irq(irq) != virq); /* Get a new binding from Xen. */ bind_virq.virq = virq; @@ -729,7 +816,7 @@ static void restore_cpu_virqs(unsigned int cpu) /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + irq_info[irq] = mk_virq_info(evtchn, virq); bind_evtchn_to_cpu(evtchn, cpu); /* Ready for use. */ @@ -746,8 +833,7 @@ static void restore_cpu_ipis(unsigned int cpu) if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) continue; - BUG_ON(irq_info[irq].type != IRQT_IPI); - BUG_ON(irq_info[irq].index != ipi); + BUG_ON(ipi_from_irq(irq) != ipi); /* Get a new binding from Xen. */ bind_ipi.vcpu = cpu; @@ -758,7 +844,7 @@ static void restore_cpu_ipis(unsigned int cpu) /* Record the new mapping. */ evtchn_to_irq[evtchn] = irq; - irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + irq_info[irq] = mk_ipi_info(evtchn, ipi); bind_evtchn_to_cpu(evtchn, cpu); /* Ready for use. */ From d77bbd4db475e2edc78edb7f94a258159c140b54 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:45 -0800 Subject: [PATCH 423/682] xen: remove irq bindcount There should be no need for us to maintain our own bind count for irqs, since the surrounding irq system should keep track of shared irqs for us. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0541e07d4f6..459121c5325 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -54,7 +54,7 @@ static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = - /* Interrupt types. */ enum xen_irq_type { - IRQT_UNBOUND, + IRQT_UNBOUND = 0, IRQT_PIRQ, IRQT_VIRQ, IRQT_IPI, @@ -102,9 +102,6 @@ static inline unsigned long *cpu_evtchn_mask(int cpu) return cpu_evtchn_mask_p[cpu].bits; } -/* Reference counts for bindings to IRQs. */ -static int irq_bindcount[NR_IRQS]; - /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) @@ -330,9 +327,8 @@ static int find_unbound_irq(void) int irq; struct irq_desc *desc; - /* Only allocate from dynirq range */ for (irq = 0; irq < nr_irqs; irq++) - if (irq_bindcount[irq] == 0) + if (irq_info[irq].type == IRQT_UNBOUND) break; if (irq == nr_irqs) @@ -365,8 +361,6 @@ int bind_evtchn_to_irq(unsigned int evtchn) irq_info[irq] = mk_evtchn_info(evtchn); } - irq_bindcount[irq]++; - spin_unlock(&irq_mapping_update_lock); return irq; @@ -403,8 +397,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } - irq_bindcount[irq]++; - out: spin_unlock(&irq_mapping_update_lock); return irq; @@ -441,8 +433,6 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } - irq_bindcount[irq]++; - spin_unlock(&irq_mapping_update_lock); return irq; @@ -455,7 +445,7 @@ static void unbind_from_irq(unsigned int irq) spin_lock(&irq_mapping_update_lock); - if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { + if (VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) BUG(); @@ -681,6 +671,8 @@ out: /* Rebind a new event channel to an existing irq. */ void rebind_evtchn_irq(int evtchn, int irq) { + struct irq_info *info = info_for_irq(irq); + /* Make sure the irq is masked, since the new event channel will also be masked. */ disable_irq(irq); @@ -690,8 +682,8 @@ void rebind_evtchn_irq(int evtchn, int irq) /* After resume the irq<->evtchn mappings are all cleared out */ BUG_ON(evtchn_to_irq[evtchn] != -1); /* Expect irq to have been bound before, - so the bindcount should be non-0 */ - BUG_ON(irq_bindcount[irq] == 0); + so there should be a proper type */ + BUG_ON(info->type == IRQT_UNBOUND); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_evtchn_info(evtchn); @@ -948,9 +940,5 @@ void __init xen_init_IRQ(void) for (i = 0; i < NR_EVENT_CHANNELS; i++) mask_evtchn(i); - /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for (i = 0; i < nr_irqs; i++) - irq_bindcount[i] = 0; - irq_ctx_init(smp_processor_id()); } From 3445a8fd7c6868bd9db0d1bea7d6e89004552122 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 14:09:46 -0800 Subject: [PATCH 424/682] xen: make sure that softirqs get handled at the end of event processing Make sure that irq_enter()/irq_exit() wrap the entire event processing loop, rather than each individual event invokation. This makes sure that softirq processing is deferred until the end of event processing, rather than in the middle with interrupts disabled. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 459121c5325..e53fd60fe4b 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -595,25 +595,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } - -static void xen_do_irq(unsigned irq, struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - if (WARN_ON(irq == -1)) - return; - - exit_idle(); - irq_enter(); - - //printk("cpu %d handling irq %d\n", smp_processor_id(), info->irq); - handle_irq(irq, regs); - - irq_exit(); - - set_irq_regs(old_regs); -} - /* * Search the CPUs pending events bitmasks. For each one found, map * the event number to an irq, and feed it into do_IRQ() for @@ -626,11 +607,15 @@ static void xen_do_irq(unsigned irq, struct pt_regs *regs) void xen_evtchn_do_upcall(struct pt_regs *regs) { int cpu = get_cpu(); + struct pt_regs *old_regs = set_irq_regs(regs); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu); static DEFINE_PER_CPU(unsigned, nesting_count); unsigned count; + exit_idle(); + irq_enter(); + do { unsigned long pending_words; @@ -654,7 +639,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; - xen_do_irq(irq, regs); + if (irq != -1) + handle_irq(irq, regs); } } @@ -665,6 +651,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) } while(count != 1); out: + irq_exit(); + set_irq_regs(old_regs); + put_cpu(); } From 90af9514ac99f51e81682c7bec8f9fb88a17a95c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 16:55:58 -0800 Subject: [PATCH 425/682] xen: explicitly initialise the cpu field of irq_info I was seeing a very odd crash on 64 bit in bind_evtchn_to_cpu because cpu_from_irq(irq) was coming out as -1. I found this was coming direct from the mk_ipi_info call. It's not clear to me that this isn't a compiler bug (implicit initialisation to zero of unsigned shorts in a struct not handled correctly?). On the other hand is it true that all event channels start of bound to CPU 0? If not then -1 might be correct and the various other functions should cope with this. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- drivers/xen/events.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index e53fd60fe4b..30963af5dba 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -115,26 +115,27 @@ static struct irq_info mk_unbound_info(void) static struct irq_info mk_evtchn_info(unsigned short evtchn) { - return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn }; + return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn, + .cpu = 0 }; } static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) { return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, - .u.ipi = ipi }; + .cpu = 0, .u.ipi = ipi }; } static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) { return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, - .u.virq = virq }; + .cpu = 0, .u.virq = virq }; } static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short gsi, unsigned short vector) { return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .u.pirq = { .gsi = gsi, .vector = vector } }; + .cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } }; } /* @@ -375,6 +376,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) spin_lock(&irq_mapping_update_lock); irq = per_cpu(ipi_to_irq, cpu)[ipi]; + if (irq == -1) { irq = find_unbound_irq(); if (irq < 0) @@ -391,7 +393,6 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_ipi_info(evtchn, ipi); - per_cpu(ipi_to_irq, cpu)[ipi] = irq; bind_evtchn_to_cpu(evtchn, cpu); From 1c14fa4937eb73509e07ac12bf8db1fdf4c42a59 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:38 -0800 Subject: [PATCH 426/682] x86: use early_ioremap in __acpi_map_table __acpi_map_table() effectively reimplements early_ioremap(). Rather than have that duplication, just implement it in terms of early_ioremap(). However, unlike early_ioremap(), __acpi_map_table() just maintains a single mapping which gets replaced each call, and has no corresponding unmap function. Implement this by just removing the previous mapping each time its called. Unfortunately, this will leave a stray mapping at the end. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 3 --- arch/x86/include/asm/fixmap_32.h | 4 ---- arch/x86/include/asm/fixmap_64.h | 4 ---- arch/x86/kernel/acpi/boot.c | 27 +++++++-------------------- 4 files changed, 7 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 9830681446a..4518dc50090 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -102,9 +102,6 @@ static inline void disable_acpi(void) acpi_noirq = 1; } -/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ -#define FIX_ACPI_PAGES 4 - extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); static inline void acpi_noirq_set(void) { acpi_noirq = 1; } diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index c7115c1d721..047d9bab2b3 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -95,10 +95,6 @@ enum fixed_addresses { (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, FIX_WP_TEST, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a..298d9ba3fae 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -50,10 +50,6 @@ enum fixed_addresses { FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, -#ifdef CONFIG_ACPI - FIX_ACPI_BEGIN, - FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, -#endif #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT FIX_OHCI1394_BASE, #endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c2f43..c518599e426 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -121,8 +121,8 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; */ char *__init __acpi_map_table(unsigned long phys, unsigned long size) { - unsigned long base, offset, mapped_size; - int idx; + static char *prev_map; + static unsigned long prev_size; if (!phys || !size) return NULL; @@ -130,26 +130,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) return __va(phys); - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; - clear_fixmap(FIX_ACPI_END); - set_fixmap(FIX_ACPI_END, phys); - base = fix_to_virt(FIX_ACPI_END); + if (prev_map) + early_iounmap(prev_map, prev_size); - /* - * Most cases can be covered by the below. - */ - idx = FIX_ACPI_END; - while (mapped_size < size) { - if (--idx < FIX_ACPI_BEGIN) - return NULL; /* cannot handle this */ - phys += PAGE_SIZE; - clear_fixmap(idx); - set_fixmap(idx, phys); - mapped_size += PAGE_SIZE; - } + prev_size = size; + prev_map = early_ioremap(phys, size); - return ((unsigned char *)base + offset); + return prev_map; } #ifdef CONFIG_PCI_MMCONFIG From eecb9a697f0b790e5840dae8a8b866bea49a86ee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:38 -0800 Subject: [PATCH 427/682] x86: always explicitly map acpi memory Always map acpi tables, rather than assuming we can use the normal linear mapping to access the acpi tables. This is necessary in a virtual environment where the linear mappings are to pseudo-physical memory, but the acpi tables exist at a real physical address. It doesn't hurt to map in the normal non-virtual case, so just do it unconditionally. Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c518599e426..5424a18f2e4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -127,9 +127,6 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) if (!phys || !size) return NULL; - if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) - return __va(phys); - if (prev_map) early_iounmap(prev_map, prev_size); From 05876f88ed9a66b26af613e222795ae790616252 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Feb 2009 15:39:40 -0800 Subject: [PATCH 428/682] acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap On x86, __acpi_map_table uses early_ioremap() to create the mapping, replacing the previous mapping with a new one. Once enough of the kernel is up an running it switches to using normal ioremap(). At that point, we need to clean up the final mapping to avoid a warning from the early_ioremap subsystem. This can be removed after all the instances in the ACPI code are fixed that rely on early-ioremap's implicit overmapping of previously mapped tables. Signed-off-by: Jeremy Fitzhardinge Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 8 +++++--- drivers/acpi/bus.c | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5424a18f2e4..7217834f6b1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -124,12 +124,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) static char *prev_map; static unsigned long prev_size; + if (prev_map) { + early_iounmap(prev_map, prev_size); + prev_map = NULL; + } + if (!phys || !size) return NULL; - if (prev_map) - early_iounmap(prev_map, prev_size); - prev_size = size; prev_map = early_ioremap(phys, size); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 765fd1c56cd..fb1be7b5dbc 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -694,6 +694,12 @@ void __init acpi_early_init(void) if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE; + /* + * Doing a zero-sized mapping will clear out the previous + * __acpi_map_table() mapping, if any. + */ + __acpi_map_table(0, 0); + acpi_gbl_permanent_mmap = 1; status = acpi_reallocate_root_table(); From 7d97277b754d3ee098a5ec69b6aaafb00c94e2f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: [PATCH 429/682] acpi/x86: introduce __apci_map_table, v4 to prevent wrongly overwriting fixmap that still want to use. ACPI used to rely on low mappings being all linearly mapped and grew a habit: it never really unmapped certain kinds of tables after use. This can cause problems - for example the hypothetical case when some spurious access still references it. v2: remove prev_map and prev_size in __apci_map_table v3: let acpi_os_unmap_memory() call early_iounmap too, so remove extral calling to early_acpi_os_unmap_memory v4: fix typo in one acpi_get_table_with_size calling Signed-off-by: Yinghai Lu Acked-by: Len Brown Signed-off-by: Ingo Molnar --- arch/ia64/kernel/acpi.c | 4 ++++ arch/x86/kernel/acpi/boot.c | 17 +++++++---------- drivers/acpi/acpica/tbxface.c | 17 ++++++++++++++--- drivers/acpi/bus.c | 6 ------ drivers/acpi/osl.c | 11 +++++++++-- drivers/acpi/tables.c | 20 ++++++++++++++------ include/acpi/acpiosxf.h | 1 + include/acpi/acpixf.h | 4 ++++ include/linux/acpi.h | 1 + 9 files changed, 54 insertions(+), 27 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d541671caf4..2363ed17319 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -199,6 +199,10 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) return __va(phys_addr); } +char *__init __acpi_unmap_table(unsigned long virt_addr, unsigned long size) +{ +} + /* -------------------------------------------------------------------------- Boot-time Table Parsing -------------------------------------------------------------------------- */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7217834f6b1..4c2aaea4293 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -121,21 +121,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; */ char *__init __acpi_map_table(unsigned long phys, unsigned long size) { - static char *prev_map; - static unsigned long prev_size; - - if (prev_map) { - early_iounmap(prev_map, prev_size); - prev_map = NULL; - } if (!phys || !size) return NULL; - prev_size = size; - prev_map = early_ioremap(phys, size); + return early_ioremap(phys, size); +} +void __init __acpi_unmap_table(char *map, unsigned long size) +{ + if (!map || !size) + return; - return prev_map; + early_iounmap(map, size); } #ifdef CONFIG_PCI_MMCONFIG diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c index c3e841f3cde..ab0aff3c7d6 100644 --- a/drivers/acpi/acpica/tbxface.c +++ b/drivers/acpi/acpica/tbxface.c @@ -365,7 +365,7 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) /******************************************************************************* * - * FUNCTION: acpi_get_table + * FUNCTION: acpi_get_table_with_size * * PARAMETERS: Signature - ACPI signature of needed table * Instance - Which instance (for SSDTs) @@ -377,8 +377,9 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id) * *****************************************************************************/ acpi_status -acpi_get_table(char *signature, - u32 instance, struct acpi_table_header **out_table) +acpi_get_table_with_size(char *signature, + u32 instance, struct acpi_table_header **out_table, + acpi_size *tbl_size) { u32 i; u32 j; @@ -408,6 +409,7 @@ acpi_get_table(char *signature, acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]); if (ACPI_SUCCESS(status)) { *out_table = acpi_gbl_root_table_list.tables[i].pointer; + *tbl_size = acpi_gbl_root_table_list.tables[i].length; } if (!acpi_gbl_permanent_mmap) { @@ -420,6 +422,15 @@ acpi_get_table(char *signature, return (AE_NOT_FOUND); } +acpi_status +acpi_get_table(char *signature, + u32 instance, struct acpi_table_header **out_table) +{ + acpi_size tbl_size; + + return acpi_get_table_with_size(signature, + instance, out_table, &tbl_size); +} ACPI_EXPORT_SYMBOL(acpi_get_table) /******************************************************************************* diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index fb1be7b5dbc..765fd1c56cd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -694,12 +694,6 @@ void __init acpi_early_init(void) if (!acpi_strict) acpi_gbl_enable_interpreter_slack = TRUE; - /* - * Doing a zero-sized mapping will clear out the previous - * __acpi_map_table() mapping, if any. - */ - __acpi_map_table(0, 0); - acpi_gbl_permanent_mmap = 1; status = acpi_reallocate_root_table(); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index b3193ec0a2e..d1dd5160daa 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -274,12 +274,19 @@ EXPORT_SYMBOL_GPL(acpi_os_map_memory); void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) { - if (acpi_gbl_permanent_mmap) { + if (acpi_gbl_permanent_mmap) iounmap(virt); - } + else + __acpi_unmap_table(virt, size); } EXPORT_SYMBOL_GPL(acpi_os_unmap_memory); +void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size) +{ + if (!acpi_gbl_permanent_mmap) + __acpi_unmap_table(virt, size); +} + #ifdef ACPI_FUTURE_USAGE acpi_status acpi_os_get_physical_address(void *virt, acpi_physical_address * phys) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index a8852952fac..fec1ae36d43 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -181,14 +181,15 @@ acpi_table_parse_entries(char *id, struct acpi_subtable_header *entry; unsigned int count = 0; unsigned long table_end; + acpi_size tbl_size; if (!handler) return -EINVAL; if (strncmp(id, ACPI_SIG_MADT, 4) == 0) - acpi_get_table(id, acpi_apic_instance, &table_header); + acpi_get_table_with_size(id, acpi_apic_instance, &table_header, &tbl_size); else - acpi_get_table(id, 0, &table_header); + acpi_get_table_with_size(id, 0, &table_header, &tbl_size); if (!table_header) { printk(KERN_WARNING PREFIX "%4.4s not present\n", id); @@ -206,8 +207,10 @@ acpi_table_parse_entries(char *id, table_end) { if (entry->type == entry_id && (!max_entries || count++ < max_entries)) - if (handler(entry, table_end)) + if (handler(entry, table_end)) { + early_acpi_os_unmap_memory((char *)table_header, tbl_size); return -EINVAL; + } entry = (struct acpi_subtable_header *) ((unsigned long)entry + entry->length); @@ -217,6 +220,7 @@ acpi_table_parse_entries(char *id, "%i found\n", id, entry_id, count - max_entries, count); } + early_acpi_os_unmap_memory((char *)table_header, tbl_size); return count; } @@ -241,17 +245,19 @@ acpi_table_parse_madt(enum acpi_madt_type id, int __init acpi_table_parse(char *id, acpi_table_handler handler) { struct acpi_table_header *table = NULL; + acpi_size tbl_size; if (!handler) return -EINVAL; if (strncmp(id, ACPI_SIG_MADT, 4) == 0) - acpi_get_table(id, acpi_apic_instance, &table); + acpi_get_table_with_size(id, acpi_apic_instance, &table, &tbl_size); else - acpi_get_table(id, 0, &table); + acpi_get_table_with_size(id, 0, &table, &tbl_size); if (table) { handler(table); + early_acpi_os_unmap_memory(table, tbl_size); return 0; } else return 1; @@ -265,8 +271,9 @@ int __init acpi_table_parse(char *id, acpi_table_handler handler) static void __init check_multiple_madt(void) { struct acpi_table_header *table = NULL; + acpi_size tbl_size; - acpi_get_table(ACPI_SIG_MADT, 2, &table); + acpi_get_table_with_size(ACPI_SIG_MADT, 2, &table, &tbl_size); if (table) { printk(KERN_WARNING PREFIX "BIOS bug: multiple APIC/MADT found," @@ -275,6 +282,7 @@ static void __init check_multiple_madt(void) "If \"acpi_apic_instance=%d\" works better, " "notify linux-acpi@vger.kernel.org\n", acpi_apic_instance ? 0 : 2); + early_acpi_os_unmap_memory(table, tbl_size); } else acpi_apic_instance = 0; diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a62720a7edc..ab0b85cf21f 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -144,6 +144,7 @@ void __iomem *acpi_os_map_memory(acpi_physical_address where, acpi_size length); void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size); +void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size); #ifdef ACPI_FUTURE_USAGE acpi_status diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c8e8cf45830..cc40102fe2f 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -130,6 +130,10 @@ acpi_get_table_header(acpi_string signature, struct acpi_table_header *out_table_header); acpi_status +acpi_get_table_with_size(acpi_string signature, + u32 instance, struct acpi_table_header **out_table, + acpi_size *tbl_size); +acpi_status acpi_get_table(acpi_string signature, u32 instance, struct acpi_table_header **out_table); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6fce2fc2d12..d59f0fa4d77 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +void __init __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); From b825e6cc7b1401862531df497a4a4daff8102ed5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: [PATCH 430/682] x86, es7000: fix ACPI table mappings Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 53699c931ad..71d7be624d4 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -292,24 +292,31 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; + acpi_size tbl_size; - while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { + while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; + early_acpi_os_unmap_memory(header, tbl_size); *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); return 0; } + early_acpi_os_unmap_memory(header, tbl_size); } return -1; } void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { + if (!oem_addr) + return; + + __acpi_unmap_table((char *)oem_addr, oem_size); } #endif From 3c552ac8a747d6c26d13302c54d71dae9f56f4ac Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 431/682] x86: make apic_* operations inline functions Mainly to get proper type-checking and consistency. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/apic.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b03711d7990..f4835a1be36 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -139,12 +139,35 @@ struct apic_ops { extern struct apic_ops *apic_ops; -#define apic_read (apic_ops->read) -#define apic_write (apic_ops->write) -#define apic_icr_read (apic_ops->icr_read) -#define apic_icr_write (apic_ops->icr_write) -#define apic_wait_icr_idle (apic_ops->wait_icr_idle) -#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle) +static inline u32 apic_read(u32 reg) +{ + return apic_ops->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic_ops->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic_ops->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic_ops->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic_ops->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic_ops->safe_wait_icr_idle(); +} extern int get_physical_broadcast(void); From 4924e228ae039029a9503ad571d91086e4042c90 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 432/682] x86: unstatic mp_find_ioapic so it can be used elsewhere Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/mpspec.h | 1 + arch/x86/kernel/acpi/boot.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 8c5620147c4..b59371a312f 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -77,6 +77,7 @@ extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); +extern int mp_find_ioapic(int gsi); #else static inline int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c334fe75dcd..068b900f4b0 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -872,7 +872,7 @@ static struct { DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; -static int mp_find_ioapic(int gsi) +int mp_find_ioapic(int gsi) { int i = 0; From c3e137d1e882c4fab9adcce7ae2be9bf3eb64c4c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 433/682] x86: add mp_find_ioapic_pin Add mp_find_ioapic_pin() to find an IO APIC's specific pin from a GSI, and use this function within acpi/boot. Make it non-static so other code can use it too. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/mpspec.h | 1 + arch/x86/kernel/acpi/boot.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index b59371a312f..5916c8df09d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -78,6 +78,7 @@ extern int acpi_probe_gsi(void); extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity); extern int mp_find_ioapic(int gsi); +extern int mp_find_ioapic_pin(int ioapic, int gsi); #else static inline int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 068b900f4b0..bba162c81d5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -887,6 +887,16 @@ int mp_find_ioapic(int gsi) return -1; } +int mp_find_ioapic_pin(int ioapic, int gsi) +{ + if (WARN_ON(ioapic == -1)) + return -1; + if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end)) + return -1; + + return gsi - mp_ioapic_routing[ioapic].gsi_base; +} + static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 @@ -1022,7 +1032,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) return; - pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + pin = mp_find_ioapic_pin(ioapic, gsi); /* * TBD: This check is for faulty timer entries, where the override @@ -1142,7 +1152,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) return gsi; } - ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; + ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); #ifdef CONFIG_X86_32 if (ioapic_renumber_irq) @@ -1231,7 +1241,7 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); ioapic = mp_find_ioapic(gsi); mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; - mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; + mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); save_mp_irq(&mp_irq); #endif From ca97ab90164c7b978abf9d82dc82d6dc2cbac4a0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:47 -0800 Subject: [PATCH 434/682] x86: unstatic ioapic entry funcs Unstatic ioapic_write_entry and setup_ioapic_entry functions so that the Xen code can do its own ioapic routing setup. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/io_apic.h | 6 ++++++ arch/x86/kernel/io_apic.c | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 309d0e23193..59cb4a1317b 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -169,6 +169,12 @@ extern void reinit_intr_remapped_IO_APIC(int); extern void probe_nr_irqs_gsi(void); +extern int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector); +extern void ioapic_write_entry(int apic, int pin, + struct IO_APIC_route_entry e); #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 static const int timer_through_8259 = 0; diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 56e51eb551a..7248ca11bdc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -486,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) io_apic_write(apic, 0x10 + 2*pin, eu.w1); } -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); @@ -1478,10 +1478,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t handle_edge_irq, "edge"); } -static int setup_ioapic_entry(int apic_id, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) +int setup_ioapic_entry(int apic_id, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) { /* * add it to the IO-APIC irq-routing table: From bf56957d176c279175464f385f3eb03d65819328 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:48 -0800 Subject: [PATCH 435/682] xen: expose enable_IO_APIC for 32-bit enable_IO_APIC() is defined for both 32- and 64-bit x86, so it should be declared for both. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/hw_irq.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1b82781b898..370e1c83bb4 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -65,9 +65,7 @@ extern void disable_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); extern void setup_ioapic_dest(void); -#ifdef CONFIG_X86_64 extern void enable_IO_APIC(void); -#endif /* Statistics */ extern atomic_t irq_err_count; From 6cd61c0baa8bce32271226198b46c67a7a05d108 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 436/682] elf: add ELF_CORE_COPY_KERNEL_REGS() ELF core dump is used for both user land core dump and kernel crash dump. Depending on architecture, register might need to be accessed differently for userland and kernel. Allow architectures to define ELF_CORE_COPY_KERNEL_REGS() and use different operation for kernel register dump. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/elfcore.h | 9 +++++++++ kernel/kexec.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 5ca54d77079..7605c5e9589 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } +static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +{ +#ifdef ELF_CORE_COPY_KERNEL_REGS + ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); +#else + elf_core_copy_regs(elfregs, regs); +#endif +} + static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #ifdef ELF_CORE_COPY_TASK_REGS diff --git a/kernel/kexec.c b/kernel/kexec.c index 8a6d7b08864..795e7b67a22 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); + elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf); From 76397f72fb9f4c9a96dfe05462887811c81b0e17 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 437/682] x86: stackprotector.h misc update Impact: misc udpate * wrap content with CONFIG_CC_STACK_PROTECTOR so that other arch files can include it directly * add missing includes This will help future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stackprotector.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 36a700acaf2..ee275e9f48a 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -1,8 +1,12 @@ #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 +#ifdef CONFIG_CC_STACKPROTECTOR + #include #include +#include +#include /* * Initialize the stackprotector canary value. @@ -35,4 +39,5 @@ static __always_inline void boot_init_stack_canary(void) percpu_write(irq_stack_union.stack_canary, canary); } -#endif +#endif /* CC_STACKPROTECTOR */ +#endif /* _ASM_STACKPROTECTOR_H */ From 5d707e9c8ef2a3596ed5c975c6ff05cec890c2b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: [PATCH 438/682] stackprotector: update make rules Impact: no default -fno-stack-protector if stackp is enabled, cleanup Stackprotector make rules had the following problems. * cc support test and warning are scattered across makefile and kernel/panic.c. * -fno-stack-protector was always added regardless of configuration. Update such that cc support test and warning are contained in makefile and -fno-stack-protector is added iff stackp is turned off. While at it, prepare for 32bit support. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- Makefile | 3 ++- arch/x86/Makefile | 17 ++++++++++------- kernel/panic.c | 4 ---- scripts/gcc-x86_64-has-stack-protector.sh | 4 +++- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 681c1d23b4d..77a006dae2d 100644 --- a/Makefile +++ b/Makefile @@ -532,8 +532,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) endif # Force gcc to behave correct even for buggy distributions -# Arch Makefiles may override this setting +ifndef CONFIG_CC_STACKPROTECTOR KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) +endif ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls diff --git a/arch/x86/Makefile b/arch/x86/Makefile index cacee981d16..ab48ab497e5 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -70,14 +70,17 @@ else # this works around some issues with generating unwind tables in older gccs # newer gccs do it by default KBUILD_CFLAGS += -maccumulate-outgoing-args +endif - stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh - stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ - "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) - stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ - "$(CC)" -fstack-protector-all ) - - KBUILD_CFLAGS += $(stackp-y) +ifdef CONFIG_CC_STACKPROTECTOR + cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) + stackp-y := -fstack-protector + stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all + KBUILD_CFLAGS += $(stackp-y) + else + $(warning stack protector enabled but no compiler support) + endif endif # Stackpointer is addressed different for 32 bit and 64 bit x86 diff --git a/kernel/panic.c b/kernel/panic.c index 33cab3de176..32fe4eff1b8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -359,10 +359,6 @@ EXPORT_SYMBOL(warn_slowpath); #ifdef CONFIG_CC_STACKPROTECTOR -#ifndef GCC_HAS_SP -#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this. -#endif - /* * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 325c0a1b03b..2d69fcdc560 100644 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -2,5 +2,7 @@ echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then - echo $2 + echo y +else + echo n fi From d627ded5ab2f7818154d57369e3a8cdb40db2569 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 439/682] x86: no stack protector for vdso Impact: avoid crash on vsyscall Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4d6ef0a336d..16a9020c8f1 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ - $(filter -g%,$(KBUILD_CFLAGS)) + $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) $(vobjs): KBUILD_CFLAGS += $(CFL) From f0d96110f9fd98a1a22e03b8adba69508843d910 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 440/682] x86: use asm .macro instead of cpp #define in entry_32.S Impact: cleanup Use .macro instead of cpp #define where approriate. This cleans up code and will ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 293 +++++++++++++++++++------------------ 1 file changed, 151 insertions(+), 142 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index a0b91aac72a..c461925d3b6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -101,121 +101,127 @@ #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ - cld; \ - pushl %fs; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET fs, 0;*/\ - pushl %es; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET es, 0;*/\ - pushl %ds; \ - CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET ds, 0;*/\ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET eax, 0;\ - pushl %ebp; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebp, 0;\ - pushl %edi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edi, 0;\ - pushl %esi; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET esi, 0;\ - pushl %edx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET edx, 0;\ - pushl %ecx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ecx, 0;\ - pushl %ebx; \ - CFI_ADJUST_CFA_OFFSET 4;\ - CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ - movl %edx, %ds; \ - movl %edx, %es; \ - movl $(__KERNEL_PERCPU), %edx; \ +.macro SAVE_ALL + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0;*/ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0;*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0;*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + movl $(__USER_DS), %edx + movl %edx, %ds + movl %edx, %es + movl $(__KERNEL_PERCPU), %edx movl %edx, %fs +.endm -#define RESTORE_INT_REGS \ - popl %ebx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebx;\ - popl %ecx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ecx;\ - popl %edx; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edx;\ - popl %esi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE esi;\ - popl %edi; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE edi;\ - popl %ebp; \ - CFI_ADJUST_CFA_OFFSET -4;\ - CFI_RESTORE ebp;\ - popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4;\ +.macro RESTORE_INT_REGS + popl %ebx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebx + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ecx + popl %edx + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edx + popl %esi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE esi + popl %edi + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE edi + popl %ebp + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE ebp + popl %eax + CFI_ADJUST_CFA_OFFSET -4 CFI_RESTORE eax +.endm -#define RESTORE_REGS \ - RESTORE_INT_REGS; \ -1: popl %ds; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE ds;*/\ -2: popl %es; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE es;*/\ -3: popl %fs; \ - CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE fs;*/\ -.pushsection .fixup,"ax"; \ -4: movl $0,(%esp); \ - jmp 1b; \ -5: movl $0,(%esp); \ - jmp 2b; \ -6: movl $0,(%esp); \ - jmp 3b; \ -.section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ - .long 3b,6b; \ +.macro RESTORE_REGS + RESTORE_INT_REGS +1: popl %ds + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE ds;*/ +2: popl %es + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE es;*/ +3: popl %fs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE fs;*/ +.pushsection .fixup, "ax" +4: movl $0, (%esp) + jmp 1b +5: movl $0, (%esp) + jmp 2b +6: movl $0, (%esp) + jmp 3b +.section __ex_table, "a" + .align 4 + .long 1b, 4b + .long 2b, 5b + .long 3b, 6b .popsection +.endm -#define RING0_INT_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 3*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_INT_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 3*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_EC_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, 4*4;\ - /*CFI_OFFSET cs, -2*4;*/\ +.macro RING0_EC_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 4*4 + /*CFI_OFFSET cs, -2*4;*/ CFI_OFFSET eip, -3*4 +.endm -#define RING0_PTREGS_FRAME \ - CFI_STARTPROC simple;\ - CFI_SIGNAL_FRAME;\ - CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ - /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ - CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ - /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ - /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ - CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ - CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ - CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ - CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ - CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ - CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ +.macro RING0_PTREGS_FRAME + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, PT_OLDESP-PT_EBX + /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ + CFI_OFFSET eip, PT_EIP-PT_OLDESP + /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ + /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ + CFI_OFFSET eax, PT_EAX-PT_OLDESP + CFI_OFFSET ebp, PT_EBP-PT_OLDESP + CFI_OFFSET edi, PT_EDI-PT_OLDESP + CFI_OFFSET esi, PT_ESI-PT_OLDESP + CFI_OFFSET edx, PT_EDX-PT_OLDESP + CFI_OFFSET ecx, PT_ECX-PT_OLDESP CFI_OFFSET ebx, PT_EBX-PT_OLDESP +.endm ENTRY(ret_from_fork) CFI_STARTPROC @@ -595,28 +601,30 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC -#define FIXUP_ESPFIX_STACK \ - /* since we are on a wrong stack, we cant make it a C code :( */ \ - PER_CPU(gdt_page, %ebx); \ - GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ - addl %esp, %eax; \ - pushl $__KERNEL_DS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl %eax; \ - CFI_ADJUST_CFA_OFFSET 4; \ - lss (%esp), %esp; \ - CFI_ADJUST_CFA_OFFSET -8; -#define UNWIND_ESPFIX_STACK \ - movl %ss, %eax; \ - /* see if on espfix stack */ \ - cmpw $__ESPFIX_SS, %ax; \ - jne 27f; \ - movl $__KERNEL_DS, %eax; \ - movl %eax, %ds; \ - movl %eax, %es; \ - /* switch to normal stack */ \ - FIXUP_ESPFIX_STACK; \ -27:; +.macro FIXUP_ESPFIX_STACK + /* since we are on a wrong stack, we cant make it a C code :( */ + PER_CPU(gdt_page, %ebx) + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + addl %esp, %eax + pushl $__KERNEL_DS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 +.endm +.macro UNWIND_ESPFIX_STACK + movl %ss, %eax + /* see if on espfix stack */ + cmpw $__ESPFIX_SS, %ax + jne 27f + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + /* switch to normal stack */ + FIXUP_ESPFIX_STACK +27: +.endm /* * Build the entry stubs and pointer table with some assembler magic. @@ -1136,26 +1144,27 @@ END(page_fault) * by hand onto the new stack - while updating the return eip past * the instruction that would have done it for sysenter. */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ +.macro FIX_STACK offset ok label + cmpw $__KERNEL_CS, 4(%esp) + jne \ok +\label: + movl TSS_sysenter_sp0 + \offset(%esp), %esp + CFI_DEF_CFA esp, 0 + CFI_UNDEFINED eip + pushfl + CFI_ADJUST_CFA_OFFSET 4 + pushl $__KERNEL_CS + CFI_ADJUST_CFA_OFFSET 4 + pushl $sysenter_past_esp + CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eip, 0 +.endm ENTRY(debug) RING0_INT_FRAME cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) + FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn debug_stack_correct: pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 @@ -1213,7 +1222,7 @@ nmi_stack_correct: nmi_stack_fixup: RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) + FIX_STACK 12, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_debug_stack_check: @@ -1224,7 +1233,7 @@ nmi_debug_stack_check: jb nmi_stack_correct cmpl $debug_esp_fix_insn,(%esp) ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) + FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct nmi_espfix_stack: From d9a89a26e02ef9ed03f74a755a8b4d8f3a066622 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 441/682] x86: add %gs accessors for x86_32 Impact: cleanup On x86_32, %gs is handled lazily. It's not saved and restored on kernel entry/exit but only when necessary which usually is during task switch but there are few other places. Currently, it's done by calling savesegment() and loadsegment() explicitly. Define get_user_gs(), set_user_gs() and task_user_gs() and use them instead. While at it, clean up register access macros in signal.c. This cleans up code a bit and will help future changes. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/a.out-core.h | 2 +- arch/x86/include/asm/elf.h | 2 +- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/system.h | 9 +++++++ arch/x86/kernel/process_32.c | 6 ++--- arch/x86/kernel/ptrace.c | 14 +++++----- arch/x86/kernel/signal.c | 41 ++++++++++++------------------ arch/x86/kernel/vm86_32.c | 4 +-- arch/x86/math-emu/get_address.c | 6 ++--- 9 files changed, 41 insertions(+), 45 deletions(-) diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 3c601f8224b..bb70e397aa8 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) dump->regs.ds = (u16)regs->ds; dump->regs.es = (u16)regs->es; dump->regs.fs = (u16)regs->fs; - savesegment(gs, dump->regs.gs); + dump->regs.gs = get_user_gs(regs); dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = (u16)regs->cs; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f51a3ddde01..39b0aac1675 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -124,7 +124,7 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - savesegment(gs, pr_reg[10]); \ + pr_reg[10] = get_user_gs(regs); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 52948df9cd1..4955165682c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -79,7 +79,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - loadsegment(gs, 0); \ + set_user_gs(task_pt_regs(tsk), 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2fcc70bc85f..70c74b8db87 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -182,6 +182,15 @@ extern void native_load_gs_index(unsigned); #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") +/* + * x86_32 user gs accessors. + */ +#ifdef CONFIG_X86_32 +#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) +#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +#define task_user_gs(tsk) ((tsk)->thread.gs) +#endif + static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 1a1ae8edc40..d58a340e1be 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -131,7 +131,7 @@ void __show_regs(struct pt_regs *regs, int all) if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; - savesegment(gs, gs); + gs = get_user_gs(regs); } else { sp = (unsigned long) (®s->sp); savesegment(ss, ss); @@ -304,7 +304,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, p->thread.ip = (unsigned long) ret_from_fork; - savesegment(gs, p->thread.gs); + task_user_gs(p) = get_user_gs(regs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -342,7 +342,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) { - __asm__("movl %0, %%gs" : : "r"(0)); + set_user_gs(regs, 0); regs->fs = 0; set_fs(USER_DS); regs->ds = __USER_DS; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb..508b6b57d0c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -90,9 +90,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { - retval = task->thread.gs; if (task == current) - savesegment(gs, retval); + retval = get_user_gs(task_pt_regs(task)); + else + retval = task_user_gs(task); } return retval; } @@ -126,13 +127,10 @@ static int set_segment_reg(struct task_struct *task, break; case offsetof(struct user_regs_struct, gs): - task->thread.gs = value; if (task == current) - /* - * The user-mode %gs is not affected by - * kernel entry, so we must update the CPU. - */ - loadsegment(gs, value); + set_user_gs(task_pt_regs(task), value); + else + task_user_gs(task) = value; } return 0; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7fc78b01981..8562387c75a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -50,27 +50,23 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -#define COPY(x) { \ - get_user_ex(regs->x, &sc->x); \ -} +#define COPY(x) do { \ + get_user_ex(regs->x, &sc->x); \ +} while (0) -#define COPY_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp; \ -} +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) -#define COPY_SEG_CPL3(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp | 3; \ -} +#define COPY_SEG(seg) do { \ + regs->seg = GET_SEG(seg); \ +} while (0) -#define GET_SEG(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - loadsegment(seg, tmp); \ -} +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, @@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, get_user_try { #ifdef CONFIG_X86_32 - GET_SEG(gs); + set_user_gs(regs, GET_SEG(gs)); COPY_SEG(fs); COPY_SEG(es); COPY_SEG(ds); @@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_try { #ifdef CONFIG_X86_32 - { - unsigned int tmp; - - savesegment(gs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->gs); - } + put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); put_user_ex(regs->es, (unsigned int __user *)&sc->es); put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720..55ea30d2a3d 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) ret = KVM86->regs32; ret->fs = current->thread.saved_fs; - loadsegment(gs, current->thread.saved_gs); + set_user_gs(ret, current->thread.saved_gs); return ret; } @@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs32->ax = 0; tsk->thread.saved_sp0 = tsk->thread.sp0; tsk->thread.saved_fs = info->regs32->fs; - savesegment(gs, tsk->thread.saved_gs); + tsk->thread.saved_gs = get_user_gs(info->regs32); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 420b3b6e391..6ef5e99380f 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment, #endif /* PARANOID */ switch (segment) { - /* gs isn't used by the kernel, so it still has its - user-space value. */ case PREFIX_GS_ - 1: - /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ - savesegment(gs, addr->selector); + /* user gs handling can be lazy, use special accessors */ + addr->selector = get_user_gs(FPU_info->regs); break; default: addr->selector = PM_REG_(segment); From ccbeed3a05908d201b47b6c3dd1a373138bba566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 442/682] x86: make lazy %gs optional on x86_32 Impact: pt_regs changed, lazy gs handling made optional, add slight overhead to SAVE_ALL, simplifies error_code path a bit On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs doesn't have place for it and gs is saved/loaded only when necessary. In preparation for stack protector support, this patch makes lazy %gs handling optional by doing the followings. * Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs. * Save and restore %gs along with other registers in entry_32.S unless LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL even when LAZY_GS. However, it adds no overhead to common exit path and simplifies entry path with error code. * Define different user_gs accessors depending on LAZY_GS and add lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The lazy_*_gs() ops are used to save, load and clear %gs lazily. * Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly. xen and lguest changes need to be verified. Signed-off-by: Tejun Heo Cc: Jeremy Fitzhardinge Cc: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 + arch/x86/include/asm/elf.h | 15 +++- arch/x86/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/ptrace.h | 4 +- arch/x86/include/asm/system.h | 12 ++- arch/x86/kernel/asm-offsets_32.c | 1 + arch/x86/kernel/entry_32.S | 132 ++++++++++++++++++++++++----- arch/x86/kernel/process_32.c | 4 +- arch/x86/kernel/ptrace.c | 5 +- arch/x86/lguest/boot.c | 2 +- arch/x86/xen/enlighten.c | 17 ++-- 11 files changed, 158 insertions(+), 40 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c8e353c112..5bcdede71ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,10 @@ config X86_TRAMPOLINE depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP) default y +config X86_32_LAZY_GS + def_bool y + depends on X86_32 + config KTIME_SCALAR def_bool X86_32 source "init/Kconfig" diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 39b0aac1675..83c1bc8d2e8 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; * now struct_user_regs, they are different) */ -#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ do { \ pr_reg[0] = regs->bx; \ pr_reg[1] = regs->cx; \ @@ -124,7 +124,6 @@ do { \ pr_reg[7] = regs->ds & 0xffff; \ pr_reg[8] = regs->es & 0xffff; \ pr_reg[9] = regs->fs & 0xffff; \ - pr_reg[10] = get_user_gs(regs); \ pr_reg[11] = regs->orig_ax; \ pr_reg[12] = regs->ip; \ pr_reg[13] = regs->cs & 0xffff; \ @@ -133,6 +132,18 @@ do { \ pr_reg[16] = regs->ss & 0xffff; \ } while (0); +#define ELF_CORE_COPY_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + pr_reg[10] = get_user_gs(regs); \ +} while (0); + +#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ +do { \ + ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ + savesegment(gs, pr_reg[10]); \ +} while (0); + #define ELF_PLATFORM (utsname()->machine) #define set_personality_64bit() do { } while (0) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 4955165682c..f923203dc39 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -79,7 +79,7 @@ do { \ #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ - set_user_gs(task_pt_regs(tsk), 0); \ + lazy_load_gs(0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d34d954c22..e304b66abee 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -28,7 +28,7 @@ struct pt_regs { int xds; int xes; int xfs; - /* int gs; */ + int xgs; long orig_eax; long eip; int xcs; @@ -50,7 +50,7 @@ struct pt_regs { unsigned long ds; unsigned long es; unsigned long fs; - /* int gs; */ + unsigned long gs; unsigned long orig_ax; unsigned long ip; unsigned long cs; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 70c74b8db87..79b98e5b96f 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -186,10 +186,20 @@ extern void native_load_gs_index(unsigned); * x86_32 user gs accessors. */ #ifdef CONFIG_X86_32 +#ifdef CONFIG_X86_32_LAZY_GS #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) #define task_user_gs(tsk) ((tsk)->thread.gs) -#endif +#define lazy_save_gs(v) savesegment(gs, (v)) +#define lazy_load_gs(v) loadsegment(gs, (v)) +#else /* X86_32_LAZY_GS */ +#define get_user_gs(regs) (u16)((regs)->gs) +#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +#define lazy_save_gs(v) do { } while (0) +#define lazy_load_gs(v) do { } while (0) +#endif /* X86_32_LAZY_GS */ +#endif /* X86_32 */ static inline unsigned long get_limit(unsigned long segment) { diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee..fbf2f33e308 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -75,6 +75,7 @@ void foo(void) OFFSET(PT_DS, pt_regs, ds); OFFSET(PT_ES, pt_regs, es); OFFSET(PT_FS, pt_regs, fs); + OFFSET(PT_GS, pt_regs, gs); OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); OFFSET(PT_EIP, pt_regs, ip); OFFSET(PT_CS, pt_regs, cs); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c461925d3b6..82e6868bee4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -30,12 +30,13 @@ * 1C(%esp) - %ds * 20(%esp) - %es * 24(%esp) - %fs - * 28(%esp) - orig_eax - * 2C(%esp) - %eip - * 30(%esp) - %cs - * 34(%esp) - %eflags - * 38(%esp) - %oldesp - * 3C(%esp) - %oldss + * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS + * 2C(%esp) - orig_eax + * 30(%esp) - %eip + * 34(%esp) - %cs + * 38(%esp) - %eflags + * 3C(%esp) - %oldesp + * 40(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -101,8 +102,99 @@ #define resume_userspace_sig resume_userspace #endif +/* + * User gs save/restore + * + * %gs is used for userland TLS and kernel only uses it for stack + * canary which is required to be at %gs:20 by gcc. Read the comment + * at the top of stackprotector.h for more info. + * + * Local labels 98 and 99 are used. + */ +#ifdef CONFIG_X86_32_LAZY_GS + + /* unfortunately push/pop can't be no-op */ +.macro PUSH_GS + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 +.endm +.macro POP_GS pop=0 + addl $(4 + \pop), %esp + CFI_ADJUST_CFA_OFFSET -(4 + \pop) +.endm +.macro POP_GS_EX +.endm + + /* all the rest are no-op */ +.macro PTGS_TO_GS +.endm +.macro PTGS_TO_GS_EX +.endm +.macro GS_TO_REG reg +.endm +.macro REG_TO_PTGS reg +.endm +.macro SET_KERNEL_GS reg +.endm + +#else /* CONFIG_X86_32_LAZY_GS */ + +.macro PUSH_GS + pushl %gs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET gs, 0*/ +.endm + +.macro POP_GS pop=0 +98: popl %gs + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_RESTORE gs*/ + .if \pop <> 0 + add $\pop, %esp + CFI_ADJUST_CFA_OFFSET -\pop + .endif +.endm +.macro POP_GS_EX +.pushsection .fixup, "ax" +99: movl $0, (%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro PTGS_TO_GS +98: mov PT_GS(%esp), %gs +.endm +.macro PTGS_TO_GS_EX +.pushsection .fixup, "ax" +99: movl $0, PT_GS(%esp) + jmp 98b +.section __ex_table, "a" + .align 4 + .long 98b, 99b +.popsection +.endm + +.macro GS_TO_REG reg + movl %gs, \reg + /*CFI_REGISTER gs, \reg*/ +.endm +.macro REG_TO_PTGS reg + movl \reg, PT_GS(%esp) + /*CFI_REL_OFFSET gs, PT_GS*/ +.endm +.macro SET_KERNEL_GS reg + xorl \reg, \reg + movl \reg, %gs +.endm + +#endif /* CONFIG_X86_32_LAZY_GS */ + .macro SAVE_ALL cld + PUSH_GS pushl %fs CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET fs, 0;*/ @@ -138,6 +230,7 @@ movl %edx, %es movl $(__KERNEL_PERCPU), %edx movl %edx, %fs + SET_KERNEL_GS %edx .endm .macro RESTORE_INT_REGS @@ -164,7 +257,7 @@ CFI_RESTORE eax .endm -.macro RESTORE_REGS +.macro RESTORE_REGS pop=0 RESTORE_INT_REGS 1: popl %ds CFI_ADJUST_CFA_OFFSET -4 @@ -175,6 +268,7 @@ 3: popl %fs CFI_ADJUST_CFA_OFFSET -4 /*CFI_RESTORE fs;*/ + POP_GS \pop .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -188,6 +282,7 @@ .long 2b, 5b .long 3b, 6b .popsection + POP_GS_EX .endm .macro RING0_INT_FRAME @@ -368,6 +463,7 @@ sysenter_exit: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs + PTGS_TO_GS ENABLE_INTERRUPTS_SYSEXIT #ifdef CONFIG_AUDITSYSCALL @@ -416,6 +512,7 @@ sysexit_audit: .align 4 .long 1b,2b .popsection + PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) # system call handler stub @@ -458,8 +555,7 @@ restore_all: restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: - RESTORE_REGS - addl $4, %esp # skip orig_eax/error_code + RESTORE_REGS 4 # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 irq_return: INTERRUPT_RETURN @@ -1078,7 +1174,10 @@ ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %fs's slot on the stack */ + /* the function address is in %gs's slot on the stack */ + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -1107,20 +1206,15 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d58a340e1be..86122fa2a1b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(gs, prev->gs); + lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) - loadsegment(gs, next->gs); + lazy_load_gs(next->gs); percpu_write(current_task, next_p); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 508b6b57d0c..7ec39ab37a2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); - regno >>= 2; - if (regno > FS) - --regno; - return ®s->bx + regno; + return ®s->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 19e33b6cd59..da2e314f61b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) /* There's one problem which normal hardware doesn't have: the Host * can't handle us removing entries we're currently using. So we clear * the GS register here: if it's needed it'll be reloaded anyway. */ - loadsegment(gs, 0); + lazy_load_gs(0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 37230342c2c..95ff6a0e942 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t, static void xen_load_tls(struct thread_struct *t, unsigned int cpu) { /* - * XXX sleazy hack: If we're being called in a lazy-cpu zone, - * it means we're in a context switch, and %gs has just been - * saved. This means we can zero it out to prevent faults on - * exit from the hypervisor if the next process has no %gs. - * Either way, it has been saved, and the new value will get - * loaded properly. This will go away as soon as Xen has been - * modified to not save/restore %gs for normal hypercalls. + * XXX sleazy hack: If we're being called in a lazy-cpu zone + * and lazy gs handling is enabled, it means we're in a + * context switch, and %gs has just been saved. This means we + * can zero it out to prevent faults on exit from the + * hypervisor if the next process has no %gs. Either way, it + * has been saved, and the new value will get loaded properly. + * This will go away as soon as Xen has been modified to not + * save/restore %gs for normal hypercalls. * * On x86_64, this hack is not used for %gs, because gs points * to KERNEL_GS_BASE (and uses it for PDA references), so we @@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) */ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { #ifdef CONFIG_X86_32 - loadsegment(gs, 0); + lazy_load_gs(0); #else loadsegment(fs, 0); #endif From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: [PATCH 443/682] x86: implement x86_32 stack protector Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/processor.h | 4 + arch/x86/include/asm/segment.h | 9 ++- arch/x86/include/asm/stackprotector.h | 91 +++++++++++++++++++++-- arch/x86/include/asm/system.h | 21 ++++++ arch/x86/kernel/Makefile | 18 +++++ arch/x86/kernel/cpu/common.c | 17 +++-- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/head_32.S | 20 ++++- arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/setup_percpu.c | 2 + scripts/gcc-x86_32-has-stack-protector.sh | 8 ++ 12 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 scripts/gcc-x86_32-has-stack-protector.sh diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bcdede71ba..f760a22f95d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -209,7 +209,7 @@ config X86_TRAMPOLINE config X86_32_LAZY_GS def_bool y - depends on X86_32 + depends on X86_32 && !CC_STACKPROTECTOR config KTIME_SCALAR def_bool X86_32 @@ -1356,7 +1356,6 @@ config CC_STACKPROTECTOR_ALL config CC_STACKPROTECTOR bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" - depends on X86_64 select CC_STACKPROTECTOR_ALL help This option turns on the -fstack-protector GCC feature. This diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9763eb70013..5a947210425 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +#else /* X86_64 */ +#ifdef CONFIG_CC_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, stack_canary); #endif +#endif /* X86_64 */ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1dc1b51ac62..14e0ed86a6f 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -61,7 +61,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused + * 28 - stack_canary-20 [ for stack protector ] * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -95,6 +95,13 @@ #define __KERNEL_PERCPU 0 #endif +#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16) +#ifdef CONFIG_CC_STACKPROTECTOR +#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8) +#else +#define __KERNEL_STACK_CANARY 0 +#endif + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index ee275e9f48a..fa7e5bd6fbe 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -1,3 +1,35 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and unfortunately gcc requires it to be at a fixed offset from %gs. + * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 + * and x86_32 use segment registers differently and thus handles this + * requirement differently. + * + * On x86_64, %gs is shared by percpu area and stack canary. All + * percpu symbols are zero based and %gs points to the base of percpu + * area. The first occupant of the percpu area is always + * irq_stack_union which contains stack_canary at offset 40. Userland + * %gs is always saved and restored on kernel entry and exit using + * swapgs, so stack protector doesn't add any complexity there. + * + * On x86_32, it's slightly more complicated. As in x86_64, %gs is + * used for userland TLS. Unfortunately, some processors are much + * slower at loading segment registers with different value when + * entering and leaving the kernel, so the kernel uses %fs for percpu + * area and manages %gs lazily so that %gs is switched only when + * necessary, usually during task switch. + * + * As gcc requires the stack canary at %gs:20, %gs can't be managed + * lazily if stack protector is enabled, so the kernel saves and + * restores userland %gs on kernel entry and exit. This behavior is + * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in + * system.h to hide the details. + */ + #ifndef _ASM_STACKPROTECTOR_H #define _ASM_STACKPROTECTOR_H 1 @@ -6,8 +38,18 @@ #include #include #include +#include +#include #include +/* + * 24 byte read-only segment initializer for stack canary. Linker + * can't handle the address bit shifting. Address will be set in + * head_32 for boot CPU and setup_per_cpu_areas() for others. + */ +#define GDT_STACK_CANARY_INIT \ + [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, + /* * Initialize the stackprotector canary value. * @@ -19,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void) u64 canary; u64 tsc; - /* - * Build time only check to make sure the stack_canary is at - * offset 40 in the pda; this is a gcc ABI requirement - */ +#ifdef CONFIG_X86_64 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); - +#endif /* * We both use the random pool and the current TSC as a source * of randomness. The TSC only matters for very early init, @@ -36,7 +75,49 @@ static __always_inline void boot_init_stack_canary(void) canary += tsc + (tsc << 32UL); current->stack_canary = canary; +#ifdef CONFIG_X86_64 percpu_write(irq_stack_union.stack_canary, canary); +#else + percpu_write(stack_canary, canary); +#endif +} + +static inline void setup_stack_canary_segment(int cpu) +{ +#ifdef CONFIG_X86_32 + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); + struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); + struct desc_struct desc; + + desc = gdt_table[GDT_ENTRY_STACK_CANARY]; + desc.base0 = canary & 0xffff; + desc.base1 = (canary >> 16) & 0xff; + desc.base2 = (canary >> 24) & 0xff; + write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); +#endif +} + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); +#endif +} + +#else /* CC_STACKPROTECTOR */ + +#define GDT_STACK_CANARY_INIT + +/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ + +static inline void setup_stack_canary_segment(int cpu) +{ } + +static inline void load_stack_canary_segment(void) +{ +#ifdef CONFIG_X86_32 + asm volatile ("mov %0, %%gs" : : "r" (0)); +#endif } #endif /* CC_STACKPROTECTOR */ diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 79b98e5b96f..2692ee8ef03 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,22 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_X86_32 +#ifdef CONFIG_CC_STACKPROTECTOR +#define __switch_canary \ + "movl "__percpu_arg([current_task])",%%ebx\n\t" \ + "movl %P[task_canary](%%ebx),%%ebx\n\t" \ + "movl %%ebx,"__percpu_arg([stack_canary])"\n\t" +#define __switch_canary_oparam \ + , [stack_canary] "=m" (per_cpu_var(stack_canary)) +#define __switch_canary_iparam \ + , [current_task] "m" (per_cpu_var(current_task)) \ + , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) +#else /* CC_STACKPROTECTOR */ +#define __switch_canary +#define __switch_canary_oparam +#define __switch_canary_iparam +#endif /* CC_STACKPROTECTOR */ + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +62,7 @@ do { \ "pushl %[next_ip]\n\t" /* restore EIP */ \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ + __switch_canary \ "popl %%ebp\n\t" /* restore EBP */ \ "popfl\n" /* restore flags */ \ \ @@ -58,6 +75,8 @@ do { \ "=b" (ebx), "=c" (ecx), "=d" (edx), \ "=S" (esi), "=D" (edi) \ \ + __switch_canary_oparam \ + \ /* input parameters: */ \ : [next_sp] "m" (next->thread.sp), \ [next_ip] "m" (next->thread.ip), \ @@ -66,6 +85,8 @@ do { \ [prev] "a" (prev), \ [next] "d" (next) \ \ + __switch_canary_iparam \ + \ : /* reloaded segment registers */ \ "memory"); \ } while (0) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 37fa30bada1..b1f8be33300 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,6 +24,24 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) +# +# On x86_32, register frame is passed verbatim on stack as struct +# pt_regs. gcc considers the parameter to belong to the callee and +# with -fstack-protector it copies pt_regs to the callee's stack frame +# to put the structure after the stack canary causing changes made by +# the exception handlers to be lost. Turn off stack protector for all +# files containing functions which take struct pt_regs from register +# frame. +# +# The proper way to fix this is to teach gcc that the argument belongs +# to the caller for these functions, oh well... +# +ifdef CONFIG_X86_32 +CFLAGS_process_32.o := $(nostackp) +CFLAGS_vm86_32.o := $(nostackp) +CFLAGS_signal.o := $(nostackp) +CFLAGS_traps.o := $(nostackp) +endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 41b0de6df87..260fe4cb2c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "cpu.h" @@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, + GDT_STACK_CANARY_INIT #endif } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); @@ -261,6 +263,7 @@ void load_percpu_segment(int cpu) loadsegment(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif + load_stack_canary_segment(); } /* Current gdt points %fs at the "master" per-cpu area: after this, @@ -946,16 +949,21 @@ unsigned long kernel_eflags; */ DEFINE_PER_CPU(struct orig_ist, orig_ist); -#else +#else /* x86_64 */ -/* Make sure %fs is initialized properly in idle threads */ +#ifdef CONFIG_CC_STACKPROTECTOR +DEFINE_PER_CPU(unsigned long, stack_canary); +#endif + +/* Make sure %fs and %gs are initialized properly in idle threads */ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); regs->fs = __KERNEL_PERCPU; + regs->gs = __KERNEL_STACK_CANARY; return regs; } -#endif +#endif /* x86_64 */ /* * cpu_init() initializes state that is per-CPU. Some data is already @@ -1120,9 +1128,6 @@ void __cpuinit cpu_init(void) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %gs. */ - asm volatile ("mov %0, %%gs" : : "r" (0)); - /* Clear all 6 debug registers: */ set_debugreg(0, 0); set_debugreg(0, 1); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 82e6868bee4..5f5bd22adcd 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -186,7 +186,7 @@ /*CFI_REL_OFFSET gs, PT_GS*/ .endm .macro SET_KERNEL_GS reg - xorl \reg, \reg + movl $(__KERNEL_STACK_CANARY), \reg movl \reg, %gs .endm diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 24c0e5cd71e..924e31615fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -19,6 +19,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -437,8 +438,25 @@ is386: movl $2,%ecx # set MP movl $(__KERNEL_PERCPU), %eax movl %eax,%fs # set this cpu's percpu - xorl %eax,%eax # Clear GS and LDT +#ifdef CONFIG_CC_STACKPROTECTOR + /* + * The linker can't handle this by relocation. Manually set + * base address in stack canary segment descriptor. + */ + cmpb $0,ready + jne 1f + movl $per_cpu__gdt_page,%eax + movl $per_cpu__stack_canary,%ecx + movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) + shrl $16, %ecx + movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) + movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) +1: +#endif + movl $(__KERNEL_STACK_CANARY),%eax movl %eax,%gs + + xorl %eax,%eax # Clear LDT lldt %ax cld # gcc2 wants the direction flag cleared at all times diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 86122fa2a1b..9a62383e7c3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -212,6 +212,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) regs.ds = __USER_DS; regs.es = __USER_DS; regs.fs = __KERNEL_PERCPU; + regs.gs = __KERNEL_STACK_CANARY; regs.orig_ax = -1; regs.ip = (unsigned long) kernel_thread_helper; regs.cs = __KERNEL_CS | get_kernel_rpl(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef91747bbed..d992e6cff73 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_PER_CPU_MAPS # define DBG(x...) printk(KERN_DEBUG x) @@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); + setup_stack_canary_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh new file mode 100644 index 00000000000..4fdf6ce1b06 --- /dev/null +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +if [ "$?" -eq "0" ] ; then + echo y +else + echo n +fi From 0e81cb59c7120191c0243c686b591797bc79e5c6 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 10 Feb 2009 16:45:37 -0800 Subject: [PATCH 444/682] x86, apic: fix initialization of wakeup_cpu With refactoring of wake_cpu macros the 32bit code in tip doesn't execute generic_apic_probe if CONFIG_X86_32_NON_STANDARD is not set. Even on a x86 STANDARD cpu we need to execute the generic_apic_probe function, as we rely on this function to execute the update_genapic quirk which initilizes apic->wakeup_cpu. Failing to do so results in we making a call to a null function in do_boot_cpu. The stack trace without the patch goes like this. Booting processor 1 APIC 0x1 ip 0x6000 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<(null)>] (null) *pdpt = 0000000000839001 *pde = 0000000000c97067 *pte = 0000000000000163 Oops: 0000 [#1] SMP last sysfs file: Modules linked in: Pid: 1, comm: swapper Not tainted (2.6.29-rc4-tip #18) VMware Virtual Platform EIP: 0062:[<00000000>] EFLAGS: 00010293 CPU: 0 EIP is at 0x0 EAX: 00000001 EBX: 00006000 ECX: c077ed00 EDX: 00006000 ESI: 00000001 EDI: 00000001 EBP: ef04cf40 ESP: ef04cf1c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 006a Process swapper (pid: 1, ti=ef04c000 task=ef050000 task.ti=ef04c000) Stack: c0644e52 00000000 ef04cf24 ef04cf24 c064468d c0886dc0 00000000 c0702aea ef055480 00000001 00000101 dead4ead ffffffff ffffffff c08af530 00000000 c0709715 ef04cf60 ef04cf60 00000001 00000000 00000000 dead4ead ffffffff Call Trace: [] ? native_cpu_up+0x2de/0x45b [] ? do_fork_idle+0x0/0x19 [] ? _cpu_up+0x88/0xe8 [] ? cpu_up+0x42/0x4e [] ? kernel_init+0x99/0x14b [] ? kernel_init+0x0/0x14b [] ? kernel_thread_helper+0x7/0x10 Code: Bad EIP value. EIP: [<00000000>] 0x0 SS:ESP 006a:ef04cf1c I think we should call generic_apic_probe unconditionally for 32 bit now. Signed-off-by: Alok N Kataria Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8d8fa992c9a..150e6d0a3b4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,7 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#if defined(CONFIG_X86_32_NON_STANDARD) || defined(CONFIG_X86_BIGSMP) +#ifdef CONFIG_X86_32 generic_apic_probe(); #endif From 160d8dac12932ad6eb4a359b66521e2e3282ea7d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 11:27:39 +0100 Subject: [PATCH 445/682] x86, apic: make generic_apic_probe() generally available Impact: build fix Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 6 ++++++ arch/x86/kernel/setup.c | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f4835a1be36..fba49f66228 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -33,7 +33,13 @@ } while (0) +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) extern void generic_apic_probe(void); +#else +static inline void generic_apic_probe(void) +{ +} +#endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 150e6d0a3b4..8fce6c71451 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -936,9 +936,7 @@ void __init setup_arch(char **cmdline_p) map_vsyscall(); #endif -#ifdef CONFIG_X86_32 generic_apic_probe(); -#endif early_quirks(); From 5c79d2a517a9905599d192db8ce77ab5f1a2faca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Feb 2009 16:31:00 +0900 Subject: [PATCH 446/682] x86: fix x86_32 stack protector bugs Impact: fix x86_32 stack protector Brian Gerst found out that %gs was being initialized to stack_canary instead of stack_canary - 20, which basically gave the same canary value for all threads. Fixing this also exposed the following bugs. * cpu_idle() didn't call boot_init_stack_canary() * stack canary switching in switch_to() was being done too late making the initial run of a new thread use the old stack canary value. Fix all of them and while at it update comment in cpu_idle() about calling boot_init_stack_canary(). Reported-by: Brian Gerst Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/system.h | 8 +++----- arch/x86/kernel/head_32.S | 1 + arch/x86/kernel/process_32.c | 10 ++++++++++ arch/x86/kernel/process_64.c | 11 +++++------ 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index fa7e5bd6fbe..c2d742c6e15 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -85,7 +85,7 @@ static __always_inline void boot_init_stack_canary(void) static inline void setup_stack_canary_segment(int cpu) { #ifdef CONFIG_X86_32 - unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); struct desc_struct desc; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2692ee8ef03..7a80f72bec4 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -25,13 +25,11 @@ struct task_struct *__switch_to(struct task_struct *prev, #ifdef CONFIG_CC_STACKPROTECTOR #define __switch_canary \ - "movl "__percpu_arg([current_task])",%%ebx\n\t" \ - "movl %P[task_canary](%%ebx),%%ebx\n\t" \ - "movl %%ebx,"__percpu_arg([stack_canary])"\n\t" + "movl %P[task_canary](%[next]), %%ebx\n\t" \ + "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" #define __switch_canary_oparam \ , [stack_canary] "=m" (per_cpu_var(stack_canary)) #define __switch_canary_iparam \ - , [current_task] "m" (per_cpu_var(current_task)) \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ #define __switch_canary @@ -60,9 +58,9 @@ do { \ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ + __switch_canary \ "jmp __switch_to\n" /* regparm call */ \ "1:\t" \ - __switch_canary \ "popl %%ebp\n\t" /* restore EBP */ \ "popfl\n" /* restore flags */ \ \ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 924e31615fb..cf21fd0cf6a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -447,6 +447,7 @@ is386: movl $2,%ecx # set MP jne 1f movl $per_cpu__gdt_page,%eax movl $per_cpu__stack_canary,%ecx + subl $20, %ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9a62383e7c3..b50604bb1e4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -11,6 +11,7 @@ #include +#include #include #include #include @@ -91,6 +92,15 @@ void cpu_idle(void) { int cpu = smp_processor_id(); + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); + current_thread_info()->status |= TS_POLLING; /* endless idle loop with no priority at all */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8eb169e4558..836ef6575f0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -120,12 +120,11 @@ void cpu_idle(void) current_thread_info()->status |= TS_POLLING; /* - * If we're the non-boot CPU, nothing set the PDA stack - * canary up for us - and if we are the boot CPU we have - * a 0 stack canary. This is a good place for updating - * it, as we wont ever return from this function (so the - * invalid canaries already on the stack wont ever - * trigger): + * If we're the non-boot CPU, nothing set the stack canary up + * for us. CPU0 already has it initialized but no harm in + * doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). */ boot_init_stack_canary(); From ebd9026d9f8499abc60d82d949bd37f88fe34a41 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 12:17:29 +0100 Subject: [PATCH 447/682] stackprotector: fix multi-word cross-builds Stackprotector builds were failing if CROSS_COMPILER was more than a single world (such as when distcc was used) - because the check scripts used $1 instead of $*. Signed-off-by: Ingo Molnar --- scripts/gcc-x86_32-has-stack-protector.sh | 2 +- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh index 4fdf6ce1b06..29493dc4528 100644 --- a/scripts/gcc-x86_32-has-stack-protector.sh +++ b/scripts/gcc-x86_32-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 2d69fcdc560..afaec618b39 100644 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else From aa78bcfa01dec3cdbde3cda098ce32abbd9c3bf6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:45 -0500 Subject: [PATCH 448/682] x86: use pt_regs pointer in do_device_not_available() The generic exception handler (error_code) passes in the pt_regs pointer and the error code (unused in this case). The commit "x86: fix math_emu register frame access" changed this to pass by value, which doesn't work correctly with stack protector enabled. Change it back to use the pt_regs pointer. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/traps.h | 2 +- arch/x86/kernel/traps.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index cf3bb053da0..0d5342515b8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); dotraplinkage void do_overflow(struct pt_regs *, long); dotraplinkage void do_bounds(struct pt_regs *, long); dotraplinkage void do_invalid_op(struct pt_regs *, long); -dotraplinkage void do_device_not_available(struct pt_regs); +dotraplinkage void do_device_not_available(struct pt_regs *, long); dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); dotraplinkage void do_invalid_TSS(struct pt_regs *, long); dotraplinkage void do_segment_not_present(struct pt_regs *, long); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3b7b2e19020..71a8f871331 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) +dotraplinkage void __kprobes +do_device_not_available(struct pt_regs *regs, long error_code) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; - conditional_sti(®s); + conditional_sti(regs); - info.regs = ®s; + info.regs = regs; math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(®s); + conditional_sti(regs); } #else math_state_restore(); From 253f29a4ae9cc6cdc7b94f96517f27a93885a6ce Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:46 -0500 Subject: [PATCH 449/682] x86: pass in pt_regs pointer for syscalls that need it Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to regparm(1) to receive the pt_regs pointer as the first argument. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 7 ++++++ arch/x86/include/asm/syscalls.h | 25 ++++++++++++--------- arch/x86/kernel/entry_32.S | 20 +++++++++++++++++ arch/x86/kernel/ioport.c | 4 +--- arch/x86/kernel/process_32.c | 35 ++++++++++++------------------ arch/x86/kernel/signal.c | 35 ++++++------------------------ arch/x86/kernel/syscall_table_32.S | 20 ++++++++--------- arch/x86/kernel/vm86_32.c | 15 ++++++------- 8 files changed, 81 insertions(+), 80 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5d98d0b68ff..2fd5926fb97 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -17,6 +17,13 @@ */ #define asmregparm __attribute__((regparm(3))) +/* + * For syscalls that need a pointer to the pt_regs struct (ie. fork). + * The regs pointer is passed in %eax as the first argument. The + * remaining function arguments remain on the stack. + */ +#define ptregscall __attribute__((regparm(1))) + /* * Make sure the compiler doesn't do anything stupid with the * arguments on the stack - they are owned by the *caller*, not diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index c0b0bda754e..617295255a1 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -29,21 +29,26 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 /* kernel/process_32.c */ -asmlinkage int sys_fork(struct pt_regs); -asmlinkage int sys_clone(struct pt_regs); -asmlinkage int sys_vfork(struct pt_regs); -asmlinkage int sys_execve(struct pt_regs); +ptregscall int sys_fork(struct pt_regs *); +ptregscall int sys_clone(struct pt_regs *, unsigned long, + unsigned long, int __user *, + unsigned long, int __user *); +ptregscall int sys_vfork(struct pt_regs *); +ptregscall int sys_execve(struct pt_regs *, char __user *, + char __user * __user *, + char __user * __user *); /* kernel/signal_32.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -asmlinkage int sys_sigaltstack(unsigned long); -asmlinkage unsigned long sys_sigreturn(unsigned long); -asmlinkage int sys_rt_sigreturn(unsigned long); +ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *, + stack_t __user *); +ptregscall unsigned long sys_sigreturn(struct pt_regs *); +ptregscall int sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned long); +ptregscall long sys_iopl(struct pt_regs *, unsigned int); /* kernel/sys_i386_32.c */ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, @@ -59,8 +64,8 @@ struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -asmlinkage int sys_vm86old(struct pt_regs); -asmlinkage int sys_vm86(struct pt_regs); +ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *); +ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5f5bd22adcd..3de7b5710dc 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -697,6 +697,26 @@ syscall_badsys: END(syscall_badsys) CFI_ENDPROC +/* + * System calls that need a pt_regs pointer. + */ +#define PTREGSCALL(name) \ + ALIGN; \ +ptregs_##name: \ + leal 4(%esp),%eax; \ + jmp sys_##name; + +PTREGSCALL(iopl) +PTREGSCALL(fork) +PTREGSCALL(clone) +PTREGSCALL(vfork) +PTREGSCALL(execve) +PTREGSCALL(sigaltstack) +PTREGSCALL(sigreturn) +PTREGSCALL(rt_sigreturn) +PTREGSCALL(vm86) +PTREGSCALL(vm86old) + .macro FIXUP_ESPFIX_STACK /* since we are on a wrong stack, we cant make it a C code :( */ PER_CPU(gdt_page, %ebx) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b12208f4dfe..7ec14864631 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,10 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -asmlinkage long sys_iopl(unsigned long regsp) +ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) { - struct pt_regs *regs = (struct pt_regs *)®sp; - unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b50604bb1e4..5a9dcfb01f7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,24 +603,18 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -asmlinkage int sys_fork(struct pt_regs regs) +ptregscall int sys_fork(struct pt_regs *regs) { - return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -asmlinkage int sys_clone(struct pt_regs regs) +ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, + unsigned long newsp, int __user *parent_tidptr, + unsigned long unused, int __user *child_tidptr) { - unsigned long clone_flags; - unsigned long newsp; - int __user *parent_tidptr, *child_tidptr; - - clone_flags = regs.bx; - newsp = regs.cx; - parent_tidptr = (int __user *)regs.dx; - child_tidptr = (int __user *)regs.di; if (!newsp) - newsp = regs.sp; - return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); + newsp = regs->sp; + return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } /* @@ -633,27 +627,26 @@ asmlinkage int sys_clone(struct pt_regs regs) * do not have enough call-clobbered registers to hold all * the information you need. */ -asmlinkage int sys_vfork(struct pt_regs regs) +ptregscall int sys_vfork(struct pt_regs *regs) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(struct pt_regs regs) +ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, + char __user * __user *argv, + char __user * __user *envp) { int error; char *filename; - filename = getname((char __user *) regs.bx); + filename = getname(u_filename); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, - (char __user * __user *) regs.cx, - (char __user * __user *) regs.dx, - ®s); + error = do_execve(filename, argv, envp, regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8562387c75a..d7a158367e3 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,39 +549,28 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -asmlinkage int sys_sigaltstack(unsigned long bx) -{ - /* - * This is needed to make gcc realize it doesn't own the - * "struct pt_regs" - */ - struct pt_regs *regs = (struct pt_regs *)&bx; - const stack_t __user *uss = (const stack_t __user *)bx; - stack_t __user *uoss = (stack_t __user *)regs->cx; - - return do_sigaltstack(uss, uoss, regs->sp); -} +ptregscall int +sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, + stack_t __user *uoss) #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } -#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage unsigned long sys_sigreturn(unsigned long __unused) +ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; - struct pt_regs *regs; unsigned long ax; sigset_t set; - regs = (struct pt_regs *) &__unused; frame = (struct sigframe __user *)(regs->sp - 8); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -640,23 +629,13 @@ badframe: } #ifdef CONFIG_X86_32 -/* - * Note: do not pass in pt_regs directly as with tail-call optimization - * GCC will incorrectly stomp on the caller's frame and corrupt user-space - * register state: - */ -asmlinkage int sys_rt_sigreturn(unsigned long __unused) -{ - struct pt_regs *regs = (struct pt_regs *)&__unused; - - return do_rt_sigreturn(regs); -} +ptregscall int sys_rt_sigreturn(struct pt_regs *regs) #else /* !CONFIG_X86_32 */ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +#endif /* CONFIG_X86_32 */ { return do_rt_sigreturn(regs); } -#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index e2e86a08f31..3bdb64829b8 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -1,7 +1,7 @@ ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit - .long sys_fork + .long ptregs_fork .long sys_read .long sys_write .long sys_open /* 5 */ @@ -10,7 +10,7 @@ ENTRY(sys_call_table) .long sys_creat .long sys_link .long sys_unlink /* 10 */ - .long sys_execve + .long ptregs_execve .long sys_chdir .long sys_time .long sys_mknod @@ -109,17 +109,17 @@ ENTRY(sys_call_table) .long sys_newlstat .long sys_newfstat .long sys_uname - .long sys_iopl /* 110 */ + .long ptregs_iopl /* 110 */ .long sys_vhangup .long sys_ni_syscall /* old "idle" system call */ - .long sys_vm86old + .long ptregs_vm86old .long sys_wait4 .long sys_swapoff /* 115 */ .long sys_sysinfo .long sys_ipc .long sys_fsync - .long sys_sigreturn - .long sys_clone /* 120 */ + .long ptregs_sigreturn + .long ptregs_clone /* 120 */ .long sys_setdomainname .long sys_newuname .long sys_modify_ldt @@ -165,14 +165,14 @@ ENTRY(sys_call_table) .long sys_mremap .long sys_setresuid16 .long sys_getresuid16 /* 165 */ - .long sys_vm86 + .long ptregs_vm86 .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll .long sys_nfsservctl .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl - .long sys_rt_sigreturn + .long ptregs_rt_sigreturn .long sys_rt_sigaction .long sys_rt_sigprocmask /* 175 */ .long sys_rt_sigpending @@ -185,11 +185,11 @@ ENTRY(sys_call_table) .long sys_getcwd .long sys_capget .long sys_capset /* 185 */ - .long sys_sigaltstack + .long ptregs_sigaltstack .long sys_sendfile .long sys_ni_syscall /* reserved for streams1 */ .long sys_ni_syscall /* reserved for streams2 */ - .long sys_vfork /* 190 */ + .long ptregs_vfork /* 190 */ .long sys_getrlimit .long sys_mmap2 .long sys_truncate64 diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 55ea30d2a3d..8fa6ba7c923 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,9 +197,8 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -asmlinkage int sys_vm86old(struct pt_regs regs) +ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) { - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -218,7 +217,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) if (tmp) goto out; memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = ®s; + info.regs32 = regs; tsk->thread.vm86_info = v86; do_sys_vm86(&info, tsk); ret = 0; /* we never return here */ @@ -227,7 +226,7 @@ out: } -asmlinkage int sys_vm86(struct pt_regs regs) +ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -239,12 +238,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) struct vm86plus_struct __user *v86; tsk = current; - switch (regs.bx) { + switch (cmd) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); + ret = do_vm86_irq_handling(cmd, (int)arg); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -261,14 +260,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)regs.cx; + v86 = (struct vm86plus_struct __user *)arg; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; - info.regs32 = ®s; + info.regs32 = regs; info.vm86plus.is_vm86pus = 1; tsk->thread.vm86_info = (struct vm86_struct __user *)v86; do_sys_vm86(&info, tsk); From 9c8bb6b534d1c89a20bf9bb45e1471cf8f4747c0 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 10 Feb 2009 09:51:47 -0500 Subject: [PATCH 450/682] x86: drop -fno-stack-protector annotations after pt_regs fixes Now that no functions rely on struct pt_regs being passed by value, various "no stack protector" annotations can be dropped. Signed-off-by: Brian Gerst Acked-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1f8be33300..37fa30bada1 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,24 +24,6 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) -# -# On x86_32, register frame is passed verbatim on stack as struct -# pt_regs. gcc considers the parameter to belong to the callee and -# with -fstack-protector it copies pt_regs to the callee's stack frame -# to put the structure after the stack canary causing changes made by -# the exception handlers to be lost. Turn off stack protector for all -# files containing functions which take struct pt_regs from register -# frame. -# -# The proper way to fix this is to teach gcc that the argument belongs -# to the caller for these functions, oh well... -# -ifdef CONFIG_X86_32 -CFLAGS_process_32.o := $(nostackp) -CFLAGS_vm86_32.o := $(nostackp) -CFLAGS_signal.o := $(nostackp) -CFLAGS_traps.o := $(nostackp) -endif obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o From 1c0040047d5499599cc231ca3f105be3ceff8562 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Mon, 9 Feb 2009 10:25:20 -0600 Subject: [PATCH 451/682] SGI IA64 UV: fix ia64 build error in the linux-next tree Fix the ia64 build error that occurs in the linux-next tree by introducing an ia64 version of uv.h. Additionally, clean up the usage of is_uv_system(). Signed-off-by: Dean Nelson Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/uv/uv.h | 13 +++++++++++++ drivers/misc/sgi-gru/gru.h | 2 -- drivers/misc/sgi-gru/grufile.c | 18 +++--------------- drivers/misc/sgi-xp/xp.h | 22 ++++++++-------------- 4 files changed, 24 insertions(+), 31 deletions(-) create mode 100644 arch/ia64/include/asm/uv/uv.h diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h new file mode 100644 index 00000000000..61b5bdfd980 --- /dev/null +++ b/arch/ia64/include/asm/uv/uv.h @@ -0,0 +1,13 @@ +#ifndef _ASM_IA64_UV_UV_H +#define _ASM_IA64_UV_UV_H + +#include +#include + +static inline int is_uv_system(void) +{ + /* temporary support for running on hardware simulator */ + return IS_MEDUSA() || ia64_platform_is("uv"); +} + +#endif /* _ASM_IA64_UV_UV_H */ diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index 1b5f579df15..f93f03a9e6e 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -19,8 +19,6 @@ #ifndef __GRU_H__ #define __GRU_H__ -#include - /* * GRU architectural definitions */ diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index 65098380639..c67e4e8bd62 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -36,23 +36,11 @@ #include #include #include +#include #include "gru.h" #include "grulib.h" #include "grutables.h" -#if defined CONFIG_X86_64 -#include -#include -#define IS_UV() is_uv_system() -#elif defined CONFIG_IA64 -#include -#include -/* temp support for running on hardware simulator */ -#define IS_UV() IS_MEDUSA() || ia64_platform_is("uv") -#else -#define IS_UV() 0 -#endif - #include #include @@ -381,7 +369,7 @@ static int __init gru_init(void) char id[10]; void *gru_start_vaddr; - if (!IS_UV()) + if (!is_uv_system()) return 0; #if defined CONFIG_IA64 @@ -451,7 +439,7 @@ static void __exit gru_exit(void) int order = get_order(sizeof(struct gru_state) * GRU_CHIPLETS_PER_BLADE); - if (!IS_UV()) + if (!is_uv_system()) return; for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 069ad3a1c2a..2275126cb33 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -15,21 +15,19 @@ #include +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV #include +#define is_uv() is_uv_system() +#endif -#ifdef CONFIG_IA64 +#ifndef is_uv +#define is_uv() 0 +#endif + +#if defined CONFIG_IA64 #include #include /* defines is_shub1() and is_shub2() */ #define is_shub() ia64_platform_is("sn2") -#ifdef CONFIG_IA64_SGI_UV -#define is_uv() ia64_platform_is("uv") -#else -#define is_uv() 0 -#endif -#endif -#ifdef CONFIG_X86_64 -#include -#define is_uv() is_uv_system() #endif #ifndef is_shub1 @@ -44,10 +42,6 @@ #define is_shub() 0 #endif -#ifndef is_uv -#define is_uv() 0 -#endif - #ifdef USE_DBUG_ON #define DBUG_ON(condition) BUG_ON(condition) #else From c5c606d9dce6e0852a401c57a8b0542acdbb6796 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Mon, 9 Feb 2009 18:18:14 -0800 Subject: [PATCH 452/682] x86: cleanup, rename CONFIG_X86_NON_STANDARD to CONFIG_X86_EXTENDED_PLATFORM Patch to rename the CONFIG_X86_NON_STANDARD to CONFIG_X86_EXTENDED_PLATFORM. The new name represents the subarches better. Also, default this to 'y' so that many of the sub architectures that were not easily visible now become visible. Also re-organize the extended architecture platform and non standard platform list alphabetically as suggested by Ingo. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 87 ++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 270ecf90bdb..4a27aa41f6d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -268,65 +268,48 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs -config X86_NON_STANDARD - bool "Support for non-standard x86 platforms" +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y ---help--- If you disable this option then the kernel will only support standard PC platforms. (which covers the vast majority of systems out there.) If you enable this option then you'll be able to select a number - of less common non-PC x86 platforms: VisWS, RDC321, SGI/UV. + of non-PC x86 platforms. If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. -config X86_VISWS - bool "SGI 320/540 (Visual Workstation)" - depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT - depends on X86_NON_STANDARD - ---help--- - The SGI Visual Workstation series is an IA32-based workstation - based on SGI systems chips with some legacy PC hardware attached. - - Say Y here to create a kernel to run on the SGI 320 or 540. - - A kernel compiled for the Visual Workstation will run on general - PCs as well. See for details. - -config X86_RDC321X - bool "RDC R-321x SoC" - depends on X86_32 - depends on X86_NON_STANDARD - select M486 - select X86_REBOOTFIXUPS - ---help--- - This option is needed for RDC R-321x system-on-chip, also known - as R-8610-(G). - If you don't have one of these chips, you should say N here. - -config X86_UV - bool "SGI Ultraviolet" - depends on X86_64 - depends on X86_NON_STANDARD - ---help--- - This option is needed in order to support SGI Ultraviolet systems. - If you don't have one of these, you should say N here. +# This is an alphabetically sorted list of 64 bit extended platforms +# Please maintain the alphabetic order if and when there are additions config X86_VSMP - bool "Support for ScaleMP vSMP" + bool "ScaleMP vSMP" select PARAVIRT depends on X86_64 && PCI - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option if you have one of these machines. +config X86_UV + bool "SGI Ultraviolet" + depends on X86_64 + depends on X86_EXTENDED_PLATFORM + ---help--- + This option is needed in order to support SGI Ultraviolet systems. + If you don't have one of these, you should say N here. + +# Following is an alphabetically sorted list of 32 bit extended platforms +# Please maintain the alphabetic order if and when there are additions + config X86_ELAN bool "AMD Elan" depends on X86_32 - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- Select this for an AMD Elan processor. @@ -334,16 +317,29 @@ config X86_ELAN If unsure, choose "PC-compatible" instead. +config X86_RDC321X + bool "RDC R-321x SoC" + depends on X86_32 + depends on X86_EXTENDED_PLATFORM + select M486 + select X86_REBOOTFIXUPS + ---help--- + This option is needed for RDC R-321x system-on-chip, also known + as R-8610-(G). + If you don't have one of these chips, you should say N here. + config X86_32_NON_STANDARD bool "Support non-standard 32-bit SMP architectures" depends on X86_32 && SMP - depends on X86_NON_STANDARD + depends on X86_EXTENDED_PLATFORM ---help--- This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default subarchitectures. It is intended for a generic binary kernel. if you select them all, kernel will probe it one by one. and will fallback to default. +# Alphabetically sorted list of Non standard 32 bit platforms + config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" depends on X86_32_NON_STANDARD @@ -356,6 +352,19 @@ config X86_NUMAQ of Flat Logical. You will need a new lynxer.elf file to flash your firmware with - send email to . +config X86_VISWS + bool "SGI 320/540 (Visual Workstation)" + depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT + depends on X86_32_NON_STANDARD + ---help--- + The SGI Visual Workstation series is an IA32-based workstation + based on SGI systems chips with some legacy PC hardware attached. + + Say Y here to create a kernel to run on the SGI 320 or 540. + + A kernel compiled for the Visual Workstation will run on general + PCs as well. See for details. + config X86_SUMMIT bool "Summit/EXA (IBM x440)" depends on X86_32_NON_STANDARD @@ -364,7 +373,7 @@ config X86_SUMMIT In particular, it is needed for the x440. config X86_ES7000 - bool "Support for Unisys ES7000 IA32 series" + bool "Unisys ES7000 IA32 series" depends on X86_32_NON_STANDARD && X86_BIGSMP ---help--- Support for Unisys ES7000 systems. Say 'Y' here if this kernel is From 8e1568f3500287d0b36c9776132cb53a42d5651d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Feb 2009 01:06:59 -0800 Subject: [PATCH 453/682] pci, x86, acpi: fix early_ioremap() leak Pawel reported: ------------[ cut here ]------------ WARNING: at arch/x86/mm/ioremap.c:616 check_early_ioremap_leak+0x52/0x67() Hardware name: Debug warning: early ioremap leak of 1 areas detected. Modules linked in: Pid: 1, comm: swapper Not tainted 2.6.29-rc4-tip #2 ... Reported-by: Pawel Dziekonski Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- drivers/pci/dmar.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f5a662a50ac..519f5f91e76 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -42,6 +42,7 @@ LIST_HEAD(dmar_drhd_units); static struct acpi_table_header * __initdata dmar_tbl; +static acpi_size dmar_tbl_size; static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { @@ -288,8 +289,9 @@ static int __init dmar_table_detect(void) acpi_status status = AE_OK; /* if we could find DMAR table, then there are DMAR devices */ - status = acpi_get_table(ACPI_SIG_DMAR, 0, - (struct acpi_table_header **)&dmar_tbl); + status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0, + (struct acpi_table_header **)&dmar_tbl, + &dmar_tbl_size); if (ACPI_SUCCESS(status) && !dmar_tbl) { printk (KERN_WARNING PREFIX "Unable to map DMAR\n"); @@ -481,6 +483,7 @@ void __init detect_intel_iommu(void) iommu_detected = 1; #endif } + early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; } From 17993b49b1f540aace8e9b4242530d0b3376eb2a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 11 Feb 2009 17:20:51 +0100 Subject: [PATCH 454/682] x86: make hibernation always-possible This commit: aced3ce: x86/Voyager: remove HIBERNATION Kconfig quirk Made hibernation only available on UP - instead of making it available on all of x86. Fix it. Reported-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4a27aa41f6d..148c112c9ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -140,7 +140,6 @@ config HAVE_CPUMASK_OF_CPU_MAP config ARCH_HIBERNATION_POSSIBLE def_bool y - depends on !SMP config ARCH_SUSPEND_POSSIBLE def_bool y From 7651194fb715b2d57658c05a710408f6b8448951 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 22:26:52 +0530 Subject: [PATCH 455/682] x86: mm/init_32.c fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/mm/init_32.c: In function ‘find_low_pfn_range’: arch/x86/mm/init_32.c:696: warning: format ‘%u’ expects type ‘unsigned int’, but Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2cef0507441..d48f2560364 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -692,7 +692,7 @@ void __init find_low_pfn_range(void) max_pfn = MAXMEM_PFN + highmem_pages; if (highmem_pages + MAXMEM_PFN > max_pfn) { printk(KERN_WARNING "only %luMB highmem pages " - "available, ignoring highmem size of %uMB.\n", + "available, ignoring highmem size of %luMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages)); highmem_pages = 0; From ba1511bf7fbda452138e4096bf10d5a382710f4f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:38:25 +0530 Subject: [PATCH 456/682] x86: kernel/mpparse.c fix compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/x86/kernel/mpparse.c: In function ‘smp_scan_config’: arch/x86/kernel/mpparse.c:696: warning: format ‘%08lx’ expects type ‘long unsigned int’, but argument 3 has type ‘phys_addr_t’ arch/x86/kernel/mpparse.c: In function ‘update_mp_table’: arch/x86/kernel/mpparse.c:1014: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 2 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 66ebb823f39..20076445319 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -692,8 +692,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, #endif mpf_found = mpf; - printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", - mpf, virt_to_phys(mpf)); + printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", + mpf, (u64)virt_to_phys(mpf)); if (!reserve) return 1; @@ -1011,7 +1011,7 @@ static int __init update_mp_table(void) if (!smp_check_mpc(mpc, oem, str)) return 0; - printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); + printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf)); printk(KERN_INFO "physptr: %x\n", mpf->physptr); if (mpc_new_phys && mpc->length > mpc_new_length) { From 804852e4ffc983f9ee7600f78218698546fdc58d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:42:59 +0530 Subject: [PATCH 457/682] drm_proc.c fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/drm_proc.c: In function ‘drm__vma_info’: drivers/gpu/drm/drm_proc.c:681: warning: format ‘%08lx’ expects type ‘long unsigned int’, but argument 5 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/gpu/drm/drm_proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c index 8df849f6683..b756f043a5f 100644 --- a/drivers/gpu/drm/drm_proc.c +++ b/drivers/gpu/drm/drm_proc.c @@ -678,9 +678,9 @@ static int drm__vma_info(char *buf, char **start, off_t offset, int request, *start = &buf[offset]; *eof = 0; - DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%08lx\n", + DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%llx\n", atomic_read(&dev->vma_count), - high_memory, virt_to_phys(high_memory)); + high_memory, (u64)virt_to_phys(high_memory)); list_for_each_entry(pt, &dev->vmalist, head) { if (!(vma = pt->vma)) continue; From 9c8976a1a51ce4a3e458cdf1c10cf651e7e97113 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:49:52 +0530 Subject: [PATCH 458/682] falcon fix compilation warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/sfc/falcon.c: In function ‘falcon_alloc_special_buffer’: drivers/net/sfc/falcon.c:340: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 10 has type ‘phys_addr_t’ drivers/net/sfc/falcon.c: In function ‘falcon_free_special_buffer’: drivers/net/sfc/falcon.c:355: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 10 has type ‘phys_addr_t’ drivers/net/sfc/falcon.c: In function ‘falcon_probe_port’: drivers/net/sfc/falcon.c:2346: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 7 has type ‘phys_addr_t’ drivers/net/sfc/falcon.c: In function ‘falcon_probe_nic’: drivers/net/sfc/falcon.c:2924: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 7 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/net/sfc/falcon.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c index d5378e60fcd..064307c2277 100644 --- a/drivers/net/sfc/falcon.c +++ b/drivers/net/sfc/falcon.c @@ -338,10 +338,10 @@ static int falcon_alloc_special_buffer(struct efx_nic *efx, nic_data->next_buffer_table += buffer->entries; EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x " - "(virt %p phys %lx)\n", buffer->index, + "(virt %p phys %llx)\n", buffer->index, buffer->index + buffer->entries - 1, - (unsigned long long)buffer->dma_addr, len, - buffer->addr, virt_to_phys(buffer->addr)); + (u64)buffer->dma_addr, len, + buffer->addr, (u64)virt_to_phys(buffer->addr)); return 0; } @@ -353,10 +353,10 @@ static void falcon_free_special_buffer(struct efx_nic *efx, return; EFX_LOG(efx, "deallocating special buffers %d-%d at %llx+%x " - "(virt %p phys %lx)\n", buffer->index, + "(virt %p phys %llx)\n", buffer->index, buffer->index + buffer->entries - 1, - (unsigned long long)buffer->dma_addr, buffer->len, - buffer->addr, virt_to_phys(buffer->addr)); + (u64)buffer->dma_addr, buffer->len, + buffer->addr, (u64)virt_to_phys(buffer->addr)); pci_free_consistent(efx->pci_dev, buffer->len, buffer->addr, buffer->dma_addr); @@ -2343,10 +2343,10 @@ int falcon_probe_port(struct efx_nic *efx) FALCON_MAC_STATS_SIZE); if (rc) return rc; - EFX_LOG(efx, "stats buffer at %llx (virt %p phys %lx)\n", - (unsigned long long)efx->stats_buffer.dma_addr, + EFX_LOG(efx, "stats buffer at %llx (virt %p phys %llx)\n", + (u64)efx->stats_buffer.dma_addr, efx->stats_buffer.addr, - virt_to_phys(efx->stats_buffer.addr)); + (u64)virt_to_phys(efx->stats_buffer.addr)); return 0; } @@ -2921,9 +2921,9 @@ int falcon_probe_nic(struct efx_nic *efx) goto fail4; BUG_ON(efx->irq_status.dma_addr & 0x0f); - EFX_LOG(efx, "INT_KER at %llx (virt %p phys %lx)\n", - (unsigned long long)efx->irq_status.dma_addr, - efx->irq_status.addr, virt_to_phys(efx->irq_status.addr)); + EFX_LOG(efx, "INT_KER at %llx (virt %p phys %llx)\n", + (u64)efx->irq_status.dma_addr, + efx->irq_status.addr, (u64)virt_to_phys(efx->irq_status.addr)); falcon_probe_spi_devices(efx); From f537a53da9389f56df277057684df23997410e9c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 11 Feb 2009 23:51:34 +0530 Subject: [PATCH 459/682] iscsi_ibft.c fix compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/firmware/iscsi_ibft.c: In function ‘ibft_init’: drivers/firmware/iscsi_ibft.c:942: warning: format ‘%lx’ expects type ‘long unsigned int’, but argument 2 has type ‘phys_addr_t’ Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/firmware/iscsi_ibft.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 3ab3e4a41d6..7b7ddc2d51c 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -938,8 +938,8 @@ static int __init ibft_init(void) return -ENOMEM; if (ibft_addr) { - printk(KERN_INFO "iBFT detected at 0x%lx.\n", - virt_to_phys((void *)ibft_addr)); + printk(KERN_INFO "iBFT detected at 0x%llx.\n", + (u64)virt_to_phys((void *)ibft_addr)); rc = ibft_check_device(); if (rc) From 76a856397a5e08c43268b76afb1ef607f68e0fe2 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 12 Feb 2009 00:28:09 +0530 Subject: [PATCH 460/682] ne3210.c fix compilation warning for phys_addr_t Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/net/ne3210.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c index fac43fd6fc8..6a843f7350a 100644 --- a/drivers/net/ne3210.c +++ b/drivers/net/ne3210.c @@ -150,7 +150,8 @@ static int __init ne3210_eisa_probe (struct device *device) if (phys_mem < virt_to_phys(high_memory)) { printk(KERN_CRIT "ne3210.c: Card RAM overlaps with normal memory!!!\n"); printk(KERN_CRIT "ne3210.c: Use EISA SCU to set card memory below 1MB,\n"); - printk(KERN_CRIT "ne3210.c: or to an address above 0x%lx.\n", virt_to_phys(high_memory)); + printk(KERN_CRIT "ne3210.c: or to an address above 0x%llx.\n", + (u64)virt_to_phys(high_memory)); printk(KERN_CRIT "ne3210.c: Driver NOT installed.\n"); retval = -EINVAL; goto out3; From 89e4b5930d8a59395219e7e3e4702e4653022b93 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 12 Feb 2009 00:36:39 +0530 Subject: [PATCH 461/682] arlan-main.c fix compilation warnings for phys_addr_t Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- drivers/net/wireless/arlan-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c index bfca15da6f0..14c11656e82 100644 --- a/drivers/net/wireless/arlan-main.c +++ b/drivers/net/wireless/arlan-main.c @@ -1082,8 +1082,8 @@ static int __init arlan_probe_here(struct net_device *dev, if (arlan_check_fingerprint(memaddr)) return -ENODEV; - printk(KERN_NOTICE "%s: Arlan found at %x, \n ", dev->name, - (int) virt_to_phys((void*)memaddr)); + printk(KERN_NOTICE "%s: Arlan found at %llx, \n ", dev->name, + (u64) virt_to_phys((void*)memaddr)); ap->card = (void *) memaddr; dev->mem_start = memaddr; From b12bdaf11f935d7be030207e3c77faeaeab8ded3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 11 Feb 2009 16:43:58 -0500 Subject: [PATCH 462/682] x86: use regparm(3) for passed-in pt_regs pointer Some syscalls need to access the pt_regs structure, either to copy user register state or to modifiy it. This patch adds stubs to load the address of the pt_regs struct into the %eax register, and changes the syscalls to take the pointer as an argument instead of relying on the assumption that the pt_regs structure overlaps the function arguments. Drop the use of regparm(1) due to concern about gcc bugs, and to move in the direction of the eventual removal of regparm(0) for asmlinkage. Signed-off-by: Brian Gerst Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/linkage.h | 7 ------- arch/x86/include/asm/syscalls.h | 25 ++++++++++--------------- arch/x86/kernel/ioport.c | 3 ++- arch/x86/kernel/process_32.c | 27 +++++++++++++++++---------- arch/x86/kernel/signal.c | 25 ++++++++++++++++--------- arch/x86/kernel/vm86_32.c | 11 ++++++----- 6 files changed, 51 insertions(+), 47 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 2fd5926fb97..5d98d0b68ff 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -17,13 +17,6 @@ */ #define asmregparm __attribute__((regparm(3))) -/* - * For syscalls that need a pointer to the pt_regs struct (ie. fork). - * The regs pointer is passed in %eax as the first argument. The - * remaining function arguments remain on the stack. - */ -#define ptregscall __attribute__((regparm(1))) - /* * Make sure the compiler doesn't do anything stupid with the * arguments on the stack - they are owned by the *caller*, not diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 617295255a1..77bb31a88ba 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -29,26 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 /* kernel/process_32.c */ -ptregscall int sys_fork(struct pt_regs *); -ptregscall int sys_clone(struct pt_regs *, unsigned long, - unsigned long, int __user *, - unsigned long, int __user *); -ptregscall int sys_vfork(struct pt_regs *); -ptregscall int sys_execve(struct pt_regs *, char __user *, - char __user * __user *, - char __user * __user *); +int sys_fork(struct pt_regs *); +int sys_clone(struct pt_regs *); +int sys_vfork(struct pt_regs *); +int sys_execve(struct pt_regs *); /* kernel/signal_32.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -ptregscall int sys_sigaltstack(struct pt_regs *, const stack_t __user *, - stack_t __user *); -ptregscall unsigned long sys_sigreturn(struct pt_regs *); -ptregscall int sys_rt_sigreturn(struct pt_regs *); +int sys_sigaltstack(struct pt_regs *); +unsigned long sys_sigreturn(struct pt_regs *); +int sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ -ptregscall long sys_iopl(struct pt_regs *, unsigned int); +long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, @@ -64,8 +59,8 @@ struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ -ptregscall int sys_vm86old(struct pt_regs *, struct vm86_struct __user *); -ptregscall int sys_vm86(struct pt_regs *, unsigned long, unsigned long); +int sys_vm86old(struct pt_regs *); +int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 7ec14864631..e41980a373a 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -131,8 +131,9 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) } #ifdef CONFIG_X86_32 -ptregscall long sys_iopl(struct pt_regs *regs, unsigned int level) +long sys_iopl(struct pt_regs *regs) { + unsigned int level = regs->bx; struct thread_struct *t = ¤t->thread; int rc; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5a9dcfb01f7..fec79ad85dc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -603,15 +603,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -ptregscall int sys_fork(struct pt_regs *regs) +int sys_fork(struct pt_regs *regs) { return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); } -ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, - unsigned long newsp, int __user *parent_tidptr, - unsigned long unused, int __user *child_tidptr) +int sys_clone(struct pt_regs *regs) { + unsigned long clone_flags; + unsigned long newsp; + int __user *parent_tidptr, *child_tidptr; + + clone_flags = regs->bx; + newsp = regs->cx; + parent_tidptr = (int __user *)regs->dx; + child_tidptr = (int __user *)regs->di; if (!newsp) newsp = regs->sp; return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); @@ -627,7 +633,7 @@ ptregscall int sys_clone(struct pt_regs *regs, unsigned long clone_flags, * do not have enough call-clobbered registers to hold all * the information you need. */ -ptregscall int sys_vfork(struct pt_regs *regs) +int sys_vfork(struct pt_regs *regs) { return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); } @@ -635,18 +641,19 @@ ptregscall int sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -ptregscall int sys_execve(struct pt_regs *regs, char __user *u_filename, - char __user * __user *argv, - char __user * __user *envp) +int sys_execve(struct pt_regs *regs) { int error; char *filename; - filename = getname(u_filename); + filename = getname((char __user *) regs->bx); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, argv, envp, regs); + error = do_execve(filename, + (char __user * __user *) regs->cx, + (char __user * __user *) regs->dx, + regs); if (error == 0) { /* Make sure we don't return using sysenter.. */ set_thread_flag(TIF_IRET); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d7a158367e3..ccfb27412f0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -549,23 +549,27 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, #endif /* CONFIG_X86_32 */ #ifdef CONFIG_X86_32 -ptregscall int -sys_sigaltstack(struct pt_regs *regs, const stack_t __user *uss, - stack_t __user *uoss) +int sys_sigaltstack(struct pt_regs *regs) +{ + const stack_t __user *uss = (const stack_t __user *)regs->bx; + stack_t __user *uoss = (stack_t __user *)regs->cx; + + return do_sigaltstack(uss, uoss, regs->sp); +} #else /* !CONFIG_X86_32 */ asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ { return do_sigaltstack(uss, uoss, regs->sp); } +#endif /* CONFIG_X86_32 */ /* * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -ptregscall unsigned long sys_sigreturn(struct pt_regs *regs) +unsigned long sys_sigreturn(struct pt_regs *regs) { struct sigframe __user *frame; unsigned long ax; @@ -629,13 +633,16 @@ badframe: } #ifdef CONFIG_X86_32 -ptregscall int sys_rt_sigreturn(struct pt_regs *regs) -#else /* !CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -#endif /* CONFIG_X86_32 */ +int sys_rt_sigreturn(struct pt_regs *regs) { return do_rt_sigreturn(regs); } +#else /* !CONFIG_X86_32 */ +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ + return do_rt_sigreturn(regs); +} +#endif /* CONFIG_X86_32 */ /* * OK, we're invoking a handler: diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 8fa6ba7c923..d7ac84e7fc1 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -197,8 +197,9 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -ptregscall int sys_vm86old(struct pt_regs *regs, struct vm86_struct __user *v86) +int sys_vm86old(struct pt_regs *regs) { + struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. * This remains on the stack until we @@ -226,7 +227,7 @@ out: } -ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long arg) +int sys_vm86(struct pt_regs *regs) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -238,12 +239,12 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a struct vm86plus_struct __user *v86; tsk = current; - switch (cmd) { + switch (regs->bx) { case VM86_REQUEST_IRQ: case VM86_FREE_IRQ: case VM86_GET_IRQ_BITS: case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(cmd, (int)arg); + ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); goto out; case VM86_PLUS_INSTALL_CHECK: /* @@ -260,7 +261,7 @@ ptregscall int sys_vm86(struct pt_regs *regs, unsigned long cmd, unsigned long a ret = -EPERM; if (tsk->thread.saved_sp0) goto out; - v86 = (struct vm86plus_struct __user *)arg; + v86 = (struct vm86plus_struct __user *)regs->cx; tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, offsetof(struct kernel_vm86_struct, regs32) - sizeof(info.regs)); From b924a28138572f03bc8647c2be8f876d27e2666a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 12:08:01 -0800 Subject: [PATCH 463/682] x86: rename *-defs.h to *-_types.h for consistency The kernel tends to call definition-only headers *_types.h, so rename the x86 page/pgtable headers accordingly. Signed-off-by: Jeremy Fitzhardinge --- .../asm/{pgtable-2level-defs.h => pgtable-2level_types.h} | 0 .../asm/{pgtable-3level-defs.h => pgtable-3level_types.h} | 0 arch/x86/include/asm/pgtable_32.h | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename arch/x86/include/asm/{pgtable-2level-defs.h => pgtable-2level_types.h} (100%) rename arch/x86/include/asm/{pgtable-3level-defs.h => pgtable-3level_types.h} (100%) diff --git a/arch/x86/include/asm/pgtable-2level-defs.h b/arch/x86/include/asm/pgtable-2level_types.h similarity index 100% rename from arch/x86/include/asm/pgtable-2level-defs.h rename to arch/x86/include/asm/pgtable-2level_types.h diff --git a/arch/x86/include/asm/pgtable-3level-defs.h b/arch/x86/include/asm/pgtable-3level_types.h similarity index 100% rename from arch/x86/include/asm/pgtable-3level-defs.h rename to arch/x86/include/asm/pgtable-3level_types.h diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 1952bb762aa..72d20e2a40f 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -39,11 +39,11 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); * newer 3-level PAE-mode page tables. */ #ifdef CONFIG_X86_PAE -# include +# include # define PMD_SIZE (1UL << PMD_SHIFT) # define PMD_MASK (~(PMD_SIZE - 1)) #else -# include +# include #endif #define PGDIR_SIZE (1UL << PGDIR_SHIFT) From 8d19c99faf6165ef095138dd595d46b9bbb34055 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 18:46:18 -0800 Subject: [PATCH 464/682] Split pgtable.h into pgtable_types.h and pgtable.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable.h | 212 +------------------------- arch/x86/include/asm/pgtable_types.h | 220 +++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 211 deletions(-) create mode 100644 arch/x86/include/asm/pgtable_types.h diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 860f1b635c4..10404e7bf32 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -3,164 +3,7 @@ #include -#define FIRST_USER_ADDRESS 0 - -#define _PAGE_BIT_PRESENT 0 /* is present */ -#define _PAGE_BIT_RW 1 /* writeable */ -#define _PAGE_BIT_USER 2 /* userspace addressable */ -#define _PAGE_BIT_PWT 3 /* page write through */ -#define _PAGE_BIT_PCD 4 /* page cache disabled */ -#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ -#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ -#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ -#define _PAGE_BIT_PAT 7 /* on 4KB pages */ -#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ -#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ -#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ -#define _PAGE_BIT_UNUSED3 11 -#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ -#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 -#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 -#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ - -/* If _PAGE_BIT_PRESENT is clear, we use these: */ -/* - if the user mapped it with PROT_NONE; pte_present gives true */ -#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL -/* - set: nonlinear file mapping, saved PTE; unset:swap */ -#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY - -#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) -#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) -#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) -#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) -#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) -#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) -#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) -#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) -#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) -#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) -#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) -#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) -#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) -#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) -#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) -#define __HAVE_ARCH_PTE_SPECIAL - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) -#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) -#else -#define _PAGE_NX (_AT(pteval_t, 0)) -#endif - -#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) -#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) - -#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_DIRTY) -#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ - _PAGE_DIRTY) - -/* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) - -#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) -#define _PAGE_CACHE_WB (0) -#define _PAGE_CACHE_WC (_PAGE_PWT) -#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) -#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) - -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) - -#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ - _PAGE_USER | _PAGE_ACCESSED) -#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) -#define PAGE_COPY PAGE_COPY_NOEXEC -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED | _PAGE_NX) -#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ - _PAGE_ACCESSED) - -#define __PAGE_KERNEL_EXEC \ - (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) -#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) - -#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) -#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) -#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) -#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) -#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) -#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) -#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) -#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) -#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) - -#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) -#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) - -#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) -#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) -#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) -#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) -#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) -#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) -#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) -#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) -#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) -#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) -#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) -#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) - -#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) -#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) -#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) -#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) - -/* xwr */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - -/* - * early identity mapping pte attrib macros. - */ -#ifdef CONFIG_X86_64 -#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC -#else -/* - * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection - * bits are combined, this will alow user to access the high address mapped - * VDSO in the presence of CONFIG_COMPAT_VDSO - */ -#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ -#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ -#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ -#endif +#include /* * Macro to mark a page protection value as UC- @@ -172,9 +15,6 @@ #ifndef __ASSEMBLY__ -#define pgprot_writecombine pgprot_writecombine -extern pgprot_t pgprot_writecombine(pgprot_t prot); - /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. @@ -316,8 +156,6 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte_set_flags(pte, _PAGE_SPECIAL); } -extern pteval_t __supported_pte_mask; - static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) | @@ -376,32 +214,6 @@ static inline int is_new_memtype_allowed(unsigned long flags, return 1; } -#ifndef __ASSEMBLY__ -/* Indicate that x86 has its own track and untrack pfn vma functions */ -#define __HAVE_PFNMAP_TRACKING - -#define __HAVE_PHYS_MEM_ACCESS_PROT -struct file; -pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t vma_prot); -int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, - unsigned long size, pgprot_t *vma_prot); -#endif - -/* Install a pte for a particular vaddr in kernel space. */ -void set_pte_vaddr(unsigned long vaddr, pte_t pte); - -#ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(pgd_t *base); -extern void native_pagetable_setup_done(pgd_t *base); -#else -static inline void native_pagetable_setup_start(pgd_t *base) {} -static inline void native_pagetable_setup_done(pgd_t *base) {} -#endif - -struct seq_file; -extern void arch_report_meminfo(struct seq_file *m); - #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ @@ -662,28 +474,6 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ -enum { - PG_LEVEL_NONE, - PG_LEVEL_4K, - PG_LEVEL_2M, - PG_LEVEL_1G, - PG_LEVEL_NUM -}; - -#ifdef CONFIG_PROC_FS -extern void update_page_count(int level, unsigned long pages); -#else -static inline void update_page_count(int level, unsigned long pages) { } -#endif - -/* - * Helper function that returns the kernel pagetable entry controlling - * the virtual address 'address'. NULL means no pagetable entry present. - * NOTE: the return type is pte_t but if the pmd is PSE then we return it - * as a pte too. - */ -extern pte_t *lookup_address(unsigned long address, unsigned int *level); - /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h new file mode 100644 index 00000000000..f80f5a66bb8 --- /dev/null +++ b/arch/x86/include/asm/pgtable_types.h @@ -0,0 +1,220 @@ +#ifndef _ASM_X86_PGTABLE_DEFS_H +#define _ASM_X86_PGTABLE_DEFS_H + +#include + +#define FIRST_USER_ADDRESS 0 + +#define _PAGE_BIT_PRESENT 0 /* is present */ +#define _PAGE_BIT_RW 1 /* writeable */ +#define _PAGE_BIT_USER 2 /* userspace addressable */ +#define _PAGE_BIT_PWT 3 /* page write through */ +#define _PAGE_BIT_PCD 4 /* page cache disabled */ +#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ +#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ +#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ +#define _PAGE_BIT_PAT 7 /* on 4KB pages */ +#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ +#define _PAGE_BIT_UNUSED1 9 /* available for programmer */ +#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ +#define _PAGE_BIT_UNUSED3 11 +#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ + +/* If _PAGE_BIT_PRESENT is clear, we use these: */ +/* - if the user mapped it with PROT_NONE; pte_present gives true */ +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL +/* - set: nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY + +#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) +#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) +#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) +#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT) +#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD) +#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) +#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) +#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) +#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) +#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) +#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) +#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) +#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) +#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) +#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define __HAVE_ARCH_PTE_SPECIAL + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) +#else +#define _PAGE_NX (_AT(pteval_t, 0)) +#endif + +#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_DIRTY) +#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ + _PAGE_DIRTY) + +/* Set of bits not changed in pte_modify */ +#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) + +#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) +#define _PAGE_CACHE_WB (0) +#define _PAGE_CACHE_WC (_PAGE_PWT) +#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD) +#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT) + +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) + +#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \ + _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) +#define PAGE_COPY PAGE_COPY_NOEXEC +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED | _PAGE_NX) +#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \ + _PAGE_ACCESSED) + +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL) +#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX) + +#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) +#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) +#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC) +#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) +#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) +#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) +#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) + +#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP) +#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP) + +#define PAGE_KERNEL __pgprot(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) +#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX) +#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC) +#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE) +#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS) +#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE) +#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE) +#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) +#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) +#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) +#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) + +#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) +#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) +#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS) +#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC) + +/* xwr */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_COPY +#define __P011 PAGE_COPY +#define __P100 PAGE_READONLY_EXEC +#define __P101 PAGE_READONLY_EXEC +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED +#define __S011 PAGE_SHARED +#define __S100 PAGE_READONLY_EXEC +#define __S101 PAGE_READONLY_EXEC +#define __S110 PAGE_SHARED_EXEC +#define __S111 PAGE_SHARED_EXEC + +/* + * early identity mapping pte attrib macros. + */ +#ifdef CONFIG_X86_64 +#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC +#else +/* + * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection + * bits are combined, this will alow user to access the high address mapped + * VDSO in the presence of CONFIG_COMPAT_VDSO + */ +#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ +#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ +#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ +#endif + +#ifndef __ASSEMBLY__ + +extern pteval_t __supported_pte_mask; + +#define pgprot_writecombine pgprot_writecombine +extern pgprot_t pgprot_writecombine(pgprot_t prot); + +/* Indicate that x86 has its own track and untrack pfn vma functions */ +#define __HAVE_PFNMAP_TRACKING + +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot); + +/* Install a pte for a particular vaddr in kernel space. */ +void set_pte_vaddr(unsigned long vaddr, pte_t pte); + +#ifdef CONFIG_X86_32 +extern void native_pagetable_setup_start(pgd_t *base); +extern void native_pagetable_setup_done(pgd_t *base); +#else +static inline void native_pagetable_setup_start(pgd_t *base) {} +static inline void native_pagetable_setup_done(pgd_t *base) {} +#endif + +struct seq_file; +extern void arch_report_meminfo(struct seq_file *m); + +enum { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_NUM +}; + +#ifdef CONFIG_PROC_FS +extern void update_page_count(int level, unsigned long pages); +#else +static inline void update_page_count(int level, unsigned long pages) { } +#endif + +/* + * Helper function that returns the kernel pagetable entry controlling + * the virtual address 'address'. NULL means no pagetable entry present. + * NOTE: the return type is pte_t but if the pmd is PSE then we return it + * as a pte too. + */ +extern pte_t *lookup_address(unsigned long address, unsigned int *level); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PGTABLE_DEFS_H */ From f402a65f93c7127b2bd93a4b2fe182cd859fb4c1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 18:49:05 -0800 Subject: [PATCH 465/682] x86: Split pgtable_32.h into pgtable_32.h and pgtable_32_types.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable_32.h | 42 +--------------------- arch/x86/include/asm/pgtable_32_types.h | 46 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 41 deletions(-) create mode 100644 arch/x86/include/asm/pgtable_32_types.h diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 72d20e2a40f..97612fc7632 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_PGTABLE_32_H #define _ASM_X86_PGTABLE_32_H +#include /* * The Linux memory management assumes a three-level page table setup. On @@ -33,47 +34,6 @@ void paging_init(void); extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); -/* - * The Linux x86 paging architecture is 'compile-time dual-mode', it - * implements both the traditional 2-level x86 page tables and the - * newer 3-level PAE-mode page tables. - */ -#ifdef CONFIG_X86_PAE -# include -# define PMD_SIZE (1UL << PMD_SHIFT) -# define PMD_MASK (~(PMD_SIZE - 1)) -#else -# include -#endif - -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE - 1)) - -/* Just any arbitrary offset to the start of the vmalloc VM area: the - * current 8MB value just means that there will be a 8MB "hole" after the - * physical memory until the kernel virtual memory starts. That means that - * any out-of-bounds memory accesses will hopefully be caught. - * The vmalloc() routines leaves a hole of 4kB between each vmalloced - * area for the same reason. ;) - */ -#define VMALLOC_OFFSET (8 * 1024 * 1024) -#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif - -#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ - & PMD_MASK) - -#ifdef CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) -#else -# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) -#endif - -#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) /* * Define this if things work differently on an i386 and an i486: diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h new file mode 100644 index 00000000000..bd8df3b2fe0 --- /dev/null +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -0,0 +1,46 @@ +#ifndef _ASM_X86_PGTABLE_32_DEFS_H +#define _ASM_X86_PGTABLE_32_DEFS_H + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifdef CONFIG_X86_PAE +# include +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE - 1)) +#else +# include +#endif + +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + +/* Just any arbitrary offset to the start of the vmalloc VM area: the + * current 8MB value just means that there will be a 8MB "hole" after the + * physical memory until the kernel virtual memory starts. That means that + * any out-of-bounds memory accesses will hopefully be caught. + * The vmalloc() routines leaves a hole of 4kB between each vmalloced + * area for the same reason. ;) + */ +#define VMALLOC_OFFSET (8 * 1024 * 1024) +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) +#ifdef CONFIG_X86_PAE +#define LAST_PKMAP 512 +#else +#define LAST_PKMAP 1024 +#endif + +#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \ + & PMD_MASK) + +#ifdef CONFIG_HIGHMEM +# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#else +# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) +#endif + +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + +#endif /* _ASM_X86_PGTABLE_32_DEFS_H */ From fb3551491b20442c0de0d4debd54f40c654bbe98 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 18:50:52 -0800 Subject: [PATCH 466/682] x86: Split pgtable_64.h into pgtable_64_types.h and pgtable_64.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable_64.h | 48 ++----------------------- arch/x86/include/asm/pgtable_64_types.h | 46 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 46 deletions(-) create mode 100644 arch/x86/include/asm/pgtable_64_types.h diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1c4e247c51f..6b87bc6d501 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -2,6 +2,8 @@ #define _ASM_X86_PGTABLE_64_H #include +#include + #ifndef __ASSEMBLY__ /* @@ -25,32 +27,6 @@ extern void paging_init(void); #endif /* !__ASSEMBLY__ */ -#define SHARED_KERNEL_PMD 0 - -/* - * PGDIR_SHIFT determines what a top-level page table entry can map - */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 - -/* - * 3rd level page - */ -#define PUD_SHIFT 30 -#define PTRS_PER_PUD 512 - -/* - * PMD_SHIFT determines the size of the area a middle-level - * page table can map - */ -#define PMD_SHIFT 21 -#define PTRS_PER_PMD 512 - -/* - * entries per page directory level - */ -#define PTRS_PER_PTE 512 - #ifndef __ASSEMBLY__ #define pte_ERROR(e) \ @@ -130,26 +106,6 @@ static inline void native_pgd_clear(pgd_t *pgd) native_set_pgd(pgd, native_make_pgd(0)); } -#endif /* !__ASSEMBLY__ */ - -#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE - 1)) -#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE - 1)) -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE - 1)) - - -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) -#define VMALLOC_START _AC(0xffffc20000000000, UL) -#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) -#define VMEMMAP_START _AC(0xffffe20000000000, UL) -#define MODULES_VADDR _AC(0xffffffffa0000000, UL) -#define MODULES_END _AC(0xffffffffff000000, UL) -#define MODULES_LEN (MODULES_END - MODULES_VADDR) - -#ifndef __ASSEMBLY__ - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h new file mode 100644 index 00000000000..ffaf1934068 --- /dev/null +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -0,0 +1,46 @@ +#ifndef _ASM_X86_PGTABLE_64_DEFS_H +#define _ASM_X86_PGTABLE_64_DEFS_H + +#define SHARED_KERNEL_PMD 0 + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 + +/* + * 3rd level page + */ +#define PUD_SHIFT 30 +#define PTRS_PER_PUD 512 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE - 1)) + + +#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define VMALLOC_START _AC(0xffffc20000000000, UL) +#define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffe20000000000, UL) +#define MODULES_VADDR _AC(0xffffffffa0000000, UL) +#define MODULES_END _AC(0xffffffffff000000, UL) +#define MODULES_LEN (MODULES_END - MODULES_VADDR) + +#endif /* _ASM_X86_PGTABLE_64_DEFS_H */ From 1484096ceb4d5f2b27c0fe53f125ee0903eac9af Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 19:02:47 -0800 Subject: [PATCH 467/682] x86: Include pgtable_32|64_types.h in pgtable_types.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/pgtable_types.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f80f5a66bb8..e5d5a8d3577 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -217,4 +217,10 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level); #endif /* !__ASSEMBLY__ */ +#ifdef CONFIG_X86_32 +# include "pgtable_32_types.h" +#else +# include "pgtable_64_types.h" +#endif + #endif /* _ASM_X86_PGTABLE_DEFS_H */ From 51c78eb3f0eb033f9fb4f2316851df1d9b07b953 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 22:52:14 -0800 Subject: [PATCH 468/682] x86: create _types.h counterparts for page*.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 64 +-------------- arch/x86/include/asm/page_32.h | 89 +-------------------- arch/x86/include/asm/page_32_types.h | 90 +++++++++++++++++++++ arch/x86/include/asm/page_64.h | 101 +----------------------- arch/x86/include/asm/page_64.h.rej | 114 +++++++++++++++++++++++++++ arch/x86/include/asm/page_64_types.h | 102 ++++++++++++++++++++++++ arch/x86/include/asm/page_types.h | 77 ++++++++++++++++++ 7 files changed, 389 insertions(+), 248 deletions(-) create mode 100644 arch/x86/include/asm/page_32_types.h create mode 100644 arch/x86/include/asm/page_64.h.rej create mode 100644 arch/x86/include/asm/page_64_types.h create mode 100644 arch/x86/include/asm/page_types.h diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 40226999cbf..3b2d2af951c 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -1,42 +1,9 @@ #ifndef _ASM_X86_PAGE_H #define _ASM_X86_PAGE_H -#include - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - #ifdef __KERNEL__ -#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) -#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) - -/* Cast PAGE_MASK to a signed type so that it is sign-extended if - virtual addresses are 32-bits but physical addresses are larger - (ie, 32-bit PAE). */ -#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) - -/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ -#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) - -/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ -#define PTE_FLAGS_MASK (~PTE_PFN_MASK) - -#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) - -#define HPAGE_SHIFT PMD_SHIFT -#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) - -#define HUGE_MAX_HSTATE 2 - -#ifndef __ASSEMBLY__ -#include -#endif +#include #ifdef CONFIG_X86_64 #include @@ -44,39 +11,18 @@ #include #endif /* CONFIG_X86_64 */ -#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) - -#define VM_DATA_DEFAULT_FLAGS \ - (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ - VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) - - #ifndef __ASSEMBLY__ -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - -extern int page_is_ram(unsigned long pagenr); -extern int pagerange_is_ram(unsigned long start, unsigned long end); -extern int devmem_is_allowed(unsigned long pagenr); -extern void map_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); -extern void unmap_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); - -extern unsigned long max_low_pfn_mapped; -extern unsigned long max_pfn_mapped; - struct page; static inline void clear_user_page(void *page, unsigned long vaddr, - struct page *pg) + struct page *pg) { clear_page(page); } static inline void copy_user_page(void *to, void *from, unsigned long vaddr, - struct page *topage) + struct page *topage) { copy_page(to, from); } @@ -102,8 +48,6 @@ static inline pgdval_t pgd_flags(pgd_t pgd) #if PAGETABLE_LEVELS >= 3 #if PAGETABLE_LEVELS == 4 -typedef struct { pudval_t pud; } pud_t; - static inline pud_t native_make_pud(pmdval_t val) { return (pud_t) { val }; @@ -127,8 +71,6 @@ static inline pudval_t pud_flags(pud_t pud) return native_pud_val(pud) & PTE_FLAGS_MASK; } -typedef struct { pmdval_t pmd; } pmd_t; - static inline pmd_t native_make_pmd(pmdval_t val) { return (pmd_t) { val }; diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index bcde0d7b432..b3f0bf79e84 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -1,82 +1,12 @@ #ifndef _ASM_X86_PAGE_32_H #define _ASM_X86_PAGE_32_H -/* - * This handles the memory map. - * - * A __PAGE_OFFSET of 0xC0000000 means that the kernel has - * a virtual address space of one gigabyte, which limits the - * amount of physical memory you can use to about 950MB. - * - * If you want more physical memory than this then see the CONFIG_HIGHMEM4G - * and CONFIG_HIGHMEM64G options in the kernel configuration. - */ -#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) - -#ifdef CONFIG_4KSTACKS -#define THREAD_ORDER 0 -#else -#define THREAD_ORDER 1 -#endif -#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) - -#define STACKFAULT_STACK 0 -#define DOUBLEFAULT_STACK 1 -#define NMI_STACK 0 -#define DEBUG_STACK 0 -#define MCE_STACK 0 -#define N_EXCEPTION_STACKS 1 - -#ifdef CONFIG_X86_PAE -/* 44=32+12, the limit we can fit into an unsigned long pfn */ -#define __PHYSICAL_MASK_SHIFT 44 -#define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 3 - -#ifndef __ASSEMBLY__ -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pudval_t; -typedef u64 pgdval_t; -typedef u64 pgprotval_t; - -typedef union { - struct { - unsigned long pte_low, pte_high; - }; - pteval_t pte; -} pte_t; -#endif /* __ASSEMBLY__ - */ -#else /* !CONFIG_X86_PAE */ -#define __PHYSICAL_MASK_SHIFT 32 -#define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 2 - -#ifndef __ASSEMBLY__ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef union { - pteval_t pte; - pteval_t pte_low; -} pte_t; - -#endif /* __ASSEMBLY__ */ -#endif /* CONFIG_X86_PAE */ - -#ifndef __ASSEMBLY__ -typedef struct page *pgtable_t; -#endif +#include #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif -#ifndef __ASSEMBLY__ #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); @@ -89,22 +19,7 @@ extern unsigned long __phys_addr(unsigned long); #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_FLATMEM */ -extern int nx_enabled; - -/* - * This much address space is reserved for vmalloc() and iomap() - * as well as fixmap mappings. - */ -extern unsigned int __VMALLOC_RESERVE; -extern int sysctl_legacy_va_layout; - -extern void find_low_pfn_range(void); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); -extern void initmem_init(unsigned long, unsigned long); -extern void free_initmem(void); -extern void setup_bootmem_allocator(void); - +#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_USE_3DNOW #include diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h new file mode 100644 index 00000000000..b7b5402d7ab --- /dev/null +++ b/arch/x86/include/asm/page_32_types.h @@ -0,0 +1,90 @@ +#ifndef _ASM_X86_PAGE_32_DEFS_H +#define _ASM_X86_PAGE_32_DEFS_H + +#include + +/* + * This handles the memory map. + * + * A __PAGE_OFFSET of 0xC0000000 means that the kernel has + * a virtual address space of one gigabyte, which limits the + * amount of physical memory you can use to about 950MB. + * + * If you want more physical memory than this then see the CONFIG_HIGHMEM4G + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ +#define __PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) + +#ifdef CONFIG_4KSTACKS +#define THREAD_ORDER 0 +#else +#define THREAD_ORDER 1 +#endif +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) + +#define STACKFAULT_STACK 0 +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 0 +#define DEBUG_STACK 0 +#define MCE_STACK 0 +#define N_EXCEPTION_STACKS 1 + +#ifdef CONFIG_X86_PAE +/* 44=32+12, the limit we can fit into an unsigned long pfn */ +#define __PHYSICAL_MASK_SHIFT 44 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 3 + +#else /* !CONFIG_X86_PAE */ +#define __PHYSICAL_MASK_SHIFT 32 +#define __VIRTUAL_MASK_SHIFT 32 +#define PAGETABLE_LEVELS 2 +#endif /* CONFIG_X86_PAE */ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_PAE +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#else /* !CONFIG_X86_PAE */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef union { + pteval_t pte; + pteval_t pte_low; +} pte_t; +#endif /* CONFIG_X86_PAE */ + +extern int nx_enabled; + +/* + * This much address space is reserved for vmalloc() and iomap() + * as well as fixmap mappings. + */ +extern unsigned int __VMALLOC_RESERVE; +extern int sysctl_legacy_va_layout; + +extern void find_low_pfn_range(void); +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); +extern void initmem_init(unsigned long, unsigned long); +extern void free_initmem(void); +extern void setup_bootmem_allocator(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_32_DEFS_H */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index e27fdbe5f9e..072694ed81a 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -1,105 +1,6 @@ #ifndef _ASM_X86_PAGE_64_H #define _ASM_X86_PAGE_64_H -#define PAGETABLE_LEVELS 4 - -#define THREAD_ORDER 1 -#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) -#define CURRENT_MASK (~(THREAD_SIZE - 1)) - -#define EXCEPTION_STACK_ORDER 0 -#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) - -#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) -#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) - -#define IRQ_STACK_ORDER 2 -#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) - -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ - -#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) - -/* - * Set __PAGE_OFFSET to the most negative possible address + - * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a - * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's - * what Xen requires. - */ -#define __PAGE_OFFSET _AC(0xffff880000000000, UL) - -#define __PHYSICAL_START CONFIG_PHYSICAL_START -#define __KERNEL_ALIGN 0x200000 - -/* - * Make sure kernel is aligned to 2MB address. Catching it at compile - * time is better. Change your config file and compile the kernel - * for a 2MB aligned address (CONFIG_PHYSICAL_START) - */ -#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 -#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" -#endif - -#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) -#define __START_KERNEL_map _AC(0xffffffff80000000, UL) - -/* See Documentation/x86_64/mm.txt for a description of the memory map. */ -#define __PHYSICAL_MASK_SHIFT 46 -#define __VIRTUAL_MASK_SHIFT 48 - -/* - * Kernel image size is limited to 512 MB (see level2_kernel_pgt in - * arch/x86/kernel/head_64.S), and it is mapped here: - */ -#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) - -#ifndef __ASSEMBLY__ -void clear_page(void *page); -void copy_page(void *to, void *from); - -/* duplicated to the one in bootmem.h */ -extern unsigned long max_pfn; -extern unsigned long phys_base; - -extern unsigned long __phys_addr(unsigned long); -#define __phys_reloc_hide(x) (x) - -/* - * These are used to make use of C type-checking.. - */ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef struct page *pgtable_t; - -typedef struct { pteval_t pte; } pte_t; - -#define vmemmap ((struct page *)VMEMMAP_START) - -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - -extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); -extern void free_initmem(void); - -extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); -extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); - -#endif /* !__ASSEMBLY__ */ - -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_pfn) -#endif - +#include #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/page_64.h.rej b/arch/x86/include/asm/page_64.h.rej new file mode 100644 index 00000000000..9b1807f1859 --- /dev/null +++ b/arch/x86/include/asm/page_64.h.rej @@ -0,0 +1,114 @@ +*************** +*** 1,105 **** + #ifndef _ASM_X86_PAGE_64_H + #define _ASM_X86_PAGE_64_H + +- #define PAGETABLE_LEVELS 4 +- +- #define THREAD_ORDER 1 +- #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +- #define CURRENT_MASK (~(THREAD_SIZE - 1)) +- +- #define EXCEPTION_STACK_ORDER 0 +- #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) +- +- #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) +- #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) +- +- #define IRQSTACK_ORDER 2 +- #define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) +- +- #define STACKFAULT_STACK 1 +- #define DOUBLEFAULT_STACK 2 +- #define NMI_STACK 3 +- #define DEBUG_STACK 4 +- #define MCE_STACK 5 +- #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +- +- #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +- #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) +- +- /* +- * Set __PAGE_OFFSET to the most negative possible address + +- * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a +- * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's +- * what Xen requires. +- */ +- #define __PAGE_OFFSET _AC(0xffff880000000000, UL) +- +- #define __PHYSICAL_START CONFIG_PHYSICAL_START +- #define __KERNEL_ALIGN 0x200000 +- +- /* +- * Make sure kernel is aligned to 2MB address. Catching it at compile +- * time is better. Change your config file and compile the kernel +- * for a 2MB aligned address (CONFIG_PHYSICAL_START) +- */ +- #if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 +- #error "CONFIG_PHYSICAL_START must be a multiple of 2MB" +- #endif +- +- #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +- #define __START_KERNEL_map _AC(0xffffffff80000000, UL) +- +- /* See Documentation/x86_64/mm.txt for a description of the memory map. */ +- #define __PHYSICAL_MASK_SHIFT 46 +- #define __VIRTUAL_MASK_SHIFT 48 +- +- /* +- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in +- * arch/x86/kernel/head_64.S), and it is mapped here: +- */ +- #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +- #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) +- +- #ifndef __ASSEMBLY__ +- void clear_page(void *page); +- void copy_page(void *to, void *from); +- +- /* duplicated to the one in bootmem.h */ +- extern unsigned long max_pfn; +- extern unsigned long phys_base; +- +- extern unsigned long __phys_addr(unsigned long); +- #define __phys_reloc_hide(x) (x) +- +- /* +- * These are used to make use of C type-checking.. +- */ +- typedef unsigned long pteval_t; +- typedef unsigned long pmdval_t; +- typedef unsigned long pudval_t; +- typedef unsigned long pgdval_t; +- typedef unsigned long pgprotval_t; +- +- typedef struct page *pgtable_t; +- +- typedef struct { pteval_t pte; } pte_t; +- +- #define vmemmap ((struct page *)VMEMMAP_START) +- +- extern unsigned long init_memory_mapping(unsigned long start, +- unsigned long end); +- +- extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +- extern void free_initmem(void); +- +- extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +- extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); +- +- #endif /* !__ASSEMBLY__ */ +- +- #ifdef CONFIG_FLATMEM +- #define pfn_valid(pfn) ((pfn) < max_pfn) +- #endif +- + + #endif /* _ASM_X86_PAGE_64_H */ +--- 1,6 ---- + #ifndef _ASM_X86_PAGE_64_H + #define _ASM_X86_PAGE_64_H + ++ #include + + #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h new file mode 100644 index 00000000000..d00cd388082 --- /dev/null +++ b/arch/x86/include/asm/page_64_types.h @@ -0,0 +1,102 @@ +#ifndef _ASM_X86_PAGE_64_DEFS_H +#define _ASM_X86_PAGE_64_DEFS_H + +#define PAGETABLE_LEVELS 4 + +#define THREAD_ORDER 1 +#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) +#define CURRENT_MASK (~(THREAD_SIZE - 1)) + +#define EXCEPTION_STACK_ORDER 0 +#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) + +#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) +#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) + +#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) + +#define STACKFAULT_STACK 1 +#define DOUBLEFAULT_STACK 2 +#define NMI_STACK 3 +#define DEBUG_STACK 4 +#define MCE_STACK 5 +#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ + +#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) + +/* + * Set __PAGE_OFFSET to the most negative possible address + + * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a + * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's + * what Xen requires. + */ +#define __PAGE_OFFSET _AC(0xffff880000000000, UL) + +#define __PHYSICAL_START CONFIG_PHYSICAL_START +#define __KERNEL_ALIGN 0x200000 + +/* + * Make sure kernel is aligned to 2MB address. Catching it at compile + * time is better. Change your config file and compile the kernel + * for a 2MB aligned address (CONFIG_PHYSICAL_START) + */ +#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 +#error "CONFIG_PHYSICAL_START must be a multiple of 2MB" +#endif + +#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) +#define __START_KERNEL_map _AC(0xffffffff80000000, UL) + +/* See Documentation/x86_64/mm.txt for a description of the memory map. */ +#define __PHYSICAL_MASK_SHIFT 46 +#define __VIRTUAL_MASK_SHIFT 48 + +/* + * Kernel image size is limited to 512 MB (see level2_kernel_pgt in + * arch/x86/kernel/head_64.S), and it is mapped here: + */ +#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) +#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) + +#ifndef __ASSEMBLY__ +void clear_page(void *page); +void copy_page(void *to, void *from); + +/* duplicated to the one in bootmem.h */ +extern unsigned long max_pfn; +extern unsigned long phys_base; + +extern unsigned long __phys_addr(unsigned long); +#define __phys_reloc_hide(x) (x) + +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef struct { pteval_t pte; } pte_t; + +#define vmemmap ((struct page *)VMEMMAP_START) + +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); + +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); + +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); + +#endif /* !__ASSEMBLY__ */ + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_pfn) +#endif + +#endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h new file mode 100644 index 00000000000..65787ad4c59 --- /dev/null +++ b/arch/x86/include/asm/page_types.h @@ -0,0 +1,77 @@ +#ifndef _ASM_X86_PAGE_DEFS_H +#define _ASM_X86_PAGE_DEFS_H + +#include + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) + +/* Cast PAGE_MASK to a signed type so that it is sign-extended if + virtual addresses are 32-bits but physical addresses are larger + (ie, 32-bit PAE). */ +#define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) + +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) + +#define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) + +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#define HUGE_MAX_HSTATE 2 + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +#define VM_DATA_DEFAULT_FLAGS \ + (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ + VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifdef CONFIG_X86_64 +#include +#else +#include +#endif /* CONFIG_X86_64 */ + +#ifndef __ASSEMBLY__ + +#include + +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pgprotval_t pgprot; } pgprot_t; + +#if PAGETABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; +#endif + +#if PAGETABLE_LEVELS > 2 +typedef struct { pmdval_t pmd; } pmd_t; +#endif + +typedef struct page *pgtable_t; + +extern int page_is_ram(unsigned long pagenr); +extern int pagerange_is_ram(unsigned long start, unsigned long end); +extern int devmem_is_allowed(unsigned long pagenr); +extern void map_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); +extern void unmap_devmem(unsigned long pfn, unsigned long size, + pgprot_t vma_prot); + +extern unsigned long max_low_pfn_mapped; +extern unsigned long max_pfn_mapped; + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_X86_PAGE_DEFS_H */ From 1dfc07aad5479f1fb832ff6f61a5a9ce822d9e1f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 23:24:26 -0800 Subject: [PATCH 469/682] x86: move 2 and 3 level asm-generic defs into page-defs Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 4 ---- arch/x86/include/asm/page_types.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 3b2d2af951c..da54f6c48a7 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -58,8 +58,6 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else /* PAGETABLE_LEVELS == 3 */ -#include - static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.pgd); @@ -82,8 +80,6 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #else /* PAGETABLE_LEVELS == 2 */ -#include - static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.pgd); diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 65787ad4c59..92dfd251a65 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -53,10 +53,14 @@ typedef struct { pgprotval_t pgprot; } pgprot_t; #if PAGETABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; +#else +#include #endif #if PAGETABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; +#else +#include #endif typedef struct page *pgtable_t; From e42778de31d78ae262a3b901264eabefb9c3b51b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Feb 2009 23:42:01 -0800 Subject: [PATCH 470/682] x86: move defs around to allow paravirt.h to just include page_types.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 78 ---------------------------- arch/x86/include/asm/page_32_types.h | 2 + arch/x86/include/asm/page_types.h | 74 ++++++++++++++++++++++++++ arch/x86/include/asm/paravirt.h | 2 +- 4 files changed, 77 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index da54f6c48a7..28423609b00 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -31,84 +31,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -static inline pgd_t native_make_pgd(pgdval_t val) -{ - return (pgd_t) { val }; -} - -static inline pgdval_t native_pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} - -static inline pgdval_t pgd_flags(pgd_t pgd) -{ - return native_pgd_val(pgd) & PTE_FLAGS_MASK; -} - -#if PAGETABLE_LEVELS >= 3 -#if PAGETABLE_LEVELS == 4 -static inline pud_t native_make_pud(pmdval_t val) -{ - return (pud_t) { val }; -} - -static inline pudval_t native_pud_val(pud_t pud) -{ - return pud.pud; -} -#else /* PAGETABLE_LEVELS == 3 */ -static inline pudval_t native_pud_val(pud_t pud) -{ - return native_pgd_val(pud.pgd); -} -#endif /* PAGETABLE_LEVELS == 4 */ - -static inline pudval_t pud_flags(pud_t pud) -{ - return native_pud_val(pud) & PTE_FLAGS_MASK; -} - -static inline pmd_t native_make_pmd(pmdval_t val) -{ - return (pmd_t) { val }; -} - -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} - -#else /* PAGETABLE_LEVELS == 2 */ -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return native_pgd_val(pmd.pud.pgd); -} -#endif /* PAGETABLE_LEVELS >= 3 */ - -static inline pmdval_t pmd_flags(pmd_t pmd) -{ - return native_pmd_val(pmd) & PTE_FLAGS_MASK; -} - -static inline pte_t native_make_pte(pteval_t val) -{ - return (pte_t) { .pte = val }; -} - -static inline pteval_t native_pte_val(pte_t pte) -{ - return pte.pte; -} - -static inline pteval_t pte_flags(pte_t pte) -{ - return native_pte_val(pte) & PTE_FLAGS_MASK; -} - -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index b7b5402d7ab..83f820a2b10 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -43,6 +43,8 @@ #ifndef __ASSEMBLY__ +#include + #ifdef CONFIG_X86_PAE typedef u64 pteval_t; typedef u64 pmdval_t; diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 92dfd251a65..c41e3e8f227 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,18 +51,92 @@ typedef struct { pgdval_t pgd; } pgd_t; typedef struct { pgprotval_t pgprot; } pgprot_t; +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + #if PAGETABLE_LEVELS > 3 typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} #else #include + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.pgd); +} #endif #if PAGETABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; + +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} #else #include + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.pgd); +} #endif +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline pteval_t pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + + typedef struct page *pgtable_t; extern int page_is_ram(unsigned long pagenr); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c85e7475e17..db570f23b22 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,7 +4,7 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include +#include #include /* Bitmask of what can be clobbered: usually at least eax. */ From e2f5bda94152fa567f6b48126741014123f982b8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 00:09:52 -0800 Subject: [PATCH 471/682] x86: define pud_flags and pud_large properly to allow non-PAE builds --- arch/x86/include/asm/page_types.h | 10 +++++----- arch/x86/include/asm/pgtable.h | 7 ++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index c41e3e8f227..0733853e7c8 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -90,11 +90,6 @@ static inline pudval_t native_pud_val(pud_t pud) #if PAGETABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; -static inline pudval_t pud_flags(pud_t pud) -{ - return native_pud_val(pud) & PTE_FLAGS_MASK; -} - static inline pmd_t native_make_pmd(pmdval_t val) { return (pmd_t) { val }; @@ -113,6 +108,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) } #endif +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + static inline pmdval_t pmd_flags(pmd_t pmd) { return native_pmd_val(pmd) & PTE_FLAGS_MASK; diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 10404e7bf32..9f508509797 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -398,7 +398,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pud_large(pud_t pud) { - return (pud_flags(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == + return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == (_PAGE_PSE | _PAGE_PRESENT); } @@ -406,6 +406,11 @@ static inline int pud_bad(pud_t pud) { return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } +#else +static inline int pud_large(pud_t pud) +{ + return 0; +} #endif /* PAGETABLE_LEVELS > 2 */ #if PAGETABLE_LEVELS > 3 From 54321d947ae9d6a051b81e3eccaf2d8658aeecc6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 10:20:05 -0800 Subject: [PATCH 472/682] x86: move pte types into pgtable*.h pgtable*.h is intended for definitions relating to actual pagetables and their entries, so move all the definitions for (pte|pmd|pud|pgd)(val)?_t to the appropriate pgtable*.h headers. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page.h | 22 ---- arch/x86/include/asm/page_32_types.h | 30 ------ arch/x86/include/asm/page_64_types.h | 11 -- arch/x86/include/asm/page_types.h | 97 +----------------- arch/x86/include/asm/paravirt.h | 2 +- arch/x86/include/asm/pgtable-2level_types.h | 15 +++ arch/x86/include/asm/pgtable-3level_types.h | 18 ++++ arch/x86/include/asm/pgtable.h | 103 +++++++++++-------- arch/x86/include/asm/pgtable_64_types.h | 16 +++ arch/x86/include/asm/pgtable_types.h | 107 ++++++++++++++++++-- arch/x86/include/asm/processor.h | 1 + 11 files changed, 215 insertions(+), 207 deletions(-) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 28423609b00..467ce69306b 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -31,28 +31,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE -#ifdef CONFIG_PARAVIRT -#include -#else /* !CONFIG_PARAVIRT */ - -#define pgd_val(x) native_pgd_val(x) -#define __pgd(x) native_make_pgd(x) - -#ifndef __PAGETABLE_PUD_FOLDED -#define pud_val(x) native_pud_val(x) -#define __pud(x) native_make_pud(x) -#endif - -#ifndef __PAGETABLE_PMD_FOLDED -#define pmd_val(x) native_pmd_val(x) -#define __pmd(x) native_make_pmd(x) -#endif - -#define pte_val(x) native_pte_val(x) -#define __pte(x) native_make_pte(x) - -#endif /* CONFIG_PARAVIRT */ - #define __pa(x) __phys_addr((unsigned long)(x)) #define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x)) /* __pa_symbol should be used for C visible symbols. diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 83f820a2b10..b5486aaf36e 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -43,36 +43,6 @@ #ifndef __ASSEMBLY__ -#include - -#ifdef CONFIG_X86_PAE -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pudval_t; -typedef u64 pgdval_t; -typedef u64 pgprotval_t; - -typedef union { - struct { - unsigned long pte_low, pte_high; - }; - pteval_t pte; -} pte_t; -#else /* !CONFIG_X86_PAE */ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef union { - pteval_t pte; - pteval_t pte_low; -} pte_t; -#endif /* CONFIG_X86_PAE */ - -extern int nx_enabled; - /* * This much address space is reserved for vmalloc() and iomap() * as well as fixmap mappings. diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d00cd388082..bc73af3eda9 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -71,17 +71,6 @@ extern unsigned long phys_base; extern unsigned long __phys_addr(unsigned long); #define __phys_reloc_hide(x) (x) -/* - * These are used to make use of C type-checking.. - */ -typedef unsigned long pteval_t; -typedef unsigned long pmdval_t; -typedef unsigned long pudval_t; -typedef unsigned long pgdval_t; -typedef unsigned long pgprotval_t; - -typedef struct { pteval_t pte; } pte_t; - #define vmemmap ((struct page *)VMEMMAP_START) extern unsigned long init_memory_mapping(unsigned long start, diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 0733853e7c8..9f0c9596335 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -46,106 +46,15 @@ #ifndef __ASSEMBLY__ -#include - -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pgprotval_t pgprot; } pgprot_t; - -static inline pgd_t native_make_pgd(pgdval_t val) -{ - return (pgd_t) { val }; -} - -static inline pgdval_t native_pgd_val(pgd_t pgd) -{ - return pgd.pgd; -} - -static inline pgdval_t pgd_flags(pgd_t pgd) -{ - return native_pgd_val(pgd) & PTE_FLAGS_MASK; -} - -#if PAGETABLE_LEVELS > 3 -typedef struct { pudval_t pud; } pud_t; - -static inline pud_t native_make_pud(pmdval_t val) -{ - return (pud_t) { val }; -} - -static inline pudval_t native_pud_val(pud_t pud) -{ - return pud.pud; -} -#else -#include - -static inline pudval_t native_pud_val(pud_t pud) -{ - return native_pgd_val(pud.pgd); -} -#endif - -#if PAGETABLE_LEVELS > 2 -typedef struct { pmdval_t pmd; } pmd_t; - -static inline pmd_t native_make_pmd(pmdval_t val) -{ - return (pmd_t) { val }; -} - -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return pmd.pmd; -} -#else -#include - -static inline pmdval_t native_pmd_val(pmd_t pmd) -{ - return native_pgd_val(pmd.pud.pgd); -} -#endif - -static inline pudval_t pud_flags(pud_t pud) -{ - return native_pud_val(pud) & PTE_FLAGS_MASK; -} - -static inline pmdval_t pmd_flags(pmd_t pmd) -{ - return native_pmd_val(pmd) & PTE_FLAGS_MASK; -} - -static inline pte_t native_make_pte(pteval_t val) -{ - return (pte_t) { .pte = val }; -} - -static inline pteval_t native_pte_val(pte_t pte) -{ - return pte.pte; -} - -static inline pteval_t pte_flags(pte_t pte) -{ - return native_pte_val(pte) & PTE_FLAGS_MASK; -} - -#define pgprot_val(x) ((x).pgprot) -#define __pgprot(x) ((pgprot_t) { (x) } ) - - -typedef struct page *pgtable_t; +struct pgprot; extern int page_is_ram(unsigned long pagenr); extern int pagerange_is_ram(unsigned long start, unsigned long end); extern int devmem_is_allowed(unsigned long pagenr); extern void map_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); + struct pgprot vma_prot); extern void unmap_devmem(unsigned long pfn, unsigned long size, - pgprot_t vma_prot); + struct pgprot vma_prot); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index db570f23b22..7ed73ccd74e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,7 +4,7 @@ * para-virtualization: those hooks are defined here. */ #ifdef CONFIG_PARAVIRT -#include +#include #include /* Bitmask of what can be clobbered: usually at least eax. */ diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index d77db8990ea..09ae67efceb 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -1,6 +1,21 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H #define _ASM_X86_PGTABLE_2LEVEL_DEFS_H +#ifndef __ASSEMBLY__ +#include + +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef union { + pteval_t pte; + pteval_t pte_low; +} pte_t; +#endif /* !__ASSEMBLY__ */ + #define SHARED_KERNEL_PMD 0 /* diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 62561367653..bcc89625ebe 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -1,6 +1,23 @@ #ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H #define _ASM_X86_PGTABLE_3LEVEL_DEFS_H +#ifndef __ASSEMBLY__ +#include + +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; +typedef u64 pgprotval_t; + +typedef union { + struct { + unsigned long pte_low, pte_high; + }; + pteval_t pte; +} pte_t; +#endif /* !__ASSEMBLY__ */ + #ifdef CONFIG_PARAVIRT #define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) #else @@ -25,4 +42,5 @@ */ #define PTRS_PER_PTE 512 + #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 9f508509797..b0d1066ab6a 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -25,6 +25,66 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; extern spinlock_t pgd_lock; extern struct list_head pgd_list; +#ifdef CONFIG_PARAVIRT +#include +#else /* !CONFIG_PARAVIRT */ +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) + +#define set_pte_present(mm, addr, ptep, pte) \ + native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) \ + native_set_pte_atomic(ptep, pte) + +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) + +#ifndef __PAGETABLE_PUD_FOLDED +#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) +#define pgd_clear(pgd) native_pgd_clear(pgd) +#endif + +#ifndef set_pud +# define set_pud(pudp, pud) native_set_pud(pudp, pud) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pud_clear(pud) native_pud_clear(pud) +#endif + +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) + +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) + +static inline void __init paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void __init paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} + +#define pgd_val(x) native_pgd_val(x) +#define __pgd(x) native_make_pgd(x) + +#ifndef __PAGETABLE_PUD_FOLDED +#define pud_val(x) native_pud_val(x) +#define __pud(x) native_make_pud(x) +#endif + +#ifndef __PAGETABLE_PMD_FOLDED +#define pmd_val(x) native_pmd_val(x) +#define __pmd(x) native_make_pmd(x) +#endif + +#define pte_val(x) native_pte_val(x) +#define __pte(x) native_make_pte(x) + +#endif /* CONFIG_PARAVIRT */ + /* * The following only work if pte_present() is true. * Undefined behaviour if not.. @@ -214,49 +274,6 @@ static inline int is_new_memtype_allowed(unsigned long flags, return 1; } -#ifdef CONFIG_PARAVIRT -#include -#else /* !CONFIG_PARAVIRT */ -#define set_pte(ptep, pte) native_set_pte(ptep, pte) -#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) - -#define set_pte_present(mm, addr, ptep, pte) \ - native_set_pte_present(mm, addr, ptep, pte) -#define set_pte_atomic(ptep, pte) \ - native_set_pte_atomic(ptep, pte) - -#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) - -#ifndef __PAGETABLE_PUD_FOLDED -#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) -#endif - -#ifndef set_pud -# define set_pud(pudp, pud) native_set_pud(pudp, pud) -#endif - -#ifndef __PAGETABLE_PMD_FOLDED -#define pud_clear(pud) native_pud_clear(pud) -#endif - -#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) -#define pmd_clear(pmd) native_pmd_clear(pmd) - -#define pte_update(mm, addr, ptep) do { } while (0) -#define pte_update_defer(mm, addr, ptep) do { } while (0) - -static inline void __init paravirt_pagetable_setup_start(pgd_t *base) -{ - native_pagetable_setup_start(base); -} - -static inline void __init paravirt_pagetable_setup_done(pgd_t *base) -{ - native_pagetable_setup_done(base); -} -#endif /* CONFIG_PARAVIRT */ - #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index ffaf1934068..2f59135c6f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -1,6 +1,22 @@ #ifndef _ASM_X86_PGTABLE_64_DEFS_H #define _ASM_X86_PGTABLE_64_DEFS_H +#ifndef __ASSEMBLY__ +#include + +/* + * These are used to make use of C type-checking.. + */ +typedef unsigned long pteval_t; +typedef unsigned long pmdval_t; +typedef unsigned long pudval_t; +typedef unsigned long pgdval_t; +typedef unsigned long pgprotval_t; + +typedef struct { pteval_t pte; } pte_t; + +#endif /* !__ASSEMBLY__ */ + #define SHARED_KERNEL_PMD 0 /* diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e5d5a8d3577..a7452f10930 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -162,9 +162,110 @@ #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ #endif +#ifdef CONFIG_X86_32 +# include "pgtable_32_types.h" +#else +# include "pgtable_64_types.h" +#endif + #ifndef __ASSEMBLY__ +#include + +typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; + +typedef struct { pgdval_t pgd; } pgd_t; + +static inline pgd_t native_make_pgd(pgdval_t val) +{ + return (pgd_t) { val }; +} + +static inline pgdval_t native_pgd_val(pgd_t pgd) +{ + return pgd.pgd; +} + +static inline pgdval_t pgd_flags(pgd_t pgd) +{ + return native_pgd_val(pgd) & PTE_FLAGS_MASK; +} + +#if PAGETABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; + +static inline pud_t native_make_pud(pmdval_t val) +{ + return (pud_t) { val }; +} + +static inline pudval_t native_pud_val(pud_t pud) +{ + return pud.pud; +} +#else +#include + +static inline pudval_t native_pud_val(pud_t pud) +{ + return native_pgd_val(pud.pgd); +} +#endif + +#if PAGETABLE_LEVELS > 2 +typedef struct { pmdval_t pmd; } pmd_t; + +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { val }; +} + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return pmd.pmd; +} +#else +#include + +static inline pmdval_t native_pmd_val(pmd_t pmd) +{ + return native_pgd_val(pmd.pud.pgd); +} +#endif + +static inline pudval_t pud_flags(pud_t pud) +{ + return native_pud_val(pud) & PTE_FLAGS_MASK; +} + +static inline pmdval_t pmd_flags(pmd_t pmd) +{ + return native_pmd_val(pmd) & PTE_FLAGS_MASK; +} + +static inline pte_t native_make_pte(pteval_t val) +{ + return (pte_t) { .pte = val }; +} + +static inline pteval_t native_pte_val(pte_t pte) +{ + return pte.pte; +} + +static inline pteval_t pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + + +typedef struct page *pgtable_t; + extern pteval_t __supported_pte_mask; +extern int nx_enabled; #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); @@ -217,10 +318,4 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level); #endif /* !__ASSEMBLY__ */ -#ifdef CONFIG_X86_32 -# include "pgtable_32_types.h" -#else -# include "pgtable_64_types.h" -#endif - #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..96302880774 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -16,6 +16,7 @@ struct mm_struct; #include #include #include +#include #include #include #include From 744525092727827a9cf0044074db3e22dcf354fd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 11 Feb 2009 16:31:40 -0800 Subject: [PATCH 473/682] x86: merge sys_rt_sigreturn between 32 and 64 bits Impact: cleanup With the recent changes in the 32-bit code to make system calls which use struct pt_regs take a pointer, sys_rt_sigreturn() have become identical between 32 and 64 bits, and both are empty wrappers around do_rt_sigreturn(). Remove both wrappers and rename both to sys_rt_sigreturn(). Cc: Brian Gerst Cc: Tejun Heo Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/signal.c | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 77bb31a88ba..68b1be10cfa 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); -int sys_rt_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(struct pt_regs *); /* kernel/ioport.c */ long sys_iopl(struct pt_regs *); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ccfb27412f0..7cdcd16885e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -601,7 +601,7 @@ badframe: } #endif /* CONFIG_X86_32 */ -static long do_rt_sigreturn(struct pt_regs *regs) +long sys_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe __user *frame; unsigned long ax; @@ -632,18 +632,6 @@ badframe: return 0; } -#ifdef CONFIG_X86_32 -int sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#else /* !CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) -{ - return do_rt_sigreturn(regs); -} -#endif /* CONFIG_X86_32 */ - /* * OK, we're invoking a handler: */ From 58105ef1857112a186696c9b8957020090226a28 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 31 Jan 2009 12:32:26 -0800 Subject: [PATCH 474/682] x86: UV: fix header struct usage Impact: Fixes warning Fix uv.h struct usage: arch/x86/include/asm/uv/uv.h:16: warning: 'struct mm_struct' declared inside parameter list arch/x86/include/asm/uv/uv.h:16: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Randy Dunlap Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 8ac1d7e312f..8242bf96581 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -3,6 +3,9 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; +struct cpumask; +struct mm_struct; + #ifdef CONFIG_X86_UV extern enum uv_system_type get_uv_system_type(void); From bc8bd002b8371ece81c9adbdce49d7e68f0a0cbc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:43:52 +0100 Subject: [PATCH 475/682] x86, defconfig: update the 32-bit defconfig Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 406 +++++++++++++++++++++++++------- 1 file changed, 324 insertions(+), 82 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index edba00d98ac..c3186ccc936 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc5 -# Wed Sep 3 17:23:09 2008 +# Linux kernel version: 2.6.29-rc4 +# Thu Feb 12 12:42:50 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y # CONFIG_X86_64 is not set CONFIG_X86=y CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" -# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y @@ -24,16 +23,14 @@ CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_HWEIGHT=y -# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y # CONFIG_RWSEM_GENERIC_SPINLOCK is not set CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y # CONFIG_GENERIC_TIME_VSYSCALL is not set CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set @@ -42,12 +39,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_ZONE_DMA32 is not set CONFIG_ARCH_POPULATES_NODE_MAP=y # CONFIG_AUDIT_ARCH is not set -CONFIG_ARCH_SUPPORTS_AOUT=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_X86_SMP=y +CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_32_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y @@ -76,30 +73,44 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_AUDIT_TREE=y + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=18 -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -CONFIG_CGROUP_NS=y -# CONFIG_CGROUP_DEVICE is not set -CONFIG_CPUSETS=y CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set # CONFIG_USER_SCHED is not set CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_MEM_RES_CTLR is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y +CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -124,12 +135,15 @@ CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y @@ -139,15 +153,10 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y -# CONFIG_HAVE_ARCH_TRACEHOOK is not set -# CONFIG_HAVE_DMA_ATTRS is not set -CONFIG_USE_GENERIC_SMP_HELPERS=y -# CONFIG_HAVE_CLK is not set -CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -155,12 +164,10 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_LBD is not set CONFIG_BLK_DEV_IO_TRACE=y -# CONFIG_LSF is not set CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set @@ -176,7 +183,7 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_CLASSIC_RCU=y +CONFIG_FREEZER=y # # Processor type and features @@ -186,6 +193,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y @@ -194,7 +202,7 @@ CONFIG_X86_PC=y # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set # CONFIG_M386 is not set @@ -238,10 +246,19 @@ CONFIG_X86_TSC=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=4 CONFIG_X86_DEBUGCTLMSR=y +CONFIG_CPU_SUP_INTEL=y +CONFIG_CPU_SUP_CYRIX_32=y +CONFIG_CPU_SUP_AMD=y +CONFIG_CPU_SUP_CENTAUR_32=y +CONFIG_CPU_SUP_TRANSMETA_32=y +CONFIG_CPU_SUP_UMC_32=y +CONFIG_X86_DS=y +CONFIG_X86_PTRACE_BTS=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y # CONFIG_IOMMU_HELPER is not set +# CONFIG_IOMMU_API is not set CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y @@ -250,12 +267,15 @@ CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set CONFIG_X86_REBOOTFIXUPS=y CONFIG_MICROCODE=y +CONFIG_MICROCODE_INTEL=y +CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y @@ -264,6 +284,7 @@ CONFIG_HIGHMEM4G=y # CONFIG_HIGHMEM64G is not set CONFIG_PAGE_OFFSET=0xC0000000 CONFIG_HIGHMEM=y +# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y @@ -274,14 +295,17 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_SPARSEMEM_STATIC=y -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_RESOURCES_64BIT=y +# CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y CONFIG_HIGHPTE=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y +CONFIG_X86_RESERVE_LOW_64K=y # CONFIG_MATH_EMULATION is not set CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set @@ -302,10 +326,11 @@ CONFIG_PHYSICAL_START=0x1000000 CONFIG_PHYSICAL_ALIGN=0x200000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set +# CONFIG_CMDLINE_BOOL is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # -# Power management options +# Power management and ACPI options # CONFIG_PM=y CONFIG_PM_DEBUG=y @@ -331,19 +356,13 @@ CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y CONFIG_ACPI_DOCK=y -# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set # CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_EC=y # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y @@ -388,7 +407,6 @@ CONFIG_X86_ACPI_CPUFREQ=y # # shared options # -# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y @@ -415,6 +433,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set @@ -452,13 +471,17 @@ CONFIG_HOTPLUG_PCI=y # Executable file formats / Emulations # CONFIG_BINFMT_ELF=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +CONFIG_HAVE_AOUT=y # CONFIG_BINFMT_AOUT is not set CONFIG_BINFMT_MISC=y +CONFIG_HAVE_ATOMIC_IOMAP=y CONFIG_NET=y # # Networking options # +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -519,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -557,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y CONFIG_NF_CONNTRACK_SIP=y CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_IP_VS is not set # # IP: Netfilter Configuration # +CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_CONNTRACK_PROC_COMPAT=y CONFIG_IP_NF_IPTABLES=y @@ -595,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set @@ -604,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set CONFIG_LLC=y @@ -623,6 +648,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -630,6 +656,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_INGRESS is not set # @@ -644,6 +671,7 @@ CONFIG_NET_CLS=y # CONFIG_NET_CLS_RSVP is not set # CONFIG_NET_CLS_RSVP6 is not set # CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set @@ -659,7 +687,9 @@ CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing @@ -676,29 +706,33 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set CONFIG_FIB_RULES=y - -# -# Wireless -# +CONFIG_WIRELESS=y CONFIG_CFG80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_NL80211=y +CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=y # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +# CONFIG_RFKILL_INPUT is not set +CONFIG_RFKILL_LEDS=y # CONFIG_NET_9P is not set # @@ -722,7 +756,7 @@ CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols @@ -750,20 +784,19 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_TC1100_WMI is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -875,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y # CONFIG_MD_LINEAR is not set # CONFIG_MD_RAID0 is not set # CONFIG_MD_RAID1 is not set @@ -930,6 +964,9 @@ CONFIG_PHYLIB=y # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -953,6 +990,9 @@ CONFIG_NET_TULIP=y # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set @@ -960,7 +1000,6 @@ CONFIG_NET_PCI=y # CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -974,15 +1013,16 @@ CONFIG_8139TOO=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set CONFIG_E1000E=y # CONFIG_IP1000 is not set # CONFIG_IGB is not set @@ -1000,18 +1040,23 @@ CONFIG_BNX2=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set CONFIG_TR=y # CONFIG_IBMOL is not set @@ -1025,9 +1070,8 @@ CONFIG_TR=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set +# CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set # CONFIG_HERMES is not set # CONFIG_ATMEL is not set @@ -1044,6 +1088,8 @@ CONFIG_WLAN_80211=y CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set # CONFIG_IWLAGN is not set @@ -1054,6 +1100,10 @@ CONFIG_ATH5K=y # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + # # USB Network Adapters # @@ -1062,6 +1112,7 @@ CONFIG_ATH5K=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_HSO is not set CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set # CONFIG_PCMCIA_3C574 is not set @@ -1123,6 +1174,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y CONFIG_MOUSE_PS2_SYNAPTICS=y CONFIG_MOUSE_PS2_LIFEBOOK=y CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set @@ -1160,15 +1212,16 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1179,6 +1232,7 @@ CONFIG_INPUT_MISC=y # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # @@ -1245,6 +1299,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y @@ -1279,6 +1334,7 @@ CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=y # # I2C Hardware Bus support @@ -1331,8 +1387,6 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1351,8 +1405,78 @@ CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_BATTERY_DS2760 is not set -# CONFIG_HWMON is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set CONFIG_THERMAL=y +# CONFIG_THERMAL_HWMON is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -1372,6 +1496,7 @@ CONFIG_WATCHDOG=y # CONFIG_I6300ESB_WDT is not set # CONFIG_ITCO_WDT is not set # CONFIG_IT8712F_WDT is not set +# CONFIG_IT87_WDT is not set # CONFIG_HP_WATCHDOG is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set @@ -1379,9 +1504,11 @@ CONFIG_WATCHDOG=y # CONFIG_SBC8360_WDT is not set # CONFIG_SBC7240_WDT is not set # CONFIG_CPU5_WDT is not set +# CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83697HF_WDT is not set +# CONFIG_W83697UG_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set @@ -1397,11 +1524,11 @@ CONFIG_WATCHDOG=y # USB-based Watchdog Cards # # CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -1410,7 +1537,13 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set # # Multimedia devices @@ -1450,6 +1583,7 @@ CONFIG_DRM=y # CONFIG_DRM_I810 is not set # CONFIG_DRM_I830 is not set CONFIG_DRM_I915=y +# CONFIG_DRM_I915_KMS is not set # CONFIG_DRM_MGA is not set # CONFIG_DRM_SIS is not set # CONFIG_DRM_VIA is not set @@ -1459,6 +1593,7 @@ CONFIG_DRM_I915=y CONFIG_FB=y # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y @@ -1487,7 +1622,6 @@ CONFIG_FB_TILEBLITTING=y # CONFIG_FB_UVESA is not set # CONFIG_FB_VESA is not set CONFIG_FB_EFI=y -# CONFIG_FB_IMAC is not set # CONFIG_FB_N411 is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -1503,6 +1637,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_S3 is not set # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set +# CONFIG_FB_VIA is not set # CONFIG_FB_NEOMAGIC is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set @@ -1515,12 +1650,15 @@ CONFIG_FB_EFI=y # CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # CONFIG_BACKLIGHT_PROGEAR is not set # CONFIG_BACKLIGHT_MBP_NVIDIA is not set +# CONFIG_BACKLIGHT_SAHARA is not set # # Display device support @@ -1540,10 +1678,12 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y CONFIG_SOUND=y +CONFIG_SOUND_OSS_CORE=y CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQ_DUMMY=y CONFIG_SND_OSSEMUL=y @@ -1551,6 +1691,8 @@ CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQ_HRTIMER_DEFAULT=y CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1605,11 +1747,16 @@ CONFIG_SND_PCI=y # CONFIG_SND_FM801 is not set CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y +# CONFIG_SND_HDA_RECONFIG is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set CONFIG_SND_HDA_CODEC_REALTEK=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_NVHDMI=y +CONFIG_SND_HDA_CODEC_INTELHDMI=y +CONFIG_SND_HDA_ELD=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_HDA_CODEC_CMEDIA=y CONFIG_SND_HDA_CODEC_SI3054=y @@ -1643,6 +1790,7 @@ CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_US122L is not set CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set @@ -1657,15 +1805,37 @@ CONFIG_HIDRAW=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT_POWERBOOK=y -CONFIG_HID_FF=y CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GYRATION=y +CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y CONFIG_THRUSTMASTER_FF=y CONFIG_ZEROPLUS_FF=y -CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1683,6 +1853,8 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers @@ -1691,6 +1863,7 @@ CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1700,6 +1873,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set # # USB Device Class drivers @@ -1707,20 +1882,20 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y # CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set @@ -1728,7 +1903,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1749,6 +1923,7 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1766,7 +1941,13 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y @@ -1775,6 +1956,7 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set @@ -1785,6 +1967,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_TIMER is not set # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set @@ -1824,6 +2007,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers @@ -1833,12 +2017,15 @@ CONFIG_RTC_INTF_DEV=y # Platform RTC drivers # CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_V3020 is not set # @@ -1851,6 +2038,22 @@ CONFIG_DMADEVICES=y # # CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set +CONFIG_X86_PLATFORM_DEVICES=y +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_TC1100_WMI is not set +# CONFIG_MSI_LAPTOP is not set +# CONFIG_PANASONIC_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +CONFIG_EEEPC_LAPTOP=y +# CONFIG_ACPI_WMI is not set +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set # # Firmware Drivers @@ -1872,21 +2075,24 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QUOTA_TREE=y # CONFIG_QFMT_V1 is not set CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y @@ -1920,16 +2126,14 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -1939,6 +2143,7 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1960,6 +2165,7 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y +# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2066,33 +2272,54 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_VIRTUAL is not set # CONFIG_DEBUG_WRITECOUNT is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_HAVE_FTRACE=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y -# CONFIG_FTRACE is not set +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_HW_BRANCH_TRACER=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_POWER_TRACER is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_HW_BRANCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y +CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set @@ -2123,8 +2350,10 @@ CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2135,7 +2364,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set CONFIG_CRYPTO=y @@ -2143,11 +2371,18 @@ CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_CRYPTD is not set @@ -2182,6 +2417,7 @@ CONFIG_CRYPTO_HMAC=y # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set @@ -2222,6 +2458,11 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_PADLOCK is not set # CONFIG_CRYPTO_DEV_GEODE is not set @@ -2239,6 +2480,7 @@ CONFIG_VIRTUALIZATION=y CONFIG_BITREVERSE=y CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC_T10DIF=y From dd5fc55449c5e7e4f008d7d26cda9e6e2bc94980 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:48:48 +0100 Subject: [PATCH 476/682] x86, defconfig: update the 64-bit defconfig Signed-off-by: Ingo Molnar --- arch/x86/configs/x86_64_defconfig | 412 ++++++++++++++++++++++++------ 1 file changed, 332 insertions(+), 80 deletions(-) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 322dd2748fc..4c85bfa58e1 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,14 +1,13 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc5 -# Wed Sep 3 17:13:39 2008 +# Linux kernel version: 2.6.29-rc4 +# Thu Feb 12 12:48:13 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set CONFIG_X86_64=y CONFIG_X86=y CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" -# CONFIG_GENERIC_LOCKBREAK is not set CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_CLOCKSOURCE_WATCHDOG=y @@ -23,17 +22,16 @@ CONFIG_ZONE_DMA=y CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_IOMAP=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_GENERIC_HWEIGHT=y -# CONFIG_GENERIC_GPIO is not set CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_ARCH_HAS_CPU_RELAX=y +CONFIG_ARCH_HAS_DEFAULT_IDLE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y @@ -42,12 +40,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ZONE_DMA32=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_AUDIT_ARCH=y -CONFIG_ARCH_SUPPORTS_AOUT=y CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_X86_SMP=y +CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_X86_64_SMP=y CONFIG_X86_HT=y CONFIG_X86_BIOS_REBOOT=y @@ -76,30 +74,44 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y CONFIG_AUDITSYSCALL=y CONFIG_AUDIT_TREE=y + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=64 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=18 -CONFIG_CGROUPS=y -# CONFIG_CGROUP_DEBUG is not set -CONFIG_CGROUP_NS=y -# CONFIG_CGROUP_DEVICE is not set -CONFIG_CPUSETS=y CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set # CONFIG_USER_SCHED is not set CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +CONFIG_CGROUP_FREEZER=y +# CONFIG_CGROUP_DEVICE is not set +CONFIG_CPUSETS=y +CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y # CONFIG_CGROUP_MEM_RES_CTLR is not set # CONFIG_SYSFS_DEPRECATED_V2 is not set -CONFIG_PROC_PID_CPUSET=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y +CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -124,12 +136,15 @@ CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y CONFIG_MARKERS=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y @@ -139,15 +154,10 @@ CONFIG_KRETPROBES=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y -# CONFIG_HAVE_ARCH_TRACEHOOK is not set -# CONFIG_HAVE_DMA_ATTRS is not set -CONFIG_USE_GENERIC_SMP_HELPERS=y -# CONFIG_HAVE_CLK is not set -CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_ARCH_TRACEHOOK=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -155,7 +165,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y CONFIG_BLK_DEV_IO_TRACE=y @@ -175,7 +184,7 @@ CONFIG_IOSCHED_CFQ=y CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="cfq" -CONFIG_CLASSIC_RCU=y +CONFIG_FREEZER=y # # Processor type and features @@ -185,6 +194,8 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y +CONFIG_SPARSE_IRQ=y +# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y CONFIG_X86_PC=y @@ -192,6 +203,7 @@ CONFIG_X86_PC=y # CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set +CONFIG_SCHED_OMIT_FRAME_POINTER=y # CONFIG_PARAVIRT_GUEST is not set # CONFIG_MEMTEST is not set # CONFIG_M386 is not set @@ -230,6 +242,11 @@ CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y +CONFIG_CPU_SUP_INTEL=y +CONFIG_CPU_SUP_AMD=y +CONFIG_CPU_SUP_CENTAUR_64=y +CONFIG_X86_DS=y +CONFIG_X86_PTRACE_BTS=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y @@ -237,8 +254,11 @@ CONFIG_GART_IOMMU=y CONFIG_CALGARY_IOMMU=y CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y CONFIG_AMD_IOMMU=y +CONFIG_AMD_IOMMU_STATS=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y +CONFIG_IOMMU_API=y +# CONFIG_MAXSMP is not set CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y @@ -247,12 +267,17 @@ CONFIG_PREEMPT_VOLUNTARY=y # CONFIG_PREEMPT is not set CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y +CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set # CONFIG_I8K is not set CONFIG_MICROCODE=y +CONFIG_MICROCODE_INTEL=y +CONFIG_MICROCODE_AMD=y CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +CONFIG_ARCH_PHYS_ADDR_T_64BIT=y +CONFIG_DIRECT_GBPAGES=y CONFIG_NUMA=y CONFIG_K8_NUMA=y CONFIG_X86_64_ACPI_NUMA=y @@ -269,7 +294,6 @@ CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y CONFIG_HAVE_MEMORY_PRESENT=y -# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y @@ -280,10 +304,14 @@ CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y -CONFIG_RESOURCES_64BIT=y +CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_X86_CHECK_BIOS_CORRUPTION=y +CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y +CONFIG_X86_RESERVE_LOW_64K=y CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set CONFIG_X86_PAT=y @@ -302,11 +330,12 @@ CONFIG_PHYSICAL_START=0x1000000 CONFIG_PHYSICAL_ALIGN=0x200000 CONFIG_HOTPLUG_CPU=y # CONFIG_COMPAT_VDSO is not set +# CONFIG_CMDLINE_BOOL is not set CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y # -# Power management options +# Power management and ACPI options # CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_PM=y @@ -333,20 +362,14 @@ CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_FAN=y CONFIG_ACPI_DOCK=y -# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y -# CONFIG_ACPI_WMI is not set -# CONFIG_ACPI_ASUS is not set -# CONFIG_ACPI_TOSHIBA is not set # CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_EC=y # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y @@ -381,12 +404,16 @@ CONFIG_X86_ACPI_CPUFREQ=y # # shared options # -# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set # CONFIG_X86_SPEEDSTEP_LIB is not set CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y +# +# Memory power savings +# +# CONFIG_I7300_IDLE is not set + # # Bus options (PCI etc.) # @@ -395,8 +422,10 @@ CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_DOMAINS=y CONFIG_DMAR=y +# CONFIG_DMAR_DEFAULT_ON is not set CONFIG_DMAR_GFX_WA=y CONFIG_DMAR_FLOPPY_WA=y +# CONFIG_INTR_REMAP is not set CONFIG_PCIEPORTBUS=y # CONFIG_HOTPLUG_PCI_PCIE is not set CONFIG_PCIEAER=y @@ -405,6 +434,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y CONFIG_PCI_MSI=y # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set CONFIG_HT_IRQ=y CONFIG_ISA_DMA_API=y CONFIG_K8_NB=y @@ -438,6 +468,8 @@ CONFIG_HOTPLUG_PCI=y # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y +CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y +# CONFIG_HAVE_AOUT is not set CONFIG_BINFMT_MISC=y CONFIG_IA32_EMULATION=y # CONFIG_IA32_AOUT is not set @@ -449,6 +481,7 @@ CONFIG_NET=y # # Networking options # +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -509,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -547,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y CONFIG_NF_CONNTRACK_SIP=y CONFIG_NF_CT_NETLINK=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NETFILTER_XT_TARGET_NFLOG=y CONFIG_NETFILTER_XT_TARGET_SECMARK=y -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y CONFIG_NETFILTER_XT_TARGET_TCPMSS=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y CONFIG_NETFILTER_XT_MATCH_MARK=y CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_STATE=y +# CONFIG_IP_VS is not set # # IP: Netfilter Configuration # +CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_CONNTRACK_PROC_COMPAT=y CONFIG_IP_NF_IPTABLES=y @@ -585,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y CONFIG_NF_CONNTRACK_IPV6=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_MATCH_IPV6HEADER=y -CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_LOG=y +CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP_DCCP is not set @@ -594,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set CONFIG_LLC=y @@ -613,6 +648,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set @@ -620,6 +656,7 @@ CONFIG_NET_SCHED=y # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_INGRESS is not set # @@ -634,6 +671,7 @@ CONFIG_NET_CLS=y # CONFIG_NET_CLS_RSVP is not set # CONFIG_NET_CLS_RSVP6 is not set # CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_CLS_CGROUP is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set @@ -649,7 +687,9 @@ CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set +# CONFIG_NET_ACT_SKBEDIT is not set CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set # # Network testing @@ -666,29 +706,33 @@ CONFIG_HAMRADIO=y # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set CONFIG_FIB_RULES=y - -# -# Wireless -# +CONFIG_WIRELESS=y CONFIG_CFG80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_NL80211=y +CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=y # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -# CONFIG_IEEE80211 is not set -# CONFIG_RFKILL is not set +# CONFIG_WIMAX is not set +CONFIG_RFKILL=y +# CONFIG_RFKILL_INPUT is not set +CONFIG_RFKILL_LEDS=y # CONFIG_NET_9P is not set # @@ -712,7 +756,7 @@ CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols @@ -740,21 +784,21 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_MISC_DEVICES=y # CONFIG_IBM_ASM is not set # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set -# CONFIG_ACER_WMI is not set -# CONFIG_ASUS_LAPTOP is not set -# CONFIG_FUJITSU_LAPTOP is not set -# CONFIG_MSI_LAPTOP is not set -# CONFIG_COMPAL_LAPTOP is not set -# CONFIG_SONY_LAPTOP is not set -# CONFIG_THINKPAD_ACPI is not set -# CONFIG_INTEL_MENLOW is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SGI_XP is not set # CONFIG_HP_ILO is not set # CONFIG_SGI_GRU is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y # CONFIG_IDE is not set @@ -864,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y CONFIG_PATA_SCH=y CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_AUTODETECT=y # CONFIG_MD_LINEAR is not set # CONFIG_MD_RAID0 is not set # CONFIG_MD_RAID1 is not set @@ -919,6 +964,9 @@ CONFIG_PHYLIB=y # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -942,6 +990,9 @@ CONFIG_NET_TULIP=y # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set @@ -949,7 +1000,6 @@ CONFIG_NET_PCI=y # CONFIG_B44 is not set CONFIG_FORCEDETH=y # CONFIG_FORCEDETH_NAPI is not set -# CONFIG_EEPRO100 is not set CONFIG_E100=y # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -963,15 +1013,16 @@ CONFIG_8139TOO_PIO=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set @@ -989,18 +1040,23 @@ CONFIG_TIGON3=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set CONFIG_TR=y # CONFIG_IBMOL is not set @@ -1013,9 +1069,8 @@ CONFIG_TR=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set +# CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set # CONFIG_HERMES is not set # CONFIG_ATMEL is not set @@ -1032,6 +1087,8 @@ CONFIG_WLAN_80211=y CONFIG_ATH5K=y # CONFIG_ATH5K_DEBUG is not set # CONFIG_ATH9K is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set # CONFIG_IWLCORE is not set # CONFIG_IWLWIFI_LEDS is not set # CONFIG_IWLAGN is not set @@ -1042,6 +1099,10 @@ CONFIG_ATH5K=y # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# + # # USB Network Adapters # @@ -1050,6 +1111,7 @@ CONFIG_ATH5K=y # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET is not set +# CONFIG_USB_HSO is not set CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set # CONFIG_PCMCIA_3C574 is not set @@ -1059,6 +1121,7 @@ CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_PCMCIA_AXNET is not set +# CONFIG_PCMCIA_IBMTR is not set # CONFIG_WAN is not set CONFIG_FDDI=y # CONFIG_DEFXX is not set @@ -1110,6 +1173,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y CONFIG_MOUSE_PS2_SYNAPTICS=y CONFIG_MOUSE_PS2_LIFEBOOK=y CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set @@ -1147,15 +1211,16 @@ CONFIG_INPUT_TOUCHSCREEN=y # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set # CONFIG_TOUCHSCREEN_MTOUCH is not set # CONFIG_TOUCHSCREEN_INEXIO is not set # CONFIG_TOUCHSCREEN_MK712 is not set # CONFIG_TOUCHSCREEN_PENMOUNT is not set # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set # CONFIG_TOUCHSCREEN_TOUCHWIN is not set -# CONFIG_TOUCHSCREEN_UCB1400 is not set # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +# CONFIG_TOUCHSCREEN_TSC2007 is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_APANEL is not set @@ -1165,6 +1230,7 @@ CONFIG_INPUT_MISC=y # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set +# CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # @@ -1231,6 +1297,7 @@ CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y @@ -1260,6 +1327,7 @@ CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y # CONFIG_I2C_CHARDEV is not set CONFIG_I2C_HELPER_AUTO=y +CONFIG_I2C_ALGOBIT=y # # I2C Hardware Bus support @@ -1311,8 +1379,6 @@ CONFIG_I2C_I801=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set @@ -1331,8 +1397,78 @@ CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_BATTERY_DS2760 is not set -# CONFIG_HWMON is not set +# CONFIG_BATTERY_BQ27x00 is not set +CONFIG_HWMON=y +# CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_ABITUGURU is not set +# CONFIG_SENSORS_ABITUGURU3 is not set +# CONFIG_SENSORS_AD7414 is not set +# CONFIG_SENSORS_AD7418 is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1029 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ADM9240 is not set +# CONFIG_SENSORS_ADT7462 is not set +# CONFIG_SENSORS_ADT7470 is not set +# CONFIG_SENSORS_ADT7473 is not set +# CONFIG_SENSORS_ADT7475 is not set +# CONFIG_SENSORS_K8TEMP is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_ATXP1 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_I5K_AMB is not set +# CONFIG_SENSORS_F71805F is not set +# CONFIG_SENSORS_F71882FG is not set +# CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_FSCHMD is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_CORETEMP is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_LM93 is not set +# CONFIG_SENSORS_LTC4245 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_MAX6650 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_DME1737 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47M192 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_ADS7828 is not set +# CONFIG_SENSORS_THMC50 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_VT8231 is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83791D is not set +# CONFIG_SENSORS_W83792D is not set +# CONFIG_SENSORS_W83793 is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83L786NG is not set +# CONFIG_SENSORS_W83627HF is not set +# CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_HDAPS is not set +# CONFIG_SENSORS_LIS3LV02D is not set +# CONFIG_SENSORS_APPLESMC is not set +# CONFIG_HWMON_DEBUG_CHIP is not set CONFIG_THERMAL=y +# CONFIG_THERMAL_HWMON is not set CONFIG_WATCHDOG=y # CONFIG_WATCHDOG_NOWAYOUT is not set @@ -1352,15 +1488,18 @@ CONFIG_WATCHDOG=y # CONFIG_I6300ESB_WDT is not set # CONFIG_ITCO_WDT is not set # CONFIG_IT8712F_WDT is not set +# CONFIG_IT87_WDT is not set # CONFIG_HP_WATCHDOG is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_SBC8360_WDT is not set # CONFIG_CPU5_WDT is not set +# CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83697HF_WDT is not set +# CONFIG_W83697UG_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set @@ -1376,11 +1515,11 @@ CONFIG_WATCHDOG=y # USB-based Watchdog Cards # # CONFIG_USBPCWATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -1389,7 +1528,13 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set # # Multimedia devices @@ -1423,6 +1568,7 @@ CONFIG_DRM=y # CONFIG_DRM_I810 is not set # CONFIG_DRM_I830 is not set CONFIG_DRM_I915=y +CONFIG_DRM_I915_KMS=y # CONFIG_DRM_MGA is not set # CONFIG_DRM_SIS is not set # CONFIG_DRM_VIA is not set @@ -1432,6 +1578,7 @@ CONFIG_DRM_I915=y CONFIG_FB=y # CONFIG_FIRMWARE_EDID is not set # CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y @@ -1460,7 +1607,6 @@ CONFIG_FB_TILEBLITTING=y # CONFIG_FB_UVESA is not set # CONFIG_FB_VESA is not set CONFIG_FB_EFI=y -# CONFIG_FB_IMAC is not set # CONFIG_FB_N411 is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -1475,6 +1621,7 @@ CONFIG_FB_EFI=y # CONFIG_FB_S3 is not set # CONFIG_FB_SAVAGE is not set # CONFIG_FB_SIS is not set +# CONFIG_FB_VIA is not set # CONFIG_FB_NEOMAGIC is not set # CONFIG_FB_KYRO is not set # CONFIG_FB_3DFX is not set @@ -1486,12 +1633,15 @@ CONFIG_FB_EFI=y # CONFIG_FB_CARMINE is not set # CONFIG_FB_GEODE is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # CONFIG_BACKLIGHT_PROGEAR is not set # CONFIG_BACKLIGHT_MBP_NVIDIA is not set +# CONFIG_BACKLIGHT_SAHARA is not set # # Display device support @@ -1511,10 +1661,12 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_LOGO_LINUX_CLUT224=y CONFIG_SOUND=y +CONFIG_SOUND_OSS_CORE=y CONFIG_SND=y CONFIG_SND_TIMER=y CONFIG_SND_PCM=y CONFIG_SND_HWDEP=y +CONFIG_SND_JACK=y CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQ_DUMMY=y CONFIG_SND_OSSEMUL=y @@ -1522,6 +1674,8 @@ CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_HRTIMER=y +CONFIG_SND_SEQ_HRTIMER_DEFAULT=y CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y @@ -1575,11 +1729,16 @@ CONFIG_SND_PCI=y # CONFIG_SND_FM801 is not set CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y +# CONFIG_SND_HDA_RECONFIG is not set +# CONFIG_SND_HDA_INPUT_BEEP is not set CONFIG_SND_HDA_CODEC_REALTEK=y CONFIG_SND_HDA_CODEC_ANALOG=y CONFIG_SND_HDA_CODEC_SIGMATEL=y CONFIG_SND_HDA_CODEC_VIA=y CONFIG_SND_HDA_CODEC_ATIHDMI=y +CONFIG_SND_HDA_CODEC_NVHDMI=y +CONFIG_SND_HDA_CODEC_INTELHDMI=y +CONFIG_SND_HDA_ELD=y CONFIG_SND_HDA_CODEC_CONEXANT=y CONFIG_SND_HDA_CODEC_CMEDIA=y CONFIG_SND_HDA_CODEC_SI3054=y @@ -1612,6 +1771,7 @@ CONFIG_SND_USB=y # CONFIG_SND_USB_AUDIO is not set # CONFIG_SND_USB_USX2Y is not set # CONFIG_SND_USB_CAIAQ is not set +# CONFIG_SND_USB_US122L is not set CONFIG_SND_PCMCIA=y # CONFIG_SND_VXPOCKET is not set # CONFIG_SND_PDAUDIOCF is not set @@ -1626,15 +1786,37 @@ CONFIG_HIDRAW=y # USB Input Devices # CONFIG_USB_HID=y -CONFIG_USB_HIDINPUT_POWERBOOK=y -CONFIG_HID_FF=y CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=y +CONFIG_HID_APPLE=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CHICONY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GYRATION=y +CONFIG_HID_LOGITECH=y CONFIG_LOGITECH_FF=y # CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y +CONFIG_HID_PANTHERLORD=y CONFIG_PANTHERLORD_FF=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SAMSUNG=y +CONFIG_HID_SONY=y +CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y CONFIG_THRUSTMASTER_FF=y CONFIG_ZEROPLUS_FF=y -CONFIG_USB_HIDDEV=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1652,6 +1834,8 @@ CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y # CONFIG_USB_OTG is not set CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers @@ -1660,6 +1844,7 @@ CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_ROOT_HUB_TT is not set # CONFIG_USB_EHCI_TT_NEWSCHED is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1669,6 +1854,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=y # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set # # USB Device Class drivers @@ -1676,20 +1863,20 @@ CONFIG_USB_UHCI_HCD=y # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y # CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set @@ -1697,7 +1884,6 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_STORAGE_ALAUDA is not set # CONFIG_USB_STORAGE_ONETOUCH is not set # CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_SIERRA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set CONFIG_USB_LIBUSUAL=y @@ -1718,6 +1904,7 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1735,7 +1922,13 @@ CONFIG_USB_LIBUSUAL=y # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set CONFIG_NEW_LEDS=y @@ -1744,6 +1937,7 @@ CONFIG_LEDS_CLASS=y # # LED drivers # +# CONFIG_LEDS_ALIX2 is not set # CONFIG_LEDS_PCA9532 is not set # CONFIG_LEDS_CLEVO_MAIL is not set # CONFIG_LEDS_PCA955X is not set @@ -1754,6 +1948,7 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y # CONFIG_LEDS_TRIGGER_TIMER is not set # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set +# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set @@ -1793,6 +1988,7 @@ CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_DRV_M41T80 is not set # CONFIG_RTC_DRV_S35390A is not set # CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set # # SPI RTC drivers @@ -1802,12 +1998,15 @@ CONFIG_RTC_INTF_DEV=y # Platform RTC drivers # CONFIG_RTC_DRV_CMOS=y +# CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_V3020 is not set # @@ -1820,6 +2019,21 @@ CONFIG_DMADEVICES=y # # CONFIG_INTEL_IOATDMA is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set +CONFIG_X86_PLATFORM_DEVICES=y +# CONFIG_ACER_WMI is not set +# CONFIG_ASUS_LAPTOP is not set +# CONFIG_FUJITSU_LAPTOP is not set +# CONFIG_MSI_LAPTOP is not set +# CONFIG_PANASONIC_LAPTOP is not set +# CONFIG_COMPAL_LAPTOP is not set +# CONFIG_SONY_LAPTOP is not set +# CONFIG_THINKPAD_ACPI is not set +# CONFIG_INTEL_MENLOW is not set +CONFIG_EEEPC_LAPTOP=y +# CONFIG_ACPI_WMI is not set +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_TOSHIBA is not set # # Firmware Drivers @@ -1841,22 +2055,25 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QUOTA_TREE=y # CONFIG_QFMT_V1 is not set CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y @@ -1890,16 +2107,14 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -1909,6 +2124,7 @@ CONFIG_HUGETLB_PAGE=y # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1930,6 +2146,7 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y +# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -2035,40 +2252,60 @@ CONFIG_TIMER_STATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_VIRTUAL is not set # CONFIG_DEBUG_WRITECOUNT is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_KPROBES_SANITY_TEST is not set # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_HAVE_FTRACE=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y -# CONFIG_FTRACE is not set +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_HAVE_HW_BRANCH_TRACER=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SYSPROF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_POWER_TRACER is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_HW_BRANCH_TRACER is not set CONFIG_PROVIDE_OHCI1394_DMA_INIT=y +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set # CONFIG_STRICT_DEVMEM is not set CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y +CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_DEBUG_PER_CPU_MAPS is not set # CONFIG_X86_PTDUMP is not set CONFIG_DEBUG_RODATA=y -# CONFIG_DIRECT_GBPAGES is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_DEBUG_NX_TEST=m # CONFIG_IOMMU_DEBUG is not set @@ -2092,8 +2329,10 @@ CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set CONFIG_SECURITY_NETWORK=y # CONFIG_SECURITY_NETWORK_XFRM is not set +# CONFIG_SECURITY_PATH is not set CONFIG_SECURITY_FILE_CAPABILITIES=y # CONFIG_SECURITY_ROOTPLUG is not set CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 @@ -2104,7 +2343,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set # CONFIG_SECURITY_SMACK is not set CONFIG_CRYPTO=y @@ -2112,11 +2350,18 @@ CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set # CONFIG_CRYPTO_CRYPTD is not set @@ -2151,6 +2396,7 @@ CONFIG_CRYPTO_HMAC=y # Digest # # CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set @@ -2191,6 +2437,11 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set CONFIG_HAVE_KVM=y @@ -2205,6 +2456,7 @@ CONFIG_VIRTUALIZATION=y CONFIG_BITREVERSE=y CONFIG_GENERIC_FIND_FIRST_BIT=y CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_FIND_LAST_BIT=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set CONFIG_CRC_T10DIF=y From 556831063bcf8fd818e92227c40e3eaabe759bab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:51:29 +0100 Subject: [PATCH 477/682] x86, defconfig: turn off CONFIG_ENABLE_WARN_DEPRECATED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit deprecation warnings have become rather noisy lately: drivers/i2c/i2c-core.c: In function ‘i2c_new_device’: drivers/i2c/i2c-core.c:283: warning: ‘i2c_attach_client’ is deprecated (declared at include/linux/i2c.h:434) drivers/i2c/i2c-core.c: In function ‘i2c_del_adapter’: drivers/i2c/i2c-core.c:646: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: In function ‘i2c_register_driver’: drivers/i2c/i2c-core.c:713: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: In function ‘__detach_adapter’: drivers/i2c/i2c-core.c:780: warning: ‘detach_client’ is deprecated (declared at include/linux/i2c.h:154) drivers/i2c/i2c-core.c: At top level: drivers/i2c/i2c-core.c:876: warning: ‘i2c_attach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:827) drivers/i2c/i2c-core.c:876: warning: ‘i2c_attach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:827) drivers/i2c/i2c-core.c:904: warning: ‘i2c_detach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:879) drivers/i2c/i2c-core.c:904: warning: ‘i2c_detach_client’ is deprecated (declared at drivers/i2c/i2c-core.c:879) So turn it off for now - these reminders can obscure critical warnings. Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 3 +-- arch/x86/configs/x86_64_defconfig | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index c3186ccc936..0562a36f74d 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -2242,8 +2242,7 @@ CONFIG_NLS_UTF8=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4c85bfa58e1..06633483b29 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -2223,8 +2223,7 @@ CONFIG_NLS_UTF8=y # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y -CONFIG_ENABLE_WARN_DEPRECATED=y -CONFIG_ENABLE_MUST_CHECK=y +# CONFIG_ENABLE_WARN_DEPRECATED is not set CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set From bd282422fe9566a89bc34af325efb6d2701903be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 12:59:05 +0100 Subject: [PATCH 478/682] x86, defconfig: turn off CONFIG_SCSI_ISCSI_ATTRS=y It was enabled by mistake - iscsi is not included in a typical default PC, and no other architecture has it built-in (=y) either. Turn it off. Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 8 ++++---- arch/x86/configs/x86_64_defconfig | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 0562a36f74d..8f40a80ccb5 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:42:50 2009 +# Thu Feb 12 12:57:57 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -835,7 +835,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set @@ -2064,8 +2064,7 @@ CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -CONFIG_ISCSI_IBFT_FIND=y -CONFIG_ISCSI_IBFT=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems @@ -2243,6 +2242,7 @@ CONFIG_NLS_UTF8=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 06633483b29..10728fd91c6 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:48:13 2009 +# Thu Feb 12 12:57:29 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -837,7 +837,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set -CONFIG_SCSI_ISCSI_ATTRS=y +# CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set @@ -2044,8 +2044,7 @@ CONFIG_EFI_VARS=y # CONFIG_DELL_RBU is not set # CONFIG_DCDBAS is not set CONFIG_DMIID=y -CONFIG_ISCSI_IBFT_FIND=y -CONFIG_ISCSI_IBFT=y +# CONFIG_ISCSI_IBFT_FIND is not set # # File systems @@ -2224,6 +2223,7 @@ CONFIG_NLS_UTF8=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_WARN_DEPRECATED is not set +CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set From 3023533de43c5c01c660e1b48d3700b028eb4615 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 13:28:43 +0100 Subject: [PATCH 479/682] x86: fix warning in find_low_pfn_range() Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index d48f2560364..e77459dd38a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -722,7 +722,7 @@ void __init find_low_pfn_range(void) highmem_pages = 0; #ifdef CONFIG_HIGHMEM if (highmem_pages >= max_pfn) { - printk(KERN_ERR "highmem size specified (%uMB) is " + printk(KERN_ERR "highmem size specified (%luMB) is " "bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); @@ -731,7 +731,7 @@ void __init find_low_pfn_range(void) if (highmem_pages) { if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE){ - printk(KERN_ERR "highmem size %uMB results in " + printk(KERN_ERR "highmem size %luMB results in " "smaller than 64MB lowmem, ignoring it.\n" , pages_to_mb(highmem_pages)); highmem_pages = 0; From 4769843bc265a9c24584b98709cf39e1df5c1404 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 13:31:41 +0100 Subject: [PATCH 480/682] x86, 32-bit: clean up find_low_pfn_range() Impact: cleanup Split find_low_pfn_range() into two functions: - lowmem_pfn_init() - highmem_pfn_init() The former gets called if all of RAM fits into lowmem, otherwise we call highmem_pfn_init(). Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 141 ++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index e77459dd38a..9d36eb9ebd5 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -675,6 +675,82 @@ static int __init parse_highmem(char *arg) } early_param("highmem", parse_highmem); +#define MSG_HIGHMEM_TOO_BIG \ + "highmem size (%luMB) is bigger than pages available (%luMB)!\n" + +#define MSG_LOWMEM_TOO_SMALL \ + "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n" +/* + * All of RAM fits into lowmem - but if user wants highmem + * artificially via the highmem=x boot parameter then create + * it: + */ +void __init lowmem_pfn_init(void) +{ + if (highmem_pages == -1) + highmem_pages = 0; +#ifdef CONFIG_HIGHMEM + if (highmem_pages >= max_pfn) { + printk(KERN_ERR MSG_HIGHMEM_TOO_BIG, + pages_to_mb(highmem_pages), pages_to_mb(max_pfn)); + highmem_pages = 0; + } + if (highmem_pages) { + if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) { + printk(KERN_ERR MSG_LOWMEM_TOO_SMALL, + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn -= highmem_pages; + } +#else + if (highmem_pages) + printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n"); +#endif +} + +#define MSG_HIGHMEM_TOO_SMALL \ + "only %luMB highmem pages available, ignoring highmem size of %luMB!\n" + +#define MSG_HIGHMEM_TRIMMED \ + "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n" +/* + * We have more RAM than fits into lowmem - we try to put it into + * highmem, also taking the highmem=x boot parameter into account: + */ +void __init highmem_pfn_init(void) +{ + if (highmem_pages == -1) + highmem_pages = max_pfn - MAXMEM_PFN; + + if (highmem_pages + MAXMEM_PFN < max_pfn) + max_pfn = MAXMEM_PFN + highmem_pages; + + if (highmem_pages + MAXMEM_PFN > max_pfn) { + printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL, + pages_to_mb(max_pfn - MAXMEM_PFN), + pages_to_mb(highmem_pages)); + highmem_pages = 0; + } + max_low_pfn = MAXMEM_PFN; +#ifndef CONFIG_HIGHMEM + /* Maximum memory usable is what is directly addressable */ + printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); + if (max_pfn > MAX_NONPAE_PFN) + printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n"); + else + printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); + max_pfn = MAXMEM_PFN; +#else /* !CONFIG_HIGHMEM */ +#ifndef CONFIG_HIGHMEM64G + if (max_pfn > MAX_NONPAE_PFN) { + max_pfn = MAX_NONPAE_PFN; + printk(KERN_WARNING MSG_HIGHMEM_TRIMMED); + } +#endif /* !CONFIG_HIGHMEM64G */ +#endif /* !CONFIG_HIGHMEM */ +} + /* * Determine low and high memory ranges: */ @@ -683,67 +759,12 @@ void __init find_low_pfn_range(void) /* it could update max_pfn */ /* max_low_pfn is 0, we already have early_res support */ - max_low_pfn = max_pfn; - if (max_low_pfn > MAXMEM_PFN) { - if (highmem_pages == -1) - highmem_pages = max_pfn - MAXMEM_PFN; - if (highmem_pages + MAXMEM_PFN < max_pfn) - max_pfn = MAXMEM_PFN + highmem_pages; - if (highmem_pages + MAXMEM_PFN > max_pfn) { - printk(KERN_WARNING "only %luMB highmem pages " - "available, ignoring highmem size of %luMB.\n", - pages_to_mb(max_pfn - MAXMEM_PFN), - pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn = MAXMEM_PFN; -#ifndef CONFIG_HIGHMEM - /* Maximum memory usable is what is directly addressable */ - printk(KERN_WARNING "Warning only %ldMB will be used.\n", - MAXMEM>>20); - if (max_pfn > MAX_NONPAE_PFN) - printk(KERN_WARNING - "Use a HIGHMEM64G enabled kernel.\n"); - else - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); - max_pfn = MAXMEM_PFN; -#else /* !CONFIG_HIGHMEM */ -#ifndef CONFIG_HIGHMEM64G - if (max_pfn > MAX_NONPAE_PFN) { - max_pfn = MAX_NONPAE_PFN; - printk(KERN_WARNING "Warning only 4GB will be used." - "Use a HIGHMEM64G enabled kernel.\n"); - } -#endif /* !CONFIG_HIGHMEM64G */ -#endif /* !CONFIG_HIGHMEM */ - } else { - if (highmem_pages == -1) - highmem_pages = 0; -#ifdef CONFIG_HIGHMEM - if (highmem_pages >= max_pfn) { - printk(KERN_ERR "highmem size specified (%luMB) is " - "bigger than pages available (%luMB)!.\n", - pages_to_mb(highmem_pages), - pages_to_mb(max_pfn)); - highmem_pages = 0; - } - if (highmem_pages) { - if (max_low_pfn - highmem_pages < - 64*1024*1024/PAGE_SIZE){ - printk(KERN_ERR "highmem size %luMB results in " - "smaller than 64MB lowmem, ignoring it.\n" - , pages_to_mb(highmem_pages)); - highmem_pages = 0; - } - max_low_pfn -= highmem_pages; - } -#else - if (highmem_pages) - printk(KERN_ERR "ignoring highmem size on non-highmem" - " kernel!\n"); -#endif - } + + if (max_low_pfn > MAXMEM_PFN) + highmem_pfn_init(); + else + lowmem_pfn_init(); } #ifndef CONFIG_NEED_MULTIPLE_NODES From d88316c243e5458a1888edbe0353c4dec6e61c73 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 Feb 2009 15:16:03 +0100 Subject: [PATCH 481/682] x86, 32-bit: refactor find_low_pfn_range() Impact: cleanup Make the max_low_pfn logic a bit more standard between lowmem_pfn_init() and highmem_pfn_init(). Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9d36eb9ebd5..1a9612499a3 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -687,6 +687,9 @@ early_param("highmem", parse_highmem); */ void __init lowmem_pfn_init(void) { + /* max_low_pfn is 0, we already have early_res support */ + max_low_pfn = max_pfn; + if (highmem_pages == -1) highmem_pages = 0; #ifdef CONFIG_HIGHMEM @@ -720,6 +723,8 @@ void __init lowmem_pfn_init(void) */ void __init highmem_pfn_init(void) { + max_low_pfn = MAXMEM_PFN; + if (highmem_pages == -1) highmem_pages = max_pfn - MAXMEM_PFN; @@ -732,7 +737,6 @@ void __init highmem_pfn_init(void) pages_to_mb(highmem_pages)); highmem_pages = 0; } - max_low_pfn = MAXMEM_PFN; #ifndef CONFIG_HIGHMEM /* Maximum memory usable is what is directly addressable */ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20); @@ -758,13 +762,10 @@ void __init find_low_pfn_range(void) { /* it could update max_pfn */ - /* max_low_pfn is 0, we already have early_res support */ - max_low_pfn = max_pfn; - - if (max_low_pfn > MAXMEM_PFN) - highmem_pfn_init(); - else + if (max_pfn <= MAXMEM_PFN) lowmem_pfn_init(); + else + highmem_pfn_init(); } #ifndef CONFIG_NEED_MULTIPLE_NODES From 999c7880cc8eeb0cbe6610b8c6d0ab0ec51cd848 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Feb 2009 13:15:55 +0100 Subject: [PATCH 482/682] x86 headers: remove duplicate pud_large() definition Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 62024ff897d..1c097a3a666 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -387,13 +387,6 @@ static inline unsigned long pages_to_mb(unsigned long npg) #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) -#if PAGETABLE_LEVELS == 2 -static inline int pud_large(pud_t pud) -{ - return 0; -} -#endif - #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { From 56cefcea7c8769ffc04a5609e6ac849e36685468 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Feb 2009 13:23:02 +0100 Subject: [PATCH 483/682] x86 headers: include linux/types.h To properly pick up types relied on by prototypes like 'bool'. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 467ce69306b..89ed9d70b0a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PAGE_H #define _ASM_X86_PAGE_H +#include + #ifdef __KERNEL__ #include From e43623b4ed1d0a1a86f0e05a2df8b9637b90ddd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Feb 2009 13:24:19 +0100 Subject: [PATCH 484/682] x86 headers: include page_types.h in pgtable_types.h To properly pick up details like PTE_FLAGS_MASK. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index a7452f10930..9dafe87be2d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -2,6 +2,7 @@ #define _ASM_X86_PGTABLE_DEFS_H #include +#include #define FIRST_USER_ADDRESS 0 From beb6943d8df7ce9278282101af4e0f6f7b648451 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Feb 2009 13:36:47 +0100 Subject: [PATCH 485/682] x86 headers: protect page_32.h via __ASSEMBLY__ Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_32.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index b3f0bf79e84..da4e762406f 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -3,6 +3,8 @@ #include +#ifndef __ASSEMBLY__ + #ifdef CONFIG_HUGETLB_PAGE #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #endif @@ -19,8 +21,6 @@ extern unsigned long __phys_addr(unsigned long); #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* CONFIG_FLATMEM */ -#ifndef __ASSEMBLY__ - #ifdef CONFIG_X86_USE_3DNOW #include From 694aa960608d2976666d850bd4ef78053bbd0c84 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 12 Feb 2009 16:25:42 -0800 Subject: [PATCH 486/682] xen: fix xen_flush_tlb_others The commit commit 4595f9620cda8a1e973588e743cf5f8436dd20c6 Author: Rusty Russell Date: Sat Jan 10 21:58:09 2009 -0800 x86: change flush_tlb_others to take a const struct cpumask causes xen_flush_tlb_others to allocate a multicall and then issue it without initializing it in the case where the cpumask is empty, leading to: [ 8.354898] 1 multicall(s) failed: cpu 1 [ 8.354921] Pid: 2213, comm: bootclean Not tainted 2.6.29-rc3-x86_32p-xenU-tip #135 [ 8.354937] Call Trace: [ 8.354955] [] xen_mc_flush+0x133/0x1b0 [ 8.354971] [] ? xen_force_evtchn_callback+0x1a/0x30 [ 8.354988] [] xen_flush_tlb_others+0xb0/0xd0 [ 8.355003] [] flush_tlb_page+0x53/0xa0 [ 8.355018] [] do_wp_page+0x2a0/0x7c0 [ 8.355034] [] ? notify_remote_via_irq+0x3a/0x70 [ 8.355049] [] handle_mm_fault+0x7b0/0xa50 [ 8.355065] [] ? wake_up_new_task+0x8e/0xb0 [ 8.355079] [] ? do_fork+0xe5/0x320 [ 8.355095] [] do_page_fault+0xe9/0x240 [ 8.355109] [] ? do_page_fault+0x0/0x240 [ 8.355125] [] error_code+0x72/0x78 [ 8.355139] call 1/1: op=2863311530 arg=[aaaaaaaa] result=-38 xen_flush_tlb_others+0x41/0xd0 Since empty cpumasks are rare and undoing an xen_mc_entry() is tricky just issue such requests normally. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d2e8ed1aff3..319bd40a57c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1273,8 +1273,6 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, /* Remove us, and any offline CPUS. */ cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask)); - if (unlikely(cpumask_empty(to_cpumask(args->mask)))) - goto issue; if (va == TLB_FLUSH_ALL) { args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; @@ -1285,7 +1283,6 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); -issue: xen_mc_issue(PARAVIRT_LAZY_MMU); } From c466ed2e4337c5eb03b283da507eb5328ab06c73 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Fri, 13 Feb 2009 08:40:55 -0600 Subject: [PATCH 487/682] x86, UV: set full apicid in uv_hub_send_ipi The uv_hub_send_ipi() function needs to set the full apicid in the UVH_IPI_INT mmr. Signed-off-by: Dimitri Sivanich Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 6adb5e6f4d9..89b84e004f0 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -114,16 +114,15 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid, lapicid; + unsigned long val, apicid; int pnode; apicid = per_cpu(x86_cpu_to_apicid, cpu); - lapicid = apicid & 0x3f; /* ZZZ macro needed */ pnode = uv_apicid_to_pnode(apicid); - val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) | - ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) | - ( vector << UVH_IPI_INT_VECTOR_SHFT ); + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } From 0341c14da49e7b93d2998926f6ac89a3129e3fa1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 13 Feb 2009 11:14:01 -0800 Subject: [PATCH 488/682] x86: use _types.h headers in asm where available In general, the only definitions that assembly files can use are in _types.S headers (where available), so convert them. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/boot/compressed/head_32.S | 2 +- arch/x86/boot/compressed/head_64.S | 4 ++-- arch/x86/boot/header.S | 2 +- arch/x86/kernel/acpi/realmode/wakeup.S | 4 ++-- arch/x86/kernel/acpi/wakeup_32.S | 2 +- arch/x86/kernel/acpi/wakeup_64.S | 4 ++-- arch/x86/kernel/efi_stub_32.S | 2 +- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/head_32.S | 4 ++-- arch/x86/kernel/relocate_kernel_32.S | 2 +- arch/x86/kernel/relocate_kernel_64.S | 4 ++-- arch/x86/kernel/trampoline_32.S | 2 +- arch/x86/kernel/trampoline_64.S | 4 ++-- arch/x86/kernel/vmlinux_32.lds.S | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 2 +- arch/x86/lib/getuser.S | 2 +- arch/x86/power/hibernate_asm_32.S | 2 +- arch/x86/power/hibernate_asm_64.S | 2 +- arch/x86/xen/xen-head.S | 2 +- 20 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 29c5fbf0839..112f81abfa6 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1d5dff4123e..36743525e37 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -26,8 +26,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b993062e9a5..7ccff4884a2 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include "boot.h" #include "offsets.h" diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 3355973b12a..580b4e29601 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -3,8 +3,8 @@ */ #include #include -#include -#include +#include +#include #include .code16 diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index a12e6a9fb65..8ded418b059 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -1,7 +1,7 @@ .section .text.page_aligned #include #include -#include +#include # Copyright 2003, 2008 Pavel Machek , distribute under GPLv2 diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index bcc293423a7..82add8b804b 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -1,8 +1,8 @@ .text #include #include -#include -#include +#include +#include #include #include diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index ef00bb77d7e..8b9171b9cbd 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S @@ -6,7 +6,7 @@ */ #include -#include +#include /* * efi_call_phys(void *, ...) is a function with variable parameters. diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e9920683145..999e827ef9c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fbcf96b295f..dcf31283f94 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 2a0aad7718d..c32ca19d591 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -11,8 +11,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index a160f311972..2064d0aa8d2 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index b0bbdd4829c..d32cfb27a47 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -7,10 +7,10 @@ */ #include -#include +#include #include #include -#include +#include /* * Must be relocatable PIC code callable as a C function diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index d8ccc3c6552..66d874e5404 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -29,7 +29,7 @@ #include #include -#include +#include /* We can free up trampoline after bootup if cpu hotplug is not supported. */ #ifndef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 95a012a4664..cddfb8d386b 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -25,8 +25,8 @@ */ #include -#include -#include +#include +#include #include #include #include diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 3eba7f7bac0..0d860963f26 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 087a7f2c639..fbfced6f680 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -6,7 +6,7 @@ #include #include -#include +#include #undef i386 /* in case the preprocessor is a 32bit one */ diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index ad374003742..51f1504cddd 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index d1e9b53f9d3..b641388d828 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S index 000415947d9..9356547d8c0 100644 --- a/arch/x86/power/hibernate_asm_64.S +++ b/arch/x86/power/hibernate_asm_64.S @@ -18,7 +18,7 @@ .text #include #include -#include +#include #include #include diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 63d49a523ed..1a5ff24e29c 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include From 9b3651cbc26cfcea8276ecaff66718ea087f2e91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 13 Feb 2009 11:01:54 -0800 Subject: [PATCH 489/682] x86: move more pagetable-related definitions into pgtable*.h PAGETABLE_LEVELS and the PTE masks should be in pgtable*.h Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/page_32_types.h | 2 -- arch/x86/include/asm/page_64_types.h | 2 -- arch/x86/include/asm/page_types.h | 6 ------ arch/x86/include/asm/pgtable-2level_types.h | 2 ++ arch/x86/include/asm/pgtable-3level_types.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 1 + arch/x86/include/asm/pgtable_types.h | 6 ++++++ 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index b5486aaf36e..f1e4a79a6e4 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -33,12 +33,10 @@ /* 44=32+12, the limit we can fit into an unsigned long pfn */ #define __PHYSICAL_MASK_SHIFT 44 #define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 3 #else /* !CONFIG_X86_PAE */ #define __PHYSICAL_MASK_SHIFT 32 #define __VIRTUAL_MASK_SHIFT 32 -#define PAGETABLE_LEVELS 2 #endif /* CONFIG_X86_PAE */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index bc73af3eda9..d38c91b7024 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,8 +1,6 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define PAGETABLE_LEVELS 4 - #define THREAD_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 2c52ff76758..2d625da6603 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -16,12 +16,6 @@ (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ -#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) - -/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ -#define PTE_FLAGS_MASK (~PTE_PFN_MASK) - #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 09ae67efceb..daacc23e3fb 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -17,6 +17,7 @@ typedef union { #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 +#define PAGETABLE_LEVELS 2 /* * traditional i386 two-level paging structure: @@ -25,6 +26,7 @@ typedef union { #define PGDIR_SHIFT 22 #define PTRS_PER_PGD 1024 + /* * the i386 is two-level, so we don't really have any * PMD directory physically. diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index bcc89625ebe..1bd5876c864 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -24,6 +24,8 @@ typedef union { #define SHARED_KERNEL_PMD 1 #endif +#define PAGETABLE_LEVELS 3 + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2f59135c6f2..fbf42b8e038 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -18,6 +18,7 @@ typedef struct { pteval_t pte; } pte_t; #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 +#define PAGETABLE_LEVELS 4 /* * PGDIR_SHIFT determines what a top-level page table entry can map diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9dafe87be2d..4d258ad76a0 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -173,6 +173,12 @@ #include +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) + typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; From bf33a70a73876b163d62612e9567cbac6604ba7e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 13 Feb 2009 12:52:44 -0600 Subject: [PATCH 490/682] x86: fix "__udivdi3" [drivers/scsi/aha1542.ko] undefined Commit 976e8f677e42757e5586ea04a9ac8bb8ddaa037e ("x86: asm/io.h: unify virt_to_phys/phys_to_virt") changed the return of virt_to_phys from long to phys_addr_t which is unsigned long long on a PAE platform. So, I could suggest a fix below since isa addresses may never be above 32 bits. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index e5a2ab44cd5..4f8e820cf38 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -125,7 +125,7 @@ static inline void *phys_to_virt(phys_addr_t address) /* * ISA I/O bus memory addresses are 1:1 with the physical address. */ -#define isa_virt_to_bus virt_to_phys +#define isa_virt_to_bus (unsigned long)virt_to_phys #define isa_page_to_bus page_to_phys #define isa_bus_to_virt phys_to_virt From f6db44df5bd39ed33883786d35342759af796e4a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Feb 2009 23:59:18 -0800 Subject: [PATCH 491/682] x86: fix typo in filter_cpuid_features() Impact: fix wrong disabling of cpu features an amd system got this strange output: CPU: CPU feature monitor disabled due to lack of CPUID level 0x5 but in /proc/cpuinfo I have: cpuid level : 5 on intel system: CPU: CPU feature monitor disabled due to lack of CPUID level 0x5 CPU: CPU feature dca disabled due to lack of CPUID level 0x9 but in /proc/cpuinfo i have: cpuid level : 11 Tt turns out there is a typo, and we should use level member in df. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 21f086b4c1a..32093d08d87 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -241,9 +241,9 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) * signs here... */ if (cpu_has(c, df->feature) && - ((s32)df->feature < 0 ? - (u32)df->feature > (u32)c->extended_cpuid_level : - (s32)df->feature > (s32)c->cpuid_level)) { + ((s32)df->level < 0 ? + (u32)df->level > (u32)c->extended_cpuid_level : + (s32)df->level > (s32)c->cpuid_level)) { clear_cpu_cap(c, df->feature); if (warn) printk(KERN_WARNING @@ -253,7 +253,7 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) df->level); } } -} +} /* * Naming convention should be: [()] From 88d0f550d71493cd975a11a03c166211b2f3bd32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Feb 2009 23:57:28 -0800 Subject: [PATCH 492/682] x86: make 32bit to call enable_IO_APIC early like 64bit Impact: cleanup So we remove some #ifdefs. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 ++++---- arch/x86/kernel/io_apic.c | 4 ---- arch/x86/kernel/smpboot.c | 3 +-- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 8bd801db24d..b8616aaa168 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1646,19 +1646,19 @@ int __init APIC_init_uniprocessor(void) physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_IO_APIC /* * Now enable IO-APICs, actually call clear_IO_APIC * We need clear_IO_APIC before enabling vector on BP */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); -#endif -#ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif localise_nmi_watchdog(); +#else + localise_nmi_watchdog(); +#endif end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7248ca11bdc..0b7cde3da48 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3057,13 +3057,9 @@ out: void __init setup_IO_APIC(void) { -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ -#endif io_apic_irqs = ~PIC_IRQS; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index af57f88186e..10834954e30 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1154,13 +1154,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) */ setup_local_APIC(); -#ifdef CONFIG_X86_64 /* * Enable IO APIC before setting up error vector */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); -#endif + end_local_APIC_setup(); map_cpu_to_logical_apicid(); From 970ec1a8213cd1a1ea29972ebbe4575a8b30bca1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Feb 2009 14:06:13 -0800 Subject: [PATCH 493/682] [IA64] fix __apci_unmap_table Impact: fix build error to fix: tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here Signed-off-by: Yinghai Lu Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/ia64/kernel/acpi.c | 2 +- include/linux/acpi.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 2363ed17319..bdef2ce38c8 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -199,7 +199,7 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) return __va(phys_addr); } -char *__init __acpi_unmap_table(unsigned long virt_addr, unsigned long size) +void __init __acpi_unmap_table(char *map, unsigned long size) { } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d59f0fa4d77..78199151c00 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,7 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); -void __init __acpi_unmap_table(char *map, unsigned long size); +void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); From 9033304a1520df346862c95743a6c2023f21f057 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:35:44 -0800 Subject: [PATCH 494/682] x86, xen: short-circuit tests for dom0 When testing for a dom0/initial/privileged domain, make sure the predicate evaluates to a compile-time 0 if CONFIG_XEN_DOM0 isn't enabled. This will make most of the dom0 code evaporate without much more effort. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/hypervisor.h | 28 +++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 81fbd735aec..d5b7e90c0ed 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -38,22 +38,30 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; enum xen_domain_type { - XEN_NATIVE, - XEN_PV_DOMAIN, - XEN_HVM_DOMAIN, + XEN_NATIVE, /* running on bare hardware */ + XEN_PV_DOMAIN, /* running in a PV domain */ + XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ }; -extern enum xen_domain_type xen_domain_type; - #ifdef CONFIG_XEN -#define xen_domain() (xen_domain_type != XEN_NATIVE) +extern enum xen_domain_type xen_domain_type; #else -#define xen_domain() (0) +#define xen_domain_type XEN_NATIVE #endif -#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN) +#define xen_domain() (xen_domain_type != XEN_NATIVE) +#define xen_pv_domain() (xen_domain() && \ + xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && \ + xen_domain_type == XEN_HVM_DOMAIN) -#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) +#ifdef CONFIG_XEN_DOM0 +#include + +#define xen_initial_domain() (xen_pv_domain() && \ + xen_start_info->flags & SIF_INITDOMAIN) +#else /* !CONFIG_XEN_DOM0 */ +#define xen_initial_domain() (0) +#endif /* CONFIG_XEN_DOM0 */ #endif /* _ASM_X86_XEN_HYPERVISOR_H */ From b93d51dc62a41b5c2d6f32a0870709dc0cc55545 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 13:35:57 -0800 Subject: [PATCH 495/682] x86, xen: record and display initiator of each multicall when debugging Store the caller for each multicall so we can report it on failure. Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index c738644b543..82f2513e975 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -39,6 +39,7 @@ struct mc_buffer { struct multicall_entry entries[MC_BATCH]; #if MC_DEBUG struct multicall_entry debug[MC_BATCH]; + void *caller[MC_BATCH]; #endif unsigned char args[MC_ARGS]; struct callback { @@ -154,11 +155,12 @@ void xen_mc_flush(void) ret, smp_processor_id()); dump_stack(); for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n", + printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", i+1, b->mcidx, b->debug[i].op, b->debug[i].args[0], - b->entries[i].result); + b->entries[i].result, + b->caller[i]); } } #endif @@ -197,6 +199,9 @@ struct multicall_space __xen_mc_entry(size_t args) } ret.mc = &b->entries[b->mcidx]; +#ifdef MC_DEBUG + b->caller[b->mcidx] = __builtin_return_address(0); +#endif b->mcidx++; ret.args = &b->args[argidx]; b->argidx = argidx + args; From 3d39e9d07b576ee72f2c94cfad9e618fe21763b2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:38:51 -0800 Subject: [PATCH 496/682] x86, xen: degrade BUG to WARN when multicall fails If one of the components of a multicall fails, WARN rather than BUG, to help with debugging. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 82f2513e975..6cffd5532b9 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -179,7 +179,7 @@ void xen_mc_flush(void) } b->cbidx = 0; - BUG_ON(ret); + WARN_ON(ret); } struct multicall_space __xen_mc_entry(size_t args) From c99608637eac8834d830496c462c054137772122 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 13:38:56 -0800 Subject: [PATCH 497/682] x86, xen: do multicall callbacks with interrupts disabled We can't call the callbacks after enabling interrupts, as we may get a nested multicall call, which would cause a great deal of havok. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/multicalls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 6cffd5532b9..8bff7e7c290 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -170,8 +170,6 @@ void xen_mc_flush(void) } else BUG_ON(b->argidx != 0); - local_irq_restore(flags); - for (i = 0; i < b->cbidx; i++) { struct callback *cb = &b->callbacks[i]; @@ -179,6 +177,8 @@ void xen_mc_flush(void) } b->cbidx = 0; + local_irq_restore(flags); + WARN_ON(ret); } From 3bd25d0fa3ed588a6735b815cb0c146c23888ace Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Feb 2009 02:54:03 -0800 Subject: [PATCH 498/682] x86: pre init pirq_entries[] Impact: cleanup set default value early - this allows the removal of a number of dynamic initialization codepaths, and an #ifdef. Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 0b7cde3da48..a89878e08a4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -813,8 +813,9 @@ static void clear_IO_APIC (void) */ #define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; +static int pirq_entries[MAX_PIRQS] = { + [0 ... MAX_PIRQS - 1] = -1 +}; static int __init ioapic_pirq_setup(char *str) { @@ -823,10 +824,6 @@ static int __init ioapic_pirq_setup(char *str) get_options(str, ARRAY_SIZE(ints), ints); - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; apic_printk(APIC_VERBOSE, KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n"); max = MAX_PIRQS; @@ -1976,13 +1973,6 @@ void __init enable_IO_APIC(void) int apic; unsigned long flags; -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - /* * The number of IO-APIC IRQ registers (== #pins): */ From 98c061b6cf2e7a1010286a7a4f672c4623e1b3e0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Feb 2009 00:00:50 -0800 Subject: [PATCH 499/682] x86: make APIC_init_uniprocessor() more like smp_prepare_cpus() Impact: cleanup 1. move localise_nmi_watchdog() later 2. change setup_boot_APIC_clock() to setup_boot_clock() for 64-bit Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b8616aaa168..c03f4cc135c 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1649,32 +1649,28 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_X86_IO_APIC /* * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP + * We need clear_IO_APIC before enabling error vector */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); - - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) - localise_nmi_watchdog(); -#else - localise_nmi_watchdog(); #endif + end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else + else { nr_ioapics = 0; -# endif + localise_nmi_watchdog(); + } +#else + localise_nmi_watchdog(); #endif -#ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); - check_nmi_watchdog(); -#else setup_boot_clock(); +#ifdef CONFIG_X86_64 + check_nmi_watchdog(); #endif return 0; From ee8b53c1cfe33aecf0c77c0aa3ce437f0d84d831 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 12:20:13 +0100 Subject: [PATCH 500/682] x86: remove stale arch/x86/include/asm/page_64.h.rej file Introduced by: 51c78eb: x86: create _types.h counterparts for page*.h Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64.h.rej | 114 ----------------------------- 1 file changed, 114 deletions(-) delete mode 100644 arch/x86/include/asm/page_64.h.rej diff --git a/arch/x86/include/asm/page_64.h.rej b/arch/x86/include/asm/page_64.h.rej deleted file mode 100644 index 9b1807f1859..00000000000 --- a/arch/x86/include/asm/page_64.h.rej +++ /dev/null @@ -1,114 +0,0 @@ -*************** -*** 1,105 **** - #ifndef _ASM_X86_PAGE_64_H - #define _ASM_X86_PAGE_64_H - -- #define PAGETABLE_LEVELS 4 -- -- #define THREAD_ORDER 1 -- #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) -- #define CURRENT_MASK (~(THREAD_SIZE - 1)) -- -- #define EXCEPTION_STACK_ORDER 0 -- #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) -- -- #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) -- #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -- -- #define IRQSTACK_ORDER 2 -- #define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) -- -- #define STACKFAULT_STACK 1 -- #define DOUBLEFAULT_STACK 2 -- #define NMI_STACK 3 -- #define DEBUG_STACK 4 -- #define MCE_STACK 5 -- #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ -- -- #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) -- #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -- -- /* -- * Set __PAGE_OFFSET to the most negative possible address + -- * PGDIR_SIZE*16 (pgd slot 272). The gap is to allow a space for a -- * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's -- * what Xen requires. -- */ -- #define __PAGE_OFFSET _AC(0xffff880000000000, UL) -- -- #define __PHYSICAL_START CONFIG_PHYSICAL_START -- #define __KERNEL_ALIGN 0x200000 -- -- /* -- * Make sure kernel is aligned to 2MB address. Catching it at compile -- * time is better. Change your config file and compile the kernel -- * for a 2MB aligned address (CONFIG_PHYSICAL_START) -- */ -- #if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 -- #error "CONFIG_PHYSICAL_START must be a multiple of 2MB" -- #endif -- -- #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) -- #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -- -- /* See Documentation/x86_64/mm.txt for a description of the memory map. */ -- #define __PHYSICAL_MASK_SHIFT 46 -- #define __VIRTUAL_MASK_SHIFT 48 -- -- /* -- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in -- * arch/x86/kernel/head_64.S), and it is mapped here: -- */ -- #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -- #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) -- -- #ifndef __ASSEMBLY__ -- void clear_page(void *page); -- void copy_page(void *to, void *from); -- -- /* duplicated to the one in bootmem.h */ -- extern unsigned long max_pfn; -- extern unsigned long phys_base; -- -- extern unsigned long __phys_addr(unsigned long); -- #define __phys_reloc_hide(x) (x) -- -- /* -- * These are used to make use of C type-checking.. -- */ -- typedef unsigned long pteval_t; -- typedef unsigned long pmdval_t; -- typedef unsigned long pudval_t; -- typedef unsigned long pgdval_t; -- typedef unsigned long pgprotval_t; -- -- typedef struct page *pgtable_t; -- -- typedef struct { pteval_t pte; } pte_t; -- -- #define vmemmap ((struct page *)VMEMMAP_START) -- -- extern unsigned long init_memory_mapping(unsigned long start, -- unsigned long end); -- -- extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); -- extern void free_initmem(void); -- -- extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); -- extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); -- -- #endif /* !__ASSEMBLY__ */ -- -- #ifdef CONFIG_FLATMEM -- #define pfn_valid(pfn) ((pfn) < max_pfn) -- #endif -- - - #endif /* _ASM_X86_PAGE_64_H */ ---- 1,6 ---- - #ifndef _ASM_X86_PAGE_64_H - #define _ASM_X86_PAGE_64_H - -+ #include - - #endif /* _ASM_X86_PAGE_64_H */ From 06cd9a7dc8a58186060a91b6ddc031057435fd34 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Feb 2009 17:29:58 -0800 Subject: [PATCH 501/682] x86: add x2apic config Impact: cleanup so could deselect x2apic and INTR_REMAP will select x2apic Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 15 +++++++++++++++ arch/x86/include/asm/apic.h | 18 +++++++++++++++--- arch/x86/kernel/Makefile | 4 ++-- arch/x86/kernel/apic.c | 24 ++++++++++++------------ arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/genapic_64.c | 4 ++++ arch/x86/kernel/setup.c | 3 +-- arch/x86/kernel/smpboot.c | 2 +- 8 files changed, 51 insertions(+), 21 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1042d69b267..bce241fe1d2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -235,6 +235,20 @@ config SMP If you don't know what to do here, say N. +config X86_X2APIC + bool "Support x2apic" + depends on X86_LOCAL_APIC && X86_64 + ---help--- + This enables x2apic support on CPUs that have this feature. + + This allows 32-bit apic IDs (so it can support very large systems), + and accesses the local apic via MSRs not via mmio. + + ( On certain CPU models you may need to enable INTR_REMAP too, + to get functional x2apic mode. ) + + If you don't know what to do here, say N. + config SPARSE_IRQ bool "Support sparse irq numbering" depends on PCI_MSI || HT_IRQ @@ -1828,6 +1842,7 @@ config DMAR_FLOPPY_WA config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + select X86_X2APIC ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index fba49f66228..dc1db99cd40 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -112,7 +112,7 @@ static inline u32 native_apic_msr_read(u32 reg) return low; } -#ifndef CONFIG_X86_32 +#ifdef CONFIG_X86_X2APIC extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); @@ -131,7 +131,19 @@ static inline int x2apic_enabled(void) return 0; } #else -#define x2apic_enabled() 0 +static inline void check_x2apic(void) +{ +} +static inline void enable_x2apic(void) +{ +} +static inline void enable_IR_x2apic(void) +{ +} +static inline int x2apic_enabled(void) +{ + return 0; +} #endif struct apic_ops { @@ -177,7 +189,7 @@ static inline u32 safe_apic_wait_icr_idle(void) extern int get_physical_broadcast(void); -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_X2APIC static inline void ack_x2APIC_irq(void) { /* Docs say use 0 for future compatibility */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 24f357e7557..1cefd218f76 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -117,8 +117,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o - obj-y += genx2apic_cluster.o - obj-y += genx2apic_phys.o + obj-$(CONFIG_X86_X2APIC) += genx2apic_cluster.o + obj-$(CONFIG_X86_X2APIC) += genx2apic_phys.o obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index a894eea9d51..004aa1c31e4 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -112,11 +112,7 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -#ifdef CONFIG_X86_64 -#define HAVE_X2APIC -#endif - -#ifdef HAVE_X2APIC +#ifdef CONFIG_X86_X2APIC int x2apic; /* x2apic enabled before OS handover */ static int x2apic_preenabled; @@ -269,7 +265,7 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); -#ifdef HAVE_X2APIC +#ifdef CONFIG_X86_X2APIC static void x2apic_wait_icr_idle(void) { /* no need to wait for icr idle in x2apic */ @@ -1320,11 +1316,14 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } -#ifdef HAVE_X2APIC +#ifdef CONFIG_X86_X2APIC void check_x2apic(void) { int msr, msr2; + if (!cpu_has_x2apic) + return; + rdmsr(MSR_IA32_APICBASE, msr, msr2); if (msr & X2APIC_ENABLE) { @@ -1338,6 +1337,9 @@ void enable_x2apic(void) { int msr, msr2; + if (!x2apic) + return; + rdmsr(MSR_IA32_APICBASE, msr, msr2); if (!(msr & X2APIC_ENABLE)) { pr_info("Enabling x2apic\n"); @@ -1439,7 +1441,7 @@ end: return; } -#endif /* HAVE_X2APIC */ +#endif /* CONFIG_X86_X2APIC */ #ifdef CONFIG_X86_64 /* @@ -1570,7 +1572,7 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { -#ifdef HAVE_X2APIC +#ifdef CONFIG_X86_X2APIC if (x2apic) { boot_cpu_physical_apicid = read_apic_id(); return; @@ -1634,9 +1636,7 @@ int __init APIC_init_uniprocessor(void) } #endif -#ifdef HAVE_X2APIC enable_IR_x2apic(); -#endif #ifdef CONFIG_X86_64 default_setup_apic_routing(); #endif @@ -2021,7 +2021,7 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef HAVE_X2APIC +#ifdef CONFIG_X86_X2APIC if (x2apic) enable_x2apic(); else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4db150ed446..4b5d13e472d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1051,7 +1051,7 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); - if (cpu != 0 && x2apic) + if (cpu != 0) enable_x2apic(); /* diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 820dea5d0eb..cdc4772d9c8 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -35,8 +35,10 @@ static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV &apic_x2apic_uv_x, #endif +#ifdef CONFIG_X86_X2APIC &apic_x2apic_phys, &apic_x2apic_cluster, +#endif &apic_physflat, NULL, }; @@ -46,10 +48,12 @@ static struct genapic *apic_probe[] __initdata = { */ void __init default_setup_apic_routing(void) { +#ifdef CONFIG_X86_X2APIC if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { if (!intr_remapping_enabled) apic = &apic_flat; } +#endif if (apic == &apic_flat) { if (max_physical_apicid >= 8) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8fce6c71451..43d964411c0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -836,8 +836,7 @@ void __init setup_arch(char **cmdline_p) #else num_physpages = max_pfn; - if (cpu_has_x2apic) - check_x2apic(); + check_x2apic(); /* How many end-of-memory variables you have, grandma! */ /* need this before calling reserve_initrd */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 10834954e30..b5f2b698973 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1128,8 +1128,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; /* needed? */ set_cpu_sibling_map(0); -#ifdef CONFIG_X86_64 enable_IR_x2apic(); +#ifdef CONFIG_X86_64 default_setup_apic_routing(); #endif From c1eeb2de41d7015678bdd412b48a5f071b84e29a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Feb 2009 23:02:14 -0800 Subject: [PATCH 502/682] x86: fold apic_ops into genapic Impact: cleanup make it simpler, don't need have one extra struct. v2: fix the sgi_uv build Signed-off-by: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 97 ++++++++--------------- arch/x86/include/asm/genapic.h | 61 ++++++++++++++ arch/x86/kernel/apic.c | 63 +-------------- arch/x86/kernel/bigsmp_32.c | 7 ++ arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/kernel/es7000_32.c | 7 ++ arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 16 +++- arch/x86/kernel/genx2apic_cluster.c | 11 ++- arch/x86/kernel/genx2apic_phys.c | 11 ++- arch/x86/kernel/genx2apic_uv_x.c | 9 ++- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/numaq_32.c | 7 ++ arch/x86/kernel/probe_32.c | 7 ++ arch/x86/kernel/summit_32.c | 7 ++ arch/x86/kernel/uv_irq.c | 2 +- arch/x86/kernel/vmi_32.c | 6 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 19 ++--- arch/x86/xen/enlighten.c | 21 ++--- 24 files changed, 205 insertions(+), 162 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index dc1db99cd40..4f56e053d34 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -92,6 +92,12 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } +extern void native_apic_wait_icr_idle(void); +extern u32 native_safe_apic_wait_icr_idle(void); +extern void native_apic_icr_write(u32 low, u32 id); +extern u64 native_apic_icr_read(void); + +#ifdef CONFIG_X86_X2APIC static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || @@ -112,7 +118,31 @@ static inline u32 native_apic_msr_read(u32 reg) return low; } -#ifdef CONFIG_X86_X2APIC +static inline void native_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static inline u32 native_safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +static inline void native_x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +static inline u64 native_x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); @@ -146,47 +176,6 @@ static inline int x2apic_enabled(void) } #endif -struct apic_ops { - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); -}; - -extern struct apic_ops *apic_ops; - -static inline u32 apic_read(u32 reg) -{ - return apic_ops->read(reg); -} - -static inline void apic_write(u32 reg, u32 val) -{ - apic_ops->write(reg, val); -} - -static inline u64 apic_icr_read(void) -{ - return apic_ops->icr_read(); -} - -static inline void apic_icr_write(u32 low, u32 high) -{ - apic_ops->icr_write(low, high); -} - -static inline void apic_wait_icr_idle(void) -{ - apic_ops->wait_icr_idle(); -} - -static inline u32 safe_apic_wait_icr_idle(void) -{ - return apic_ops->safe_wait_icr_idle(); -} - extern int get_physical_broadcast(void); #ifdef CONFIG_X86_X2APIC @@ -197,18 +186,6 @@ static inline void ack_x2APIC_irq(void) } #endif - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC(void); @@ -256,18 +233,6 @@ static inline void disable_local_APIC(void) { } #define SET_APIC_ID(x) (apic->set_apic_id(x)) #else -#ifdef CONFIG_X86_LOCAL_APIC -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} -#endif - #endif #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 273b99452ae..a6d0b00a544 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -5,6 +5,7 @@ #include #include +#include /* * Copyright 2004 James Cleverdon, IBM. @@ -83,10 +84,70 @@ struct genapic { void (*smp_callin_clear_local_apic)(void); void (*store_NMI_vector)(unsigned short *high, unsigned short *low); void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); }; extern struct genapic *apic; +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + /* * Warm reset vector default position: */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 004aa1c31e4..af494bad885 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -210,18 +210,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) +void native_apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_xapic_wait_icr_idle(void) +u32 native_safe_apic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -237,13 +232,13 @@ u32 safe_xapic_wait_icr_idle(void) return send_status; } -void xapic_icr_write(u32 low, u32 id) +void native_apic_icr_write(u32 low, u32 id) { apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); } -static u64 xapic_icr_read(void) +u64 native_apic_icr_read(void) { u32 icr1, icr2; @@ -253,54 +248,6 @@ static u64 xapic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef CONFIG_X86_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -static u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -1329,7 +1276,6 @@ void check_x2apic(void) if (msr & X2APIC_ENABLE) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); x2apic_preenabled = x2apic = 1; - apic_ops = &x2apic_ops; } } @@ -1403,7 +1349,6 @@ void __init enable_IR_x2apic(void) if (!x2apic) { x2apic = 1; - apic_ops = &x2apic_ops; enable_x2apic(); } diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 47a62f46afd..9eeb714c5de 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -263,4 +263,11 @@ struct genapic apic_bigsmp = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 4772e91e824..e22d6ed26e6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 5e8c79e748a..42f090702f0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9abd48b2267..f6c70a164e3 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include struct nmi_watchdog_ctlblk { diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 55515d73d9c..23f1df4ce18 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -806,4 +806,11 @@ struct genapic apic_es7000 = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index cdc4772d9c8..70b616b4c62 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -19,8 +19,8 @@ #include #include -#include #include +#include #include extern struct genapic apic_flat; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 249d2d3c034..36ee760fd13 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #ifdef CONFIG_ACPI #include @@ -229,6 +229,13 @@ struct genapic apic_flat = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; /* @@ -374,4 +381,11 @@ struct genapic apic_physflat = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index 7c87156b641..dd6e8d68542 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -7,8 +7,8 @@ #include #include -#include #include +#include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); @@ -46,7 +46,7 @@ static void /* * send the IPI. */ - x2apic_icr_write(cfg, apicid); + native_x2apic_icr_write(cfg, apicid); } /* @@ -234,4 +234,11 @@ struct genapic apic_x2apic_cluster = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 5cbae8aa040..eb1486bb002 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -7,8 +7,8 @@ #include #include -#include #include +#include static int x2apic_phys; @@ -50,7 +50,7 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, /* * send the IPI. */ - x2apic_icr_write(cfg, apicid); + native_x2apic_icr_write(cfg, apicid); } static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) @@ -220,4 +220,11 @@ struct genapic apic_x2apic_phys = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 89b84e004f0..9ae4a92fac8 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -22,8 +22,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -292,6 +292,13 @@ struct genapic apic_x2apic_uv_x = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = NULL, + + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = native_x2apic_icr_read, + .icr_write = native_x2apic_icr_write, + .wait_icr_idle = native_x2apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; static __cpuinit void set_x2apic_extra_bits(int pnode) diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index dbf5445727a..1326272cae4 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f13ca1650aa..3957776b193 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bdfad80c3cf..48b9ca5e088 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -11,7 +11,7 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include +#include #include #include diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 0cc41a1d255..f0f0c2f0596 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -569,4 +569,11 @@ struct genapic apic_numaq = { .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .store_NMI_vector = numaq_store_NMI_vector, .inquire_remote_apic = NULL, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 22337b75de6..1f701caa95b 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -127,6 +127,13 @@ struct genapic apic_default = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; extern struct genapic apic_numaq; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 1e733eff9b3..1cf32c325d1 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -599,4 +599,11 @@ struct genapic apic_summit = { .smp_callin_clear_local_apic = NULL, .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e..75eb5ec5dd2 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include static void uv_noop(unsigned int irq) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index f052c84ecbe..a1c7b71dc0d 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -798,8 +798,8 @@ static inline int __init activate_vmi(void) #endif #ifdef CONFIG_X86_LOCAL_APIC - para_fill(apic_ops->read, APICRead); - para_fill(apic_ops->write, APICWrite); + para_fill(apic->read, APICRead); + para_fill(apic->write, APICWrite); #endif /* diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index a4791ef412d..2a5e0e6a7c0 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index da2e314f61b..bc9893f2c38 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include @@ -828,13 +828,14 @@ static u32 lguest_apic_safe_wait_icr_idle(void) return 0; } -static struct apic_ops lguest_basic_apic_ops = { - .read = lguest_apic_read, - .write = lguest_apic_write, - .icr_read = lguest_apic_icr_read, - .icr_write = lguest_apic_icr_write, - .wait_icr_idle = lguest_apic_wait_icr_idle, - .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle, +static void set_lguest_basic_apic_ops(void) +{ + apic->read = lguest_apic_read; + apic->write = lguest_apic_write; + apic->icr_read = lguest_apic_icr_read; + apic->icr_write = lguest_apic_icr_write; + apic->wait_icr_idle = lguest_apic_wait_icr_idle; + apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle; }; #endif @@ -1035,7 +1036,7 @@ __init void lguest_init(void) #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ - apic_ops = &lguest_basic_apic_ops; + set_lguest_basic_apic_ops(); #endif /* time operations */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 95ff6a0e942..e3dd3fb6729 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include @@ -554,14 +554,15 @@ static u32 xen_safe_apic_wait_icr_idle(void) return 0; } -static struct apic_ops xen_basic_apic_ops = { - .read = xen_apic_read, - .write = xen_apic_write, - .icr_read = xen_apic_icr_read, - .icr_write = xen_apic_icr_write, - .wait_icr_idle = xen_apic_wait_icr_idle, - .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, -}; +static void set_xen_basic_apic_ops(void) +{ + apic->read = xen_apic_read; + apic->write = xen_apic_write; + apic->icr_read = xen_apic_icr_read; + apic->icr_write = xen_apic_icr_write; + apic->wait_icr_idle = xen_apic_wait_icr_idle; + apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; +} #endif @@ -898,7 +899,7 @@ asmlinkage void __init xen_start_kernel(void) /* * set up the basic apic ops. */ - apic_ops = &xen_basic_apic_ops; + set_xen_basic_apic_ops(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { From 7d01d32d3b9becd6deba318b718db3d9fc181d23 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 12:33:20 +0100 Subject: [PATCH 503/682] x86, apic: fix build fallout of genapic changes - make oprofile build - select X86_X2APIC from X86_UV - it relies on it - export genapic for oprofile modular build Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/cpu/mcheck/p4.c | 2 +- arch/x86/kernel/genapic_64.c | 1 + arch/x86/kernel/probe_32.c | 2 ++ arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/op_model_p4.c | 2 +- arch/x86/oprofile/op_model_ppro.c | 2 +- 7 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bce241fe1d2..8955262caa3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -316,6 +316,7 @@ config X86_UV bool "SGI Ultraviolet" depends on X86_64 depends on X86_EXTENDED_PLATFORM + select X86_X2APIC ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 9b60fce09f7..f9c92b66dfb 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 70b616b4c62..ef788635324 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -30,6 +30,7 @@ extern struct genapic apic_x2apic_phys; extern struct genapic apic_x2apic_cluster; struct genapic __read_mostly *apic = &apic_flat; +EXPORT_SYMBOL_GPL(apic); static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 1f701caa95b..6e31b17d546 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -143,6 +144,7 @@ extern struct genapic apic_es7000; extern struct genapic apic_default; struct genapic *apic = &apic_default; +EXPORT_SYMBOL_GPL(apic); static struct genapic *apic_probe[] __initdata = { #ifdef CONFIG_X86_NUMAQ diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a..a32a5c7a8ef 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "op_counter.h" #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 4c4a51c90bc..09a237bc9ef 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index e9f80c744cf..5ebd8f605d7 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include From 28aa29eeb3918f820b914679cfc4404972f2df32 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:36:42 +0100 Subject: [PATCH 504/682] remove: genapic prepare Signed-off-by: Ingo Molnar --- arch/x86/include/asm/genapic.h | 13 +------------ arch/x86/kernel/probe_32.c | 9 +++++++++ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index a6d0b00a544..9b874a38683 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -168,8 +168,6 @@ extern void apic_send_IPI_self(int vector); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); -extern void default_setup_apic_routing(void); - extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); #endif @@ -211,10 +209,9 @@ static inline unsigned int read_apic_id(void) return apic->get_apic_id(reg); } -#ifdef CONFIG_X86_64 extern void default_setup_apic_routing(void); -#else +#ifdef CONFIG_X86_32 /* * Set up the logical destination ID. * @@ -251,14 +248,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -static inline void default_setup_apic_routing(void) -{ -#ifdef CONFIG_X86_IO_APIC - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Flat", nr_ioapics); -#endif -} - extern int default_apicid_to_node(int logical_apicid); #endif diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 6e31b17d546..b3d5d74e522 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -53,6 +53,15 @@ int no_broadcast = DEFAULT_SEND_IPI; #ifdef CONFIG_X86_LOCAL_APIC +void default_setup_apic_routing(void) +{ +#ifdef CONFIG_X86_IO_APIC + printk(KERN_INFO + "Enabling APIC mode: Flat. Using %d I/O APICs\n", + nr_ioapics); +#endif +} + static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* From e2780a68f889c9d7ec8e78d58a3a2be8cfebf202 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 13:52:29 +0100 Subject: [PATCH 505/682] x86, apic: merge genapic.h into apic.h Impact: cleanup Reduce the number of include files to worry about. Also, most of the users of APIC facilities had to include genapic.h already, which embedded apic.h, so the distinction was meaningless. [ include apic.h from genapic.h for compatibility. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 316 ++++++++++++++++++++++++++++++++- arch/x86/include/asm/genapic.h | 312 -------------------------------- 2 files changed, 311 insertions(+), 317 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 4f56e053d34..c07f5fbf43c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -1,15 +1,18 @@ #ifndef _ASM_X86_APIC_H #define _ASM_X86_APIC_H -#include +#include #include +#include #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include #include #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -235,4 +238,307 @@ static inline void disable_local_APIC(void) { } #endif +/* + * Copyright 2004 James Cleverdon, IBM. + * Subject to the GNU Public License, v.2 + * + * Generic APIC sub-arch data struct. + * + * Hacked for x86-64 by James Cleverdon from i386 architecture code by + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and + * James Cleverdon. + */ +struct genapic { + char *name; + + int (*probe)(void); + int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); + int (*apic_id_registered)(void); + + u32 irq_delivery_mode; + u32 irq_dest_mode; + + const struct cpumask *(*target_cpus)(void); + + int disable_esr; + + int dest_logical; + unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_present)(int apicid); + + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); + void (*init_apic_ldr)(void); + + physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + + void (*setup_apic_routing)(void); + int (*multi_timer_check)(int apic, int irq); + int (*apicid_to_node)(int logical_apicid); + int (*cpu_to_logical_apicid)(int cpu); + int (*cpu_present_to_apicid)(int mps_cpu); + physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*setup_portio_remap)(void); + int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); + void (*enable_apic_mode)(void); + int (*phys_pkg_id)(int cpuid_apic, int index_msb); + + /* + * When one of the next two hooks returns 1 the genapic + * is switched to this. Essentially they are additional + * probe functions: + */ + int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); + + unsigned int (*get_apic_id)(unsigned long x); + unsigned long (*set_apic_id)(unsigned int id); + unsigned long apic_id_mask; + + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + + /* ipi */ + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + + /* wakeup_secondary_cpu */ + int (*wakeup_cpu)(int apicid, unsigned long start_eip); + + int trampoline_phys_low; + int trampoline_phys_high; + + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); + + /* apic ops */ + u32 (*read)(u32 reg); + void (*write)(u32 reg, u32 v); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + void (*wait_icr_idle)(void); + u32 (*safe_wait_icr_idle)(void); +}; + +extern struct genapic *apic; + +static inline u32 apic_read(u32 reg) +{ + return apic->read(reg); +} + +static inline void apic_write(u32 reg, u32 val) +{ + apic->write(reg, val); +} + +static inline u64 apic_icr_read(void) +{ + return apic->icr_read(); +} + +static inline void apic_icr_write(u32 low, u32 high) +{ + apic->icr_write(low, high); +} + +static inline void apic_wait_icr_idle(void) +{ + apic->wait_icr_idle(); +} + +static inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle(); +} + + +static inline void ack_APIC_irq(void) +{ + /* + * ack_APIC_irq() actually gets compiled as a single instruction + * ... yummie. + */ + + /* Docs say use 0 for future compatibility */ + apic_write(APIC_EOI, 0); +} + +static inline unsigned default_get_apic_id(unsigned long x) +{ + unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); + + if (APIC_XAPIC(ver)) + return (x >> 24) & 0xFF; + else + return (x >> 24) & 0x0F; +} + +/* + * Warm reset vector default position: + */ +#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 +#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 + +#ifdef CONFIG_X86_32 +extern void es7000_update_genapic_to_cluster(void); +#else +extern struct genapic apic_flat; +extern struct genapic apic_physflat; +extern struct genapic apic_x2apic_cluster; +extern struct genapic apic_x2apic_phys; +extern int default_acpi_madt_oem_check(char *, char *); + +extern void apic_send_IPI_self(int vector); + +extern struct genapic apic_x2apic_uv_x; +DECLARE_PER_CPU(int, x2apic_extra_bits); + +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +#endif + +static inline void default_wait_for_init_deassert(atomic_t *deassert) +{ + while (!atomic_read(deassert)) + cpu_relax(); + return; +} + +extern void generic_bigsmp_probe(void); + + +#ifdef CONFIG_X86_LOCAL_APIC + +#include + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +static inline const struct cpumask *default_target_cpus(void) +{ +#ifdef CONFIG_SMP + return cpu_online_mask; +#else + return cpumask_of(0); +#endif +} + +DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); + + +static inline unsigned int read_apic_id(void) +{ + unsigned int reg; + + reg = apic_read(APIC_ID); + + return apic->get_apic_id(reg); +} + +extern void default_setup_apic_routing(void); + +#ifdef CONFIG_X86_32 +/* + * Set up the logical destination ID. + * + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ +extern void default_init_apic_ldr(void); + +static inline int default_apic_id_registered(void) +{ + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static inline unsigned int +default_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int +default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); +} + +static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return cpuid_apic >> index_msb; +} + +extern int default_apicid_to_node(int logical_apicid); + +#endif + +static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + return physid_isset(apicid, bitmap); +} + +static inline unsigned long default_check_apicid_present(int bit) +{ + return physid_isset(bit, phys_cpu_present_map); +} + +static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +/* Mapping from cpu number to logical apicid */ +static inline int default_cpu_to_logical_apicid(int cpu) +{ + return 1 << cpu; +} + +static inline int __default_cpu_present_to_apicid(int mps_cpu) +{ + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) + return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); + else + return BAD_APICID; +} + +static inline int +__default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); +} + +#ifdef CONFIG_X86_32 +static inline int default_cpu_present_to_apicid(int mps_cpu) +{ + return __default_cpu_present_to_apicid(mps_cpu); +} + +static inline int +default_check_phys_apicid_present(int boot_cpu_physical_apicid) +{ + return __default_check_phys_apicid_present(boot_cpu_physical_apicid); +} +#else +extern int default_cpu_present_to_apicid(int mps_cpu); +extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); +#endif + +static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) +{ + return physid_mask_of_physid(phys_apicid); +} + +#endif /* CONFIG_X86_LOCAL_APIC */ + #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/include/asm/genapic.h b/arch/x86/include/asm/genapic.h index 9b874a38683..4b8b98fa7f2 100644 --- a/arch/x86/include/asm/genapic.h +++ b/arch/x86/include/asm/genapic.h @@ -1,313 +1 @@ -#ifndef _ASM_X86_GENAPIC_H -#define _ASM_X86_GENAPIC_H - -#include - -#include -#include #include - -/* - * Copyright 2004 James Cleverdon, IBM. - * Subject to the GNU Public License, v.2 - * - * Generic APIC sub-arch data struct. - * - * Hacked for x86-64 by James Cleverdon from i386 architecture code by - * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and - * James Cleverdon. - */ -struct genapic { - char *name; - - int (*probe)(void); - int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_registered)(void); - - u32 irq_delivery_mode; - u32 irq_dest_mode; - - const struct cpumask *(*target_cpus)(void); - - int disable_esr; - - int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); - unsigned long (*check_apicid_present)(int apicid); - - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); - void (*init_apic_ldr)(void); - - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); - - void (*setup_apic_routing)(void); - int (*multi_timer_check)(int apic, int irq); - int (*apicid_to_node)(int logical_apicid); - int (*cpu_to_logical_apicid)(int cpu); - int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); - void (*setup_portio_remap)(void); - int (*check_phys_apicid_present)(int boot_cpu_physical_apicid); - void (*enable_apic_mode)(void); - int (*phys_pkg_id)(int cpuid_apic, int index_msb); - - /* - * When one of the next two hooks returns 1 the genapic - * is switched to this. Essentially they are additional - * probe functions: - */ - int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid); - - unsigned int (*get_apic_id)(unsigned long x); - unsigned long (*set_apic_id)(unsigned int id); - unsigned long apic_id_mask; - - unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); - unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, - const struct cpumask *andmask); - - /* ipi */ - void (*send_IPI_mask)(const struct cpumask *mask, int vector); - void (*send_IPI_mask_allbutself)(const struct cpumask *mask, - int vector); - void (*send_IPI_allbutself)(int vector); - void (*send_IPI_all)(int vector); - void (*send_IPI_self)(int vector); - - /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); - - int trampoline_phys_low; - int trampoline_phys_high; - - void (*wait_for_init_deassert)(atomic_t *deassert); - void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); - void (*inquire_remote_apic)(int apicid); - - /* apic ops */ - u32 (*read)(u32 reg); - void (*write)(u32 reg, u32 v); - u64 (*icr_read)(void); - void (*icr_write)(u32 low, u32 high); - void (*wait_icr_idle)(void); - u32 (*safe_wait_icr_idle)(void); -}; - -extern struct genapic *apic; - -static inline u32 apic_read(u32 reg) -{ - return apic->read(reg); -} - -static inline void apic_write(u32 reg, u32 val) -{ - apic->write(reg, val); -} - -static inline u64 apic_icr_read(void) -{ - return apic->icr_read(); -} - -static inline void apic_icr_write(u32 low, u32 high) -{ - apic->icr_write(low, high); -} - -static inline void apic_wait_icr_idle(void) -{ - apic->wait_icr_idle(); -} - -static inline u32 safe_apic_wait_icr_idle(void) -{ - return apic->safe_wait_icr_idle(); -} - - -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction - * ... yummie. - */ - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - -static inline unsigned default_get_apic_id(unsigned long x) -{ - unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - - if (APIC_XAPIC(ver)) - return (x >> 24) & 0xFF; - else - return (x >> 24) & 0x0F; -} - -/* - * Warm reset vector default position: - */ -#define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 -#define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 - -#ifdef CONFIG_X86_32 -extern void es7000_update_genapic_to_cluster(void); -#else -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; -extern int default_acpi_madt_oem_check(char *, char *); - -extern void apic_send_IPI_self(int vector); - -extern struct genapic apic_x2apic_uv_x; -DECLARE_PER_CPU(int, x2apic_extra_bits); - -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); -#endif - -static inline void default_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); - return; -} - -extern void generic_bigsmp_probe(void); - - -#ifdef CONFIG_X86_LOCAL_APIC - -#include - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -static inline const struct cpumask *default_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - -DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); - - -static inline unsigned int read_apic_id(void) -{ - unsigned int reg; - - reg = apic_read(APIC_ID); - - return apic->get_apic_id(reg); -} - -extern void default_setup_apic_routing(void); - -#ifdef CONFIG_X86_32 -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -extern void default_init_apic_ldr(void); - -static inline int default_apic_id_registered(void) -{ - return physid_isset(read_apic_id(), phys_cpu_present_map); -} - -static inline unsigned int -default_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return cpumask_bits(cpumask)[0]; -} - -static inline unsigned int -default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - unsigned long mask1 = cpumask_bits(cpumask)[0]; - unsigned long mask2 = cpumask_bits(andmask)[0]; - unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; - - return (unsigned int)(mask1 & mask2 & mask3); -} - -static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -extern int default_apicid_to_node(int logical_apicid); - -#endif - -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) -{ - return physid_isset(apicid, bitmap); -} - -static inline unsigned long default_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - -/* Mapping from cpu number to logical apicid */ -static inline int default_cpu_to_logical_apicid(int cpu) -{ - return 1 << cpu; -} - -static inline int __default_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static inline int -__default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map); -} - -#ifdef CONFIG_X86_32 -static inline int default_cpu_present_to_apicid(int mps_cpu) -{ - return __default_cpu_present_to_apicid(mps_cpu); -} - -static inline int -default_check_phys_apicid_present(int boot_cpu_physical_apicid) -{ - return __default_check_phys_apicid_present(boot_cpu_physical_apicid); -} -#else -extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid); -#endif - -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - -#endif /* CONFIG_X86_LOCAL_APIC */ - -#endif /* _ASM_X86_GENAPIC_64_H */ From 7b6aa335ca1a845c2262ec7a595b4521bca0f79d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 13:58:15 +0100 Subject: [PATCH 506/682] x86, apic: remove genapic.h Impact: cleanup Remove genapic.h and remove all references to it. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ipi.h | 2 +- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/cpu/addon_cpuid_features.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/cpu/mcheck/p4.c | 2 +- arch/x86/kernel/crash.c | 2 +- arch/x86/kernel/es7000_32.c | 4 ++-- arch/x86/kernel/genapic_64.c | 2 +- arch/x86/kernel/genapic_flat_64.c | 2 +- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/io_apic.c | 2 +- arch/x86/kernel/ipi.c | 2 +- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/kgdb.c | 2 +- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/numaq_32.c | 4 ++-- arch/x86/kernel/probe_32.c | 8 ++++---- arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/kernel/summit_32.c | 2 +- arch/x86/kernel/tlb_uv.c | 4 ++-- arch/x86/kernel/uv_irq.c | 2 +- arch/x86/kernel/visws_quirks.c | 4 ++-- arch/x86/kernel/vmi_32.c | 2 +- arch/x86/kernel/vmiclock_32.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/mm/srat_64.c | 2 +- arch/x86/mm/tlb.c | 2 +- arch/x86/oprofile/nmi_int.c | 2 +- arch/x86/oprofile/op_model_p4.c | 2 +- arch/x86/oprofile/op_model_ppro.c | 2 +- arch/x86/pci/numaq_32.c | 2 +- arch/x86/xen/enlighten.c | 2 +- 45 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 5f2efc5d992..3395c680a97 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -123,7 +123,7 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); -#include +#include extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 956c1dee6fb..42814152c94 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index af494bad885..7db03a9b61d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 9eeb714c5de..72f4e534051 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index e48640cfac0..6882a735d9c 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -7,7 +7,7 @@ #include #include -#include +#include struct cpuid_bit { u16 feature; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ff4d7b9e32e..c94ba9311e6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,7 @@ # include #endif -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b5d13e472d..41f3788ec9b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -26,8 +26,8 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include -#include +#include +#include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f137a87d4b..290f92e2b7c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include +#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index e22d6ed26e6..4772e91e824 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 42f090702f0..5e8c79e748a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f9c92b66dfb..9b60fce09f7 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ad7f2a696f4..3340cc0f244 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,7 +28,7 @@ #include #include -#include +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 23f1df4ce18..6cdfb3e4dc3 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include /* @@ -387,7 +387,7 @@ void __init es7000_enable_apic_mode(void) #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index ef788635324..91cae6f6e73 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 36ee760fd13..a7d84763648 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index dd6e8d68542..f5e02cffa26 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index eb1486bb002..11eb4cb7ca3 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include static int x2apic_phys; diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 9ae4a92fac8..c1746a198bd 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a89878e08a4..00e6071cefc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -62,7 +62,7 @@ #include #include -#include +#include #define __apicdebuginit(type) static type __init diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 1326272cae4..dbf5445727a 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3957776b193..f13ca1650aa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4beb9a13873..e4ac7c80e2d 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -212,7 +212,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include +#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 5c4f5548384..eedfaebe106 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,7 +46,7 @@ #include #include -#include +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 20076445319..7f4d2586972 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -29,7 +29,7 @@ #include #include -#include +#include /* * Checksum an MP configuration block. */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 48b9ca5e088..bdfad80c3cf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -11,7 +11,7 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ -#include +#include #include #include diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f0f0c2f0596..5a2d75d1fd4 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include @@ -301,7 +301,7 @@ int __init get_memcfg_numaq(void) #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index b3d5d74e522..be0d554984a 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -17,20 +17,20 @@ #include #include #include -#include +#include #include #include #include #include -#include +#include #include #include #include #include #include #include -#include +#include #include #include @@ -41,7 +41,7 @@ #include #include -#include +#include #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 32e8f0af292..7c8cd447d5e 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,7 +24,7 @@ # include #endif -#include +#include /* * Power off function, if any diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 43d964411c0..deaafd2693e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index eaaffae31cc..13f33ea8cca 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* * Some notes on x86 processor bugs affecting SMP operation: * diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b5f2b698973..562a9fc3bc3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -60,12 +60,12 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 1cf32c325d1..eb31ba276bb 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f396e61bcb3..1a7dfa7cb52 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -15,12 +15,12 @@ #include #include #include -#include +#include #include #include #include -#include +#include static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 75eb5ec5dd2..aeef529917e 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include static void uv_noop(unsigned int irq) diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 4fd646e6dd4..5264fea6c28 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -32,9 +32,9 @@ #include #include -#include +#include -#include +#include #include diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index a1c7b71dc0d..2cc4a90e2cb 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 2a5e0e6a7c0..a4791ef412d 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index bc9893f2c38..f3a5305b8ad 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 15df1baee10..574c8bc95ef 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include int acpi_numa __initdata; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 14c5af4d11e..b641349fe07 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; -#include +#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index a32a5c7a8ef..202864ad49a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "op_counter.h" #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 09a237bc9ef..4c4a51c90bc 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 5ebd8f605d7..e9f80c744cf 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 5601e829c38..8eb295e116f 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e3dd3fb6729..86497d5f44c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -36,7 +36,7 @@ #include #include -#include +#include #include #include #include From e641f5f525acb163ba71d92de79c9c7366deae03 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:02:01 +0100 Subject: [PATCH 507/682] x86, apic: remove duplicate asm/apic.h inclusions Impact: cleanup Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ipi.h | 2 -- arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/apic.c | 1 - arch/x86/kernel/cpu/amd.c | 2 -- arch/x86/kernel/cpu/common.c | 6 ++---- arch/x86/kernel/cpu/intel.c | 1 - arch/x86/kernel/crash.c | 2 -- arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/irq_32.c | 1 - arch/x86/kernel/numaq_32.c | 1 - arch/x86/kernel/probe_32.c | 4 ---- arch/x86/kernel/reboot.c | 2 -- arch/x86/kernel/setup.c | 1 - arch/x86/kernel/smpboot.c | 1 - arch/x86/kernel/summit_32.c | 1 - arch/x86/kernel/tlb_uv.c | 2 -- arch/x86/kernel/visws_quirks.c | 7 +------ arch/x86/mm/tlb.c | 1 - 18 files changed, 3 insertions(+), 34 deletions(-) diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 3395c680a97..0b7228268a6 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -123,8 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int vector); -#include - extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 42814152c94..a18eb7ce223 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7db03a9b61d..c12823eb55b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -36,7 +36,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c94ba9311e6..25423a5b80e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,8 +12,6 @@ # include #endif -#include - #include "cpu.h" #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 41f3788ec9b..826d5c87627 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -23,11 +23,9 @@ #include #include #include +#include + #ifdef CONFIG_X86_LOCAL_APIC -#include -#include -#include -#include #include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 290f92e2b7c..7aeef1d327b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -24,7 +24,6 @@ #ifdef CONFIG_X86_LOCAL_APIC #include #include -#include #endif static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 3340cc0f244..ff958248e61 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -28,8 +28,6 @@ #include #include -#include - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 6cdfb3e4dc3..352d870e5d5 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -387,7 +387,6 @@ void __init es7000_enable_apic_mode(void) #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e4ac7c80e2d..9dc6b2b2427 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -212,7 +212,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -#include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 5a2d75d1fd4..40400a58845 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -301,7 +301,6 @@ int __init get_memcfg_numaq(void) #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index be0d554984a..fd1352ac909 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -23,14 +23,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include #include @@ -41,8 +39,6 @@ #include #include -#include - #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 7c8cd447d5e..1cc18d439bb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,8 +24,6 @@ # include #endif -#include - /* * Power off function, if any */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index deaafd2693e..b2da0b1d15e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -97,7 +97,6 @@ #include #include -#include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 562a9fc3bc3..09e73876a44 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -65,7 +65,6 @@ #include #include -#include #include #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index eb31ba276bb..577b0bd8e53 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 1a7dfa7cb52..f04549afcfe 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -20,8 +20,6 @@ #include #include -#include - static struct bau_control **uv_bau_table_bases __read_mostly; static int uv_bau_retry_limit __read_mostly; diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 5264fea6c28..34199d30ff4 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -29,13 +29,10 @@ #include #include #include +#include #include #include -#include - -#include - #include #include @@ -49,8 +46,6 @@ extern int no_broadcast; -#include - char visws_board_type = -1; char visws_board_rev = -1; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index b641349fe07..a654d59e448 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0, }; -#include /* * Smarter SMP flushing macros. * c/o Linus Torvalds. From 5c615feb90ce63f2293a7ac7809f320f46c75e0a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:04:24 +0100 Subject: [PATCH 508/682] x86, apic: remove stale references to APIC_DEFINITION Impact: cleanup APIC_DEFINITION was a hack from the x86 subarch times, it has no meaning anymore - remove it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 1 - arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/numaq_32.c | 1 - arch/x86/kernel/summit_32.c | 1 - 4 files changed, 4 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 72f4e534051..f49af36351f 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -2,7 +2,6 @@ * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. * Drives the local APIC in "clustered mode". */ -#define APIC_DEFINITION 1 #include #include #include diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 352d870e5d5..cf53a98dbf1 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -383,7 +383,6 @@ void __init es7000_enable_apic_mode(void) /* * APIC driver for the Unisys ES7000 chipset. */ -#define APIC_DEFINITION 1 #include #include #include diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 40400a58845..dcf22f50827 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -297,7 +297,6 @@ int __init get_memcfg_numaq(void) /* * APIC driver for the IBM NUMAQ chipset. */ -#define APIC_DEFINITION 1 #include #include #include diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 577b0bd8e53..30597778914 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -34,7 +34,6 @@ /* * APIC driver for the IBM "Summit" chipset. */ -#define APIC_DEFINITION 1 #include #include #include From 77313190d121dd1fffa965aff6e9f0782a307bb8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:09:20 +0100 Subject: [PATCH 509/682] x86, apic: clean up arch/x86/kernel/bigsmp_32.c Impact: cleanup - remove unnecessary indirections that were artifacts of the subarch code - clean up include file section - clean up various small details Signed-off-by: Ingo Molnar --- arch/x86/kernel/bigsmp_32.c | 43 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index f49af36351f..41732abd700 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -1,27 +1,26 @@ /* - * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs. + * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs. + * * Drives the local APIC in "clustered mode". */ #include #include -#include -#include -#include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include static inline unsigned bigsmp_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; } -#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) - static inline int bigsmp_apic_id_registered(void) { return 1; @@ -36,8 +35,6 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #endif } -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - static inline unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) { @@ -52,9 +49,11 @@ static inline unsigned long bigsmp_check_apicid_present(int bit) static inline unsigned long calculate_ldr(int cpu) { unsigned long val, id; + val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = xapic_phys_to_log_apicid(cpu); + id = per_cpu(x86_bios_cpu_apicid, cpu); val |= SET_APIC_LOGICAL_ID(id); + return val; } @@ -70,15 +69,16 @@ static inline void bigsmp_init_apic_ldr(void) unsigned long val; int cpu = smp_processor_id(); - apic_write(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_FLAT); val = calculate_ldr(cpu); apic_write(APIC_LDR, val); } static inline void bigsmp_setup_apic_routing(void) { - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "Physflat", nr_ioapics); + printk(KERN_INFO + "Enabling APIC mode: Physflat. Using %d I/O APICs\n", + nr_ioapics); } static inline int bigsmp_apicid_to_node(int logical_apicid) @@ -100,6 +100,7 @@ static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) } extern u8 cpu_2_logical_apicid[]; + /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -175,21 +176,24 @@ static int hp_ht_bigsmp(const struct dmi_system_id *d) { printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident); dmi_bigsmp = 1; + return 0; } static const struct dmi_system_id bigsmp_dmi_table[] = { { hp_ht_bigsmp, "HP ProLiant DL760 G2", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P44-"),} + { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P44-"), + } }, { hp_ht_bigsmp, "HP ProLiant DL740", - { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), - DMI_MATCH(DMI_BIOS_VERSION, "P47-"),} + { DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_BIOS_VERSION, "P47-"), + } }, - { } + { } /* NULL entry stops DMI scanning */ }; static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask) @@ -204,6 +208,7 @@ static int probe_bigsmp(void) dmi_bigsmp = 1; else dmi_check_system(bigsmp_dmi_table); + return dmi_bigsmp; } From 2f205bc47f615b7bd0c7aba817d67ce25760eaf1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:45:30 +0100 Subject: [PATCH 510/682] x86, apic: clean up the cpu_2_logical_apiciddeclaration extern declarations were scattered in 4 files - consolidate them into apic.h. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 4 ++++ arch/x86/kernel/bigsmp_32.c | 2 -- arch/x86/kernel/es7000_32.c | 3 +-- arch/x86/kernel/numaq_32.c | 5 +---- arch/x86/kernel/summit_32.c | 4 +--- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c07f5fbf43c..2cdd19e4536 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -541,4 +541,8 @@ static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_32 +extern u8 cpu_2_logical_apicid[NR_CPUS]; +#endif + #endif /* _ASM_X86_APIC_H */ diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 41732abd700..0de9eed7c60 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -99,8 +99,6 @@ static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) return physid_mask_of_physid(phys_apicid); } -extern u8 cpu_2_logical_apicid[]; - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index cf53a98dbf1..3dc48831eb9 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -403,7 +403,6 @@ void __init es7000_enable_apic_mode(void) extern void es7000_enable_apic_mode(void); extern int apic_version [MAX_APICS]; -extern u8 cpu_2_logical_apicid[]; extern unsigned int boot_cpu_physical_apicid; extern int parse_unisys_oem (char *oemptr); @@ -570,7 +569,7 @@ static int es7000_cpu_to_logical_apicid(int cpu) #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + return cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index dcf22f50827..9abaacde72e 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -408,14 +408,11 @@ static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) return physids_promote(0xFUL); } -/* Mapping from cpu number to logical apicid */ -extern u8 cpu_2_logical_apicid[]; - static inline int numaq_cpu_to_logical_apicid(int cpu) { if (cpu >= nr_cpu_ids) return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + return cpu_2_logical_apicid[cpu]; } /* diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 30597778914..7a1db1f2356 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -209,8 +209,6 @@ static inline unsigned long summit_check_apicid_present(int bit) #define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) -extern u8 cpu_2_logical_apicid[]; - static inline void summit_init_apic_ldr(void) { unsigned long val, id; @@ -264,7 +262,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu) #ifdef CONFIG_SMP if (cpu >= nr_cpu_ids) return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + return cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif From 2c4ce18c95d632c9227ebcc6d45da11a9ef1ec70 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 14:57:16 +0100 Subject: [PATCH 511/682] x86, es7000: clean up No code changed: arch/x86/kernel/es7000_32.o: text data bss dec hex filename 2813 192 44 3049 be9 es7000_32.o.before 2813 192 44 3049 be9 es7000_32.o.after md5: a593d98a882bf534622c70d9568497ac es7000_32.o.before.asm a593d98a882bf534622c70d9568497ac es7000_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 243 +++++++++++++++++------------------- 1 file changed, 114 insertions(+), 129 deletions(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 3dc48831eb9..c7cc9776cca 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -24,90 +24,96 @@ * http://www.unisys.com */ -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include #include -#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include /* * ES7000 chipsets */ -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 +#define MIP_REG 1 +#define MIP_PSAI_REG 4 -#define MIP_REG 1 -#define MIP_PSAI_REG 4 +#define MIP_BUSY 1 +#define MIP_SPIN 0xf0000 +#define MIP_VALID 0x0100000000000000ULL -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) +#define MIP_PORT(val) ((val >> 32) & 0xffff) -#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) +#define MIP_RD_LO(val) (val & 0xffffffff) struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; }; struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; + unsigned char type; + unsigned char length; + unsigned char part_id; + unsigned char apic_mode; + unsigned long snum; + char ptype[16]; + char sname[64]; + char pname[64]; }; struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; }; struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; + unsigned char type; + unsigned char length; + unsigned char resv[6]; + unsigned long long start; + unsigned long long size; }; struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; + unsigned long long hdr; + struct mip_reg_info mip; + struct part_info pif; + struct es7000_mem_info shm; + struct psai psai; }; #ifdef CONFIG_ACPI struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; }; extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); @@ -115,36 +121,50 @@ extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); #endif struct mip_reg { - unsigned long long off_0; - unsigned long long off_8; - unsigned long long off_10; - unsigned long long off_18; - unsigned long long off_20; - unsigned long long off_28; - unsigned long long off_30; - unsigned long long off_38; + unsigned long long off_0x00; + unsigned long long off_0x08; + unsigned long long off_0x10; + unsigned long long off_0x18; + unsigned long long off_0x20; + unsigned long long off_0x28; + unsigned long long off_0x30; + unsigned long long off_0x38; }; -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) +#define MIP_SW_APIC 0x1020b +#define MIP_FUNC(VALUE) (VALUE & 0xff) + +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ + +#define APIC_DFR_VALUE (APIC_DFR_FLAT) + +extern void es7000_enable_apic_mode(void); +extern int parse_unisys_oem (char *oemptr); +extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); +extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); +extern void setup_unisys(void); + +#define apicid_cluster(apicid) (apicid & 0xF0) /* * ES7000 Globals */ -static volatile unsigned long *psai = NULL; -static struct mip_reg *mip_reg; -static struct mip_reg *host_reg; -static int mip_port; -static unsigned long mip_addr, host_addr; +static volatile unsigned long *psai = NULL; +static struct mip_reg *mip_reg; +static struct mip_reg *host_reg; +static int mip_port; +static unsigned long mip_addr, host_addr; -int es7000_plat; +int es7000_plat; /* * GSI override for ES7000 platforms. */ -static unsigned int base; +static unsigned int base; static int es7000_rename_gsi(int ioapic, int gsi) @@ -160,6 +180,7 @@ es7000_rename_gsi(int ioapic, int gsi) if (!ioapic && (gsi < 16)) gsi += base; + return gsi; } @@ -196,8 +217,7 @@ static int __init es7000_update_genapic(void) return 0; } -void __init -setup_unisys(void) +void __init setup_unisys(void) { /* * Determine the generation of the ES7000 currently running. @@ -219,8 +239,7 @@ setup_unisys(void) * Parse the OEM Table */ -int __init -parse_unisys_oem (char *oemptr) +int __init parse_unisys_oem (char *oemptr) { int i; int success = 0; @@ -277,12 +296,15 @@ parse_unisys_oem (char *oemptr) es7000_plat = NON_UNISYS; } else setup_unisys(); + return es7000_plat; } #ifdef CONFIG_ACPI -static unsigned long oem_addrX; -static unsigned long oem_size; + +static unsigned long oem_addrX; +static unsigned long oem_size; + int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; @@ -315,8 +337,7 @@ void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) } #endif -static void -es7000_spin(int n) +static void es7000_spin(int n) { int i = 0; @@ -327,16 +348,15 @@ es7000_spin(int n) static int __init es7000_mip_write(struct mip_reg *mip_reg) { - int status = 0; - int spin; + int status = 0; + int spin; spin = MIP_SPIN; - while (((unsigned long long)host_reg->off_38 & - (unsigned long long)MIP_VALID) != 0) { - if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); - return -1; - } + while ((host_reg->off_0x38 & MIP_VALID) != 0) { + if (--spin <= 0) { + printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + return -1; + } es7000_spin(MIP_SPIN); } @@ -345,8 +365,7 @@ es7000_mip_write(struct mip_reg *mip_reg) spin = MIP_SPIN; - while (((unsigned long long)mip_reg->off_38 & - (unsigned long long)MIP_VALID) == 0) { + while ((mip_reg->off_0x38 & MIP_VALID) == 0) { if (--spin <= 0) { printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); return -1; @@ -354,10 +373,9 @@ es7000_mip_write(struct mip_reg *mip_reg) es7000_spin(MIP_SPIN); } - status = ((unsigned long long)mip_reg->off_0 & - (unsigned long long)0xffff0000000000ULL) >> 48; - mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 & - (unsigned long long)~MIP_VALID); + status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48; + mip_reg->off_0x38 &= ~MIP_VALID; + return status; } @@ -371,8 +389,8 @@ void __init es7000_enable_apic_mode(void) printk("ES7000: Enabling APIC mode.\n"); memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0 = MIP_SW_APIC; - es7000_mip_reg.off_38 = MIP_VALID; + es7000_mip_reg.off_0x00 = MIP_SW_APIC; + es7000_mip_reg.off_0x38 = MIP_VALID; while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { printk("es7000_enable_apic_mode: command failed, status = %x\n", @@ -380,39 +398,6 @@ void __init es7000_enable_apic_mode(void) } } -/* - * APIC driver for the Unisys ES7000 chipset. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ - -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -extern void es7000_enable_apic_mode(void); -extern int apic_version [MAX_APICS]; -extern unsigned int boot_cpu_physical_apicid; - -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - -#define apicid_cluster(apicid) (apicid & 0xF0) -#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) - static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus @@ -495,9 +480,9 @@ static unsigned long es7000_check_apicid_present(int bit) static unsigned long calculate_ldr(int cpu) { - unsigned long id = xapic_phys_to_log_apicid(cpu); + unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); - return (SET_APIC_LOGICAL_ID(id)); + return SET_APIC_LOGICAL_ID(id); } /* @@ -547,7 +532,7 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) if (!mps_cpu) return boot_cpu_physical_apicid; else if (mps_cpu < nr_cpu_ids) - return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); + return per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; } @@ -584,7 +569,7 @@ static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) static int es7000_check_phys_apicid_present(int cpu_physical_apicid) { boot_cpu_physical_apicid = read_apic_id(); - return (1); + return 1; } static unsigned int From b9e0d1aa9767707cad24db32d8ce0409df16d491 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 15:09:08 +0100 Subject: [PATCH 512/682] x86, apic: remove apicid_cluster() There were multiple definitions of apicid_cluster() scattered around in APIC drivers - but the definitions are equivalent to the already existing generic APIC_CLUSTER() method. So remove apicid_cluster() and change all users to APIC_CLUSTER(). No code changed: md5: 1b8244ba8d3d6a454593ce10f09dfa58 summit_32.o.before.asm 1b8244ba8d3d6a454593ce10f09dfa58 summit_32.o.after.asm md5: a593d98a882bf534622c70d9568497ac es7000_32.o.before.asm a593d98a882bf534622c70d9568497ac es7000_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 8 ++------ arch/x86/kernel/numaq_32.c | 2 -- arch/x86/kernel/summit_32.c | 9 +++------ 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index c7cc9776cca..8a20866b2a3 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -146,8 +146,6 @@ extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); extern void setup_unisys(void); -#define apicid_cluster(apicid) (apicid & 0xF0) - /* * ES7000 Globals */ @@ -595,8 +593,7 @@ es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { + if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); return 0xFF; @@ -630,8 +627,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) if (cpu_isset(cpu, *cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { + if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); return es7000_cpu_to_logical_apicid(0); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 9abaacde72e..15328500de6 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -375,8 +375,6 @@ static inline unsigned long numaq_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -#define apicid_cluster(apicid) (apicid & 0xF0) - static inline int numaq_apic_id_registered(void) { return 1; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 7a1db1f2356..c4690349a54 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -207,14 +207,12 @@ static inline unsigned long summit_check_apicid_present(int bit) return 1; } -#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK) - static inline void summit_init_apic_ldr(void) { unsigned long val, id; int count = 0; u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = (u8)apicid_cluster(my_id); + u8 my_cluster = APIC_CLUSTER(my_id); #ifdef CONFIG_SMP u8 lid; int i; @@ -222,7 +220,7 @@ static inline void summit_init_apic_ldr(void) /* Create logical APIC IDs by counting CPUs already in cluster. */ for (count = 0, i = nr_cpu_ids; --i >= 0; ) { lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) + if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) ++count; } #endif @@ -319,8 +317,7 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) if (cpu_isset(cpu, *cpumask)) { int new_apicid = summit_cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)) { + if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { printk ("%s: Not a valid mask!\n", __func__); return 0xFF; From d3185b37df05e9ad7ce987a9a0419ffe1af9d23f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 15:13:05 +0100 Subject: [PATCH 513/682] x86, es7000: remove externs Impact: cleanup In the subarch times there were a number of externs between various bits of the ES7000 code. Now that there's a single es7000-platform support file, the externs can be removed and the functions can be changed the statics. Beyond the cleanup factor, this also shrinks the size of the kernel image a bit: arch/x86/kernel/es7000_32.o: text data bss dec hex filename 2813 192 44 3049 be9 es7000_32.o.before 2693 192 44 2929 b71 es7000_32.o.after Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 8a20866b2a3..d5c3894a983 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -116,8 +116,6 @@ struct oem_table { u32 OEMTableSize; }; -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); #endif struct mip_reg { @@ -140,12 +138,6 @@ struct mip_reg { #define APIC_DFR_VALUE (APIC_DFR_FLAT) -extern void es7000_enable_apic_mode(void); -extern int parse_unisys_oem (char *oemptr); -extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr); -extern void setup_unisys(void); - /* * ES7000 Globals */ @@ -215,7 +207,7 @@ static int __init es7000_update_genapic(void) return 0; } -void __init setup_unisys(void) +static void __init setup_unisys(void) { /* * Determine the generation of the ES7000 currently running. @@ -234,10 +226,9 @@ void __init setup_unisys(void) } /* - * Parse the OEM Table + * Parse the OEM Table: */ - -int __init parse_unisys_oem (char *oemptr) +static int __init parse_unisys_oem (char *oemptr) { int i; int success = 0; @@ -290,9 +281,9 @@ int __init parse_unisys_oem (char *oemptr) tp += size; } - if (success < 2) { + if (success < 2) es7000_plat = NON_UNISYS; - } else + else setup_unisys(); return es7000_plat; @@ -303,7 +294,7 @@ int __init parse_unisys_oem (char *oemptr) static unsigned long oem_addrX; static unsigned long oem_size; -int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) +static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; @@ -326,7 +317,7 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) return -1; } -void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) +static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { if (!oem_addr) return; @@ -377,7 +368,7 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -void __init es7000_enable_apic_mode(void) +static void __init es7000_enable_apic_mode(void) { struct mip_reg es7000_mip_reg; int mip_status; @@ -706,9 +697,9 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) check_dsdt = es7000_check_dsdt(); if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) + if (check_dsdt) { ret = parse_unisys_oem((char *)oem_addr); - else { + } else { setup_unisys(); ret = 1; } From 352887d1c9d99d4c2f0fbac6176ef0cd4fe7a820 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 15:17:55 +0100 Subject: [PATCH 514/682] x86, es7000: remove dead code, clean up Impact: cleanup - a number of structure definitions were stale - remove needless wrappers around apic definitions - fix details noticed by checkpatch No code changed: md5: 029d8fde0aaf6e934ea63bd8b36430fd es7000_32.o.before.asm 029d8fde0aaf6e934ea63bd8b36430fd es7000_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 115 ++++++++++++------------------------ 1 file changed, 39 insertions(+), 76 deletions(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index d5c3894a983..03acbe95d2b 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -23,7 +23,6 @@ * * http://www.unisys.com */ - #include #include #include @@ -63,61 +62,12 @@ #define MIP_BUSY 1 #define MIP_SPIN 0xf0000 #define MIP_VALID 0x0100000000000000ULL +#define MIP_SW_APIC 0x1020b #define MIP_PORT(val) ((val >> 32) & 0xffff) #define MIP_RD_LO(val) (val & 0xffffffff) -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct part_info { - unsigned char type; - unsigned char length; - unsigned char part_id; - unsigned char apic_mode; - unsigned long snum; - char ptype[16]; - char sname[64]; - char pname[64]; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -struct es7000_mem_info { - unsigned char type; - unsigned char length; - unsigned char resv[6]; - unsigned long long start; - unsigned long long size; -}; - -struct es7000_oem_table { - unsigned long long hdr; - struct mip_reg_info mip; - struct part_info pif; - struct es7000_mem_info shm; - struct psai psai; -}; - -#ifdef CONFIG_ACPI - -struct oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -#endif - struct mip_reg { unsigned long long off_0x00; unsigned long long off_0x08; @@ -129,14 +79,26 @@ struct mip_reg { unsigned long long off_0x38; }; -#define MIP_SW_APIC 0x1020b -#define MIP_FUNC(VALUE) (VALUE & 0xff) +struct mip_reg_info { + unsigned long long mip_info; + unsigned long long delivery_info; + unsigned long long host_reg; + unsigned long long mip_reg; +}; -#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) -#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ +struct psai { + unsigned long long entry_type; + unsigned long long addr; + unsigned long long bep_addr; +}; -#define APIC_DFR_VALUE (APIC_DFR_FLAT) +#ifdef CONFIG_ACPI +struct es7000_oem_table { + struct acpi_table_header Header; + u32 OEMTableAddr; + u32 OEMTableSize; +}; +#endif /* * ES7000 Globals @@ -228,14 +190,14 @@ static void __init setup_unisys(void) /* * Parse the OEM Table: */ -static int __init parse_unisys_oem (char *oemptr) +static int __init parse_unisys_oem(char *oemptr) { - int i; + int i; int success = 0; - unsigned char type, size; - unsigned long val; - char *tp = NULL; - struct psai *psaip = NULL; + unsigned char type, size; + unsigned long val; + char *tp = NULL; + struct psai *psaip = NULL; struct mip_reg_info *mi; struct mip_reg *host, *mip; @@ -243,7 +205,7 @@ static int __init parse_unisys_oem (char *oemptr) tp += 8; - for (i=0; i <= 6; i++) { + for (i = 0; i <= 6; i++) { type = *tp++; size = *tp++; tp -= 2; @@ -302,7 +264,7 @@ static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { - struct oem_table *t = (struct oem_table *)header; + struct es7000_oem_table *t = (void *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; @@ -377,11 +339,11 @@ static void __init es7000_enable_apic_mode(void) return; printk("ES7000: Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0x00 = MIP_SW_APIC; - es7000_mip_reg.off_0x38 = MIP_VALID; + memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); + es7000_mip_reg.off_0x00 = MIP_SW_APIC; + es7000_mip_reg.off_0x38 = MIP_VALID; - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { printk("es7000_enable_apic_mode: command failed, status = %x\n", mip_status); } @@ -444,7 +406,7 @@ static void es7000_send_IPI_all(int vector) static int es7000_apic_id_registered(void) { - return 1; + return 1; } static const cpumask_t *target_cpus_cluster(void) @@ -486,7 +448,7 @@ static void es7000_init_apic_ldr_cluster(void) unsigned long val; int cpu = smp_processor_id(); - apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + apic_write(APIC_DFR, APIC_DFR_CLUSTER); val = calculate_ldr(cpu); apic_write(APIC_LDR, val); } @@ -496,7 +458,7 @@ static void es7000_init_apic_ldr(void) unsigned long val; int cpu = smp_processor_id(); - apic_write(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_FLAT); val = calculate_ldr(cpu); apic_write(APIC_LDR, val); } @@ -585,7 +547,7 @@ es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); + printk("%s: Not a valid mask!\n", __func__); return 0xFF; } @@ -619,7 +581,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); + printk("%s: Not a valid mask!\n", __func__); return es7000_cpu_to_logical_apicid(0); } @@ -658,8 +620,9 @@ static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) void __init es7000_update_genapic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; - apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER; - apic->irq_dest_mode = INT_DEST_MODE_CLUSTER; + apic->irq_delivery_mode = dest_LowestPrio; + /* logical delivery broadcast to all procs: */ + apic->irq_dest_mode = 1; apic->init_apic_ldr = es7000_init_apic_ldr_cluster; From 7da18ed924b182f8174de243c55a323c56398675 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 15:29:30 +0100 Subject: [PATCH 515/682] x86, es7000: misc cleanups These are cleanups that change the md5 signature: - asm/ => linux/ include conversion - simplify the code flow of find_unisys_acpi_oem_table() - move ACPI methods into one #ifdef block - remove 0/NULL initialization of statics - simplify/standardize printouts - update copyrights - more cleanups, pointed out by checkpatch arch/x86/kernel/es7000_32.o: text data bss dec hex filename 2693 192 44 2929 b71 es7000_32.o.before 2688 192 44 2924 b6c es7000_32.o.after Signed-off-by: Ingo Molnar --- arch/x86/kernel/es7000_32.c | 170 +++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 80 deletions(-) diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 03acbe95d2b..3519f8cab70 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -1,10 +1,14 @@ /* * Written by: Garry Forsgren, Unisys Corporation * Natalie Protasevich, Unisys Corporation + * * This file contains the code to configure and interface * with Unisys ES7000 series hardware system manager. * - * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. + * Copyright (c) 2003 Unisys Corporation. + * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar + * + * All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License as @@ -35,7 +39,9 @@ #include #include #include +#include #include +#include #include #include @@ -44,9 +50,6 @@ #include #include #include -#include -#include -#include /* * ES7000 chipsets @@ -93,22 +96,28 @@ struct psai { }; #ifdef CONFIG_ACPI + struct es7000_oem_table { struct acpi_table_header Header; u32 OEMTableAddr; u32 OEMTableSize; }; + +static unsigned long oem_addrX; +static unsigned long oem_size; + #endif /* * ES7000 Globals */ -static volatile unsigned long *psai = NULL; +static volatile unsigned long *psai; static struct mip_reg *mip_reg; static struct mip_reg *host_reg; static int mip_port; -static unsigned long mip_addr, host_addr; +static unsigned long mip_addr; +static unsigned long host_addr; int es7000_plat; @@ -252,31 +261,35 @@ static int __init parse_unisys_oem(char *oemptr) } #ifdef CONFIG_ACPI - -static unsigned long oem_addrX; -static unsigned long oem_size; - static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; - int i = 0; + struct es7000_oem_table *table; acpi_size tbl_size; + acpi_status ret; + int i = 0; - while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { - if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { - struct es7000_oem_table *t = (void *)header; + for (;;) { + ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size); + if (!ACPI_SUCCESS(ret)) + return -1; - oem_addrX = t->OEMTableAddr; - oem_size = t->OEMTableSize; - early_acpi_os_unmap_memory(header, tbl_size); + if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) + break; - *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, - oem_size); - return 0; - } early_acpi_os_unmap_memory(header, tbl_size); } - return -1; + + table = (void *)header; + + oem_addrX = table->OEMTableAddr; + oem_size = table->OEMTableSize; + + early_acpi_os_unmap_memory(header, tbl_size); + + *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); + + return 0; } static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) @@ -286,7 +299,47 @@ static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) __acpi_unmap_table((char *)oem_addr, oem_size); } -#endif + +static int es7000_check_dsdt(void) +{ + struct acpi_table_header header; + + if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && + !strncmp(header.oem_id, "UNISYS", 6)) + return 1; + return 0; +} + +/* Hook from generic ACPI tables.c */ +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + unsigned long oem_addr = 0; + int check_dsdt; + int ret = 0; + + /* check dsdt at first to avoid clear fix_map for oem_addr */ + check_dsdt = es7000_check_dsdt(); + + if (!find_unisys_acpi_oem_table(&oem_addr)) { + if (check_dsdt) { + ret = parse_unisys_oem((char *)oem_addr); + } else { + setup_unisys(); + ret = 1; + } + /* + * we need to unmap it + */ + unmap_unisys_acpi_oem_table(oem_addr); + } + return ret; +} +#else /* !CONFIG_ACPI: */ +static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return 0; +} +#endif /* !CONFIG_ACPI */ static void es7000_spin(int n) { @@ -305,7 +358,7 @@ es7000_mip_write(struct mip_reg *mip_reg) spin = MIP_SPIN; while ((host_reg->off_0x38 & MIP_VALID) != 0) { if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for Host Valid Flag"); + WARN(1, "Timeout waiting for Host Valid Flag\n"); return -1; } es7000_spin(MIP_SPIN); @@ -318,7 +371,7 @@ es7000_mip_write(struct mip_reg *mip_reg) while ((mip_reg->off_0x38 & MIP_VALID) == 0) { if (--spin <= 0) { - printk("es7000_mip_write: Timeout waiting for MIP Valid Flag"); + WARN(1, "Timeout waiting for MIP Valid Flag\n"); return -1; } es7000_spin(MIP_SPIN); @@ -338,15 +391,13 @@ static void __init es7000_enable_apic_mode(void) if (!es7000_plat) return; - printk("ES7000: Enabling APIC mode.\n"); + printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); es7000_mip_reg.off_0x00 = MIP_SW_APIC; es7000_mip_reg.off_0x38 = MIP_VALID; - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) { - printk("es7000_enable_apic_mode: command failed, status = %x\n", - mip_status); - } + while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) + WARN(1, "Command failed, status = %x\n", mip_status); } static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) @@ -377,18 +428,6 @@ static unsigned int es7000_get_apic_id(unsigned long x) return (x >> 24) & 0xFF; } -#ifdef CONFIG_ACPI -static int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} -#endif - static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) { default_send_IPI_mask_sequence_phys(mask, vector); @@ -466,10 +505,12 @@ static void es7000_init_apic_ldr(void) static void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + + printk(KERN_INFO + "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); + nr_ioapics, cpus_addr(*es7000_target_cpus())[0]); } static int es7000_apicid_to_node(int logical_apicid) @@ -488,13 +529,14 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } +static int cpu_id; + static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) { - static int id = 0; physid_mask_t mask; - mask = physid_mask_of_physid(id); - ++id; + mask = physid_mask_of_physid(cpu_id); + ++cpu_id; return mask; } @@ -547,7 +589,7 @@ es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); + WARN(1, "Not a valid mask!"); return 0xFF; } @@ -648,38 +690,6 @@ es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) return 0; } -#ifdef CONFIG_ACPI -/* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr = 0; - int check_dsdt; - int ret = 0; - - /* check dsdt at first to avoid clear fix_map for oem_addr */ - check_dsdt = es7000_check_dsdt(); - - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) { - ret = parse_unisys_oem((char *)oem_addr); - } else { - setup_unisys(); - ret = 1; - } - /* - * we need to unmap it - */ - unmap_unisys_acpi_oem_table(oem_addr); - } - return ret; -} -#else -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif - struct genapic apic_es7000 = { From 36afc3af04f4d3dfa709f7659fcadb3336d66ed2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 15:50:25 +0100 Subject: [PATCH 516/682] x86, numaq_32: clean up Impact: cleanup - refactor smp_dump_qct() - tidy up include files, remove duplicates - misc other cleanups, pointed out by checkpatch No code changed: md5: 9c0bc01a53558c77df0f2ebcda7e11a9 numaq_32.o.before.asm 9c0bc01a53558c77df0f2ebcda7e11a9 numaq_32.o.after.asm Signed-off-by: Ingo Molnar --- arch/x86/kernel/numaq_32.c | 195 +++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 95 deletions(-) diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 15328500de6..a4ea08a9ac9 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -22,21 +22,54 @@ * * Send feedback to */ - #include #include +#include +#include +#include #include #include +#include +#include +#include +#include #include #include #include -#include +#include +#include #include #include +#include #include +#include +#include -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) +#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) + +static inline void numaq_register_node(int node, struct sys_cfg_data *scd) +{ + struct eachquadmem *eq = scd->eq + node; + + node_set_online(node); + + /* Convert to pages */ + node_start_pfn[node] = + MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); + + node_end_pfn[node] = + MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); + + e820_register_active_regions(node, node_start_pfn[node], + node_end_pfn[node]); + + memory_present(node, node_start_pfn[node], node_end_pfn[node]); + + node_remap_size[node] = node_memmap_size_bytes(node, + node_start_pfn[node], + node_end_pfn[node]); +} /* * Function: smp_dump_qct() @@ -46,34 +79,18 @@ */ static void __init smp_dump_qct(void) { + struct sys_cfg_data *scd; int node; - struct eachquadmem *eq; - struct sys_cfg_data *scd = - (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR); + + scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); nodes_clear(node_online_map); for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) { - node_set_online(node); - eq = &scd->eq[node]; - /* Convert to pages */ - node_start_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start - eq->priv_mem_size); - node_end_pfn[node] = MB_TO_PAGES( - eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - - e820_register_active_regions(node, node_start_pfn[node], - node_end_pfn[node]); - memory_present(node, - node_start_pfn[node], node_end_pfn[node]); - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); - } + if (scd->quads_present31_0 & (1 << node)) + numaq_register_node(node, scd); } } - void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) @@ -98,20 +115,20 @@ int found_numaq; * hence the mpc_record variable .... can't see a less disgusting way of * doing this .... */ -struct mpc_config_translation { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; +struct mpc_trans { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; }; /* x86_quirks member */ -static int mpc_record; -static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] - __cpuinitdata; +static int mpc_record; + +static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY]; static inline int generate_logical_apicid(int quad, int phys_apicid) { @@ -124,10 +141,12 @@ static int mpc_apic_id(struct mpc_cpu *m) int quad = translation_table[mpc_record]->trans_quad; int logical_apicid = generate_logical_apicid(quad, m->apicid); - printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, - m->apicver, quad, logical_apicid); + printk(KERN_DEBUG + "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", + m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, + (m->cpufeature & CPU_MODEL_MASK) >> 4, + m->apicver, quad, logical_apicid); + return logical_apicid; } @@ -143,11 +162,11 @@ static void mpc_oem_bus_info(struct mpc_bus *m, char *name) mp_bus_id_to_node[m->busid] = quad; mp_bus_id_to_local[m->busid] = local; - printk(KERN_INFO "Bus #%d is %s (node %d)\n", - m->busid, name, quad); + + printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); } -int quad_local_to_mp_bus_id [NR_CPUS/4][4]; +int quad_local_to_mp_bus_id[NR_CPUS/4][4]; /* x86_quirks member */ static void mpc_oem_pci_bus(struct mpc_bus *m) @@ -158,7 +177,7 @@ static void mpc_oem_pci_bus(struct mpc_bus *m) quad_local_to_mp_bus_id[quad][local] = m->busid; } -static void __init MP_translation_info(struct mpc_config_translation *m) +static void __init MP_translation_info(struct mpc_trans *m) { printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", @@ -168,7 +187,8 @@ static void __init MP_translation_info(struct mpc_config_translation *m) if (mpc_record >= MAX_MPC_ENTRY) printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); else - translation_table[mpc_record] = m; /* stash this for later */ + translation_table[mpc_record] = m; /* stash this for later */ + if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) node_set_online(m->trans_quad); } @@ -186,16 +206,16 @@ static int __init mpf_checksum(unsigned char *mp, int len) /* * Read/parse the MPC oem tables */ - -static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable, - unsigned short oemsize) +static void __init + smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize) { int count = sizeof(*oemtable); /* the header size */ unsigned char *oemptr = ((unsigned char *)oemtable) + count; mpc_record = 0; - printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", - oemtable); + printk(KERN_INFO + "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); + if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", @@ -203,16 +223,18 @@ static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable, oemtable->signature[2], oemtable->signature[3]); return; } + if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); return; } + while (count < oemtable->length) { switch (*oemptr) { case MP_TRANSLATION: { - struct mpc_config_translation *m = - (struct mpc_config_translation *)oemptr; + struct mpc_trans *m = (void *)oemptr; + MP_translation_info(m); oemptr += sizeof(*m); count += sizeof(*m); @@ -220,12 +242,10 @@ static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable, break; } default: - { - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } + printk(KERN_WARNING + "Unrecognised OEM table entry type! - %d\n", + (int)*oemptr); + return; } } } @@ -244,21 +264,21 @@ static int __init numaq_update_genapic(void) } static struct x86_quirks numaq_x86_quirks __initdata = { - .arch_pre_time_init = numaq_pre_time_init, - .arch_time_init = NULL, - .arch_pre_intr_init = NULL, - .arch_memory_setup = NULL, - .arch_intr_init = NULL, - .arch_trap_init = NULL, - .mach_get_smp_config = NULL, - .mach_find_smp_config = NULL, - .mpc_record = &mpc_record, - .mpc_apic_id = mpc_apic_id, - .mpc_oem_bus_info = mpc_oem_bus_info, - .mpc_oem_pci_bus = mpc_oem_pci_bus, - .smp_read_mpc_oem = smp_read_mpc_oem, - .setup_ioapic_ids = numaq_setup_ioapic_ids, - .update_genapic = numaq_update_genapic, + .arch_pre_time_init = numaq_pre_time_init, + .arch_time_init = NULL, + .arch_pre_intr_init = NULL, + .arch_memory_setup = NULL, + .arch_intr_init = NULL, + .arch_trap_init = NULL, + .mach_get_smp_config = NULL, + .mach_find_smp_config = NULL, + .mpc_record = &mpc_record, + .mpc_apic_id = mpc_apic_id, + .mpc_oem_bus_info = mpc_oem_bus_info, + .mpc_oem_pci_bus = mpc_oem_pci_bus, + .smp_read_mpc_oem = smp_read_mpc_oem, + .setup_ioapic_ids = numaq_setup_ioapic_ids, + .update_genapic = numaq_update_genapic, }; void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) @@ -275,6 +295,7 @@ static __init void early_check_numaq(void) * Find possible boot-time SMP configuration: */ early_find_smp_config(); + /* * get boot-time SMP configuration: */ @@ -291,28 +312,10 @@ int __init get_memcfg_numaq(void) if (!found_numaq) return 0; smp_dump_qct(); + return 1; } -/* - * APIC driver for the IBM NUMAQ chipset. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) static inline unsigned int numaq_get_apic_id(unsigned long x) @@ -337,8 +340,8 @@ static inline void numaq_send_IPI_all(int vector) extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) +#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) +#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) /* * Because we use NMIs rather than the INIT-STARTUP sequence to @@ -426,7 +429,7 @@ static inline int numaq_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline int numaq_apicid_to_node(int logical_apicid) +static inline int numaq_apicid_to_node(int logical_apicid) { return logical_apicid >> 4; } @@ -468,7 +471,9 @@ static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } -static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) + +static int +__numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { numaq_mps_oem_check(mpc, oem, productid); return found_numaq; From cb81eaedf12d3e5e6e3dcf3320089660f7fb7464 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 17:53:54 +0100 Subject: [PATCH 517/682] x86, numaq_32: clean up, misc Impact: cleanup - misc other cleanups that change the md5 signature - consolidate global variables - remove unnecessary __numaq_mps_oem_check() wrapper - make numaq_mps_oem_check static - update copyrights - misc other cleanups pointed out by checkpatch Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 2 - arch/x86/kernel/numaq_32.c | 97 +++++++++++++++++------------------ 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 5916c8df09d..642fc7fc8cd 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -167,6 +167,4 @@ extern int generic_mps_oem_check(struct mpc_table *, char *, char *); extern int default_acpi_madt_oem_check(char *, char *); -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a4ea08a9ac9..62f9274a2ed 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -2,6 +2,7 @@ * Written by: Patricia Gaughen, IBM Corporation * * Copyright (C) 2002, IBM Corp. + * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar * * All rights reserved. * @@ -23,6 +24,7 @@ * Send feedback to */ #include +#include #include #include #include @@ -33,10 +35,10 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -44,10 +46,36 @@ #include #include #include -#include #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) +int found_numaq; + +/* + * Have to match translation table entries to main table entries by counter + * hence the mpc_record variable .... can't see a less disgusting way of + * doing this .... + */ +struct mpc_trans { + unsigned char mpc_type; + unsigned char trans_len; + unsigned char trans_type; + unsigned char trans_quad; + unsigned char trans_global; + unsigned char trans_local; + unsigned short trans_reserved; +}; + +/* x86_quirks member */ +static int mpc_record; + +static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY]; + +int mp_bus_id_to_node[MAX_MP_BUSSES]; +int mp_bus_id_to_local[MAX_MP_BUSSES]; +int quad_local_to_mp_bus_id[NR_CPUS/4][4]; + + static inline void numaq_register_node(int node, struct sys_cfg_data *scd) { struct eachquadmem *eq = scd->eq + node; @@ -108,28 +136,6 @@ static int __init numaq_pre_time_init(void) return 0; } -int found_numaq; - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_trans { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -/* x86_quirks member */ -static int mpc_record; - -static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY]; - static inline int generate_logical_apicid(int quad, int phys_apicid) { return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); @@ -150,10 +156,6 @@ static int mpc_apic_id(struct mpc_cpu *m) return logical_apicid; } -int mp_bus_id_to_node[MAX_MP_BUSSES]; - -int mp_bus_id_to_local[MAX_MP_BUSSES]; - /* x86_quirks member */ static void mpc_oem_bus_info(struct mpc_bus *m, char *name) { @@ -166,8 +168,6 @@ static void mpc_oem_bus_info(struct mpc_bus *m, char *name) printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); } -int quad_local_to_mp_bus_id[NR_CPUS/4][4]; - /* x86_quirks member */ static void mpc_oem_pci_bus(struct mpc_bus *m) { @@ -180,7 +180,7 @@ static void mpc_oem_pci_bus(struct mpc_bus *m) static void __init MP_translation_info(struct mpc_trans *m) { printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", + "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local); @@ -281,14 +281,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .update_genapic = numaq_update_genapic, }; -void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk("Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; -} - static __init void early_check_numaq(void) { /* @@ -338,8 +330,6 @@ static inline void numaq_send_IPI_all(int vector) numaq_send_IPI_mask(cpu_online_mask, vector); } -extern void numaq_mps_oem_check(struct mpc_table *, char *, char *); - #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) @@ -355,7 +345,7 @@ static inline void numaq_smp_callin_clear_local_apic(void) static inline void numaq_store_NMI_vector(unsigned short *high, unsigned short *low) { - printk("Storing NMI vector\n"); + printk(KERN_ERR "Storing NMI vector\n"); *high = *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); *low = @@ -390,8 +380,9 @@ static inline void numaq_init_apic_ldr(void) static inline void numaq_setup_apic_routing(void) { - printk("Enabling APIC mode: %s. Using %d I/O APICs\n", - "NUMA-Q", nr_ioapics); + printk(KERN_INFO + "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", + nr_ioapics); } /* @@ -473,9 +464,13 @@ static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) } static int -__numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { - numaq_mps_oem_check(mpc, oem, productid); + if (strncmp(oem, "IBM NUMA", 8)) + printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); + else + found_numaq = 1; + return found_numaq; } @@ -505,9 +500,13 @@ static void numaq_setup_portio_remap(void) if (num_quads <= 1) return; - printk("Remapping cross-quad port I/O for %d quads\n", num_quads); + printk(KERN_INFO + "Remapping cross-quad port I/O for %d quads\n", num_quads); + xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - printk("xquad_portio vaddr 0x%08lx, len %08lx\n", + + printk(KERN_INFO + "xquad_portio vaddr 0x%08lx, len %08lx\n", (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); } @@ -542,7 +541,7 @@ struct genapic apic_numaq = { .check_phys_apicid_present = numaq_check_phys_apicid_present, .enable_apic_mode = NULL, .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = __numaq_mps_oem_check, + .mps_oem_check = numaq_mps_oem_check, .get_apic_id = numaq_get_apic_id, .set_apic_id = NULL, From ab6fb7c0b03e2c3286f316c840347be8b9ee3d9f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 16:22:09 +0100 Subject: [PATCH 518/682] x86, apic: remove ->store_NMI_vector() Impact: cleanup It's not used by anything anymore. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/bigsmp_32.c | 1 - arch/x86/kernel/es7000_32.c | 1 - arch/x86/kernel/genapic_flat_64.c | 2 -- arch/x86/kernel/genx2apic_cluster.c | 1 - arch/x86/kernel/genx2apic_phys.c | 1 - arch/x86/kernel/genx2apic_uv_x.c | 1 - arch/x86/kernel/numaq_32.c | 11 ----------- arch/x86/kernel/probe_32.c | 1 - arch/x86/kernel/smpboot.c | 13 +++++-------- arch/x86/kernel/summit_32.c | 1 - 11 files changed, 5 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2cdd19e4536..122d8eda275 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -313,7 +313,6 @@ struct genapic { void (*wait_for_init_deassert)(atomic_t *deassert); void (*smp_callin_clear_local_apic)(void); - void (*store_NMI_vector)(unsigned short *high, unsigned short *low); void (*inquire_remote_apic)(int apicid); /* apic ops */ diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 0de9eed7c60..17c25bc26a5 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -263,7 +263,6 @@ struct genapic apic_bigsmp = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 3519f8cab70..1d6e99a8edc 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -746,7 +746,6 @@ struct genapic apic_es7000 = { /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index a7d84763648..27b81208009 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -227,7 +227,6 @@ struct genapic apic_flat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = NULL, .read = native_apic_mem_read, @@ -379,7 +378,6 @@ struct genapic apic_physflat = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = NULL, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f5e02cffa26..b9ef0091c4d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -232,7 +232,6 @@ struct genapic apic_x2apic_cluster = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index 11eb4cb7ca3..bb752015776 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -218,7 +218,6 @@ struct genapic apic_x2apic_phys = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index c1746a198bd..dcb8c14287d 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -290,7 +290,6 @@ struct genapic apic_x2apic_uv_x = { .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 62f9274a2ed..a709de87819 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -342,16 +342,6 @@ static inline void numaq_smp_callin_clear_local_apic(void) clear_local_APIC(); } -static inline void -numaq_store_NMI_vector(unsigned short *high, unsigned short *low) -{ - printk(KERN_ERR "Storing NMI vector\n"); - *high = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH)); - *low = - *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW)); -} - static inline const cpumask_t *numaq_target_cpus(void) { return &CPU_MASK_ALL; @@ -564,7 +554,6 @@ struct genapic apic_numaq = { .wait_for_init_deassert = NULL, .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .store_NMI_vector = numaq_store_NMI_vector, .inquire_remote_apic = NULL, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index fd1352ac909..5914ffb6e40 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -131,7 +131,6 @@ struct genapic apic_default = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 09e73876a44..9ce666387f3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -745,21 +745,21 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } -static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. */ +static int __cpuinit do_boot_cpu(int apicid, int cpu) { unsigned long boot_error = 0; - int timeout; unsigned long start_ip; - unsigned short nmi_high = 0, nmi_low = 0; + int timeout; struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; + INIT_WORK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); @@ -824,9 +824,6 @@ do_rest: pr_debug("Setting warm reset code and vector.\n"); - if (apic->store_NMI_vector) - apic->store_NMI_vector(&nmi_high, &nmi_low); - smpboot_setup_warm_reset_vector(start_ip); /* * Be paranoid about clearing APIC errors. diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index c4690349a54..8f1a11b072a 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -590,7 +590,6 @@ struct genapic apic_summit = { .wait_for_init_deassert = default_wait_for_init_deassert, .smp_callin_clear_local_apic = NULL, - .store_NMI_vector = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, From be163a159b223e94b3180afdd47a8d468eb9a492 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 16:28:46 +0100 Subject: [PATCH 519/682] x86, apic: rename 'genapic' to 'apic' Impact: cleanup Now that all APIC code is consolidated there's nothing 'gen' about apics anymore - so rename 'struct genapic' to 'struct apic'. This shortens the code and is nicer to read as well. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 18 +++++++-------- arch/x86/include/asm/setup.h | 2 +- arch/x86/kernel/bigsmp_32.c | 2 +- arch/x86/kernel/es7000_32.c | 10 ++++---- arch/x86/kernel/genapic_64.c | 18 +++++++-------- arch/x86/kernel/genapic_flat_64.c | 4 ++-- arch/x86/kernel/genx2apic_cluster.c | 2 +- arch/x86/kernel/genx2apic_phys.c | 2 +- arch/x86/kernel/genx2apic_uv_x.c | 2 +- arch/x86/kernel/numaq_32.c | 6 ++--- arch/x86/kernel/probe_32.c | 36 ++++++++++++++--------------- arch/x86/kernel/setup.c | 4 ++-- arch/x86/kernel/summit_32.c | 2 +- 13 files changed, 54 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 122d8eda275..dce1bf696cc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -248,7 +248,7 @@ static inline void disable_local_APIC(void) { } * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ -struct genapic { +struct apic { char *name; int (*probe)(void); @@ -283,7 +283,7 @@ struct genapic { int (*phys_pkg_id)(int cpuid_apic, int index_msb); /* - * When one of the next two hooks returns 1 the genapic + * When one of the next two hooks returns 1 the apic * is switched to this. Essentially they are additional * probe functions: */ @@ -324,7 +324,7 @@ struct genapic { u32 (*safe_wait_icr_idle)(void); }; -extern struct genapic *apic; +extern struct apic *apic; static inline u32 apic_read(u32 reg) { @@ -385,17 +385,17 @@ static inline unsigned default_get_apic_id(unsigned long x) #define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 #ifdef CONFIG_X86_32 -extern void es7000_update_genapic_to_cluster(void); +extern void es7000_update_apic_to_cluster(void); #else -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2apic_cluster; -extern struct genapic apic_x2apic_phys; +extern struct apic apic_flat; +extern struct apic apic_physflat; +extern struct apic apic_x2apic_cluster; +extern struct apic apic_x2apic_phys; extern int default_acpi_madt_oem_check(char *, char *); extern void apic_send_IPI_self(int vector); -extern struct genapic apic_x2apic_uv_x; +extern struct apic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern int default_cpu_present_to_apicid(int mps_cpu); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index c230189462a..8029369cd6f 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -30,7 +30,7 @@ struct x86_quirks { void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, unsigned short oemsize); int (*setup_ioapic_ids)(void); - int (*update_genapic)(void); + int (*update_apic)(void); }; #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c index 17c25bc26a5..0b1093394fd 100644 --- a/arch/x86/kernel/bigsmp_32.c +++ b/arch/x86/kernel/bigsmp_32.c @@ -210,7 +210,7 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -struct genapic apic_bigsmp = { +struct apic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 1d6e99a8edc..320f2d2e4e5 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -163,14 +163,14 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static int __init es7000_update_genapic(void) +static int __init es7000_update_apic(void) { apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { - es7000_update_genapic_to_cluster(); + es7000_update_apic_to_cluster(); apic->wait_for_init_deassert = NULL; apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; } @@ -193,7 +193,7 @@ static void __init setup_unisys(void) es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; - x86_quirks->update_genapic = es7000_update_genapic; + x86_quirks->update_apic = es7000_update_apic; } /* @@ -659,7 +659,7 @@ static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -void __init es7000_update_genapic_to_cluster(void) +void __init es7000_update_apic_to_cluster(void) { apic->target_cpus = target_cpus_cluster; apic->irq_delivery_mode = dest_LowestPrio; @@ -691,7 +691,7 @@ es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) } -struct genapic apic_es7000 = { +struct apic apic_es7000 = { .name = "es7000", .probe = probe_es7000, diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 91cae6f6e73..70935dd904d 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -23,16 +23,16 @@ #include #include -extern struct genapic apic_flat; -extern struct genapic apic_physflat; -extern struct genapic apic_x2xpic_uv_x; -extern struct genapic apic_x2apic_phys; -extern struct genapic apic_x2apic_cluster; +extern struct apic apic_flat; +extern struct apic apic_physflat; +extern struct apic apic_x2xpic_uv_x; +extern struct apic apic_x2apic_phys; +extern struct apic apic_x2apic_cluster; -struct genapic __read_mostly *apic = &apic_flat; +struct apic __read_mostly *apic = &apic_flat; EXPORT_SYMBOL_GPL(apic); -static struct genapic *apic_probe[] __initdata = { +static struct apic *apic_probe[] __initdata = { #ifdef CONFIG_X86_UV &apic_x2apic_uv_x, #endif @@ -62,8 +62,8 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 27b81208009..3b002995e14 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -178,7 +178,7 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) return hard_smp_processor_id() >> index_msb; } -struct genapic apic_flat = { +struct apic apic_flat = { .name = "flat", .probe = NULL, .acpi_madt_oem_check = flat_acpi_madt_oem_check, @@ -327,7 +327,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -struct genapic apic_physflat = { +struct apic apic_physflat = { .name = "physical flat", .probe = NULL, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index b9ef0091c4d..4e39d9ad4d5 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -182,7 +182,7 @@ static void init_x2apic_ldr(void) per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); } -struct genapic apic_x2apic_cluster = { +struct apic apic_x2apic_cluster = { .name = "cluster x2apic", .probe = NULL, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index bb752015776..d2d52eb9f7e 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -168,7 +168,7 @@ static void init_x2apic_ldr(void) { } -struct genapic apic_x2apic_phys = { +struct apic apic_x2apic_phys = { .name = "physical x2apic", .probe = NULL, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dcb8c14287d..20b4ad07c3a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -240,7 +240,7 @@ static void uv_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -struct genapic apic_x2apic_uv_x = { +struct apic apic_x2apic_uv_x = { .name = "UV large system", .probe = NULL, diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a709de87819..d9d6d61eed8 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -256,7 +256,7 @@ static int __init numaq_setup_ioapic_ids(void) return 1; } -static int __init numaq_update_genapic(void) +static int __init numaq_update_apic(void) { apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; @@ -278,7 +278,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, .setup_ioapic_ids = numaq_setup_ioapic_ids, - .update_genapic = numaq_update_genapic, + .update_apic = numaq_update_apic, }; static __init void early_check_numaq(void) @@ -500,7 +500,7 @@ static void numaq_setup_portio_remap(void) (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); } -struct genapic apic_numaq = { +struct apic apic_numaq = { .name = "NUMAQ", .probe = probe_numaq, diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c index 5914ffb6e40..5fa48332c5c 100644 --- a/arch/x86/kernel/probe_32.c +++ b/arch/x86/kernel/probe_32.c @@ -78,7 +78,7 @@ static int probe_default(void) return 1; } -struct genapic apic_default = { +struct apic apic_default = { .name = "default", .probe = probe_default, @@ -141,16 +141,16 @@ struct genapic apic_default = { .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; -extern struct genapic apic_numaq; -extern struct genapic apic_summit; -extern struct genapic apic_bigsmp; -extern struct genapic apic_es7000; -extern struct genapic apic_default; +extern struct apic apic_numaq; +extern struct apic apic_summit; +extern struct apic apic_bigsmp; +extern struct apic apic_es7000; +extern struct apic apic_default; -struct genapic *apic = &apic_default; +struct apic *apic = &apic_default; EXPORT_SYMBOL_GPL(apic); -static struct genapic *apic_probe[] __initdata = { +static struct apic *apic_probe[] __initdata = { #ifdef CONFIG_X86_NUMAQ &apic_numaq, #endif @@ -183,8 +183,8 @@ static int __init parse_apic(char *arg) } } - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); /* Parsed again by __setup for debug/verbose */ return 0; @@ -204,8 +204,8 @@ void __init generic_bigsmp_probe(void) if (!cmdline_apic && apic == &apic_default) { if (apic_bigsmp.probe()) { apic = &apic_bigsmp; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); printk(KERN_INFO "Overriding APIC driver with %s\n", apic->name); } @@ -227,8 +227,8 @@ void __init generic_apic_probe(void) if (!apic_probe[i]) panic("Didn't find an APIC driver"); - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); } printk(KERN_INFO "Using APIC driver %s\n", apic->name); } @@ -248,8 +248,8 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) if (!cmdline_apic) { apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } @@ -270,8 +270,8 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (!cmdline_apic) { apic = apic_probe[i]; - if (x86_quirks->update_genapic) - x86_quirks->update_genapic(); + if (x86_quirks->update_apic) + x86_quirks->update_apic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b2da0b1d15e..6b588d6b388 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -599,7 +599,7 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif -static int __init default_update_genapic(void) +static int __init default_update_apic(void) { #ifdef CONFIG_SMP if (!apic->wakeup_cpu) @@ -610,7 +610,7 @@ static int __init default_update_genapic(void) } static struct x86_quirks default_x86_quirks __initdata = { - .update_genapic = default_update_genapic, + .update_apic = default_update_apic, }; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c index 8f1a11b072a..cfe7b09015d 100644 --- a/arch/x86/kernel/summit_32.c +++ b/arch/x86/kernel/summit_32.c @@ -537,7 +537,7 @@ void __init setup_summit(void) } #endif -struct genapic apic_summit = { +struct apic apic_summit = { .name = "summit", .probe = probe_summit, From f62bae5009c1ba596cd475cafbc83e0570a36e26 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 18:09:24 +0100 Subject: [PATCH 520/682] x86, apic: move APIC drivers to arch/x86/kernel/apic/* arch/x86/kernel/ is getting a bit crowded, and the APIC drivers are scattered into various different files. Move them to arch/x86/kernel/apic/*, and also remove the 'gen' prefix from those which had it. Also move APIC related functionality: the IO-APIC driver, the NMI and the IPI code. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 23 ++++++++----------- arch/x86/kernel/apic/Makefile | 15 ++++++++++++ arch/x86/kernel/{ => apic}/apic.c | 0 .../kernel/{genapic_64.c => apic/apic_64.c} | 0 .../apic_flat_64.c} | 0 arch/x86/kernel/{ => apic}/io_apic.c | 0 arch/x86/kernel/{ => apic}/ipi.c | 0 arch/x86/kernel/{ => apic}/nmi.c | 0 .../x2apic_cluster.c} | 0 .../{genx2apic_phys.c => apic/x2apic_phys.c} | 0 .../{genx2apic_uv_x.c => apic/x2apic_uv_x.c} | 0 11 files changed, 24 insertions(+), 14 deletions(-) create mode 100644 arch/x86/kernel/apic/Makefile rename arch/x86/kernel/{ => apic}/apic.c (100%) rename arch/x86/kernel/{genapic_64.c => apic/apic_64.c} (100%) rename arch/x86/kernel/{genapic_flat_64.c => apic/apic_flat_64.c} (100%) rename arch/x86/kernel/{ => apic}/io_apic.c (100%) rename arch/x86/kernel/{ => apic}/ipi.c (100%) rename arch/x86/kernel/{ => apic}/nmi.c (100%) rename arch/x86/kernel/{genx2apic_cluster.c => apic/x2apic_cluster.c} (100%) rename arch/x86/kernel/{genx2apic_phys.c => apic/x2apic_phys.c} (100%) rename arch/x86/kernel/{genx2apic_uv_x.c => apic/x2apic_uv_x.c} (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 1cefd218f76..9139ff69471 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -58,13 +58,12 @@ obj-$(CONFIG_PCI) += early-quirks.o apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o +obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic/ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o @@ -116,17 +115,13 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) - obj-y += genapic_64.o genapic_flat_64.o - obj-$(CONFIG_X86_X2APIC) += genx2apic_cluster.o - obj-$(CONFIG_X86_X2APIC) += genx2apic_phys.o - obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o - obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o - obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o - obj-$(CONFIG_AUDIT) += audit_64.o + obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o + obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o + obj-$(CONFIG_AUDIT) += audit_64.o - obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o - obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o - obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o + obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o + obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o + obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o - obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o endif diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile new file mode 100644 index 00000000000..da20b70c400 --- /dev/null +++ b/arch/x86/kernel/apic/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for local APIC drivers and for the IO-APIC code +# + +obj-y := apic.o ipi.o nmi.o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o +obj-$(CONFIG_SMP) += ipi.o + +ifeq ($(CONFIG_X86_64),y) +obj-y += apic_64.o apic_flat_64.o +obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o +obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o +obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +endif + diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic/apic.c similarity index 100% rename from arch/x86/kernel/apic.c rename to arch/x86/kernel/apic/apic.c diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/apic/apic_64.c similarity index 100% rename from arch/x86/kernel/genapic_64.c rename to arch/x86/kernel/apic/apic_64.c diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c similarity index 100% rename from arch/x86/kernel/genapic_flat_64.c rename to arch/x86/kernel/apic/apic_flat_64.c diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/apic/io_apic.c similarity index 100% rename from arch/x86/kernel/io_apic.c rename to arch/x86/kernel/apic/io_apic.c diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/apic/ipi.c similarity index 100% rename from arch/x86/kernel/ipi.c rename to arch/x86/kernel/apic/ipi.c diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/apic/nmi.c similarity index 100% rename from arch/x86/kernel/nmi.c rename to arch/x86/kernel/apic/nmi.c diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c similarity index 100% rename from arch/x86/kernel/genx2apic_cluster.c rename to arch/x86/kernel/apic/x2apic_cluster.c diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c similarity index 100% rename from arch/x86/kernel/genx2apic_phys.c rename to arch/x86/kernel/apic/x2apic_phys.c diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c similarity index 100% rename from arch/x86/kernel/genx2apic_uv_x.c rename to arch/x86/kernel/apic/x2apic_uv_x.c From 2a05180fe2e5b414f0cb2ccfc80e6c90563e3c67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 20:35:16 +0100 Subject: [PATCH 521/682] x86, apic: move remaining APIC drivers to arch/x86/kernel/apic/* Move the 32-bit extended-arch APIC drivers to arch/x86/kernel/apic/ too, and rename apic_64.c to probe_64.c. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 6 +----- arch/x86/kernel/apic/Makefile | 9 +++++++-- arch/x86/kernel/{ => apic}/bigsmp_32.c | 0 arch/x86/kernel/{ => apic}/es7000_32.c | 0 arch/x86/kernel/{ => apic}/numaq_32.c | 0 arch/x86/kernel/{ => apic}/probe_32.c | 0 arch/x86/kernel/apic/{apic_64.c => probe_64.c} | 0 arch/x86/kernel/{ => apic}/summit_32.c | 0 8 files changed, 8 insertions(+), 7 deletions(-) rename arch/x86/kernel/{ => apic}/bigsmp_32.c (100%) rename arch/x86/kernel/{ => apic}/es7000_32.c (100%) rename arch/x86/kernel/{ => apic}/numaq_32.c (100%) rename arch/x86/kernel/{ => apic}/probe_32.c (100%) rename arch/x86/kernel/apic/{apic_64.c => probe_64.c} (100%) rename arch/x86/kernel/{ => apic}/summit_32.c (100%) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9139ff69471..c70537d8c15 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -30,7 +30,7 @@ obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o obj-$(CONFIG_X86_VISWS) += visws_quirks.o -obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o +obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o @@ -70,10 +70,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_ES7000) += es7000_32.o -obj-$(CONFIG_X86_SUMMIT) += summit_32.o obj-y += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da20b70c400..97f558db5c3 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,14 +2,19 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-y := apic.o ipi.o nmi.o +obj-y := apic.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o +obj-$ ifeq ($(CONFIG_X86_64),y) -obj-y += apic_64.o apic_flat_64.o +obj-y += apic_flat_64.o obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o endif +obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o +obj-$(CONFIG_X86_NUMAQ) += numaq_32.o +obj-$(CONFIG_X86_ES7000) += es7000_32.o +obj-$(CONFIG_X86_SUMMIT) += summit_32.o diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c similarity index 100% rename from arch/x86/kernel/bigsmp_32.c rename to arch/x86/kernel/apic/bigsmp_32.c diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c similarity index 100% rename from arch/x86/kernel/es7000_32.c rename to arch/x86/kernel/apic/es7000_32.c diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c similarity index 100% rename from arch/x86/kernel/numaq_32.c rename to arch/x86/kernel/apic/numaq_32.c diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/apic/probe_32.c similarity index 100% rename from arch/x86/kernel/probe_32.c rename to arch/x86/kernel/apic/probe_32.c diff --git a/arch/x86/kernel/apic/apic_64.c b/arch/x86/kernel/apic/probe_64.c similarity index 100% rename from arch/x86/kernel/apic/apic_64.c rename to arch/x86/kernel/apic/probe_64.c diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/apic/summit_32.c similarity index 100% rename from arch/x86/kernel/summit_32.c rename to arch/x86/kernel/apic/summit_32.c From a7eb518998529c08cc53fef17756d9fe433b0c23 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 17 Feb 2009 13:01:51 -0800 Subject: [PATCH 522/682] x86: truncate ISA addresses to unsigned int Impact: Cleanup; fix inappropriate macro use ISA addresses on x86 are mapped 1:1 with the physical address space. Since the ISA address space is only 24 bits (32 for VLB or LPC) it will always fit in an unsigned int, and at least in the aha1542 driver using a wider type would cause an undesirable promotion. Hence explicitly cast the ISA bus addresses to unsigned int. Signed-off-by: H. Peter Anvin Cc: James Bottomley --- arch/x86/include/asm/io.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 4f8e820cf38..683d0b4c00f 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -124,10 +124,15 @@ static inline void *phys_to_virt(phys_addr_t address) /* * ISA I/O bus memory addresses are 1:1 with the physical address. + * However, we truncate the address to unsigned int to avoid undesirable + * promitions in legacy drivers. */ -#define isa_virt_to_bus (unsigned long)virt_to_phys -#define isa_page_to_bus page_to_phys -#define isa_bus_to_virt phys_to_virt +static inline unsigned int isa_virt_to_bus(volatile void *address) +{ + return (unsigned int)virt_to_phys(address); +} +#define isa_page_to_bus(page) ((unsigned int)page_to_phys(page)) +#define isa_bus_to_virt phys_to_virt /* * However PCI ones are not necessarily 1:1 and therefore these interfaces From 9be1b56a3e718aa998772019c57c398dbb19e258 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 17 Feb 2009 23:12:48 +0100 Subject: [PATCH 523/682] x86, apic: separate 32-bit setup functionality out of apic_32.c Impact: build fix, cleanup A couple of arch setup callbacks were mistakenly in apic_32.c, breaking the build. Also simplify the code a bit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/apic/Makefile | 3 +- arch/x86/kernel/apic/probe_32.c | 160 ++++---------------------------- arch/x86/kernel/setup.c | 124 +++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 147 deletions(-) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c70537d8c15..de5657c039e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -63,7 +63,7 @@ obj-$(CONFIG_SMP) += setup_percpu.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic/ +obj-y += apic/ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 97f558db5c3..da7b7b9f8bd 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,10 +2,9 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-y := apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o -obj-$ ifeq ($(CONFIG_X86_64),y) obj-y += apic_flat_64.o diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 5fa48332c5c..c9ec90742e9 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -47,7 +47,22 @@ int no_broadcast = DEFAULT_SEND_IPI; -#ifdef CONFIG_X86_LOCAL_APIC +static __init int no_ipi_broadcast(char *str) +{ + get_option(&str, &no_broadcast); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); + return 1; +} +__setup("no_ipi_broadcast=", no_ipi_broadcast); + +static int __init print_ipi_mode(void) +{ + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); + return 0; +} +late_initcall(print_ipi_mode); void default_setup_apic_routing(void) { @@ -279,146 +294,3 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) } return 0; } - -#endif /* CONFIG_X86_LOCAL_APIC */ - -/** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init pre_intr_init_hook(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** - * intr_init_hook - post gate setup interrupt initialisation - * - * Description: - * Fill in any interrupts that may have been left out by the general - * init_IRQ() routine. interrupts having to do with the machine rather - * than the devices on the I/O bus (like APIC interrupts in intel MP - * systems) are started here. - **/ -void __init intr_init_hook(void) -{ - if (x86_quirks->arch_intr_init) { - if (x86_quirks->arch_intr_init()) - return; - } -} - -/** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On Voyager - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - -/** - * trap_init_hook - initialise system specific traps - * - * Description: - * Called as the final act of trap_init(). Used in VISWS to initialise - * the various board specific APIC traps. - **/ -void __init trap_init_hook(void) -{ - if (x86_quirks->arch_trap_init) { - if (x86_quirks->arch_trap_init()) - return; - } -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -/** - * pre_time_init_hook - do any specific initialisations before. - * - **/ -void __init pre_time_init_hook(void) -{ - if (x86_quirks->arch_pre_time_init) - x86_quirks->arch_pre_time_init(); -} - -/** - * time_init_hook - do any specific initialisations for the system timer. - * - * Description: - * Must plug the system timer interrupt source at HZ into the IRQ listed - * in irq_vectors.h:TIMER_IRQ - **/ -void __init time_init_hook(void) -{ - if (x86_quirks->arch_time_init) { - /* - * A nonzero return code does not mean failure, it means - * that the architecture quirk does not want any - * generic (timer) setup to be performed after this: - */ - if (x86_quirks->arch_time_init()) - return; - } - - irq0.mask = cpumask_of_cpu(0); - setup_irq(0, &irq0); -} - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Architecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* - * If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - pr_warning("NMI generated from unknown source!\n"); -} -#endif - -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - pr_info("Using %s mode\n", - no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); - return 1; -} -__setup("no_ipi_broadcast=", no_ipi_broadcast); - -static int __init print_ipi_mode(void) -{ - pr_info("Using IPI %s mode\n", - no_broadcast ? "No-Shortcut" : "Shortcut"); - return 0; -} - -late_initcall(print_ipi_mode); - diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 6b588d6b388..ebef8005579 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -985,4 +985,128 @@ void __init setup_arch(char **cmdline_p) #endif } +#ifdef CONFIG_X86_32 +/** + * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +void __init pre_intr_init_hook(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} + +/** + * intr_init_hook - post gate setup interrupt initialisation + * + * Description: + * Fill in any interrupts that may have been left out by the general + * init_IRQ() routine. interrupts having to do with the machine rather + * than the devices on the I/O bus (like APIC interrupts in intel MP + * systems) are started here. + **/ +void __init intr_init_hook(void) +{ + if (x86_quirks->arch_intr_init) { + if (x86_quirks->arch_intr_init()) + return; + } +} + +/** + * pre_setup_arch_hook - hook called prior to any setup_arch() execution + * + * Description: + * generally used to activate any machine specific identification + * routines that may be needed before setup_arch() runs. On Voyager + * this is used to get the board revision and type. + **/ +void __init pre_setup_arch_hook(void) +{ +} + +/** + * trap_init_hook - initialise system specific traps + * + * Description: + * Called as the final act of trap_init(). Used in VISWS to initialise + * the various board specific APIC traps. + **/ +void __init trap_init_hook(void) +{ + if (x86_quirks->arch_trap_init) { + if (x86_quirks->arch_trap_init()) + return; + } +} + +static struct irqaction irq0 = { + .handler = timer_interrupt, + .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL, + .mask = CPU_MASK_NONE, + .name = "timer" +}; + +/** + * pre_time_init_hook - do any specific initialisations before. + * + **/ +void __init pre_time_init_hook(void) +{ + if (x86_quirks->arch_pre_time_init) + x86_quirks->arch_pre_time_init(); +} + +/** + * time_init_hook - do any specific initialisations for the system timer. + * + * Description: + * Must plug the system timer interrupt source at HZ into the IRQ listed + * in irq_vectors.h:TIMER_IRQ + **/ +void __init time_init_hook(void) +{ + if (x86_quirks->arch_time_init) { + /* + * A nonzero return code does not mean failure, it means + * that the architecture quirk does not want any + * generic (timer) setup to be performed after this: + */ + if (x86_quirks->arch_time_init()) + return; + } + + irq0.mask = cpumask_of_cpu(0); + setup_irq(0, &irq0); +} + +#ifdef CONFIG_MCA +/** + * mca_nmi_hook - hook into MCA specific NMI chain + * + * Description: + * The MCA (Microchannel Architecture) has an NMI chain for NMI sources + * along the MCA bus. Use this to hook into that chain if you will need + * it. + **/ +void mca_nmi_hook(void) +{ + /* + * If I recall correctly, there's a whole bunch of other things that + * we can do to check for NMI problems, but that's all I know about + * at the moment. + */ + pr_warning("NMI generated from unknown source!\n"); +} +#endif /* CONFIG_MCA */ + +#endif /* CONFIG_X86_32 */ From de5483029b8f18e23395d8fd4f4ef6ae15beb809 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 19 Feb 2009 12:20:17 +0530 Subject: [PATCH 524/682] x86: include/asm/processor.h remove double declaration of print_cpu_info Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dabab1a19dd..72914d0315e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); #endif #endif /* X86_64 */ -extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; From 71d8f9784a99991a7571dd20226f5f450dda7f34 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 18 Feb 2009 11:07:52 -0800 Subject: [PATCH 525/682] x86: syscalls.h: remove asmlinkage from declaration of sys_rt_sigreturn() Impact: cleanup asmlinkage for sys_rt_sigreturn() no longer exists in arch/x86/kernel/signal.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 258ef730aaa..7043408f690 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -82,7 +82,7 @@ asmlinkage long sys_iopl(unsigned int, struct pt_regs *); /* kernel/signal_64.c */ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); -asmlinkage long sys_rt_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, From 95695547a7db44b88a7ee36cf5df188de267e99e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:18 +0300 Subject: [PATCH 526/682] x86: asm linkage - introduce GLOBAL macro If the code is time critical and this entry is called from other places we use ENTRY to have it globally defined and especially aligned. Contrary we have some snippets which are size critical. So we use plane ".globl name; name:" directive. Introduce GLOBAL macro for this. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 5d98d0b68ff..2ecf0f6fc9e 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -52,6 +52,10 @@ #endif +#define GLOBAL(name) \ + .globl name; \ + name: + #ifdef CONFIG_X86_ALIGNMENT_16 #define __ALIGN .align 16,0x90 #define __ALIGN_STR ".align 16,0x90" From 1b25f3b4e18d1acffeb41258a18f13db71da9a7a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:19 +0300 Subject: [PATCH 527/682] x86: linkage - get rid of _X86 macros Impact: cleanup There was an attempt to bring build-time checking for missed ENTRY_X86/END_X86 and KPROBE... pairs. Using them will add messy in code. Get just rid of them. This commit could be easily restored if the need appear in future. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 60 ---------------------------------- 1 file changed, 60 deletions(-) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 2ecf0f6fc9e..9320e2a8a26 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -61,65 +61,5 @@ #define __ALIGN_STR ".align 16,0x90" #endif -/* - * to check ENTRY_X86/END_X86 and - * KPROBE_ENTRY_X86/KPROBE_END_X86 - * unbalanced-missed-mixed appearance - */ -#define __set_entry_x86 .set ENTRY_X86_IN, 0 -#define __unset_entry_x86 .set ENTRY_X86_IN, 1 -#define __set_kprobe_x86 .set KPROBE_X86_IN, 0 -#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 - -#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" - -#define __check_entry_x86 \ - .ifdef ENTRY_X86_IN; \ - .ifeq ENTRY_X86_IN; \ - __macro_err_x86; \ - .abort; \ - .endif; \ - .endif - -#define __check_kprobe_x86 \ - .ifdef KPROBE_X86_IN; \ - .ifeq KPROBE_X86_IN; \ - __macro_err_x86; \ - .abort; \ - .endif; \ - .endif - -#define __check_entry_kprobe_x86 \ - __check_entry_x86; \ - __check_kprobe_x86 - -#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 - -#define ENTRY_X86(name) \ - __check_entry_kprobe_x86; \ - __set_entry_x86; \ - .globl name; \ - __ALIGN; \ - name: - -#define END_X86(name) \ - __unset_entry_x86; \ - __check_entry_kprobe_x86; \ - .size name, .-name - -#define KPROBE_ENTRY_X86(name) \ - __check_entry_kprobe_x86; \ - __set_kprobe_x86; \ - .pushsection .kprobes.text, "ax"; \ - .globl name; \ - __ALIGN; \ - name: - -#define KPROBE_END_X86(name) \ - __unset_kprobe_x86; \ - __check_entry_kprobe_x86; \ - .size name, .-name; \ - .popsection - #endif /* _ASM_X86_LINKAGE_H */ From 2f7955509710fd378a1ac96e19d29d5a0e3301fd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:20 +0300 Subject: [PATCH 528/682] x86: copy.S - use GLOBAL,ENDPROC macros Impact: cleanup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/boot/copy.S | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef50c84e8b4..11f272c6f5e 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -8,6 +8,8 @@ * * ----------------------------------------------------------------------- */ +#include + /* * Memory copy routines */ @@ -15,9 +17,7 @@ .code16gcc .text - .globl memcpy - .type memcpy, @function -memcpy: +GLOBAL(memcpy) pushw %si pushw %di movw %ax, %di @@ -31,11 +31,9 @@ memcpy: popw %di popw %si ret - .size memcpy, .-memcpy +ENDPROC(memcpy) - .globl memset - .type memset, @function -memset: +GLOBAL(memset) pushw %di movw %ax, %di movzbl %dl, %eax @@ -48,52 +46,42 @@ memset: rep; stosb popw %di ret - .size memset, .-memset +ENDPROC(memset) - .globl copy_from_fs - .type copy_from_fs, @function -copy_from_fs: +GLOBAL(copy_from_fs) pushw %ds pushw %fs popw %ds call memcpy popw %ds ret - .size copy_from_fs, .-copy_from_fs +ENDPROC(copy_from_fs) - .globl copy_to_fs - .type copy_to_fs, @function -copy_to_fs: +GLOBAL(copy_to_fs) pushw %es pushw %fs popw %es call memcpy popw %es ret - .size copy_to_fs, .-copy_to_fs +ENDPROC(copy_to_fs) #if 0 /* Not currently used, but can be enabled as needed */ - - .globl copy_from_gs - .type copy_from_gs, @function -copy_from_gs: +GLOBAL(copy_from_gs) pushw %ds pushw %gs popw %ds call memcpy popw %ds ret - .size copy_from_gs, .-copy_from_gs - .globl copy_to_gs +ENDPROC(copy_from_gs) - .type copy_to_gs, @function -copy_to_gs: +GLOBAL(copy_to_gs) pushw %es pushw %gs popw %es call memcpy popw %es ret - .size copy_to_gs, .-copy_to_gs - +ENDPROC(copy_to_gs) #endif From 324bda9e47f53aebec1376ee89bba8128c8455e2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:21 +0300 Subject: [PATCH 529/682] x86: pmjump - use GLOBAL,ENDPROC macros Impact: cleanup We are in setup stage so we use GLOBAL instead of ENTRY and do not increase code size. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/boot/pmjump.S | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index 141b6e20ed3..019c17a7585 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -15,18 +15,15 @@ #include #include #include +#include .text - - .globl protected_mode_jump - .type protected_mode_jump, @function - .code16 /* * void protected_mode_jump(u32 entrypoint, u32 bootparams); */ -protected_mode_jump: +GLOBAL(protected_mode_jump) movl %edx, %esi # Pointer to boot_params table xorl %ebx, %ebx @@ -47,12 +44,10 @@ protected_mode_jump: .byte 0x66, 0xea # ljmpl opcode 2: .long in_pm32 # offset .word __BOOT_CS # segment - - .size protected_mode_jump, .-protected_mode_jump +ENDPROC(protected_mode_jump) .code32 - .type in_pm32, @function -in_pm32: +GLOBAL(in_pm32) # Set up data segments for flat 32-bit mode movl %ecx, %ds movl %ecx, %es @@ -78,5 +73,4 @@ in_pm32: lldt %cx jmpl *%eax # Jump to the 32-bit entrypoint - - .size in_pm32, .-in_pm32 +ENDPROC(in_pm32) From 2d4eeecb98ade6a736940d43311275b7d32dab21 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:22 +0300 Subject: [PATCH 530/682] x86: compressed head_64 - use ENTRY,ENDPROC macros Impact: clenaup Linker script will put startup_32 at predefined address so using ENTRY will not bloat the code size. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_64.S | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1d5dff4123e..ba9229ba7f0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -35,9 +35,7 @@ .section ".text.head" .code32 - .globl startup_32 - -startup_32: +ENTRY(startup_32) cld /* test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ @@ -176,6 +174,7 @@ startup_32: /* Jump from 32bit compatibility mode into 64bit mode. */ lret +ENDPROC(startup_32) no_longmode: /* This isn't an x86-64 CPU so hang */ @@ -295,7 +294,6 @@ relocated: call decompress_kernel popq %rsi - /* * Jump to the decompressed kernel. */ From cb425afd2183e90a481bb211ff49361a117a3ecc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 14 Feb 2009 00:50:23 +0300 Subject: [PATCH 531/682] x86: compressed head_32 - use ENTRY,ENDPROC macros Impact: clenaup Linker script will put startup_32 at predefined address so using startup_32 will not bloat the code size. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_32.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 29c5fbf0839..9f20d379406 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -30,9 +30,7 @@ #include .section ".text.head","ax",@progbits - .globl startup_32 - -startup_32: +ENTRY(startup_32) cld /* test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ @@ -113,6 +111,8 @@ startup_32: */ leal relocated(%ebx), %eax jmp *%eax +ENDPROC(startup_32) + .section ".text" relocated: From e4aa7ca5a2e6d44f07ceb87d9448113f5b48a334 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 19 Feb 2009 13:36:55 -0800 Subject: [PATCH 532/682] bzip2/lzma: don't stop search at first unconfigured compression Impact: Bugfix, avoids kernels which build but panic on boot Fix a bug in decompress.c : only scanned until the first non-configured compressor (with disastrous result especially if that was gzip.) Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- lib/decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress.c b/lib/decompress.c index 961f367320f..d2842f57167 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -43,7 +43,7 @@ decompress_fn decompress_method(const unsigned char *inbuf, int len, if (len < 2) return NULL; /* Need at least this much... */ - for (cf = compressed_formats; cf->decompressor; cf++) { + for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2)) break; From ab59d3b70f7a47987f80e153d828363faec643fe Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 19 Feb 2009 13:39:21 -0800 Subject: [PATCH 533/682] bzip2/lzma: don't leave empty files around on failure Impact: Bugfix, silent build failures Fix a bug in gen_initramfs_list.sh: in case of failure, it left an empty output file behind, messing up the next make. Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- scripts/gen_initramfs_list.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 41041e4923f..3eea8f15131 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -292,7 +292,8 @@ if [ ! -z ${output_file} ]; then if [ "${is_cpio_compressed}" = "compressed" ]; then cat ${cpio_tfile} > ${output_file} else - cat ${cpio_tfile} | ${compr} - > ${output_file} + (cat ${cpio_tfile} | ${compr} - > ${output_file}) \ + || (rm -f ${output_file} ; false) fi [ -z ${cpio_file} ] && rm ${cpio_tfile} fi From ab76f3d771590d5c89faa3219559c5d3fc0ce0c2 Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Thu, 19 Feb 2009 13:43:51 -0800 Subject: [PATCH 534/682] bzip2/lzma: make internal initramfs compression configurable Impact: Avoids silent environment dependency Make builtin initramfs compression an explicit configurable. The previous version would pick a compression based on the binaries which were installed on the system, which could lead to unexpected results. It is now explicitly configured, and not having the appropriate binaries installed on the build host is simply an error. Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- usr/Kconfig | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ usr/Makefile | 18 +++++---------- 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/usr/Kconfig b/usr/Kconfig index a691a8f5898..43a3a0fe8f2 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -71,3 +71,65 @@ config RD_LZMA help Support loading of a lzma encoded initial ramdisk or cpio buffer If unsure, say N. + +choice + prompt "Built-in initramfs compression mode" + help + This setting is only meaningful if the INITRAMFS_SOURCE is + set. It decides by which algorithm the INITRAMFS_SOURCE will + be compressed. + Several compression algorithms are available, which differ + in efficiency, compression and decompression speed. + Compression speed is only relevant when building a kernel. + Decompression speed is relevant at each boot. + + If you have any problems with bzip2 or lzma compressed + initramfs, mail me (Alain Knaff) . + + High compression options are mostly useful for users who + are low on disk space (embedded systems), but for whom ram + size matters less. + + If in doubt, select 'gzip' + +config INITRAMFS_COMPRESSION_NONE + bool "None" + help + Do not compress the built-in initramfs at all. This may + sound wasteful in space, but, you should be aware that the + built-in initramfs will be compressed at a later stage + anyways along with the rest of the kernel, on those + architectures that support this. + However, not compressing the initramfs may lead to slightly + higher memory consumption during a short time at boot, while + both the cpio image and the unpacked filesystem image will + be present in memory simultaneously + +config INITRAMFS_COMPRESSION_GZIP + bool "Gzip" + depends on RD_GZIP + help + The old and tried gzip compression. Its compression ratio is + the poorest among the 3 choices; however its speed (both + compression and decompression) is the fastest. + +config INITRAMFS_COMPRESSION_BZIP2 + bool "Bzip2" + depends on RD_BZIP2 + help + Its compression ratio and speed is intermediate. + Decompression speed is slowest among the three. The initramfs + size is about 10% smaller with bzip2, in comparison to gzip. + Bzip2 uses a large amount of memory. For modern kernels you + will need at least 8MB RAM or more for booting. + +config INITRAMFS_COMPRESSION_LZMA + bool "LZMA" + depends on RD_LZMA + help + The most recent compression algorithm. + Its ratio is best, decompression speed is between the other + two. Compression is slowest. The initramfs size is about 33% + smaller with LZMA in comparison to gzip. + +endchoice diff --git a/usr/Makefile b/usr/Makefile index 451cdff7dff..b84894b3929 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -5,24 +5,18 @@ klibcdirs:; PHONY += klibcdirs -# Find out "preferred" ramdisk compressor. Order of preference is -# 1. bzip2 efficient, and likely to be present -# 2. gzip former default -# 3. lzma -# 4. none -# None of the above -suffix_y = - -# Lzma, but no gzip nor bzip2 -suffix_$(CONFIG_RD_LZMA) = .lzma +# No compression +suffix_$(CONFIG_INITRAMFS_COMPRESSION_NONE) = # Gzip, but no bzip2 -suffix_$(CONFIG_RD_GZIP) = .gz +suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP) = .gz # Bzip2 -suffix_$(CONFIG_RD_BZIP2) = .bz2 +suffix_$(CONFIG_INITRAMFS_COMPRESSION_BZIP2) = .bz2 +# Lzma +suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZMA) = .lzma # Generate builtin.o based on initramfs_data.o obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data$(suffix_y).o From 42f8faecf7a88371de0f30aebb052d1ae51762c0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 17 Feb 2009 11:46:42 +0800 Subject: [PATCH 535/682] x86: use percpu data for 4k hardirq and softirq stacks Impact: economize memory for large NR_CPUS percpu data is setup earlier than irq, we can use percpu data to economize memory. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- arch/x86/kernel/irq_32.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index e0f29be8ab0..90cd94aba69 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { } union irq_ctx { struct thread_info tinfo; u32 stack[THREAD_SIZE/sizeof(u32)]; -}; +} __attribute__((aligned(PAGE_SIZE))); -static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; -static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; +static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); +static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); -static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; -static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; +static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack); +static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack); static void call_on_stack(void *func, void *stack) { @@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) u32 *isp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); - irqctx = hardirq_ctx[smp_processor_id()]; + irqctx = __get_cpu_var(hardirq_ctx); /* * this is where we switch to the IRQ stack. However, if we are @@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu) { union irq_ctx *irqctx; - if (hardirq_ctx[cpu]) + if (per_cpu(hardirq_ctx, cpu)) return; - irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE]; + irqctx = &per_cpu(hardirq_stack, cpu); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = HARDIRQ_OFFSET; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - hardirq_ctx[cpu] = irqctx; + per_cpu(hardirq_ctx, cpu) = irqctx; - irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE]; + irqctx = &per_cpu(softirq_stack, cpu); irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; irqctx->tinfo.preempt_count = 0; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - softirq_ctx[cpu] = irqctx; + per_cpu(softirq_ctx, cpu) = irqctx; printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, hardirq_ctx[cpu], softirq_ctx[cpu]); + cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } void irq_ctx_exit(int cpu) { - hardirq_ctx[cpu] = NULL; + per_cpu(hardirq_ctx, cpu) = NULL; } asmlinkage void do_softirq(void) @@ -169,7 +170,7 @@ asmlinkage void do_softirq(void) if (local_softirq_pending()) { curctx = current_thread_info(); - irqctx = softirq_ctx[smp_processor_id()]; + irqctx = __get_cpu_var(softirq_ctx); irqctx->tinfo.task = curctx->task; irqctx->tinfo.previous_esp = current_stack_pointer; From 734269521e320ad14ed39ae9b64d482b9028dcd2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:07 +0900 Subject: [PATCH 536/682] vmalloc: call flush_cache_vunmap() from unmap_kernel_range() Impact: proper vcache flush on unmap_kernel_range() flush_cache_vunmap() should be called before pages are unmapped. Add a call to it in unmap_kernel_range(). Signed-off-by: Tejun Heo --- mm/vmalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 75f49d312e8..c37924a2ee3 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1012,6 +1012,8 @@ void __init vmalloc_init(void) void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; + + flush_cache_vunmap(addr, end); vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } From 6b588c18f8dacfa6d7957c33c5ff832096e752d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:07 +0900 Subject: [PATCH 537/682] module: reorder module pcpu related functions Impact: cleanup Move percpu_modinit() upwards. This is to ease further changes. Signed-off-by: Tejun Heo --- kernel/module.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index ba22484a987..52b3497b874 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -480,21 +480,6 @@ static void percpu_modfree(void *freeme) } } -static unsigned int find_pcpusec(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - const char *secstrings) -{ - return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); -} - -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) -{ - int cpu; - - for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); -} - static int percpu_modinit(void) { pcpu_num_used = 2; @@ -513,7 +498,24 @@ static int percpu_modinit(void) return 0; } __initcall(percpu_modinit); + +static unsigned int find_pcpusec(Elf_Ehdr *hdr, + Elf_Shdr *sechdrs, + const char *secstrings) +{ + return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); +} + +static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +{ + int cpu; + + for_each_possible_cpu(cpu) + memcpy(pcpudest + per_cpu_offset(cpu), from, size); +} + #else /* ... !CONFIG_SMP */ + static inline void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) { @@ -535,6 +537,7 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } + #endif /* CONFIG_SMP */ #define MODINFO_ATTR(field) \ From b36128c830a8f5bd7d4981f5b0b69950f5928ee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 538/682] alloc_percpu: change percpu_ptr to per_cpu_ptr Impact: cleanup There are two allocated per-cpu accessor macros with almost identical spelling. The original and far more popular is per_cpu_ptr (44 files), so change over the other 4 files. tj: kill percpu_ptr() and update UP too Signed-off-by: Rusty Russell Cc: mingo@redhat.com Cc: lenb@kernel.org Cc: cpufreq@vger.kernel.org Signed-off-by: Tejun Heo --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 2 +- drivers/acpi/processor_perflib.c | 4 ++-- include/linux/percpu.h | 23 +++++++++++----------- kernel/sched.c | 6 +++--- kernel/stop_machine.c | 2 +- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 4b1c319d30c..22590cf688a 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) if (!data) return -ENOMEM; - data->acpi_data = percpu_ptr(acpi_perf_data, cpu); + data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); per_cpu(drv_data, cpu) = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 9cc769b587f..68fd3d29279 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -516,12 +516,12 @@ int acpi_processor_preregister_performance( continue; } - if (!performance || !percpu_ptr(performance, i)) { + if (!performance || !per_cpu_ptr(performance, i)) { retval = -EINVAL; continue; } - pr->performance = percpu_ptr(performance, i); + pr->performance = per_cpu_ptr(performance, i); cpumask_set_cpu(i, pr->performance->shared_cpu_map); if (acpi_processor_get_psd(pr)) { retval = -EINVAL; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3577ffd90d4..c80cfe1260e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -81,23 +81,13 @@ struct percpu_data { }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define percpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { @@ -122,6 +112,15 @@ static inline void percpu_free(void *__pdata) cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) #define free_percpu(ptr) percpu_free((ptr)) -#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) +/* + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should + * probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) #endif /* __LINUX_PERCPU_H */ diff --git a/kernel/sched.c b/kernel/sched.c index fc17fd91ab5..9d30ac95632 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9472,7 +9472,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); u64 data; #ifndef CONFIG_64BIT @@ -9491,7 +9491,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); #ifndef CONFIG_64BIT /* @@ -9587,7 +9587,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) ca = task_ca(tsk); for (; ca; ca = ca->parent) { - u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu); + u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; } } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 0cd415ee62a..74541ca4953 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -170,7 +170,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) * doesn't hit this CPU until we're ready. */ get_cpu(); for_each_online_cpu(i) { - sm_work = percpu_ptr(stop_machine_work, i); + sm_work = per_cpu_ptr(stop_machine_work, i); INIT_WORK(sm_work, stop_cpu); queue_work_on(i, stop_machine_wq, sm_work); } From 313e458f81ec3852106c5a83830fe0d4f405a71a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 539/682] alloc_percpu: add align argument to __alloc_percpu. This prepares for a real __alloc_percpu, by adding an alignment argument. Only one place uses __alloc_percpu directly, and that's for a string. tj: af_inet also uses __alloc_percpu(), update it. Signed-off-by: Rusty Russell Cc: Christoph Lameter Cc: Jens Axboe --- block/blktrace.c | 2 +- include/linux/percpu.h | 5 +++-- net/ipv4/af_inet.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/blktrace.c b/block/blktrace.c index 39cc3bfe56e..487766237d2 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -363,7 +363,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->sequence) goto err; - bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG); + bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); if (!bt->msg_data) goto err; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index c80cfe1260e..1fdaee93c04 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -108,9 +108,10 @@ static inline void percpu_free(void *__pdata) /* (legacy) interface for use without CPU hotplug handling */ -#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ +#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ cpu_possible_map) -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #define free_percpu(ptr) percpu_free((ptr)) /* * Use this to get to a cpu's version of the per-cpu object dynamically diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 743f5542d65..3a3dad80135 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1375,10 +1375,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field); int snmp_mib_init(void *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize); + ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize); + ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); if (!ptr[1]) goto err1; return 0; From f2a8205c4ef1af917d175c36a4097ae5587791c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 540/682] percpu: kill percpu_alloc() and friends Impact: kill unused functions percpu_alloc() and its friends never saw much action. It was supposed to replace the cpu-mask unaware __alloc_percpu() but it never happened and in fact __percpu_alloc_mask() itself never really grew proper up/down handling interface either (no exported interface for populate/depopulate). percpu allocation is about to go through major reimplementation and there's no reason to carry this unused interface around. Replace it with __alloc_percpu() and free_percpu(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 65 ++++++++++++++++++++---------------------- mm/allocpercpu.c | 32 ++++++++++++--------- 2 files changed, 50 insertions(+), 47 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1fdaee93c04..d99e24ae181 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,41 +82,10 @@ struct percpu_data { #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); -extern void percpu_free(void *__pdata); - -#else /* CONFIG_SMP */ - -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) - -static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) -{ - return kzalloc(size, gfp); -} - -static inline void percpu_free(void *__pdata) -{ - kfree(__pdata); -} - -#endif /* CONFIG_SMP */ - -#define percpu_alloc_mask(size, gfp, mask) \ - __percpu_alloc_mask((size), (gfp), &(mask)) - -#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) - -/* (legacy) interface for use without CPU hotplug handling */ - -#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ - cpu_possible_map) -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ - __alignof__(type)) -#define free_percpu(ptr) percpu_free((ptr)) /* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). + * Use this to get to a cpu's version of the per-cpu object + * dynamically allocated. Non-atomic access to the current CPU's + * version should probably be combined with get_cpu()/put_cpu(). */ #define per_cpu_ptr(ptr, cpu) \ ({ \ @@ -124,4 +93,32 @@ static inline void percpu_free(void *__pdata) (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +extern void *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void *__pdata); + +#else /* CONFIG_SMP */ + +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) + +static inline void *__alloc_percpu(size_t size, size_t align) +{ + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > __alignof__(unsigned long long)); + return kzalloc(size, gfp); +} + +static inline void free_percpu(void *p) +{ + kfree(p); +} + +#endif /* CONFIG_SMP */ + +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) + #endif /* __LINUX_PERCPU_H */ diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 4297bc41bfd..3653c570232 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -99,45 +99,51 @@ static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) /** - * percpu_alloc_mask - initial setup of per-cpu data + * alloc_percpu - initial setup of per-cpu data * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @mask: populate per-data for cpu's selected through mask bits + * @align: alignment * - * Populating per-cpu data for all online cpu's would be a typical use case, - * which is simplified by the percpu_alloc() wrapper. - * Per-cpu objects are populated with zeroed buffers. + * Allocate dynamic percpu area. Percpu objects are populated with + * zeroed buffers. */ -void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +void *__alloc_percpu(size_t size, size_t align) { /* * We allocate whole cache lines to avoid false sharing */ size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); - void *pdata = kzalloc(sz, gfp); + void *pdata = kzalloc(sz, GFP_KERNEL); void *__pdata = __percpu_disguise(pdata); + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > __alignof__(unsigned long long)); + if (unlikely(!pdata)) return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) + if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL, + &cpu_possible_map))) return __pdata; kfree(pdata); return NULL; } -EXPORT_SYMBOL_GPL(__percpu_alloc_mask); +EXPORT_SYMBOL_GPL(__alloc_percpu); /** - * percpu_free - final cleanup of per-cpu data + * free_percpu - final cleanup of per-cpu data * @__pdata: object to clean up * * We simply clean up any per-cpu object left. No need for the client to * track and specify through a bis mask which per-cpu objects are to free. */ -void percpu_free(void *__pdata) +void free_percpu(void *__pdata) { if (unlikely(!__pdata)) return; __percpu_depopulate_mask(__pdata, &cpu_possible_map); kfree(__percpu_disguise(__pdata)); } -EXPORT_SYMBOL_GPL(percpu_free); +EXPORT_SYMBOL_GPL(free_percpu); From f0aa6617903648077dffe5cfcf7c4458f4610fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 541/682] vmalloc: implement vm_area_register_early() Impact: allow multiple early vm areas There are places where kernel VM area needs to be allocated before vmalloc is initialized. This is done by allocating static vm_struct, initializing several fields and linking it to vmlist and later vmalloc initialization picking up these from vmlist. This is currently done manually and if there's more than one such areas, there's no defined way to arbitrate who gets which address. This patch implements vm_area_register_early(), which takes vm_area struct with flags and size initialized, assigns address to it and puts it on the vmlist. This way, multiple early vm areas can determine which addresses they should use. The only current user - alpha mm init - is converted to use it. Signed-off-by: Tejun Heo --- arch/alpha/mm/init.c | 20 +++++++++++++------- include/linux/vmalloc.h | 1 + mm/vmalloc.c | 24 ++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 5d7a16eab31..df6df025ded 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -189,9 +189,21 @@ callback_init(void * kernel_end) if (alpha_using_srm) { static struct vm_struct console_remap_vm; - unsigned long vaddr = VMALLOC_START; + unsigned long nr_pages = 0; + unsigned long vaddr; unsigned long i, j; + /* calculate needed size */ + for (i = 0; i < crb->map_entries; ++i) + nr_pages += crb->map[i].count; + + /* register the vm area */ + console_remap_vm.flags = VM_ALLOC; + console_remap_vm.size = nr_pages << PAGE_SHIFT; + vm_area_register_early(&console_remap_vm); + + vaddr = (unsigned long)consle_remap_vm.addr; + /* Set up the third level PTEs and update the virtual addresses of the CRB entries. */ for (i = 0; i < crb->map_entries; ++i) { @@ -213,12 +225,6 @@ callback_init(void * kernel_end) vaddr += PAGE_SIZE; } } - - /* Let vmalloc know that we've allocated some space. */ - console_remap_vm.flags = VM_ALLOC; - console_remap_vm.addr = (void *) VMALLOC_START; - console_remap_vm.size = vaddr - VMALLOC_START; - vmlist = &console_remap_vm; } callback_init_done = 1; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a98..bbc05139229 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,5 +106,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_register_early(struct vm_struct *vm); #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c37924a2ee3..d206261ad9e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -982,6 +983,29 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); +/** + * vm_area_register_early - register vmap area early during boot + * @vm: vm_struct to register + * @size: size of area to register + * + * This function is used to register kernel vm area before + * vmalloc_init() is called. @vm->size and @vm->flags should contain + * proper values on entry and other fields should be zero. On return, + * vm->addr contains the allocated address. + * + * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. + */ +void __init vm_area_register_early(struct vm_struct *vm) +{ + static size_t vm_init_off __initdata; + + vm->addr = (void *)VMALLOC_START + vm_init_off; + vm_init_off = PFN_ALIGN(vm_init_off + vm->size); + + vm->next = vmlist; + vmlist = vm; +} + void __init vmalloc_init(void) { struct vmap_area *va; From 8fc48985006da4ceba24508db64ec77fc0dfe3bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 542/682] vmalloc: add un/map_kernel_range_noflush() Impact: two more public map/unmap functions Implement map_kernel_range_noflush() and unmap_kernel_range_noflush(). These functions respectively map and unmap address range in kernel VM area but doesn't do any vcache or tlb flushing. These will be used by new percpu allocator. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 3 ++ mm/vmalloc.c | 67 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bbc05139229..599ba798431 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -91,6 +91,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); +extern int map_kernel_range_noflush(unsigned long start, unsigned long size, + pgprot_t prot, struct page **pages); +extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d206261ad9e..224eca9650a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -153,8 +153,8 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr, * * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] */ -static int vmap_page_range(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) +static int vmap_page_range_noflush(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) { pgd_t *pgd; unsigned long next; @@ -170,13 +170,22 @@ static int vmap_page_range(unsigned long start, unsigned long end, if (err) break; } while (pgd++, addr = next, addr != end); - flush_cache_vmap(start, end); if (unlikely(err)) return err; return nr; } +static int vmap_page_range(unsigned long start, unsigned long end, + pgprot_t prot, struct page **pages) +{ + int ret; + + ret = vmap_page_range_noflush(start, end, prot, pages); + flush_cache_vmap(start, end); + return ret; +} + static inline int is_vmalloc_or_module_addr(const void *x) { /* @@ -1033,6 +1042,58 @@ void __init vmalloc_init(void) vmap_initialized = true; } +/** + * map_kernel_range_noflush - map kernel VM area with the specified pages + * @addr: start of the VM area to map + * @size: size of the VM area to map + * @prot: page protection flags to use + * @pages: pages to map + * + * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vmap() on to-be-mapped areas + * before calling this function. + * + * RETURNS: + * The number of pages mapped on success, -errno on failure. + */ +int map_kernel_range_noflush(unsigned long addr, unsigned long size, + pgprot_t prot, struct page **pages) +{ + return vmap_page_range_noflush(addr, addr + size, prot, pages); +} + +/** + * unmap_kernel_range_noflush - unmap kernel VM area + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size + * specify should have been allocated using get_vm_area() and its + * friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is + * responsible for calling flush_cache_vunmap() on to-be-mapped areas + * before calling this function and flush_tlb_kernel_range() after. + */ +void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) +{ + vunmap_page_range(addr, addr + size); +} + +/** + * unmap_kernel_range - unmap kernel VM area and flush cache and TLB + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Similar to unmap_kernel_range_noflush() but flushes vcache before + * the unmapping and tlb after. + */ void unmap_kernel_range(unsigned long addr, unsigned long size) { unsigned long end = addr + size; From fbf59bc9d74d1fb30b8e0630743aff2806eafcea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: [PATCH 543/682] percpu: implement new dynamic percpu allocator Impact: new scalable dynamic percpu allocator which allows dynamic percpu areas to be accessed the same way as static ones Implement scalable dynamic percpu allocator which can be used for both static and dynamic percpu areas. This will allow static and dynamic areas to share faster direct access methods. This feature is optional and enabled only when CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is defined by arch. Please read comment on top of mm/percpu.c for details. Signed-off-by: Tejun Heo Cc: Andrew Morton --- include/linux/percpu.h | 24 +- kernel/module.c | 31 ++ mm/Makefile | 4 + mm/percpu.c | 890 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 944 insertions(+), 5 deletions(-) create mode 100644 mm/percpu.c diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d99e24ae181..18080995ff3 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -76,23 +76,37 @@ #ifdef CONFIG_SMP +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + +extern void *pcpu_base_addr; + +typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); + +extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size); +/* + * Use this to get to a cpu's version of the per-cpu object + * dynamically allocated. Non-atomic access to the current CPU's + * version should probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) + +#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + struct percpu_data { void *ptrs[1]; }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -/* - * Use this to get to a cpu's version of the per-cpu object - * dynamically allocated. Non-atomic access to the current CPU's - * version should probably be combined with get_cpu()/put_cpu(). - */ #define per_cpu_ptr(ptr, cpu) \ ({ \ struct percpu_data *__p = __percpu_disguise(ptr); \ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); diff --git a/kernel/module.c b/kernel/module.c index 52b3497b874..1f0657ae555 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -51,6 +51,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -366,6 +367,34 @@ static struct module *find_module(const char *name) } #ifdef CONFIG_SMP + +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + +static void *percpu_modalloc(unsigned long size, unsigned long align, + const char *name) +{ + void *ptr; + + if (align > PAGE_SIZE) { + printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", + name, align, PAGE_SIZE); + align = PAGE_SIZE; + } + + ptr = __alloc_percpu(size, align); + if (!ptr) + printk(KERN_WARNING + "Could not allocate %lu bytes percpu data\n", size); + return ptr; +} + +static void percpu_modfree(void *freeme) +{ + free_percpu(freeme); +} + +#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; /* Size of each block. -ve means used. */ @@ -499,6 +528,8 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const char *secstrings) diff --git a/mm/Makefile b/mm/Makefile index 72255be57f8..818569b68f4 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -30,6 +30,10 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o +ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +obj-$(CONFIG_SMP) += percpu.o +else obj-$(CONFIG_SMP) += allocpercpu.o +endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o diff --git a/mm/percpu.c b/mm/percpu.c new file mode 100644 index 00000000000..4617d97e877 --- /dev/null +++ b/mm/percpu.c @@ -0,0 +1,890 @@ +/* + * linux/mm/percpu.c - percpu memory allocator + * + * Copyright (C) 2009 SUSE Linux Products GmbH + * Copyright (C) 2009 Tejun Heo + * + * This file is released under the GPLv2. + * + * This is percpu allocator which can handle both static and dynamic + * areas. Percpu areas are allocated in chunks in vmalloc area. Each + * chunk is consisted of num_possible_cpus() units and the first chunk + * is used for static percpu variables in the kernel image (special + * boot time alloc/init handling necessary as these areas need to be + * brought up before allocation services are running). Unit grows as + * necessary and all units grow or shrink in unison. When a chunk is + * filled up, another chunk is allocated. ie. in vmalloc area + * + * c0 c1 c2 + * ------------------- ------------------- ------------ + * | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u + * ------------------- ...... ------------------- .... ------------ + * + * Allocation is done in offset-size areas of single unit space. Ie, + * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, + * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring + * percpu base registers UNIT_SIZE apart. + * + * There are usually many small percpu allocations many of them as + * small as 4 bytes. The allocator organizes chunks into lists + * according to free size and tries to allocate from the fullest one. + * Each chunk keeps the maximum contiguous area size hint which is + * guaranteed to be eqaul to or larger than the maximum contiguous + * area in the chunk. This helps the allocator not to iterate the + * chunk maps unnecessarily. + * + * Allocation state in each chunk is kept using an array of integers + * on chunk->map. A positive value in the map represents a free + * region and negative allocated. Allocation inside a chunk is done + * by scanning this map sequentially and serving the first matching + * entry. This is mostly copied from the percpu_modalloc() allocator. + * Chunks are also linked into a rb tree to ease address to chunk + * mapping during free. + * + * To use this allocator, arch code should do the followings. + * + * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA + * + * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate + * regular address to percpu pointer and back + * + * - use pcpu_setup_static() during percpu area initialization to + * setup kernel static percpu area + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define PCPU_MIN_UNIT_PAGES_SHIFT 4 /* also max alloc size */ +#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ +#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ + +struct pcpu_chunk { + struct list_head list; /* linked to pcpu_slot lists */ + struct rb_node rb_node; /* key is chunk->vm->addr */ + int free_size; /* free bytes in the chunk */ + int contig_hint; /* max contiguous size hint */ + struct vm_struct *vm; /* mapped vmalloc region */ + int map_used; /* # of map entries used */ + int map_alloc; /* # of map entries allocated */ + int *map; /* allocation map */ + struct page *page[]; /* #cpus * UNIT_PAGES */ +}; + +static int pcpu_unit_pages_shift; +static int pcpu_unit_pages; +static int pcpu_unit_shift; +static int pcpu_unit_size; +static int pcpu_chunk_size; +static int pcpu_nr_slots; +static size_t pcpu_chunk_struct_size; + +/* the address of the first chunk which starts with the kernel static area */ +void *pcpu_base_addr; +EXPORT_SYMBOL_GPL(pcpu_base_addr); + +/* the size of kernel static area */ +static int pcpu_static_size; + +/* + * One mutex to rule them all. + * + * The following mutex is grabbed in the outermost public alloc/free + * interface functions and released only when the operation is + * complete. As such, every function in this file other than the + * outermost functions are called under pcpu_mutex. + * + * It can easily be switched to use spinlock such that only the area + * allocation and page population commit are protected with it doing + * actual [de]allocation without holding any lock. However, given + * what this allocator does, I think it's better to let them run + * sequentially. + */ +static DEFINE_MUTEX(pcpu_mutex); + +static struct list_head *pcpu_slot; /* chunk list slots */ +static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ + +static int pcpu_size_to_slot(int size) +{ + int highbit = fls(size); + return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); +} + +static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) +{ + if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) + return 0; + + return pcpu_size_to_slot(chunk->free_size); +} + +static int pcpu_page_idx(unsigned int cpu, int page_idx) +{ + return (cpu << pcpu_unit_pages_shift) + page_idx; +} + +static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + return &chunk->page[pcpu_page_idx(cpu, page_idx)]; +} + +static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + return (unsigned long)chunk->vm->addr + + (pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT); +} + +static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, + int page_idx) +{ + return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; +} + +/** + * pcpu_realloc - versatile realloc + * @p: the current pointer (can be NULL for new allocations) + * @size: the current size (can be 0 for new allocations) + * @new_size: the wanted new size (can be 0 for free) + * + * More robust realloc which can be used to allocate, resize or free a + * memory area of arbitrary size. If the needed size goes over + * PAGE_SIZE, kernel VM is used. + * + * RETURNS: + * The new pointer on success, NULL on failure. + */ +static void *pcpu_realloc(void *p, size_t size, size_t new_size) +{ + void *new; + + if (new_size <= PAGE_SIZE) + new = kmalloc(new_size, GFP_KERNEL); + else + new = vmalloc(new_size); + if (new_size && !new) + return NULL; + + memcpy(new, p, min(size, new_size)); + if (new_size > size) + memset(new + size, 0, new_size - size); + + if (size <= PAGE_SIZE) + kfree(p); + else + vfree(p); + + return new; +} + +/** + * pcpu_chunk_relocate - put chunk in the appropriate chunk slot + * @chunk: chunk of interest + * @oslot: the previous slot it was on + * + * This function is called after an allocation or free changed @chunk. + * New slot according to the changed state is determined and @chunk is + * moved to the slot. + */ +static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) +{ + int nslot = pcpu_chunk_slot(chunk); + + if (oslot != nslot) { + if (oslot < nslot) + list_move(&chunk->list, &pcpu_slot[nslot]); + else + list_move_tail(&chunk->list, &pcpu_slot[nslot]); + } +} + +static struct rb_node **pcpu_chunk_rb_search(void *addr, + struct rb_node **parentp) +{ + struct rb_node **p = &pcpu_addr_root.rb_node; + struct rb_node *parent = NULL; + struct pcpu_chunk *chunk; + + while (*p) { + parent = *p; + chunk = rb_entry(parent, struct pcpu_chunk, rb_node); + + if (addr < chunk->vm->addr) + p = &(*p)->rb_left; + else if (addr > chunk->vm->addr) + p = &(*p)->rb_right; + else + break; + } + + if (parentp) + *parentp = parent; + return p; +} + +/** + * pcpu_chunk_addr_search - search for chunk containing specified address + * @addr: address to search for + * + * Look for chunk which might contain @addr. More specifically, it + * searchs for the chunk with the highest start address which isn't + * beyond @addr. + * + * RETURNS: + * The address of the found chunk. + */ +static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) +{ + struct rb_node *n, *parent; + struct pcpu_chunk *chunk; + + n = *pcpu_chunk_rb_search(addr, &parent); + if (!n) { + /* no exactly matching chunk, the parent is the closest */ + n = parent; + BUG_ON(!n); + } + chunk = rb_entry(n, struct pcpu_chunk, rb_node); + + if (addr < chunk->vm->addr) { + /* the parent was the next one, look for the previous one */ + n = rb_prev(n); + BUG_ON(!n); + chunk = rb_entry(n, struct pcpu_chunk, rb_node); + } + + return chunk; +} + +/** + * pcpu_chunk_addr_insert - insert chunk into address rb tree + * @new: chunk to insert + * + * Insert @new into address rb tree. + */ +static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) +{ + struct rb_node **p, *parent; + + p = pcpu_chunk_rb_search(new->vm->addr, &parent); + BUG_ON(*p); + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &pcpu_addr_root); +} + +/** + * pcpu_split_block - split a map block + * @chunk: chunk of interest + * @i: index of map block to split + * @head: head size (can be 0) + * @tail: tail size (can be 0) + * + * Split the @i'th map block into two or three blocks. If @head is + * non-zero, @head bytes block is inserted before block @i moving it + * to @i+1 and reducing its size by @head bytes. + * + * If @tail is non-zero, the target block, which can be @i or @i+1 + * depending on @head, is reduced by @tail bytes and @tail byte block + * is inserted after the target block. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) +{ + int nr_extra = !!head + !!tail; + int target = chunk->map_used + nr_extra; + + /* reallocation required? */ + if (chunk->map_alloc < target) { + int new_alloc = chunk->map_alloc; + int *new; + + while (new_alloc < target) + new_alloc *= 2; + + new = pcpu_realloc(chunk->map, + chunk->map_alloc * sizeof(new[0]), + new_alloc * sizeof(new[0])); + if (!new) + return -ENOMEM; + + chunk->map_alloc = new_alloc; + chunk->map = new; + } + + /* insert a new subblock */ + memmove(&chunk->map[i + nr_extra], &chunk->map[i], + sizeof(chunk->map[0]) * (chunk->map_used - i)); + chunk->map_used += nr_extra; + + if (head) { + chunk->map[i + 1] = chunk->map[i] - head; + chunk->map[i++] = head; + } + if (tail) { + chunk->map[i++] -= tail; + chunk->map[i] = tail; + } + return 0; +} + +/** + * pcpu_alloc_area - allocate area from a pcpu_chunk + * @chunk: chunk of interest + * @size: wanted size + * @align: wanted align + * + * Try to allocate @size bytes area aligned at @align from @chunk. + * Note that this function only allocates the offset. It doesn't + * populate or map the area. + * + * RETURNS: + * Allocated offset in @chunk on success, -errno on failure. + */ +static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) +{ + int oslot = pcpu_chunk_slot(chunk); + int max_contig = 0; + int i, off; + + /* + * The static chunk initially doesn't have map attached + * because kmalloc wasn't available during init. Give it one. + */ + if (unlikely(!chunk->map)) { + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + if (!chunk->map) + return -ENOMEM; + + chunk->map_alloc = PCPU_DFL_MAP_ALLOC; + chunk->map[chunk->map_used++] = -pcpu_static_size; + if (chunk->free_size) + chunk->map[chunk->map_used++] = chunk->free_size; + } + + for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { + bool is_last = i + 1 == chunk->map_used; + int head, tail; + + /* extra for alignment requirement */ + head = ALIGN(off, align) - off; + BUG_ON(i == 0 && head != 0); + + if (chunk->map[i] < 0) + continue; + if (chunk->map[i] < head + size) { + max_contig = max(chunk->map[i], max_contig); + continue; + } + + /* + * If head is small or the previous block is free, + * merge'em. Note that 'small' is defined as smaller + * than sizeof(int), which is very small but isn't too + * uncommon for percpu allocations. + */ + if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { + if (chunk->map[i - 1] > 0) + chunk->map[i - 1] += head; + else { + chunk->map[i - 1] -= head; + chunk->free_size -= head; + } + chunk->map[i] -= head; + off += head; + head = 0; + } + + /* if tail is small, just keep it around */ + tail = chunk->map[i] - head - size; + if (tail < sizeof(int)) + tail = 0; + + /* split if warranted */ + if (head || tail) { + if (pcpu_split_block(chunk, i, head, tail)) + return -ENOMEM; + if (head) { + i++; + off += head; + max_contig = max(chunk->map[i - 1], max_contig); + } + if (tail) + max_contig = max(chunk->map[i + 1], max_contig); + } + + /* update hint and mark allocated */ + if (is_last) + chunk->contig_hint = max_contig; /* fully scanned */ + else + chunk->contig_hint = max(chunk->contig_hint, + max_contig); + + chunk->free_size -= chunk->map[i]; + chunk->map[i] = -chunk->map[i]; + + pcpu_chunk_relocate(chunk, oslot); + return off; + } + + chunk->contig_hint = max_contig; /* fully scanned */ + pcpu_chunk_relocate(chunk, oslot); + + /* + * Tell the upper layer that this chunk has no area left. + * Note that this is not an error condition but a notification + * to upper layer that it needs to look at other chunks. + * -ENOSPC is chosen as it isn't used in memory subsystem and + * matches the meaning in a way. + */ + return -ENOSPC; +} + +/** + * pcpu_free_area - free area to a pcpu_chunk + * @chunk: chunk of interest + * @freeme: offset of area to free + * + * Free area starting from @freeme to @chunk. Note that this function + * only modifies the allocation map. It doesn't depopulate or unmap + * the area. + */ +static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) +{ + int oslot = pcpu_chunk_slot(chunk); + int i, off; + + for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) + if (off == freeme) + break; + BUG_ON(off != freeme); + BUG_ON(chunk->map[i] > 0); + + chunk->map[i] = -chunk->map[i]; + chunk->free_size += chunk->map[i]; + + /* merge with previous? */ + if (i > 0 && chunk->map[i - 1] >= 0) { + chunk->map[i - 1] += chunk->map[i]; + chunk->map_used--; + memmove(&chunk->map[i], &chunk->map[i + 1], + (chunk->map_used - i) * sizeof(chunk->map[0])); + i--; + } + /* merge with next? */ + if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { + chunk->map[i] += chunk->map[i + 1]; + chunk->map_used--; + memmove(&chunk->map[i + 1], &chunk->map[i + 2], + (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); + } + + chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); + pcpu_chunk_relocate(chunk, oslot); +} + +/** + * pcpu_unmap - unmap pages out of a pcpu_chunk + * @chunk: chunk of interest + * @page_start: page index of the first page to unmap + * @page_end: page index of the last page to unmap + 1 + * @flush: whether to flush cache and tlb or not + * + * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. + * If @flush is true, vcache is flushed before unmapping and tlb + * after. + */ +static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, + bool flush) +{ + unsigned int last = num_possible_cpus() - 1; + unsigned int cpu; + + /* + * Each flushing trial can be very expensive, issue flush on + * the whole region at once rather than doing it for each cpu. + * This could be an overkill but is more scalable. + */ + if (flush) + flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); + + for_each_possible_cpu(cpu) + unmap_kernel_range_noflush( + pcpu_chunk_addr(chunk, cpu, page_start), + (page_end - page_start) << PAGE_SHIFT); + + /* ditto as flush_cache_vunmap() */ + if (flush) + flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); +} + +/** + * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk + * @chunk: chunk to depopulate + * @off: offset to the area to depopulate + * @size: size of the area to depopulate + * @flush: whether to flush cache and tlb or not + * + * For each cpu, depopulate and unmap pages [@page_start,@page_end) + * from @chunk. If @flush is true, vcache is flushed before unmapping + * and tlb after. + */ +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, size_t off, + size_t size, bool flush) +{ + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + int unmap_start = -1; + int uninitialized_var(unmap_end); + unsigned int cpu; + int i; + + for (i = page_start; i < page_end; i++) { + for_each_possible_cpu(cpu) { + struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + + if (!*pagep) + continue; + + __free_page(*pagep); + + /* + * If it's partial depopulation, it might get + * populated or depopulated again. Mark the + * page gone. + */ + *pagep = NULL; + + unmap_start = unmap_start < 0 ? i : unmap_start; + unmap_end = i + 1; + } + } + + if (unmap_start >= 0) + pcpu_unmap(chunk, unmap_start, unmap_end, flush); +} + +/** + * pcpu_map - map pages into a pcpu_chunk + * @chunk: chunk of interest + * @page_start: page index of the first page to map + * @page_end: page index of the last page to map + 1 + * + * For each cpu, map pages [@page_start,@page_end) into @chunk. + * vcache is flushed afterwards. + */ +static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) +{ + unsigned int last = num_possible_cpus() - 1; + unsigned int cpu; + int err; + + for_each_possible_cpu(cpu) { + err = map_kernel_range_noflush( + pcpu_chunk_addr(chunk, cpu, page_start), + (page_end - page_start) << PAGE_SHIFT, + PAGE_KERNEL, + pcpu_chunk_pagep(chunk, cpu, page_start)); + if (err < 0) + return err; + } + + /* flush at once, please read comments in pcpu_unmap() */ + flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start), + pcpu_chunk_addr(chunk, last, page_end)); + return 0; +} + +/** + * pcpu_populate_chunk - populate and map an area of a pcpu_chunk + * @chunk: chunk of interest + * @off: offset to the area to populate + * @size: size of the area to populate + * + * For each cpu, populate and map pages [@page_start,@page_end) into + * @chunk. The area is cleared on return. + */ +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + int map_start = -1; + int map_end; + unsigned int cpu; + int i; + + for (i = page_start; i < page_end; i++) { + if (pcpu_chunk_page_occupied(chunk, i)) { + if (map_start >= 0) { + if (pcpu_map(chunk, map_start, map_end)) + goto err; + map_start = -1; + } + continue; + } + + map_start = map_start < 0 ? i : map_start; + map_end = i + 1; + + for_each_possible_cpu(cpu) { + struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i); + + *pagep = alloc_pages_node(cpu_to_node(cpu), + alloc_mask, 0); + if (!*pagep) + goto err; + } + } + + if (map_start >= 0 && pcpu_map(chunk, map_start, map_end)) + goto err; + + for_each_possible_cpu(cpu) + memset(chunk->vm->addr + (cpu << pcpu_unit_shift) + off, 0, + size); + + return 0; +err: + /* likely under heavy memory pressure, give memory back */ + pcpu_depopulate_chunk(chunk, off, size, true); + return -ENOMEM; +} + +static void free_pcpu_chunk(struct pcpu_chunk *chunk) +{ + if (!chunk) + return; + if (chunk->vm) + free_vm_area(chunk->vm); + pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); + kfree(chunk); +} + +static struct pcpu_chunk *alloc_pcpu_chunk(void) +{ + struct pcpu_chunk *chunk; + + chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); + if (!chunk) + return NULL; + + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + chunk->map_alloc = PCPU_DFL_MAP_ALLOC; + chunk->map[chunk->map_used++] = pcpu_unit_size; + + chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); + if (!chunk->vm) { + free_pcpu_chunk(chunk); + return NULL; + } + + INIT_LIST_HEAD(&chunk->list); + chunk->free_size = pcpu_unit_size; + chunk->contig_hint = pcpu_unit_size; + + return chunk; +} + +/** + * __alloc_percpu - allocate percpu area + * @size: size of area to allocate + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_percpu(size_t size, size_t align) +{ + void *ptr = NULL; + struct pcpu_chunk *chunk; + int slot, off; + + if (unlikely(!size || size > PAGE_SIZE << PCPU_MIN_UNIT_PAGES_SHIFT || + align > PAGE_SIZE)) { + WARN(true, "illegal size (%zu) or align (%zu) for " + "percpu allocation\n", size, align); + return NULL; + } + + mutex_lock(&pcpu_mutex); + + /* allocate area */ + for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { + list_for_each_entry(chunk, &pcpu_slot[slot], list) { + if (size > chunk->contig_hint) + continue; + off = pcpu_alloc_area(chunk, size, align); + if (off >= 0) + goto area_found; + if (off != -ENOSPC) + goto out_unlock; + } + } + + /* hmmm... no space left, create a new chunk */ + chunk = alloc_pcpu_chunk(); + if (!chunk) + goto out_unlock; + pcpu_chunk_relocate(chunk, -1); + pcpu_chunk_addr_insert(chunk); + + off = pcpu_alloc_area(chunk, size, align); + if (off < 0) + goto out_unlock; + +area_found: + /* populate, map and clear the area */ + if (pcpu_populate_chunk(chunk, off, size)) { + pcpu_free_area(chunk, off); + goto out_unlock; + } + + ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); +out_unlock: + mutex_unlock(&pcpu_mutex); + return ptr; +} +EXPORT_SYMBOL_GPL(__alloc_percpu); + +static void pcpu_kill_chunk(struct pcpu_chunk *chunk) +{ + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + list_del(&chunk->list); + rb_erase(&chunk->rb_node, &pcpu_addr_root); + free_pcpu_chunk(chunk); +} + +/** + * free_percpu - free percpu area + * @ptr: pointer to area to free + * + * Free percpu area @ptr. Might sleep. + */ +void free_percpu(void *ptr) +{ + void *addr = __pcpu_ptr_to_addr(ptr); + struct pcpu_chunk *chunk; + int off; + + if (!ptr) + return; + + mutex_lock(&pcpu_mutex); + + chunk = pcpu_chunk_addr_search(addr); + off = addr - chunk->vm->addr; + + pcpu_free_area(chunk, off); + + /* the chunk became fully free, kill one if there are other free ones */ + if (chunk->free_size == pcpu_unit_size) { + struct pcpu_chunk *pos; + + list_for_each_entry(pos, + &pcpu_slot[pcpu_chunk_slot(chunk)], list) + if (pos != chunk) { + pcpu_kill_chunk(pos); + break; + } + } + + mutex_unlock(&pcpu_mutex); +} +EXPORT_SYMBOL_GPL(free_percpu); + +/** + * pcpu_setup_static - initialize kernel static percpu area + * @populate_pte_fn: callback to allocate pagetable + * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages + * + * Initialize kernel static percpu area. The caller should allocate + * all the necessary pages and pass them in @pages. + * @populate_pte_fn() is called on each page to be used for percpu + * mapping and is responsible for making sure all the necessary page + * tables for the page is allocated. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access. + */ +size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size) +{ + static struct vm_struct static_vm; + struct pcpu_chunk *static_chunk; + int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE); + unsigned int cpu; + int err, i; + + pcpu_unit_pages_shift = max_t(int, PCPU_MIN_UNIT_PAGES_SHIFT, + order_base_2(cpu_size) - PAGE_SHIFT); + + pcpu_static_size = cpu_size; + pcpu_unit_pages = 1 << pcpu_unit_pages_shift; + pcpu_unit_shift = PAGE_SHIFT + pcpu_unit_pages_shift; + pcpu_unit_size = 1 << pcpu_unit_shift; + pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; + pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1; + pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + + (1 << pcpu_unit_pages_shift) * sizeof(struct page *); + + /* allocate chunk slots */ + pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); + for (i = 0; i < pcpu_nr_slots; i++) + INIT_LIST_HEAD(&pcpu_slot[i]); + + /* init and register vm area */ + static_vm.flags = VM_ALLOC; + static_vm.size = pcpu_chunk_size; + vm_area_register_early(&static_vm); + + /* init static_chunk */ + static_chunk = alloc_bootmem(pcpu_chunk_struct_size); + INIT_LIST_HEAD(&static_chunk->list); + static_chunk->vm = &static_vm; + static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + static_chunk->contig_hint = static_chunk->free_size; + + /* assign pages and map them */ + for_each_possible_cpu(cpu) { + for (i = 0; i < nr_cpu_pages; i++) { + *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; + populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); + } + } + + err = pcpu_map(static_chunk, 0, nr_cpu_pages); + if (err) + panic("failed to setup static percpu area, err=%d\n", err); + + /* link static_chunk in */ + pcpu_chunk_relocate(static_chunk, -1); + pcpu_chunk_addr_insert(static_chunk); + + /* we're done */ + pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); + return pcpu_unit_size; +} From 11124411aa95827404d6bfdfc14c908e1b54513c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:09 +0900 Subject: [PATCH 544/682] x86: convert to the new dynamic percpu allocator Impact: use new dynamic allocator, unified access to static/dynamic percpu memory Convert to the new dynamic percpu allocator. * implement populate_extra_pte() for both 32 and 64 * update setup_per_cpu_areas() to use pcpu_setup_static() * define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() * define config HAVE_DYNAMIC_PER_CPU_AREA Signed-off-by: Tejun Heo --- arch/x86/Kconfig | 3 ++ arch/x86/include/asm/percpu.h | 8 ++++ arch/x86/include/asm/pgtable.h | 1 + arch/x86/kernel/setup_percpu.c | 68 +++++++++++++++++++++------------- arch/x86/mm/init_32.c | 10 +++++ arch/x86/mm/init_64.c | 19 ++++++++++ 6 files changed, 84 insertions(+), 25 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f760a22f95d..d3f6eadfd4b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -135,6 +135,9 @@ config ARCH_HAS_CACHE_LINE_SIZE config HAVE_SETUP_PER_CPU_AREA def_bool y +config HAVE_DYNAMIC_PER_CPU_AREA + def_bool y + config HAVE_CPUMASK_OF_CPU_MAP def_bool X86_64_SMP diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index aee103b26d0..8f1d2fbec1d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -43,6 +43,14 @@ #else /* ...!ASSEMBLY */ #include +#include + +#define __addr_to_pcpu_ptr(addr) \ + (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ + + (unsigned long)__per_cpu_start) +#define __pcpu_ptr_to_addr(ptr) \ + (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ + - (unsigned long)__per_cpu_start) #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6f7c102018b..dd91c2515c6 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -402,6 +402,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +void populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d992e6cff73..2dce4355821 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -61,38 +61,56 @@ static inline void setup_percpu_segment(int cpu) */ void __init setup_per_cpu_areas(void) { - ssize_t size; - char *ptr; - int cpu; - - /* Copy section for each CPU (we discard the original) */ - size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE); + ssize_t size = __per_cpu_end - __per_cpu_start; + unsigned int nr_cpu_pages = DIV_ROUND_UP(size, PAGE_SIZE); + static struct page **pages; + size_t pages_size; + unsigned int cpu, i, j; + unsigned long delta; + size_t pcpu_unit_size; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); + pr_info("PERCPU: Allocating %zd bytes for static per cpu data\n", size); - pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); + pages_size = nr_cpu_pages * num_possible_cpus() * sizeof(pages[0]); + pages = alloc_bootmem(pages_size); + j = 0; for_each_possible_cpu(cpu) { -#ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); -#else - int node = early_cpu_to_node(cpu); - if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(size); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d at %016lx\n", - cpu, __pa(ptr)); - } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); - pr_debug("per cpu data for cpu%d on node%d at %016lx\n", - cpu, node, __pa(ptr)); - } -#endif + void *ptr; - memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start); - per_cpu_offset(cpu) = ptr - __per_cpu_start; + for (i = 0; i < nr_cpu_pages; i++) { +#ifndef CONFIG_NEED_MULTIPLE_NODES + ptr = alloc_bootmem_pages(PAGE_SIZE); +#else + int node = early_cpu_to_node(cpu); + + if (!node_online(node) || !NODE_DATA(node)) { + ptr = alloc_bootmem_pages(PAGE_SIZE); + pr_info("cpu %d has no node %d or node-local " + "memory\n", cpu, node); + pr_debug("per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); + } else { + ptr = alloc_bootmem_pages_node(NODE_DATA(node), + PAGE_SIZE); + pr_debug("per cpu data for cpu%d on node%d " + "at %016lx\n", cpu, node, __pa(ptr)); + } +#endif + memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); + pages[j++] = virt_to_page(ptr); + } + } + + pcpu_unit_size = pcpu_setup_static(populate_extra_pte, pages, size); + + free_bootmem(__pa(pages), pages_size); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(cpu_number, cpu) = cpu; setup_percpu_segment(cpu); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 00263bf07a8..8b1a0ef7f87 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,16 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } +void __init populate_extra_pte(unsigned long vaddr) +{ + int pgd_idx = pgd_index(vaddr); + int pmd_idx = pmd_index(vaddr); + pmd_t *pmd; + + pmd = one_md_table_init(swapper_pg_dir + pgd_idx); + one_page_table_init(pmd + pmd_idx); +} + static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, unsigned long vaddr, pte_t *lastpte) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e6d36b49025..7f91e2cdc4c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -223,6 +223,25 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_pud(pud_page, vaddr, pteval); } +void __init populate_extra_pte(unsigned long vaddr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = pgd_offset_k(vaddr); + if (pgd_none(*pgd)) { + pud = (pud_t *)spp_getpage(); + pgd_populate(&init_mm, pgd, pud); + if (pud != pud_offset(pgd, 0)) { + printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", + pud, pud_offset(pgd, 0)); + return; + } + } + + set_pte_vaddr_pud((pud_t *)pgd_page_vaddr(*pgd), vaddr, __pte(0)); +} + /* * Create large page table mappings for a range of physical addresses. */ From ecab22aa6dc9d42ca52de2cad0854b4c6bd85ac9 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 20 Feb 2009 11:56:38 +0100 Subject: [PATCH 545/682] x86: use symbolic constants for MSR_IA32_MISC_ENABLE bits Impact: Cleanup. No functional changes. Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/e_powersaver.c | 6 +++--- arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | 6 +++--- arch/x86/kernel/cpu/intel.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 6 +++--- arch/x86/kernel/cpu/mcheck/p4.c | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c index c2f930d8664..41ab3f064cb 100644 --- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c +++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c @@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy) } /* Enable Enhanced PowerSaver */ rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { - val |= 1 << 16; + if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { + val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; wrmsrl(MSR_IA32_MISC_ENABLE, val); /* Can be locked at 0 */ rdmsrl(MSR_IA32_MISC_ENABLE, val); - if (!(val & 1 << 16)) { + if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); return -ENODEV; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index f08998278a3..c9f1fdc0283 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) enable it if not. */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { - l |= (1<<16); + if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { + l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP; dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); wrmsr(MSR_IA32_MISC_ENABLE, l, h); /* check to see if it stuck */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - if (!(l & (1<<16))) { + if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) { printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); return -ENODEV; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1f137a87d4b..c8ff69a4668 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -147,10 +147,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & (1<<9)) == 0) { + if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= (1<<9); /* Disable hw prefetching */ + lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 5e8c79e748a..ae00938ea50 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -49,13 +49,13 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); - if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } - if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) + if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) tm2 = 1; if (h & APIC_VECTOR_MASK) { @@ -73,7 +73,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 9b60fce09f7..f53bdcbaf38 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c @@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); h = apic_read(APIC_LVTTHMR); - if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); return; /* -EBUSY */ @@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) vendor_thermal_interrupt = intel_thermal_interrupt; rdmsr(MSR_IA32_MISC_ENABLE, l, h); - wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); + wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); From 3c3e5694add02e665bbbd0fecfbbdcc0b903097a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 19 Feb 2009 11:46:36 -0500 Subject: [PATCH 546/682] x86: check PMD in spurious_fault handler Impact: fix to prevent hard lockup on bad PMD permissions If the PMD does not have the correct permissions for a page access, but the PTE does, the spurious fault handler will mistake the fault as a lazy TLB transaction. This will result in an infinite loop of: fault -> spurious_fault check (pass) -> return to code -> fault This patch adds a check and a warn on if the PTE passes the permissions but the PMD does not. [ Updated: Ingo Molnar suggested using WARN_ONCE with some text ] Signed-off-by: Steven Rostedt --- arch/x86/mm/fault.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c76ef1d701c..278d645d108 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -455,6 +455,7 @@ static int spurious_fault(unsigned long address, pud_t *pud; pmd_t *pmd; pte_t *pte; + int ret; /* Reserved-bit violation or user access to kernel space? */ if (error_code & (PF_USER | PF_RSVD)) @@ -482,7 +483,17 @@ static int spurious_fault(unsigned long address, if (!pte_present(*pte)) return 0; - return spurious_fault_check(error_code, pte); + ret = spurious_fault_check(error_code, pte); + if (!ret) + return 0; + + /* + * Make sure we have permissions in PMD + * If not, then there's a bug in the page tables. + */ + ret = spurious_fault_check(error_code, (pte_t *) pmd); + WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); + return ret; } /* From 7a5714e0186030676d79a7b4b9830c8e45c3b0a1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 17:44:21 +0100 Subject: [PATCH 547/682] x86, pat: add large-PAT check to split_large_page() Impact: future-proof the split_large_page() function Linus noticed that split_large_page() is not safe wrt. the PAT bit: it is bit 12 on the 1GB and 2MB page table level (_PAGE_BIT_PAT_LARGE), and it is bit 7 on the 4K page table level (_PAGE_BIT_PAT). Currently it is not a problem because we never set _PAGE_BIT_PAT_LARGE on any of the large-page mappings - but should this happen in the future the split_large_page() would silently lift bit 12 into the lowlevel 4K pte and would start corrupting the physical page frame offset. Not fun. So add a debug warning, to make sure if something ever sets the PAT bit then this function gets updated too. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7be47d1a97e..8253bc97587 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -482,6 +482,13 @@ static int split_large_page(pte_t *kpte, unsigned long address) pbase = (pte_t *)page_address(base); paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); + /* + * If we ever want to utilize the PAT bit, we need to + * update this function to make sure it's converted from + * bit 12 to bit 7 when we cross from the 2MB level to + * the 4K level: + */ + WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE); #ifdef CONFIG_X86_64 if (level == PG_LEVEL_1G) { From fdb17aeb28aaad3eae5ef57e70cb287d34d1049d Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Feb 2009 10:23:18 -0800 Subject: [PATCH 548/682] x86, vmi: TSC going backwards check in vmi clocksource, cleanup clean up vmi_read_cycles to use max() Reported-b: Andrew Morton Signed-off-by: Alok N Kataria Cc: Zach Amsden Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmiclock_32.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 9cd28c04952..b77ad5789af 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -288,8 +288,7 @@ static struct clocksource clocksource_vmi; static cycle_t read_real_cycles(void) { cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); - return ret >= clocksource_vmi.cycle_last ? - ret : clocksource_vmi.cycle_last; + return max(ret, clocksource_vmi.cycle_last); } static struct clocksource clocksource_vmi = { From 2d4a71676f4d89418a0d53e60b89e8b804b390b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 19:56:40 +0100 Subject: [PATCH 549/682] x86, mm: fault.c cleanup Impact: cleanup, no code changed Clean up various small details, which can be correctness checked automatically: - tidy up the include file section - eliminate unnecessary includes - introduce show_signal_msg() to clean up code flow - standardize the code flow - standardize comments and other style details - more cleanups, pointed out by checkpatch No code changed on either 32-bit nor 64-bit: arch/x86/mm/fault.o: text data bss dec hex filename 4632 32 24 4688 1250 fault.o.before 4632 32 24 4688 1250 fault.o.after the md5 changed due to a change in a single instruction: 2e8a8241e7f0d69706776a5a26c90bc0 fault.o.before.asm c5c3d36e725586eb74f0e10692f0193e fault.o.after.asm Because a __LINE__ reference in a WARN_ONCE() has changed. On 32-bit a few stack offsets changed - no code size difference nor any functionality difference. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 550 +++++++++++++++++++++++++++----------------- 1 file changed, 333 insertions(+), 217 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e4b9fc5001c..351d679bf97 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1,56 +1,59 @@ /* * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. + * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include /* For unblank_screen() */ +#include +#include #include #include -#include /* for max_low_pfn */ -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include +#include -#include -#include -#include -#include -#include -#include -#include #include + +#include +#include +#include +#include +#include #include +#include /* - * Page fault error code bits - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - * bit 3 == 1 means use of reserved bit detected - * bit 4 == 1 means fault was an instruction fetch + * Page fault error code bits: + * + * bit 0 == 0: no page found 1: protection fault + * bit 1 == 0: read access 1: write access + * bit 2 == 0: kernel-mode access 1: user-mode access + * bit 3 == 1: use of reserved bit detected + * bit 4 == 1: fault was an instruction fetch */ -#define PF_PROT (1<<0) -#define PF_WRITE (1<<1) -#define PF_USER (1<<2) -#define PF_RSVD (1<<3) -#define PF_INSTR (1<<4) +enum x86_pf_error_code { + + PF_PROT = 1 << 0, + PF_WRITE = 1 << 1, + PF_USER = 1 << 2, + PF_RSVD = 1 << 3, + PF_INSTR = 1 << 4, +}; static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { @@ -82,23 +85,27 @@ static inline int notify_page_fault(struct pt_regs *regs) } /* - * X86_32 - * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. - * Check that here and ignore it. + * Prefetch quirks: * - * X86_64 - * Sometimes the CPU reports invalid exceptions on prefetch. - * Check that here and ignore it. + * 32-bit mode: * - * Opcode checker based on code by Richard Brunner + * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. + * Check that here and ignore it. + * + * 64-bit mode: + * + * Sometimes the CPU reports invalid exceptions on prefetch. + * Check that here and ignore it. + * + * Opcode checker based on code by Richard Brunner. */ -static int is_prefetch(struct pt_regs *regs, unsigned long error_code, - unsigned long addr) +static int +is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) { + unsigned char *max_instr; unsigned char *instr; int scan_more = 1; int prefetch = 0; - unsigned char *max_instr; /* * If it was a exec (instruction fetch) fault on NX page, then @@ -114,9 +121,9 @@ static int is_prefetch(struct pt_regs *regs, unsigned long error_code, return 0; while (scan_more && instr < max_instr) { - unsigned char opcode; unsigned char instr_hi; unsigned char instr_lo; + unsigned char opcode; if (probe_kernel_address(instr, opcode)) break; @@ -173,15 +180,17 @@ static int is_prefetch(struct pt_regs *regs, unsigned long error_code, return prefetch; } -static void force_sig_info_fault(int si_signo, int si_code, - unsigned long address, struct task_struct *tsk) +static void +force_sig_info_fault(int si_signo, int si_code, unsigned long address, + struct task_struct *tsk) { siginfo_t info; - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + force_sig_info(si_signo, &info, tsk); } @@ -189,6 +198,7 @@ static void force_sig_info_fault(int si_signo, int si_code, static int bad_address(void *p) { unsigned long dummy; + return probe_kernel_address((unsigned long *)p, dummy); } #endif @@ -200,13 +210,14 @@ static void dump_pagetable(unsigned long address) page = read_cr3(); page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; + #ifdef CONFIG_X86_PAE printk("*pdpt = %016Lx ", page); if ((page >> PAGE_SHIFT) < max_low_pfn && page & _PAGE_PRESENT) { page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) - & (PTRS_PER_PMD - 1)]; + & (PTRS_PER_PMD - 1)]; printk(KERN_CONT "*pde = %016Lx ", page); page &= ~_PAGE_NX; } @@ -218,14 +229,15 @@ static void dump_pagetable(unsigned long address) * We must not directly access the pte in the highpte * case if the page table is located in highmem. * And let's rather not kmap-atomic the pte, just in case - * it's allocated already. + * it's allocated already: */ if ((page >> PAGE_SHIFT) < max_low_pfn && (page & _PAGE_PRESENT) && !(page & _PAGE_PSE)) { + page &= PAGE_MASK; page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) - & (PTRS_PER_PTE - 1)]; + & (PTRS_PER_PTE - 1)]; printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page); } @@ -239,26 +251,38 @@ static void dump_pagetable(unsigned long address) pgd = (pgd_t *)read_cr3(); pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); + pgd += pgd_index(address); - if (bad_address(pgd)) goto bad; + if (bad_address(pgd)) + goto bad; + printk("PGD %lx ", pgd_val(*pgd)); - if (!pgd_present(*pgd)) goto ret; + + if (!pgd_present(*pgd)) + goto out; pud = pud_offset(pgd, address); - if (bad_address(pud)) goto bad; + if (bad_address(pud)) + goto bad; + printk("PUD %lx ", pud_val(*pud)); if (!pud_present(*pud) || pud_large(*pud)) - goto ret; + goto out; pmd = pmd_offset(pud, address); - if (bad_address(pmd)) goto bad; + if (bad_address(pmd)) + goto bad; + printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) + goto out; pte = pte_offset_kernel(pmd, address); - if (bad_address(pte)) goto bad; + if (bad_address(pte)) + goto bad; + printk("PTE %lx", pte_val(*pte)); -ret: +out: printk("\n"); return; bad: @@ -285,7 +309,6 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) * and redundant with the set_pmd() on non-PAE. As would * set_pud. */ - pud = pud_offset(pgd, address); pud_k = pud_offset(pgd_k, address); if (!pud_present(*pud_k)) @@ -295,11 +318,14 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; + if (!pmd_present(*pmd)) { set_pmd(pmd, *pmd_k); arch_flush_lazy_mmu_mode(); - } else + } else { BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + } + return pmd_k; } #endif @@ -312,29 +338,37 @@ KERN_ERR "******* Please consider a BIOS update.\n" KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; #endif -/* Workaround for K8 erratum #93 & buggy BIOS. - BIOS SMM functions are required to use a specific workaround - to avoid corruption of the 64bit RIP register on C stepping K8. - A lot of BIOS that didn't get tested properly miss this. - The OS sees this as a page fault with the upper 32bits of RIP cleared. - Try to work around it here. - Note we only handle faults in kernel here. - Does nothing for X86_32 +/* + * Workaround for K8 erratum #93 & buggy BIOS. + * + * BIOS SMM functions are required to use a specific workaround + * to avoid corruption of the 64bit RIP register on C stepping K8. + * + * A lot of BIOS that didn't get tested properly miss this. + * + * The OS sees this as a page fault with the upper 32bits of RIP cleared. + * Try to work around it here. + * + * Note we only handle faults in kernel here. + * Does nothing on 32-bit. */ static int is_errata93(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - static int warned; + static int once; + if (address != regs->ip) return 0; + if ((address >> 32) != 0) return 0; + address |= 0xffffffffUL << 32; if ((address >= (u64)_stext && address <= (u64)_etext) || (address >= MODULES_VADDR && address <= MODULES_END)) { - if (!warned) { + if (!once) { printk(errata93_warning); - warned = 1; + once = 1; } regs->ip = address; return 1; @@ -344,16 +378,17 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) } /* - * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal - * addresses >4GB. We catch this in the page fault handler because these - * addresses are not reachable. Just detect this case and return. Any code + * Work around K8 erratum #100 K8 in compat mode occasionally jumps + * to illegal addresses >4GB. + * + * We catch this in the page fault handler because these addresses + * are not reachable. Just detect this case and return. Any code * segment in LDT is compatibility mode. */ static int is_errata100(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_64 - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && - (address >> 32)) + if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32)) return 1; #endif return 0; @@ -363,8 +398,9 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG unsigned long nr; + /* - * Pentium F0 0F C7 C8 bug workaround. + * Pentium F0 0F C7 C8 bug workaround: */ if (boot_cpu_data.f00f_bug) { nr = (address - idt_descr.address) >> 3; @@ -378,8 +414,9 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) return 0; } -static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static void +show_fault_oops(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { #ifdef CONFIG_X86_32 if (!oops_may_print()) @@ -389,12 +426,14 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, #ifdef CONFIG_X86_PAE if (error_code & PF_INSTR) { unsigned int level; + pte_t *pte = lookup_address(address, &level); - if (pte && pte_present(*pte) && !pte_exec(*pte)) + if (pte && pte_present(*pte) && !pte_exec(*pte)) { printk(KERN_CRIT "kernel tried to execute " "NX-protected page - exploit attempt? " "(uid: %d)\n", current_uid()); + } } #endif @@ -403,34 +442,45 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "NULL pointer dereference"); else printk(KERN_CONT "paging request"); + printk(KERN_CONT " at %p\n", (void *) address); printk(KERN_ALERT "IP:"); printk_address(regs->ip, 1); + dump_pagetable(address); } #ifdef CONFIG_X86_64 -static noinline void pgtable_bad(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +pgtable_bad(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { - unsigned long flags = oops_begin(); - int sig = SIGKILL; - struct task_struct *tsk = current; + struct task_struct *tsk; + unsigned long flags; + int sig; + + flags = oops_begin(); + tsk = current; + sig = SIGKILL; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", tsk->comm, address); dump_pagetable(address); - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + if (__die("Bad pagetable", regs, error_code)) sig = 0; + oops_end(flags, regs, sig); } #endif -static noinline void no_context(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +no_context(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { struct task_struct *tsk = current; unsigned long *stackend; @@ -440,18 +490,20 @@ static noinline void no_context(struct pt_regs *regs, int sig; #endif - /* Are we prepared to handle this kernel fault? */ + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. + * 32-bit: * - * X86_64 - * Hall of shame of CPU/BIOS bugs. + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * 64-bit: + * + * Hall of shame of CPU/BIOS bugs. */ if (is_prefetch(regs, error_code, address)) return; @@ -461,7 +513,7 @@ static noinline void no_context(struct pt_regs *regs, /* * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. + * terminate things with extreme prejudice: */ #ifdef CONFIG_X86_32 bust_spinlocks(1); @@ -471,7 +523,7 @@ static noinline void no_context(struct pt_regs *regs, show_fault_oops(regs, error_code, address); - stackend = end_of_stack(tsk); + stackend = end_of_stack(tsk); if (*stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); @@ -487,28 +539,54 @@ static noinline void no_context(struct pt_regs *regs, sig = SIGKILL; if (__die("Oops", regs, error_code)) sig = 0; + /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); #endif } -static void __bad_area_nosemaphore(struct pt_regs *regs, - unsigned long error_code, unsigned long address, - int si_code) +/* + * Print out info about fatal segfaults, if the show_unhandled_signals + * sysctl is set: + */ +static inline void +show_signal_msg(struct pt_regs *regs, unsigned long error_code, + unsigned long address, struct task_struct *tsk) +{ + if (!unhandled_signal(tsk, SIGSEGV)) + return; + + if (!printk_ratelimit()) + return; + + printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *)regs->ip, (void *)regs->sp, error_code); + + print_vma_addr(KERN_CONT " in ", regs->ip); + + printk(KERN_CONT "\n"); +} + +static void +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) { struct task_struct *tsk = current; /* User mode accesses just cause a SIGSEGV */ if (error_code & PF_USER) { /* - * It's possible to have interrupts off here. + * It's possible to have interrupts off here: */ local_irq_enable(); /* * Valid to do another page fault here because this one came - * from user space. + * from user space: */ if (is_prefetch(regs, error_code, address)) return; @@ -516,22 +594,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs, if (is_errata100(regs, address)) return; - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } + if (unlikely(show_unhandled_signals)) + show_signal_msg(regs, error_code, address, tsk); + + /* Kernel addresses are always protection faults: */ + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; } @@ -541,15 +613,16 @@ static void __bad_area_nosemaphore(struct pt_regs *regs, no_context(regs, error_code, address); } -static noinline void bad_area_nosemaphore(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); } -static void __bad_area(struct pt_regs *regs, - unsigned long error_code, unsigned long address, - int si_code) +static void +__bad_area(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) { struct mm_struct *mm = current->mm; @@ -562,67 +635,77 @@ static void __bad_area(struct pt_regs *regs, __bad_area_nosemaphore(regs, error_code, address, si_code); } -static noinline void bad_area(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) { __bad_area(regs, error_code, address, SEGV_MAPERR); } -static noinline void bad_area_access_error(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static noinline void +bad_area_access_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { __bad_area(regs, error_code, address, SEGV_ACCERR); } /* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ -static void out_of_memory(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static void +out_of_memory(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { /* * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). + * (which will retry the fault, or kill us if we got oom-killed): */ up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); } -static void do_sigbus(struct pt_regs *regs, - unsigned long error_code, unsigned long address) +static void +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; up_read(&mm->mmap_sem); - /* Kernel mode? Handle exceptions or die */ + /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) no_context(regs, error_code, address); + #ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ + /* User space => ok to do another page fault: */ if (is_prefetch(regs, error_code, address)) return; #endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; + + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } -static noinline void mm_fault_error(struct pt_regs *regs, - unsigned long error_code, unsigned long address, unsigned int fault) +static noinline void +mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) { - if (fault & VM_FAULT_OOM) + if (fault & VM_FAULT_OOM) { out_of_memory(regs, error_code, address); - else if (fault & VM_FAULT_SIGBUS) - do_sigbus(regs, error_code, address); - else - BUG(); + } else { + if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); + } } static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) return 0; @@ -630,16 +713,19 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) } /* - * Handle a spurious fault caused by a stale TLB entry. This allows - * us to lazily refresh the TLB when increasing the permissions of a - * kernel page (RO -> RW or NX -> X). Doing it eagerly is very - * expensive since that implies doing a full cross-processor TLB - * flush, even if no stale TLB entries exist on other processors. + * Handle a spurious fault caused by a stale TLB entry. + * + * This allows us to lazily refresh the TLB when increasing the + * permissions of a kernel page (RO -> RW or NX -> X). Doing it + * eagerly is very expensive since that implies doing a full + * cross-processor TLB flush, even if no stale TLB entries exist + * on other processors. + * * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int spurious_fault(unsigned long error_code, - unsigned long address) +static noinline int +spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; pud_t *pud; @@ -678,20 +764,23 @@ static noinline int spurious_fault(unsigned long error_code, return 0; /* - * Make sure we have permissions in PMD - * If not, then there's a bug in the page tables. + * Make sure we have permissions in PMD. + * If not, then there's a bug in the page tables: */ ret = spurious_fault_check(error_code, (pte_t *) pmd); WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); + return ret; } /* - * X86_32 - * Handle a fault on the vmalloc or module mapping area + * 32-bit: * - * X86_64 - * Handle a fault on the vmalloc area + * Handle a fault on the vmalloc or module mapping area + * + * 64-bit: + * + * Handle a fault on the vmalloc area * * This assumes no large pages in there. */ @@ -702,7 +791,7 @@ static noinline int vmalloc_fault(unsigned long address) pmd_t *pmd_k; pte_t *pte_k; - /* Make sure we are in vmalloc area */ + /* Make sure we are in vmalloc area: */ if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; @@ -717,9 +806,11 @@ static noinline int vmalloc_fault(unsigned long address) pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); if (!pmd_k) return -1; + pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; + return 0; #else pgd_t *pgd, *pgd_ref; @@ -727,69 +818,84 @@ static noinline int vmalloc_fault(unsigned long address) pmd_t *pmd, *pmd_ref; pte_t *pte, *pte_ref; - /* Make sure we are in vmalloc area */ + /* Make sure we are in vmalloc area: */ if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - /* Copy kernel mappings over when needed. This can also - happen within a race in page table update. In the later - case just flush. */ - + /* + * Copy kernel mappings over when needed. This can also + * happen within a race in page table update. In the later + * case just flush: + */ pgd = pgd_offset(current->active_mm, address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; + if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - /* Below here mismatches are bugs because these lower tables - are shared */ + /* + * Below here mismatches are bugs because these lower tables + * are shared: + */ pud = pud_offset(pgd, address); pud_ref = pud_offset(pgd_ref, address); if (pud_none(*pud_ref)) return -1; + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) BUG(); + pmd = pmd_offset(pud, address); pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd_ref)) return -1; + if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) BUG(); + pte_ref = pte_offset_kernel(pmd_ref, address); if (!pte_present(*pte_ref)) return -1; + pte = pte_offset_kernel(pmd, address); - /* Don't use pte_page here, because the mappings can point - outside mem_map, and the NUMA hash lookup cannot handle - that. */ + + /* + * Don't use pte_page here, because the mappings can point + * outside mem_map, and the NUMA hash lookup cannot handle + * that: + */ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) BUG(); + return 0; #endif } int show_unhandled_signals = 1; -static inline int access_error(unsigned long error_code, int write, - struct vm_area_struct *vma) +static inline int +access_error(unsigned long error_code, int write, struct vm_area_struct *vma) { if (write) { - /* write, present and write, not present */ + /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) return 1; - } else if (unlikely(error_code & PF_PROT)) { - /* read, present */ - return 1; - } else { - /* read, not present */ - if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) - return 1; + return 0; } + /* read, present: */ + if (unlikely(error_code & PF_PROT)) + return 1; + + /* read, not present: */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + return 0; } @@ -797,9 +903,9 @@ static int fault_in_kernel_space(unsigned long address) { #ifdef CONFIG_X86_32 return address >= TASK_SIZE; -#else /* !CONFIG_X86_32 */ +#else return address >= TASK_SIZE64; -#endif /* CONFIG_X86_32 */ +#endif } /* @@ -812,18 +918,19 @@ asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { - unsigned long address; - struct task_struct *tsk; - struct mm_struct *mm; struct vm_area_struct *vma; + struct task_struct *tsk; + unsigned long address; + struct mm_struct *mm; int write; int fault; tsk = current; mm = tsk->mm; + prefetchw(&mm->mmap_sem); - /* get the address */ + /* Get the faulting address: */ address = read_cr2(); if (unlikely(kmmio_fault(regs, address))) @@ -847,22 +954,23 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) vmalloc_fault(address) >= 0) return; - /* Can handle a stale RO->RW TLB */ + /* Can handle a stale RO->RW TLB: */ if (spurious_fault(error_code, address)) return; - /* kprobes don't want to hook the spurious faults. */ + /* kprobes don't want to hook the spurious faults: */ if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. + * fault we could otherwise deadlock: */ bad_area_nosemaphore(regs, error_code, address); + return; } - /* kprobes don't want to hook the spurious faults. */ + /* kprobes don't want to hook the spurious faults: */ if (unlikely(notify_page_fault(regs))) return; /* @@ -870,13 +978,15 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * vmalloc fault has been handled. * * User-mode registers count as a user access even for any - * potential system fault or CPU buglet. + * potential system fault or CPU buglet: */ if (user_mode_vm(regs)) { local_irq_enable(); error_code |= PF_USER; - } else if (regs->flags & X86_EFLAGS_IF) - local_irq_enable(); + } else { + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); + } #ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) @@ -884,8 +994,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) #endif /* - * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault. + * If we're in an interrupt, have no user context or are running + * in an atomic region then we must not take the fault: */ if (unlikely(in_atomic() || !mm)) { bad_area_nosemaphore(regs, error_code, address); @@ -894,19 +1004,19 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* * When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunately, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user - * space from well defined areas of code, which are listed in the - * exceptions table. + * addresses in user space. All other faults represent errors in + * the kernel and should generate an OOPS. Unfortunately, in the + * case of an erroneous fault occurring in a code path which already + * holds mmap_sem we will deadlock attempting to validate the fault + * against the address space. Luckily the kernel only validly + * references user space from well defined areas of code, which are + * listed in the exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibility of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. + * the source reference check when there is a possibility of a + * deadlock. Attempt to lock the address space, if we cannot we then + * validate the source. If this is invalid we can skip the address + * space check, thus avoiding the deadlock: */ if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && @@ -917,8 +1027,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) down_read(&mm->mmap_sem); } else { /* - * The above down_read_trylock() might have succeeded in which - * case we'll have missed the might_sleep() from down_read(). + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): */ might_sleep(); } @@ -938,7 +1049,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter - * and pusha to work. ("enter $65535,$31" pushes + * and pusha to work. ("enter $65535, $31" pushes * 32 pointers and then decrements %sp by 65535.) */ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { @@ -957,6 +1068,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) */ good_area: write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { bad_area_access_error(regs, error_code, address); return; @@ -965,13 +1077,15 @@ good_area: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo - * the fault. + * the fault: */ fault = handle_mm_fault(mm, vma, address, write); + if (unlikely(fault & VM_FAULT_ERROR)) { mm_fault_error(regs, error_code, address, fault); return; } + if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; else @@ -1004,13 +1118,13 @@ void vmalloc_sync_all(void) for (address = VMALLOC_START & PMD_MASK; address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { + unsigned long flags; struct page *page; spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), - address)) + if (!vmalloc_sync_one(page_address(page), address)) break; } spin_unlock_irqrestore(&pgd_lock, flags); @@ -1018,12 +1132,14 @@ void vmalloc_sync_all(void) #else /* CONFIG_X86_64 */ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; address += PGDIR_SIZE) { + const pgd_t *pgd_ref = pgd_offset_k(address); unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; + spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; From 107a03678cac0dd6cf7095f81442a4fa477e4700 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 20:37:05 +0100 Subject: [PATCH 550/682] x86, mm: fault.c, refactor/simplify the is_prefetch() code Impact: no functionality changed Factor out the opcode checker into a helper inline. The code got a tiny bit smaller: text data bss dec hex filename 4632 32 24 4688 1250 fault.o.before 4618 32 24 4674 1242 fault.o.after And it got cleaner / easier to review as well. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 100 ++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 351d679bf97..72973c7682b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,12 +99,58 @@ static inline int notify_page_fault(struct pt_regs *regs) * * Opcode checker based on code by Richard Brunner. */ +static inline int +check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, + unsigned char opcode, int *prefetch) +{ + unsigned char instr_hi = opcode & 0xf0; + unsigned char instr_lo = opcode & 0x0f; + + switch (instr_hi) { + case 0x20: + case 0x30: + /* + * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. + * In X86_64 long mode, the CPU will signal invalid + * opcode if some of these prefixes are present so + * X86_64 will never get here anyway + */ + return ((instr_lo & 7) == 0x6); +#ifdef CONFIG_X86_64 + case 0x40: + /* + * In AMD64 long mode 0x40..0x4F are valid REX prefixes + * Need to figure out under what instruction mode the + * instruction was issued. Could check the LDT for lm, + * but for now it's good enough to assume that long + * mode only uses well known segments or kernel. + */ + return (!user_mode(regs)) || (regs->cs == __USER_CS); +#endif + case 0x60: + /* 0x64 thru 0x67 are valid prefixes in all modes. */ + return (instr_lo & 0xC) == 0x4; + case 0xF0: + /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ + return !instr_lo || (instr_lo>>1) == 1; + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + if (probe_kernel_address(instr, opcode)) + return 0; + + *prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); + return 0; + default: + return 0; + } +} + static int is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) { unsigned char *max_instr; unsigned char *instr; - int scan_more = 1; int prefetch = 0; /* @@ -114,68 +160,22 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) if (error_code & PF_INSTR) return 0; - instr = (unsigned char *)convert_ip_to_linear(current, regs); + instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; - while (scan_more && instr < max_instr) { - unsigned char instr_hi; - unsigned char instr_lo; + while (instr < max_instr) { unsigned char opcode; if (probe_kernel_address(instr, opcode)) break; - instr_hi = opcode & 0xf0; - instr_lo = opcode & 0x0f; instr++; - switch (instr_hi) { - case 0x20: - case 0x30: - /* - * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. - * In X86_64 long mode, the CPU will signal invalid - * opcode if some of these prefixes are present so - * X86_64 will never get here anyway - */ - scan_more = ((instr_lo & 7) == 0x6); + if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) break; -#ifdef CONFIG_X86_64 - case 0x40: - /* - * In AMD64 long mode 0x40..0x4F are valid REX prefixes - * Need to figure out under what instruction mode the - * instruction was issued. Could check the LDT for lm, - * but for now it's good enough to assume that long - * mode only uses well known segments or kernel. - */ - scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); - break; -#endif - case 0x60: - /* 0x64 thru 0x67 are valid prefixes in all modes. */ - scan_more = (instr_lo & 0xC) == 0x4; - break; - case 0xF0: - /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ - scan_more = !instr_lo || (instr_lo>>1) == 1; - break; - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - scan_more = 0; - - if (probe_kernel_address(instr, opcode)) - break; - prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); - break; - default: - scan_more = 0; - break; - } } return prefetch; } From 8c938f9fae887f6e180bf802aa1c33cf74712aff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:12:18 +0100 Subject: [PATCH 551/682] x86, mm: fault.c, factor out the vm86 fault check Impact: cleanup Instead of an ugly, open-coded, #ifdef-ed vm86 related legacy check in do_page_fault(), put it into the check_v8086_mode() helper function and merge it with an existing #ifdef. Also, simplify the code flow a tiny bit in the helper. No code changed: arch/x86/mm/fault.o: text data bss dec hex filename 2711 12 12 2735 aaf fault.o.before 2711 12 12 2735 aaf fault.o.after Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 72973c7682b..7dc0615c3cf 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -328,14 +328,41 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -#endif -#ifdef CONFIG_X86_64 +/* + * Did it hit the DOS screen memory VA from vm86 mode? + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ + unsigned long bit; + + if (!v8086_mode(regs)) + return; + + bit = (address - 0xA0000) >> PAGE_SHIFT; + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; +} + +#else /* CONFIG_X86_64: */ + static const char errata93_warning[] = KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" KERN_ERR "******* Please consider a BIOS update.\n" KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; + +/* + * No vm86 mode in 64-bit mode: + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ +} + #endif /* @@ -1091,16 +1118,8 @@ good_area: else tsk->min_flt++; -#ifdef CONFIG_X86_32 - /* - * Did it hit the DOS screen memory VA from vm86 mode? - */ - if (v8086_mode(regs)) { - unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; - if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; - } -#endif + check_v8086_mode(regs, address, tsk); + up_read(&mm->mmap_sem); } From 121d5d0a7e5808fbcfda484efd7ba840ac93450f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:18:08 +0100 Subject: [PATCH 552/682] x86, mm: fault.c, enable PF_RSVD checks on 32-bit too Impact: improve page fault handling robustness The 'PF_RSVD' flag (bit 3) of the page-fault error_code is a relatively recent addition to x86 CPUs, so the 32-bit do_fault() implementation never had it. This flag gets set when the CPU detects nonzero values in any reserved bits of the page directory entries. Extend the existing 64-bit check for PF_RSVD in do_page_fault() to 32-bit too. If we detect such a fault then we print a more informative oops and the pagetables. This unifies the code some more, removes an ugly #ifdef and improves the 32-bit page fault code robustness a bit. It slightly increases the 32-bit kernel text size. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7dc0615c3cf..3e366146273 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -477,7 +477,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, dump_pagetable(address); } -#ifdef CONFIG_X86_64 static noinline void pgtable_bad(struct pt_regs *regs, unsigned long error_code, unsigned long address) @@ -503,7 +502,6 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, oops_end(flags, regs, sig); } -#endif static noinline void no_context(struct pt_regs *regs, unsigned long error_code, @@ -1015,10 +1013,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) local_irq_enable(); } -#ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); -#endif /* * If we're in an interrupt, have no user context or are running From b814d41f0987c7648d7ed07471258101c95c026b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:32:10 +0100 Subject: [PATCH 553/682] x86, mm: fault.c, simplify kmmio_fault() Impact: cleanup Remove an #ifdef from kmmio_fault() - we can do this by providing default implementations for is_kmmio_active() and kmmio_handler(). The compiler optimizes it all away in the !CONFIG_MMIOTRACE case. Also, while at it, clean up mmiotrace.h a bit: - standard header guards - standard vertical spaces for structure definitions No code changed (both with mmiotrace on and off in the config): text data bss dec hex filename 2947 12 12 2971 b9b fault.o.before 2947 12 12 2971 b9b fault.o.after Cc: Pekka Paalanen Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 5 ++- include/linux/mmiotrace.h | 82 +++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3e366146273..fe99af4b86d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -55,13 +55,14 @@ enum x86_pf_error_code { PF_INSTR = 1 << 4, }; +/* + * (returns 0 if mmiotrace is disabled) + */ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; -#endif return 0; } diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 139d7c88d9c..3d1b7bde128 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,5 +1,5 @@ -#ifndef MMIOTRACE_H -#define MMIOTRACE_H +#ifndef _LINUX_MMIOTRACE_H +#define _LINUX_MMIOTRACE_H #include #include @@ -13,28 +13,34 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; /* kmmio internal list */ - unsigned long addr; /* start location of the probe point */ - unsigned long len; /* length of the probe region */ - kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ - kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *private; + /* kmmio internal list: */ + struct list_head list; + /* start location of the probe point: */ + unsigned long addr; + /* length of the probe region: */ + unsigned long len; + /* Called before addr is executed: */ + kmmio_pre_handler_t pre_handler; + /* Called after addr is executed: */ + kmmio_post_handler_t post_handler; + void *private; }; -/* kmmio is active by some kmmio_probes? */ -static inline int is_kmmio_active(void) -{ - extern unsigned int kmmio_count; - return kmmio_count; -} +extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +#ifdef CONFIG_MMIOTRACE +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + return kmmio_count; +} + /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_MMIOTRACE /* Called from ioremap.c */ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr); @@ -43,7 +49,17 @@ extern void mmiotrace_iounmap(volatile void __iomem *addr); /* For anyone to insert markers. Remember trailing newline. */ extern int mmiotrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); -#else +#else /* !CONFIG_MMIOTRACE: */ +static inline int is_kmmio_active(void) +{ + return 0; +} + +static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + return 0; +} + static inline void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr) { @@ -63,28 +79,28 @@ static inline int mmiotrace_printk(const char *fmt, ...) #endif /* CONFIG_MMIOTRACE */ enum mm_io_opcode { - MMIO_READ = 0x1, /* struct mmiotrace_rw */ - MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ - MMIO_PROBE = 0x3, /* struct mmiotrace_map */ - MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ - MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ + MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ }; struct mmiotrace_rw { - resource_size_t phys; /* PCI address of register */ - unsigned long value; - unsigned long pc; /* optional program counter */ - int map_id; - unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ - unsigned char width; /* size of register access in bytes */ + resource_size_t phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; struct mmiotrace_map { - resource_size_t phys; /* base address in PCI space */ - unsigned long virt; /* base virtual address */ - unsigned long len; /* mapping size */ - int map_id; - unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ + resource_size_t phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; /* in kernel/trace/trace_mmiotrace.c */ @@ -94,4 +110,4 @@ extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); extern int mmio_trace_printk(const char *fmt, va_list args); -#endif /* MMIOTRACE_H */ +#endif /* _LINUX_MMIOTRACE_H */ From b18018126f422f5b706fd750373425e10e84b486 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:42:57 +0100 Subject: [PATCH 554/682] x86, mm, kprobes: fault.c, simplify notify_page_fault() Impact: cleanup Remove an #ifdef from notify_page_fault(). The function still compiles to nothing in the !CONFIG_KPROBES case. Introduce kprobes_built_in() and kprobe_fault_handler() helpers to allow this - they returns 0 if !CONFIG_KPROBES. No code changed: text data bss dec hex filename 4618 32 24 4674 1242 fault.o.before 4618 32 24 4674 1242 fault.o.after Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 6 +----- include/linux/kprobes.h | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fe99af4b86d..379beaec6ca 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -68,11 +68,10 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) static inline int notify_page_fault(struct pt_regs *regs) { -#ifdef CONFIG_KPROBES int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (!user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode_vm(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -80,9 +79,6 @@ static inline int notify_page_fault(struct pt_regs *regs) } return ret; -#else - return 0; -#endif } /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 32851eef48f..2ec6cc14a11 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,14 @@ struct kprobe_blackpoint { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* + * For #ifdef avoidance: + */ +static inline int kprobes_built_in(void) +{ + return 1; +} + #ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); @@ -271,8 +279,16 @@ void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES: */ +static inline int kprobes_built_in(void) +{ + return 0; +} +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + return 0; +} static inline struct kprobe *get_kprobe(void *addr) { return NULL; @@ -329,5 +345,5 @@ static inline void unregister_kretprobes(struct kretprobe **rps, int num) static inline void kprobe_flush_task(struct task_struct *tk) { } -#endif /* CONFIG_KPROBES */ -#endif /* _LINUX_KPROBES_H */ +#endif /* CONFIG_KPROBES */ +#endif /* _LINUX_KPROBES_H */ From f2f13a8535174dbb813a0607a9d4737cfba98f6c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:50:24 +0100 Subject: [PATCH 555/682] x86, mm: fault.c, reorder functions Impact: cleanup Avoid a couple more #ifdefs by moving fundamentally non-unifiable functions into a single #ifdef 32-bit / #else / #endif block in fault.c: vmalloc*(), dump_pagetable(), check_vm8086_mode(). No code changed: text data bss dec hex filename 4618 32 24 4674 1242 fault.o.before 4618 32 24 4674 1242 fault.o.after Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 476 ++++++++++++++++++++++---------------------- 1 file changed, 240 insertions(+), 236 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 379beaec6ca..4ce62fb80da 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -191,18 +191,124 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, force_sig_info(si_signo, &info, tsk); } -#ifdef CONFIG_X86_64 -static int bad_address(void *p) -{ - unsigned long dummy; +DEFINE_SPINLOCK(pgd_lock); +LIST_HEAD(pgd_list); - return probe_kernel_address((unsigned long *)p, dummy); +#ifdef CONFIG_X86_32 +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + /* + * set_pgd(pgd, *pgd_k); here would be useless on PAE + * and redundant with the set_pmd() on non-PAE. As would + * set_pud. + */ + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + + if (!pmd_present(*pmd)) { + set_pmd(pmd, *pmd_k); + arch_flush_lazy_mmu_mode(); + } else { + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + } + + return pmd_k; +} + +void vmalloc_sync_all(void) +{ + unsigned long address; + + if (SHARED_KERNEL_PMD) + return; + + for (address = VMALLOC_START & PMD_MASK; + address >= TASK_SIZE && address < FIXADDR_TOP; + address += PMD_SIZE) { + + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!vmalloc_sync_one(page_address(page), address)) + break; + } + spin_unlock_irqrestore(&pgd_lock, flags); + } +} + +/* + * 32-bit: + * + * Handle a fault on the vmalloc or module mapping area + */ +static noinline int vmalloc_fault(unsigned long address) +{ + unsigned long pgd_paddr; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + pgd_paddr = read_cr3(); + pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); + if (!pmd_k) + return -1; + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; + + return 0; +} + +/* + * Did it hit the DOS screen memory VA from vm86 mode? + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ + unsigned long bit; + + if (!v8086_mode(regs)) + return; + + bit = (address - 0xA0000) >> PAGE_SHIFT; + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; } -#endif static void dump_pagetable(unsigned long address) { -#ifdef CONFIG_X86_32 __typeof__(pte_val(__pte(0))) page; page = read_cr3(); @@ -239,7 +345,132 @@ static void dump_pagetable(unsigned long address) } printk("\n"); -#else /* CONFIG_X86_64 */ +} + +#else /* CONFIG_X86_64: */ + +void vmalloc_sync_all(void) +{ + unsigned long address; + + for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; + address += PGDIR_SIZE) { + + const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } + spin_unlock_irqrestore(&pgd_lock, flags); + } +} + +/* + * 64-bit: + * + * Handle a fault on the vmalloc area + * + * This assumes no large pages in there. + */ +static noinline int vmalloc_fault(unsigned long address) +{ + pgd_t *pgd, *pgd_ref; + pud_t *pud, *pud_ref; + pmd_t *pmd, *pmd_ref; + pte_t *pte, *pte_ref; + + /* Make sure we are in vmalloc area: */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + + /* + * Copy kernel mappings over when needed. This can also + * happen within a race in page table update. In the later + * case just flush: + */ + pgd = pgd_offset(current->active_mm, address); + pgd_ref = pgd_offset_k(address); + if (pgd_none(*pgd_ref)) + return -1; + + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + + /* + * Below here mismatches are bugs because these lower tables + * are shared: + */ + + pud = pud_offset(pgd, address); + pud_ref = pud_offset(pgd_ref, address); + if (pud_none(*pud_ref)) + return -1; + + if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + BUG(); + + pmd = pmd_offset(pud, address); + pmd_ref = pmd_offset(pud_ref, address); + if (pmd_none(*pmd_ref)) + return -1; + + if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) + BUG(); + + pte_ref = pte_offset_kernel(pmd_ref, address); + if (!pte_present(*pte_ref)) + return -1; + + pte = pte_offset_kernel(pmd, address); + + /* + * Don't use pte_page here, because the mappings can point + * outside mem_map, and the NUMA hash lookup cannot handle + * that: + */ + if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) + BUG(); + + return 0; +} + +static const char errata93_warning[] = +KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" +KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" +KERN_ERR "******* Please consider a BIOS update.\n" +KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; + +/* + * No vm86 mode in 64-bit mode: + */ +static inline void +check_v8086_mode(struct pt_regs *regs, unsigned long address, + struct task_struct *tsk) +{ +} + +static int bad_address(void *p) +{ + unsigned long dummy; + + return probe_kernel_address((unsigned long *)p, dummy); +} + +static void dump_pagetable(unsigned long address) +{ pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -284,83 +515,9 @@ out: return; bad: printk("BAD\n"); -#endif } -#ifdef CONFIG_X86_32 -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; - - /* - * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. As would - * set_pud. - */ - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - - if (!pmd_present(*pmd)) { - set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else { - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - } - - return pmd_k; -} - -/* - * Did it hit the DOS screen memory VA from vm86 mode? - */ -static inline void -check_v8086_mode(struct pt_regs *regs, unsigned long address, - struct task_struct *tsk) -{ - unsigned long bit; - - if (!v8086_mode(regs)) - return; - - bit = (address - 0xA0000) >> PAGE_SHIFT; - if (bit < 32) - tsk->thread.screen_bitmap |= 1 << bit; -} - -#else /* CONFIG_X86_64: */ - -static const char errata93_warning[] = -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -KERN_ERR "******* Please consider a BIOS update.\n" -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; - -/* - * No vm86 mode in 64-bit mode: - */ -static inline void -check_v8086_mode(struct pt_regs *regs, unsigned long address, - struct task_struct *tsk) -{ -} - -#endif +#endif /* CONFIG_X86_64 */ /* * Workaround for K8 erratum #93 & buggy BIOS. @@ -795,109 +952,6 @@ spurious_fault(unsigned long error_code, unsigned long address) return ret; } -/* - * 32-bit: - * - * Handle a fault on the vmalloc or module mapping area - * - * 64-bit: - * - * Handle a fault on the vmalloc area - * - * This assumes no large pages in there. - */ -static noinline int vmalloc_fault(unsigned long address) -{ -#ifdef CONFIG_X86_32 - unsigned long pgd_paddr; - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - pgd_paddr = read_cr3(); - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); - if (!pmd_k) - return -1; - - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - - return 0; -#else - pgd_t *pgd, *pgd_ref; - pud_t *pud, *pud_ref; - pmd_t *pmd, *pmd_ref; - pte_t *pte, *pte_ref; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* - * Copy kernel mappings over when needed. This can also - * happen within a race in page table update. In the later - * case just flush: - */ - pgd = pgd_offset(current->active_mm, address); - pgd_ref = pgd_offset_k(address); - if (pgd_none(*pgd_ref)) - return -1; - - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - - /* - * Below here mismatches are bugs because these lower tables - * are shared: - */ - - pud = pud_offset(pgd, address); - pud_ref = pud_offset(pgd_ref, address); - if (pud_none(*pud_ref)) - return -1; - - if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) - BUG(); - - pmd = pmd_offset(pud, address); - pmd_ref = pmd_offset(pud_ref, address); - if (pmd_none(*pmd_ref)) - return -1; - - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) - BUG(); - - pte_ref = pte_offset_kernel(pmd_ref, address); - if (!pte_present(*pte_ref)) - return -1; - - pte = pte_offset_kernel(pmd, address); - - /* - * Don't use pte_page here, because the mappings can point - * outside mem_map, and the NUMA hash lookup cannot handle - * that: - */ - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) - BUG(); - - return 0; -#endif -} - int show_unhandled_signals = 1; static inline int @@ -1115,53 +1169,3 @@ good_area: up_read(&mm->mmap_sem); } - -DEFINE_SPINLOCK(pgd_lock); -LIST_HEAD(pgd_list); - -void vmalloc_sync_all(void) -{ - unsigned long address; - -#ifdef CONFIG_X86_32 - if (SHARED_KERNEL_PMD) - return; - - for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE && address < FIXADDR_TOP; - address += PMD_SIZE) { - - unsigned long flags; - struct page *page; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - if (!vmalloc_sync_one(page_address(page), address)) - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); - } -#else /* CONFIG_X86_64 */ - for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; - address += PGDIR_SIZE) { - - const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; - struct page *page; - - if (pgd_none(*pgd_ref)) - continue; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each_entry(page, &pgd_list, lru) { - pgd_t *pgd; - pgd = (pgd_t *)page_address(page) + pgd_index(address); - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - else - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); - } - spin_unlock_irqrestore(&pgd_lock, flags); - } -#endif -} From 8f7661496cece8320137d5e26808825498fd2b26 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:00:29 +0100 Subject: [PATCH 556/682] x86, mm: fault.c, unify oops printing Impact: refine/extend page fault related oops printing on 64-bit - honor the pause_on_oops logic on 64-bit too - print out NX fault warnings on 64-bit as well - factor out the NX fault message to make it git-greppable and readable Note that this means that we do the PF_INSTR check on 32-bit non-PAE as well where it should not occur ... normally. Cannot hurt. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4ce62fb80da..ebfaca3bbb1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -595,28 +595,24 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) return 0; } +static const char nx_warning[] = KERN_CRIT +"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; + static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) { -#ifdef CONFIG_X86_32 if (!oops_may_print()) return; -#endif -#ifdef CONFIG_X86_PAE if (error_code & PF_INSTR) { unsigned int level; pte_t *pte = lookup_address(address, &level); - if (pte && pte_present(*pte) && !pte_exec(*pte)) { - printk(KERN_CRIT "kernel tried to execute " - "NX-protected page - exploit attempt? " - "(uid: %d)\n", current_uid()); - } + if (pte && pte_present(*pte) && !pte_exec(*pte)) + printk(nx_warning, current_uid()); } -#endif printk(KERN_ALERT "BUG: unable to handle kernel "); if (address < PAGE_SIZE) From 1cc99544dde9e48602979f16b9309fade6e93051 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:07:48 +0100 Subject: [PATCH 557/682] x86, mm: fault.c, unify oops handling Impact: add oops-recursion check to 32-bit Unify the oops state-machine, to the 64-bit version. It is slightly more careful in that it does a recursion check in oops_begin(), and is thus more likely to show the relevant oops. It also means that 32-bit will print one more line at the end of pagefault triggered oopses: printk(KERN_EMERG "CR2: %016lx\n", address); Which is generally good information to be seen in partial-dump digital-camera jpegs ;-) The downside is the somewhat more complex critical path. Both variants have been tested well meanwhile by kernel developers crashing their boxes so i dont think this is a practical worry. This removes 3 ugly #ifdefs from no_context() and makes the function a lot nicer read. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ebfaca3bbb1..8fe2dd254df 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -659,11 +659,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, { struct task_struct *tsk = current; unsigned long *stackend; - -#ifdef CONFIG_X86_64 unsigned long flags; int sig; -#endif /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) @@ -690,11 +687,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice: */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else flags = oops_begin(); -#endif show_fault_oops(regs, error_code, address); @@ -702,15 +695,10 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (*stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else sig = SIGKILL; if (__die("Oops", regs, error_code)) sig = 0; @@ -719,7 +707,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, printk(KERN_EMERG "CR2: %016lx\n", address); oops_end(flags, regs, sig); -#endif } /* From c3731c68668325abddee8665018c74c7156a57be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:22:34 +0100 Subject: [PATCH 558/682] x86, mm: fault.c, remove #ifdef from do_page_fault() Impact: cleanup do_page_fault() has this ugly #ifdef in its prototype: #ifdef CONFIG_X86_64 asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) Replace it with 'dotraplinkage' which maps to exactly the above construct: nothing on 32-bit and asmlinkage on 64-bit. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8fe2dd254df..9c2dc5d7953 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -972,10 +972,8 @@ static int fault_in_kernel_space(unsigned long address) * and the problem, and then passes it off to one of the appropriate * routines. */ -#ifdef CONFIG_X86_64 -asmlinkage -#endif -void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; From d951734654f76a370a89b4e531af9b765bd13541 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:32:28 +0100 Subject: [PATCH 559/682] x86, mm: rename TASK_SIZE64 => TASK_SIZE_MAX Impact: cleanup Rename TASK_SIZE64 to TASK_SIZE_MAX, and provide the define on 32-bit too. (mapped to TASK_SIZE) This allows 32-bit code to make use of the (former-) TASK_SIZE64 symbol as well, in a clean way. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 9 +++++---- arch/x86/kernel/ptrace.c | 2 +- arch/x86/mm/fault.c | 2 +- arch/x86/vdso/vma.c | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 72914d0315e..c7a98f73821 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -861,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x) * User space process size: 3GB (default). */ #define TASK_SIZE PAGE_OFFSET +#define TASK_SIZE_MAX TASK_SIZE #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP @@ -920,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. @@ -929,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); 0xc0000000 : 0xFFFFe000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX TASK_SIZE64 +#define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d2f7cd5b2c8..fb2159a5c81 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_IA32)) return IA32_PAGE_OFFSET - 3; #endif - return TASK_SIZE64 - 7; + return TASK_SIZE_MAX - 7; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9c2dc5d7953..6fa9f175cba 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -963,7 +963,7 @@ static int fault_in_kernel_space(unsigned long address) #ifdef CONFIG_X86_32 return address >= TASK_SIZE; #else - return address >= TASK_SIZE64; + return address >= TASK_SIZE_MAX; #endif } diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 9c98cc6ba97..7133cdf9098 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) unsigned long addr, end; unsigned offset; end = (start + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE64) - end = TASK_SIZE64; + if (end >= TASK_SIZE_MAX) + end = TASK_SIZE_MAX; end -= len; /* This loses some more bits than a modulo, but is cheaper */ offset = get_random_int() & (PTRS_PER_PTE - 1); From 7c178a26d3e753d2a4346d3e4b8aa549d387f698 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:27:29 +0100 Subject: [PATCH 560/682] x86, mm: fault.c, remove #ifdef from fault_in_kernel_space() Impact: cleanup Removal of an #ifdef in fault_in_kernel_space(), by making use of the new TASK_SIZE_MAX symbol which is now available on 32-bit too. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6fa9f175cba..f195691ec26 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -960,11 +960,7 @@ access_error(unsigned long error_code, int write, struct vm_area_struct *vma) static int fault_in_kernel_space(unsigned long address) { -#ifdef CONFIG_X86_32 - return address >= TASK_SIZE; -#else return address >= TASK_SIZE_MAX; -#endif } /* From cd1b68f08f6328737928e5b8ba8ef80394680ff0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:39:02 +0100 Subject: [PATCH 561/682] x86, mm: fault.c, give another attempt at prefetch handing before SIGBUS Impact: extend prefetch handling on 64-bit Currently there's an extra is_prefetch() check done in do_sigbus(), which we only do on 32 bits. This is a last-ditch check before we terminate a task, so it's worth giving prefetch instructions another chance - should none of our existing quirks have caught a prefetch instruction related spurious fault. The only risk is if a prefetch causes a real sigbus, in that case we'll not OOM but try another fault. But this code has been on 32-bit for a long time, so it should be fine in practice. So do this on 64-bit too - and thus remove one more #ifdef. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f195691ec26..413e835e4a8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -836,11 +836,9 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) if (!(error_code & PF_USER)) no_context(regs, error_code, address); -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault: */ + /* User-space => ok to do another page fault: */ if (is_prefetch(regs, error_code, address)) return; -#endif tsk->thread.cr2 = address; tsk->thread.error_code = error_code; From f8eeb2e6be367d79be3617f0a12646bced8b2fe6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:13:36 +0100 Subject: [PATCH 562/682] x86, mm: fault.c, update copyrights Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 413e835e4a8..9bb07d331c3 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1,6 +1,7 @@ /* * Copyright (C) 1995 Linus Torvalds * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. + * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ #include #include From cae3aeb83fef5a7c9c8ac40e653e59dd9a35469c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 21 Feb 2009 16:56:23 +0900 Subject: [PATCH 563/682] percpu: clean up size usage Andrew was concerned about the unit of variables named or have suffix size. Every usage in percpu allocator is in bytes but make it super clear by adding comments. While at it, make pcpu_depopulate_chunk() take int @off and @size like everyone else. Signed-off-by: Tejun Heo Cc: Andrew Morton --- mm/percpu.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 4617d97e877..997724c2ea2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -119,7 +119,7 @@ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ static int pcpu_size_to_slot(int size) { - int highbit = fls(size); + int highbit = fls(size); /* size is in bytes */ return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); } @@ -158,8 +158,8 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, /** * pcpu_realloc - versatile realloc * @p: the current pointer (can be NULL for new allocations) - * @size: the current size (can be 0 for new allocations) - * @new_size: the wanted new size (can be 0 for free) + * @size: the current size in bytes (can be 0 for new allocations) + * @new_size: the wanted new size in bytes (can be 0 for free) * * More robust realloc which can be used to allocate, resize or free a * memory area of arbitrary size. If the needed size goes over @@ -290,8 +290,8 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) * pcpu_split_block - split a map block * @chunk: chunk of interest * @i: index of map block to split - * @head: head size (can be 0) - * @tail: tail size (can be 0) + * @head: head size in bytes (can be 0) + * @tail: tail size in bytes (can be 0) * * Split the @i'th map block into two or three blocks. If @head is * non-zero, @head bytes block is inserted before block @i moving it @@ -346,7 +346,7 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) /** * pcpu_alloc_area - allocate area from a pcpu_chunk * @chunk: chunk of interest - * @size: wanted size + * @size: wanted size in bytes * @align: wanted align * * Try to allocate @size bytes area aligned at @align from @chunk. @@ -540,15 +540,15 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk * @chunk: chunk to depopulate * @off: offset to the area to depopulate - * @size: size of the area to depopulate + * @size: size of the area to depopulate in bytes * @flush: whether to flush cache and tlb or not * * For each cpu, depopulate and unmap pages [@page_start,@page_end) * from @chunk. If @flush is true, vcache is flushed before unmapping * and tlb after. */ -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, size_t off, - size_t size, bool flush) +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, + bool flush) { int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); @@ -617,7 +617,7 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * pcpu_populate_chunk - populate and map an area of a pcpu_chunk * @chunk: chunk of interest * @off: offset to the area to populate - * @size: size of the area to populate + * @size: size of the area to populate in bytes * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. The area is cleared on return. @@ -707,7 +707,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) /** * __alloc_percpu - allocate percpu area - * @size: size of area to allocate + * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) * * Allocate percpu area of @size bytes aligned at @align. Might @@ -819,6 +819,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_static - initialize kernel static percpu area * @populate_pte_fn: callback to allocate pagetable * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages + * @cpu_size: the size of static percpu area in bytes * * Initialize kernel static percpu area. The caller should allocate * all the necessary pages and pass them in @pages. From fc6fcdfbb8f1b2553255170f221cd57a4108591f Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sun, 22 Feb 2009 01:00:57 +0100 Subject: [PATCH 564/682] x86: kexec/i386: fix sparse warnings: Using plain integer as NULL pointer Fix these sparse warnings: arch/x86/kernel/machine_kexec_32.c:124:22: warning: Using plain integer as NULL pointer arch/x86/kernel/traps.c:950:24: warning: Using plain integer as NULL pointer Signed-off-by: Hannes Eder Cc: trivial@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/machine_kexec_32.c | 2 +- arch/x86/kernel/traps.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 37f420018a4..f5fc8c781a6 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one( static void machine_kexec_prepare_page_tables(struct kimage *image) { void *control_page; - pmd_t *pmd = 0; + pmd_t *pmd = NULL; control_page = page_address(image->control_code_page); #ifdef CONFIG_X86_PAE diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index acb8c0585ab..2df927b5003 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -942,7 +942,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_BADSTK; - info.si_addr = 0; + info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, 32, SIGILL) == NOTIFY_STOP) return; From 2366c298b5afe52e635afd5604b69ce9fd4471fc Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sun, 22 Feb 2009 01:01:13 +0100 Subject: [PATCH 565/682] x86: numa_32.c: fix sparse warning: Using plain integer as NULL pointer Fix this sparse warning: arch/x86/mm/numa_32.c:197:24: warning: Using plain integer as NULL pointer Signed-off-by: Hannes Eder Cc: trivial@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/numa_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d1f7439d173..3957cd6d645 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -194,7 +194,7 @@ void *alloc_remap(int nid, unsigned long size) size = ALIGN(size, L1_CACHE_BYTES); if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) - return 0; + return NULL; node_remap_alloc_vaddr[nid] += size; memset(allocation, 0, size); From b319eed0aa0a6d710887350a3cb734c572aa64c4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 10:24:18 +0100 Subject: [PATCH 566/682] x86, mm: fault.c, simplify kmmio_fault(), cleanup Clarify the kmmio_fault() comment. Acked-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9bb07d331c3..a03b7279efa 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -57,7 +57,8 @@ enum x86_pf_error_code { }; /* - * (returns 0 if mmiotrace is disabled) + * Returns 0 if mmiotrace is disabled, or if the fault is not + * handled by mmiotrace: */ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { From a47e3ec197f515e25c77805f02d26f9e86456f65 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 20 Feb 2009 18:57:58 -0800 Subject: [PATCH 567/682] x86: ia32_signal: remove unused debug code Impact: cleanup DEBUG_SIG will not be used. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index dd77ac0cac4..adc63f81cb8 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -33,8 +33,6 @@ #include #include -#define DEBUG_SIG 0 - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ @@ -220,12 +218,6 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG restore_sigcontext: " - "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n", - sc, sc->err, sc->ip, sc->cs, sc->flags); -#endif - get_user_try { /* * Reload fs and gs if they have changed in the signal @@ -488,11 +480,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->cs = __USER32_CS; regs->ss = __USER32_DS; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; } @@ -574,10 +561,5 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER32_CS; regs->ss = __USER32_DS; -#if DEBUG_SIG - printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", - current->comm, current->pid, frame, regs->ip, frame->pretcode); -#endif - return 0; } From 8801ead40caa8ba9c7d47a06ff1247c166cbfd5a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 20 Feb 2009 19:00:13 -0800 Subject: [PATCH 568/682] x86: ia32_signal: introduce GET_SEG() macro Impact: cleanup introduce GET_SEG() macro like arch/x86/kernel/signal.c. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index adc63f81cb8..8dd0903da08 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -192,11 +192,15 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, get_user_ex(regs->x, &sc->x); \ } -#define COPY_SEG_CPL3(seg) { \ - unsigned short tmp; \ - get_user_ex(tmp, &sc->seg); \ - regs->seg = tmp | 3; \ -} +#define GET_SEG(seg) ({ \ + unsigned short tmp; \ + get_user_ex(tmp, &sc->seg); \ + tmp; \ +}) + +#define COPY_SEG_CPL3(seg) do { \ + regs->seg = GET_SEG(seg) | 3; \ +} while (0) #define RELOAD_SEG(seg) { \ unsigned int cur, pre; \ From a967bb3fbe640056bacb0357722f51676bb06e3c Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 20 Feb 2009 19:00:54 -0800 Subject: [PATCH 569/682] x86: ia32_signal: introduce {get|set}_user_seg() Impact: cleanup Introduce {get|set}_user_seg() and loadsegment_xx() macros to make code clean. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 38 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 8dd0903da08..588a7aa937e 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -188,6 +188,14 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, /* * Do a signal return; undo the signal stack. */ +#define loadsegment_gs(v) load_gs_index(v) +#define loadsegment_fs(v) loadsegment(fs, v) +#define loadsegment_ds(v) loadsegment(ds, v) +#define loadsegment_es(v) loadsegment(es, v) + +#define get_user_seg(seg) ({ unsigned int v; savesegment(seg, v); v; }) +#define set_user_seg(seg, v) loadsegment_##seg(v) + #define COPY(x) { \ get_user_ex(regs->x, &sc->x); \ } @@ -203,19 +211,18 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } while (0) #define RELOAD_SEG(seg) { \ - unsigned int cur, pre; \ - get_user_ex(pre, &sc->seg); \ - savesegment(seg, cur); \ + unsigned int pre = GET_SEG(seg); \ + unsigned int cur = get_user_seg(seg); \ pre |= 3; \ if (pre != cur) \ - loadsegment(seg, pre); \ + set_user_seg(seg, pre); \ } static int ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 __user *sc, unsigned int *pax) { - unsigned int tmpflags, gs, oldgs, err = 0; + unsigned int tmpflags, err = 0; void __user *buf; u32 tmp; @@ -229,12 +236,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, * the handler, but does not clobber them at least in the * normal case. */ - get_user_ex(gs, &sc->gs); - gs |= 3; - savesegment(gs, oldgs); - if (gs != oldgs) - load_gs_index(gs); - + RELOAD_SEG(gs); RELOAD_SEG(fs); RELOAD_SEG(ds); RELOAD_SEG(es); @@ -333,17 +335,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned int mask) { - int tmp, err = 0; + int err = 0; put_user_try { - savesegment(gs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->fs); - savesegment(ds, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->ds); - savesegment(es, tmp); - put_user_ex(tmp, (unsigned int __user *)&sc->es); + put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs); + put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs); + put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds); + put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es); put_user_ex(regs->di, &sc->di); put_user_ex(regs->si, &sc->si); From ef1f87aa7ba6224bef1b750b3272ba281d8f43ed Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 21 Feb 2009 14:23:21 -0800 Subject: [PATCH 570/682] x86: select x2apic ops in early apic probe only if x2apic mode is enabled If BIOS hands over the control to OS in legacy xapic mode, select legacy xapic related ops in the early apic probe and shift to x2apic ops later in the boot sequence, only after enabling x2apic mode. If BIOS hands over the control in x2apic mode, select x2apic related ops in the early apic probe. This fixes the early boot panic, where we were selecting x2apic ops, while the cpu is still in legacy xapic mode. Signed-off-by: Suresh Siddha Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/kernel/apic/apic.c | 9 +-------- arch/x86/kernel/apic/probe_64.c | 13 ++++++++++--- arch/x86/kernel/apic/x2apic_cluster.c | 5 +---- arch/x86/kernel/apic/x2apic_phys.c | 10 +++++----- 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index dce1bf696cc..a6208dc7463 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -146,7 +146,7 @@ static inline u64 native_x2apic_icr_read(void) return val; } -extern int x2apic; +extern int x2apic, x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c12823eb55b..47c2d12e5cf 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1265,14 +1265,7 @@ void __cpuinit end_local_APIC_setup(void) #ifdef CONFIG_X86_X2APIC void check_x2apic(void) { - int msr, msr2; - - if (!cpu_has_x2apic) - return; - - rdmsr(MSR_IA32_APICBASE, msr, msr2); - - if (msr & X2APIC_ENABLE) { + if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); x2apic_preenabled = x2apic = 1; } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 70935dd904d..e7c163661c7 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,9 +50,16 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) { - if (!intr_remapping_enabled) - apic = &apic_flat; + if (x2apic && (apic != &apic_x2apic_phys && +#ifdef CONFIG_X86_UV + apic != &apic_x2apic_uv_x && +#endif + apic != &apic_x2apic_cluster)) { + if (x2apic_phys) + apic = &apic_x2apic_phys; + else + apic = &apic_x2apic_cluster; + printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } #endif diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4e39d9ad4d5..354b9c45601 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -14,10 +14,7 @@ DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic) - return 1; - - return 0; + return x2apic_enabled(); } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index d2d52eb9f7e..5bcb174409b 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -10,7 +10,7 @@ #include #include -static int x2apic_phys; +int x2apic_phys; static int set_x2apic_phys_mode(char *arg) { @@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - if (cpu_has_x2apic && x2apic_phys) - return 1; - - return 0; + if (x2apic_phys) + return x2apic_enabled(); + else + return 0; } /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ From 8425091ff8af2addae118fc510a523b84ce51115 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Fri, 20 Feb 2009 16:59:11 -0800 Subject: [PATCH 571/682] x86: improve the help text of X86_EXTENDED_PLATFORM Change the CONFIG_X86_EXTENDED_PLATFORM help text to display the 32bit/64bit extended platform list. This is as suggested by Ingo. Signed-off-by: Ravikiran Thirumalai Cc: shai@scalex86.org Cc: "Benzi Galili (Benzi@ScaleMP.com)" Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8955262caa3..35efba546e0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -285,6 +285,7 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs +if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" default y @@ -293,12 +294,37 @@ config X86_EXTENDED_PLATFORM standard PC platforms. (which covers the vast majority of systems out there.) - If you enable this option then you'll be able to select a number - of non-PC x86 platforms. + If you enable this option then you'll be able to select support + for the following (non-PC) 32 bit x86 platforms: + AMD Elan + NUMAQ (IBM/Sequent) + RDC R-321x SoC + SGI 320/540 (Visual Workstation) + Summit/EXA (IBM x440) + Unisys ES7000 IA32 series + Voyager (NCR) If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. +endif +if X86_64 +config X86_EXTENDED_PLATFORM + bool "Support for extended (non-PC) x86 platforms" + default y + ---help--- + If you disable this option then the kernel will only support + standard PC platforms. (which covers the vast majority of + systems out there.) + + If you enable this option then you'll be able to select support + for the following (non-PC) 64 bit x86 platforms: + ScaleMP vSMP + SGI Ultraviolet + + If you have one of these systems, or if you want to build a + generic distribution kernel, say Y here - otherwise say N. +endif # This is an alphabetically sorted list of 64 bit extended platforms # Please maintain the alphabetic order if and when there are additions From 965c7ecaf2e2b083d711a01ab33735a4bdeee1a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Feb 2009 23:19:12 +0100 Subject: [PATCH 572/682] x86: remove the Voyager 32-bit subarch Impact: remove unused/broken code The Voyager subarch last built successfully on the v2.6.26 kernel and has been stale since then and does not build on the v2.6.27, v2.6.28 and v2.6.29-rc5 kernels. No actual users beyond the maintainer reported this breakage. Patches were sent and most of the fixes were accepted but the discussion around how to do a few remaining issues cleanly fizzled out with no resolution and the code remained broken. In the v2.6.30 x86 tree development cycle 32-bit subarch support has been reworked and removed - and the Voyager code, beyond the build problems already known, needs serious and significant changes and probably a rewrite to support it. CONFIG_X86_VOYAGER has been marked BROKEN then. The maintainer has been notified but no patches have been sent so far to fix it. While all other subarchs have been converted to the new scheme, voyager is still broken. We'd prefer to receive patches which clean up the current situation in a constructive way, but even in case of removal there is no obstacle to add that support back after the issues have been sorted out in a mutually acceptable fashion. So remove this inactive code for now. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 14 - arch/x86/boot/Makefile | 1 - arch/x86/boot/a20.c | 6 - arch/x86/boot/boot.h | 3 - arch/x86/boot/main.c | 5 - arch/x86/boot/voyager.c | 40 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - arch/x86/include/asm/mach-voyager/do_timer.h | 17 - .../x86/include/asm/mach-voyager/entry_arch.h | 26 - .../x86/include/asm/mach-voyager/setup_arch.h | 12 - arch/x86/include/asm/vic.h | 61 - arch/x86/include/asm/voyager.h | 571 ------ arch/x86/lguest/Kconfig | 1 - arch/x86/mach-voyager/Makefile | 8 - arch/x86/mach-voyager/setup.c | 119 -- arch/x86/mach-voyager/voyager_basic.c | 317 --- arch/x86/mach-voyager/voyager_cat.c | 1197 ----------- arch/x86/mach-voyager/voyager_smp.c | 1805 ----------------- arch/x86/mach-voyager/voyager_thread.c | 128 -- arch/x86/xen/Kconfig | 2 +- drivers/lguest/Kconfig | 2 +- 22 files changed, 2 insertions(+), 4335 deletions(-) delete mode 100644 arch/x86/boot/voyager.c delete mode 100644 arch/x86/include/asm/mach-voyager/do_timer.h delete mode 100644 arch/x86/include/asm/mach-voyager/entry_arch.h delete mode 100644 arch/x86/include/asm/mach-voyager/setup_arch.h delete mode 100644 arch/x86/include/asm/vic.h delete mode 100644 arch/x86/include/asm/voyager.h delete mode 100644 arch/x86/mach-voyager/Makefile delete mode 100644 arch/x86/mach-voyager/setup.c delete mode 100644 arch/x86/mach-voyager/voyager_basic.c delete mode 100644 arch/x86/mach-voyager/voyager_cat.c delete mode 100644 arch/x86/mach-voyager/voyager_smp.c delete mode 100644 arch/x86/mach-voyager/voyager_thread.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 35efba546e0..5e2919c0ff9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -302,7 +302,6 @@ config X86_EXTENDED_PLATFORM SGI 320/540 (Visual Workstation) Summit/EXA (IBM x440) Unisys ES7000 IA32 series - Voyager (NCR) If you have one of these systems, or if you want to build a generic distribution kernel, say Y here - otherwise say N. @@ -423,19 +422,6 @@ config X86_ES7000 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is supposed to run on an IA32-based Unisys ES7000 system. -config X86_VOYAGER - bool "Voyager (NCR)" - depends on SMP && !PCI && BROKEN - depends on X86_32_NON_STANDARD - ---help--- - Voyager is an MCA-based 32-way capable SMP architecture proprietary - to NCR Corp. Machine classes 345x/35xx/4100/51xx are Voyager-based. - - *** WARNING *** - - If you do not specifically know you have a Voyager based machine, - say N here, otherwise the kernel you build will not be bootable. - config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index cd48c721001..c70eff69a1f 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -32,7 +32,6 @@ setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o setup-y += printf.o string.o tty.o video.o video-mode.o version.o setup-$(CONFIG_X86_APM_BOOT) += apm.o -setup-$(CONFIG_X86_VOYAGER) += voyager.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index fba8e9c6a50..7c19ce8c244 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -126,11 +126,6 @@ static void enable_a20_fast(void) int enable_a20(void) { -#ifdef CONFIG_X86_VOYAGER - /* On Voyager, a20_test() is unsafe? */ - enable_a20_kbc(); - return 0; -#else int loops = A20_ENABLE_LOOPS; int kbc_err; @@ -164,5 +159,4 @@ int enable_a20(void) } return -1; -#endif } diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index cc0ef13fba7..7b2692e897e 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -302,9 +302,6 @@ void probe_cards(int unsafe); /* video-vesa.c */ void vesa_store_edid(void); -/* voyager.c */ -int query_voyager(void); - #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 197421db1af..58f0415d3ae 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -149,11 +149,6 @@ void main(void) /* Query MCA information */ query_mca(); - /* Voyager */ -#ifdef CONFIG_X86_VOYAGER - query_voyager(); -#endif - /* Query Intel SpeedStep (IST) information */ query_ist(); diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c deleted file mode 100644 index 433909d61e5..00000000000 --- a/arch/x86/boot/voyager.c +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2. - * - * ----------------------------------------------------------------------- */ - -/* - * Get the Voyager config information - */ - -#include "boot.h" - -int query_voyager(void) -{ - u8 err; - u16 es, di; - /* Abuse the apm_bios_info area for this */ - u8 *data_ptr = (u8 *)&boot_params.apm_bios_info; - - data_ptr[0] = 0xff; /* Flag on config not found(?) */ - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=q" (err), "=r" (es), "=D" (di) - : "a" (0xffc0)); - - if (err) - return -1; /* Not Voyager */ - - set_fs(es); - copy_from_fs(data_ptr, di, 7); /* Table is 7 bytes apparently */ - return 0; -} diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 096dd5359cd..5c023f6f652 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -197,7 +197,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set # CONFIG_X86_RDC321X is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2efb5d5063f..4157cc4a2bd 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -199,7 +199,6 @@ CONFIG_SPARSE_IRQ=y CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set -# CONFIG_X86_VOYAGER is not set # CONFIG_X86_GENERICARCH is not set # CONFIG_X86_VSMP is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y diff --git a/arch/x86/include/asm/mach-voyager/do_timer.h b/arch/x86/include/asm/mach-voyager/do_timer.h deleted file mode 100644 index 9e5a459fd15..00000000000 --- a/arch/x86/include/asm/mach-voyager/do_timer.h +++ /dev/null @@ -1,17 +0,0 @@ -/* defines for inline arch setup functions */ -#include - -#include -#include - -/** - * do_timer_interrupt_hook - hook into timer tick - * - * Call the pit clock event handler. see asm/i8253.h - **/ -static inline void do_timer_interrupt_hook(void) -{ - global_clock_event->event_handler(global_clock_event); - voyager_timer_interrupt(); -} - diff --git a/arch/x86/include/asm/mach-voyager/entry_arch.h b/arch/x86/include/asm/mach-voyager/entry_arch.h deleted file mode 100644 index ae52624b593..00000000000 --- a/arch/x86/include/asm/mach-voyager/entry_arch.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2002 - * - * Author: James.Bottomley@HansenPartnership.com - * - * linux/arch/i386/voyager/entry_arch.h - * - * This file builds the VIC and QIC CPI gates - */ - -/* initialise the voyager interrupt gates - * - * This uses the macros in irq.h to set up assembly jump gates. The - * calls are then redirected to the same routine with smp_ prefixed */ -BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT) -BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT) -BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0); - -/* do all the QIC interrupts */ -BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI); -BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI); -BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI); -BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI); -BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI); -BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI); diff --git a/arch/x86/include/asm/mach-voyager/setup_arch.h b/arch/x86/include/asm/mach-voyager/setup_arch.h deleted file mode 100644 index 71729ca05cd..00000000000 --- a/arch/x86/include/asm/mach-voyager/setup_arch.h +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include -#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \ - (&boot_params.apm_bios_info)) - -/* Hook to call BIOS initialisation function */ - -/* for voyager, pass the voyager BIOS/SUS info area to the detection - * routines */ - -#define ARCH_SETUP voyager_detect(VOYAGER_BIOS_INFO); - diff --git a/arch/x86/include/asm/vic.h b/arch/x86/include/asm/vic.h deleted file mode 100644 index 53100f35361..00000000000 --- a/arch/x86/include/asm/vic.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager Interrupt Controller */ - -/* The eight CPI vectors. To activate a CPI, you write a bit mask - * corresponding to the processor set to be interrupted into the - * relevant register. That set of CPUs will then be interrupted with - * the CPI */ -static const int VIC_CPI_Registers[] = - {0xFC00, 0xFC01, 0xFC08, 0xFC09, - 0xFC10, 0xFC11, 0xFC18, 0xFC19 }; - -#define VIC_PROC_WHO_AM_I 0xfc29 -# define QUAD_IDENTIFIER 0xC0 -# define EIGHT_SLOT_IDENTIFIER 0xE0 -#define QIC_EXTENDED_PROCESSOR_SELECT 0xFC72 -#define VIC_CPI_BASE_REGISTER 0xFC41 -#define VIC_PROCESSOR_ID 0xFC21 -# define VIC_CPU_MASQUERADE_ENABLE 0x8 - -#define VIC_CLAIM_REGISTER_0 0xFC38 -#define VIC_CLAIM_REGISTER_1 0xFC39 -#define VIC_REDIRECT_REGISTER_0 0xFC60 -#define VIC_REDIRECT_REGISTER_1 0xFC61 -#define VIC_PRIORITY_REGISTER 0xFC20 - -#define VIC_PRIMARY_MC_BASE 0xFC48 -#define VIC_SECONDARY_MC_BASE 0xFC49 - -#define QIC_PROCESSOR_ID 0xFC71 -# define QIC_CPUID_ENABLE 0x08 - -#define QIC_VIC_CPI_BASE_REGISTER 0xFC79 -#define QIC_CPI_BASE_REGISTER 0xFC7A - -#define QIC_MASK_REGISTER0 0xFC80 -/* NOTE: these are masked high, enabled low */ -# define QIC_PERF_TIMER 0x01 -# define QIC_LPE 0x02 -# define QIC_SYS_INT 0x04 -# define QIC_CMN_INT 0x08 -/* at the moment, just enable CMN_INT, disable SYS_INT */ -# define QIC_DEFAULT_MASK0 (~(QIC_CMN_INT /* | VIC_SYS_INT */)) -#define QIC_MASK_REGISTER1 0xFC81 -# define QIC_BOOT_CPI_MASK 0xFE -/* Enable CPI's 1-6 inclusive */ -# define QIC_CPI_ENABLE 0x81 - -#define QIC_INTERRUPT_CLEAR0 0xFC8A -#define QIC_INTERRUPT_CLEAR1 0xFC8B - -/* this is where we place the CPI vectors */ -#define VIC_DEFAULT_CPI_BASE 0xC0 -/* this is where we place the QIC CPI vectors */ -#define QIC_DEFAULT_CPI_BASE 0xD0 - -#define VIC_BOOT_INTERRUPT_MASK 0xfe - -extern void smp_vic_timer_interrupt(void); diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h deleted file mode 100644 index c1635d43616..00000000000 --- a/arch/x86/include/asm/voyager.h +++ /dev/null @@ -1,571 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * Standard include definitions for the NCR Voyager system */ - -#undef VOYAGER_DEBUG -#undef VOYAGER_CAT_DEBUG - -#ifdef VOYAGER_DEBUG -#define VDEBUG(x) printk x -#else -#define VDEBUG(x) -#endif - -/* There are three levels of voyager machine: 3,4 and 5. The rule is - * if it's less than 3435 it's a Level 3 except for a 3360 which is - * a level 4. A 3435 or above is a Level 5 */ -#define VOYAGER_LEVEL5_AND_ABOVE 0x3435 -#define VOYAGER_LEVEL4 0x3360 - -/* The L4 DINO ASIC */ -#define VOYAGER_DINO 0x43 - -/* voyager ports in standard I/O space */ -#define VOYAGER_MC_SETUP 0x96 - - -#define VOYAGER_CAT_CONFIG_PORT 0x97 -# define VOYAGER_CAT_DESELECT 0xff -#define VOYAGER_SSPB_RELOCATION_PORT 0x98 - -/* Valid CAT controller commands */ -/* start instruction register cycle */ -#define VOYAGER_CAT_IRCYC 0x01 -/* start data register cycle */ -#define VOYAGER_CAT_DRCYC 0x02 -/* move to execute state */ -#define VOYAGER_CAT_RUN 0x0F -/* end operation */ -#define VOYAGER_CAT_END 0x80 -/* hold in idle state */ -#define VOYAGER_CAT_HOLD 0x90 -/* single step an "intest" vector */ -#define VOYAGER_CAT_STEP 0xE0 -/* return cat controller to CLEMSON mode */ -#define VOYAGER_CAT_CLEMSON 0xFF - -/* the default cat command header */ -#define VOYAGER_CAT_HEADER 0x7F - -/* the range of possible CAT module ids in the system */ -#define VOYAGER_MIN_MODULE 0x10 -#define VOYAGER_MAX_MODULE 0x1f - -/* The voyager registers per asic */ -#define VOYAGER_ASIC_ID_REG 0x00 -#define VOYAGER_ASIC_TYPE_REG 0x01 -/* the sub address registers can be made auto incrementing on reads */ -#define VOYAGER_AUTO_INC_REG 0x02 -# define VOYAGER_AUTO_INC 0x04 -# define VOYAGER_NO_AUTO_INC 0xfb -#define VOYAGER_SUBADDRDATA 0x03 -#define VOYAGER_SCANPATH 0x05 -# define VOYAGER_CONNECT_ASIC 0x01 -# define VOYAGER_DISCONNECT_ASIC 0xfe -#define VOYAGER_SUBADDRLO 0x06 -#define VOYAGER_SUBADDRHI 0x07 -#define VOYAGER_SUBMODSELECT 0x08 -#define VOYAGER_SUBMODPRESENT 0x09 - -#define VOYAGER_SUBADDR_LO 0xff -#define VOYAGER_SUBADDR_HI 0xffff - -/* the maximum size of a scan path -- used to form instructions */ -#define VOYAGER_MAX_SCAN_PATH 0x100 -/* the biggest possible register size (in bytes) */ -#define VOYAGER_MAX_REG_SIZE 4 - -/* Total number of possible modules (including submodules) */ -#define VOYAGER_MAX_MODULES 16 -/* Largest number of asics per module */ -#define VOYAGER_MAX_ASICS_PER_MODULE 7 - -/* the CAT asic of each module is always the first one */ -#define VOYAGER_CAT_ID 0 -#define VOYAGER_PSI 0x1a - -/* voyager instruction operations and registers */ -#define VOYAGER_READ_CONFIG 0x1 -#define VOYAGER_WRITE_CONFIG 0x2 -#define VOYAGER_BYPASS 0xff - -typedef struct voyager_asic { - __u8 asic_addr; /* ASIC address; Level 4 */ - __u8 asic_type; /* ASIC type */ - __u8 asic_id; /* ASIC id */ - __u8 jtag_id[4]; /* JTAG id */ - __u8 asic_location; /* Location within scan path; start w/ 0 */ - __u8 bit_location; /* Location within bit stream; start w/ 0 */ - __u8 ireg_length; /* Instruction register length */ - __u16 subaddr; /* Amount of sub address space */ - struct voyager_asic *next; /* Next asic in linked list */ -} voyager_asic_t; - -typedef struct voyager_module { - __u8 module_addr; /* Module address */ - __u8 scan_path_connected; /* Scan path connected */ - __u16 ee_size; /* Size of the EEPROM */ - __u16 num_asics; /* Number of Asics */ - __u16 inst_bits; /* Instruction bits in the scan path */ - __u16 largest_reg; /* Largest register in the scan path */ - __u16 smallest_reg; /* Smallest register in the scan path */ - voyager_asic_t *asic; /* First ASIC in scan path (CAT_I) */ - struct voyager_module *submodule; /* Submodule pointer */ - struct voyager_module *next; /* Next module in linked list */ -} voyager_module_t; - -typedef struct voyager_eeprom_hdr { - __u8 module_id[4]; - __u8 version_id; - __u8 config_id; - __u16 boundry_id; /* boundary scan id */ - __u16 ee_size; /* size of EEPROM */ - __u8 assembly[11]; /* assembly # */ - __u8 assembly_rev; /* assembly rev */ - __u8 tracer[4]; /* tracer number */ - __u16 assembly_cksum; /* asm checksum */ - __u16 power_consump; /* pwr requirements */ - __u16 num_asics; /* number of asics */ - __u16 bist_time; /* min. bist time */ - __u16 err_log_offset; /* error log offset */ - __u16 scan_path_offset;/* scan path offset */ - __u16 cct_offset; - __u16 log_length; /* length of err log */ - __u16 xsum_end; /* offset to end of - checksum */ - __u8 reserved[4]; - __u8 sflag; /* starting sentinal */ - __u8 part_number[13]; /* prom part number */ - __u8 version[10]; /* version number */ - __u8 signature[8]; - __u16 eeprom_chksum; - __u32 data_stamp_offset; - __u8 eflag ; /* ending sentinal */ -} __attribute__((packed)) voyager_eprom_hdr_t; - - - -#define VOYAGER_EPROM_SIZE_OFFSET \ - ((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size))) -#define VOYAGER_XSUM_END_OFFSET 0x2a - -/* the following three definitions are for internal table layouts - * in the module EPROMs. We really only care about the IDs and - * offsets */ -typedef struct voyager_sp_table { - __u8 asic_id; - __u8 bypass_flag; - __u16 asic_data_offset; - __u16 config_data_offset; -} __attribute__((packed)) voyager_sp_table_t; - -typedef struct voyager_jtag_table { - __u8 icode[4]; - __u8 runbist[4]; - __u8 intest[4]; - __u8 samp_preld[4]; - __u8 ireg_len; -} __attribute__((packed)) voyager_jtt_t; - -typedef struct voyager_asic_data_table { - __u8 jtag_id[4]; - __u16 length_bsr; - __u16 length_bist_reg; - __u32 bist_clk; - __u16 subaddr_bits; - __u16 seed_bits; - __u16 sig_bits; - __u16 jtag_offset; -} __attribute__((packed)) voyager_at_t; - -/* Voyager Interrupt Controller (VIC) registers */ - -/* Base to add to Cross Processor Interrupts (CPIs) when triggering - * the CPU IRQ line */ -/* register defines for the WCBICs (one per processor) */ -#define VOYAGER_WCBIC0 0x41 /* bus A node P1 processor 0 */ -#define VOYAGER_WCBIC1 0x49 /* bus A node P1 processor 1 */ -#define VOYAGER_WCBIC2 0x51 /* bus A node P2 processor 0 */ -#define VOYAGER_WCBIC3 0x59 /* bus A node P2 processor 1 */ -#define VOYAGER_WCBIC4 0x61 /* bus B node P1 processor 0 */ -#define VOYAGER_WCBIC5 0x69 /* bus B node P1 processor 1 */ -#define VOYAGER_WCBIC6 0x71 /* bus B node P2 processor 0 */ -#define VOYAGER_WCBIC7 0x79 /* bus B node P2 processor 1 */ - - -/* top of memory registers */ -#define VOYAGER_WCBIC_TOM_L 0x4 -#define VOYAGER_WCBIC_TOM_H 0x5 - -/* register defines for Voyager Memory Contol (VMC) - * these are present on L4 machines only */ -#define VOYAGER_VMC1 0x81 -#define VOYAGER_VMC2 0x91 -#define VOYAGER_VMC3 0xa1 -#define VOYAGER_VMC4 0xb1 - -/* VMC Ports */ -#define VOYAGER_VMC_MEMORY_SETUP 0x9 -# define VMC_Interleaving 0x01 -# define VMC_4Way 0x02 -# define VMC_EvenCacheLines 0x04 -# define VMC_HighLine 0x08 -# define VMC_Start0_Enable 0x20 -# define VMC_Start1_Enable 0x40 -# define VMC_Vremap 0x80 -#define VOYAGER_VMC_BANK_DENSITY 0xa -# define VMC_BANK_EMPTY 0 -# define VMC_BANK_4MB 1 -# define VMC_BANK_16MB 2 -# define VMC_BANK_64MB 3 -# define VMC_BANK0_MASK 0x03 -# define VMC_BANK1_MASK 0x0C -# define VMC_BANK2_MASK 0x30 -# define VMC_BANK3_MASK 0xC0 - -/* Magellan Memory Controller (MMC) defines - present on L5 */ -#define VOYAGER_MMC_ASIC_ID 1 -/* the two memory modules corresponding to memory cards in the system */ -#define VOYAGER_MMC_MEMORY0_MODULE 0x14 -#define VOYAGER_MMC_MEMORY1_MODULE 0x15 -/* the Magellan Memory Address (MMA) defines */ -#define VOYAGER_MMA_ASIC_ID 2 - -/* Submodule number for the Quad Baseboard */ -#define VOYAGER_QUAD_BASEBOARD 1 - -/* ASIC defines for the Quad Baseboard */ -#define VOYAGER_QUAD_QDATA0 1 -#define VOYAGER_QUAD_QDATA1 2 -#define VOYAGER_QUAD_QABC 3 - -/* Useful areas in extended CMOS */ -#define VOYAGER_PROCESSOR_PRESENT_MASK 0x88a -#define VOYAGER_MEMORY_CLICKMAP 0xa23 -#define VOYAGER_DUMP_LOCATION 0xb1a - -/* SUS In Control bit - used to tell SUS that we don't need to be - * babysat anymore */ -#define VOYAGER_SUS_IN_CONTROL_PORT 0x3ff -# define VOYAGER_IN_CONTROL_FLAG 0x80 - -/* Voyager PSI defines */ -#define VOYAGER_PSI_STATUS_REG 0x08 -# define PSI_DC_FAIL 0x01 -# define PSI_MON 0x02 -# define PSI_FAULT 0x04 -# define PSI_ALARM 0x08 -# define PSI_CURRENT 0x10 -# define PSI_DVM 0x20 -# define PSI_PSCFAULT 0x40 -# define PSI_STAT_CHG 0x80 - -#define VOYAGER_PSI_SUPPLY_REG 0x8000 - /* read */ -# define PSI_FAIL_DC 0x01 -# define PSI_FAIL_AC 0x02 -# define PSI_MON_INT 0x04 -# define PSI_SWITCH_OFF 0x08 -# define PSI_HX_OFF 0x10 -# define PSI_SECURITY 0x20 -# define PSI_CMOS_BATT_LOW 0x40 -# define PSI_CMOS_BATT_FAIL 0x80 - /* write */ -# define PSI_CLR_SWITCH_OFF 0x13 -# define PSI_CLR_HX_OFF 0x14 -# define PSI_CLR_CMOS_BATT_FAIL 0x17 - -#define VOYAGER_PSI_MASK 0x8001 -# define PSI_MASK_MASK 0x10 - -#define VOYAGER_PSI_AC_FAIL_REG 0x8004 -#define AC_FAIL_STAT_CHANGE 0x80 - -#define VOYAGER_PSI_GENERAL_REG 0x8007 - /* read */ -# define PSI_SWITCH_ON 0x01 -# define PSI_SWITCH_ENABLED 0x02 -# define PSI_ALARM_ENABLED 0x08 -# define PSI_SECURE_ENABLED 0x10 -# define PSI_COLD_RESET 0x20 -# define PSI_COLD_START 0x80 - /* write */ -# define PSI_POWER_DOWN 0x10 -# define PSI_SWITCH_DISABLE 0x01 -# define PSI_SWITCH_ENABLE 0x11 -# define PSI_CLEAR 0x12 -# define PSI_ALARM_DISABLE 0x03 -# define PSI_ALARM_ENABLE 0x13 -# define PSI_CLEAR_COLD_RESET 0x05 -# define PSI_SET_COLD_RESET 0x15 -# define PSI_CLEAR_COLD_START 0x07 -# define PSI_SET_COLD_START 0x17 - - - -struct voyager_bios_info { - __u8 len; - __u8 major; - __u8 minor; - __u8 debug; - __u8 num_classes; - __u8 class_1; - __u8 class_2; -}; - -/* The following structures and definitions are for the Kernel/SUS - * interface these are needed to find out how SUS initialised any Quad - * boards in the system */ - -#define NUMBER_OF_MC_BUSSES 2 -#define SLOTS_PER_MC_BUS 8 -#define MAX_CPUS 16 /* 16 way CPU system */ -#define MAX_PROCESSOR_BOARDS 4 /* 4 processor slot system */ -#define MAX_CACHE_LEVELS 4 /* # of cache levels supported */ -#define MAX_SHARED_CPUS 4 /* # of CPUs that can share a LARC */ -#define NUMBER_OF_POS_REGS 8 - -typedef struct { - __u8 MC_Slot; - __u8 POS_Values[NUMBER_OF_POS_REGS]; -} __attribute__((packed)) MC_SlotInformation_t; - -struct QuadDescription { - __u8 Type; /* for type 0 (DYADIC or MONADIC) all fields - * will be zero except for slot */ - __u8 StructureVersion; - __u32 CPI_BaseAddress; - __u32 LARC_BankSize; - __u32 LocalMemoryStateBits; - __u8 Slot; /* Processor slots 1 - 4 */ -} __attribute__((packed)); - -struct ProcBoardInfo { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOfBoards; - struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS]; -} __attribute__((packed)); - -struct CacheDescription { - __u8 Level; - __u32 TotalSize; - __u16 LineSize; - __u8 Associativity; - __u8 CacheType; - __u8 WriteType; - __u8 Number_CPUs_SharedBy; - __u8 Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS]; - -} __attribute__((packed)); - -struct CPU_Description { - __u8 CPU_HardwareId; - char *FRU_String; - __u8 NumberOfCacheLevels; - struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS]; -} __attribute__((packed)); - -struct CPU_Info { - __u8 Type; - __u8 StructureVersion; - __u8 NumberOf_CPUs; - struct CPU_Description CPU_Data[MAX_CPUS]; -} __attribute__((packed)); - - -/* - * This structure will be used by SUS and the OS. - * The assumption about this structure is that no blank space is - * packed in it by our friend the compiler. - */ -typedef struct { - __u8 Mailbox_SUS; /* Written to by SUS to give - commands/response to the OS */ - __u8 Mailbox_OS; /* Written to by the OS to give - commands/response to SUS */ - __u8 SUS_MailboxVersion; /* Tells the OS which iteration of the - interface SUS supports */ - __u8 OS_MailboxVersion; /* Tells SUS which iteration of the - interface the OS supports */ - __u32 OS_Flags; /* Flags set by the OS as info for - SUS */ - __u32 SUS_Flags; /* Flags set by SUS as info - for the OS */ - __u32 WatchDogPeriod; /* Watchdog period (in seconds) which - the DP uses to see if the OS - is dead */ - __u32 WatchDogCount; /* Updated by the OS on every tic. */ - __u32 MemoryFor_SUS_ErrorLog; /* Flat 32 bit address which tells SUS - where to stuff the SUS error log - on a dump */ - MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS]; - /* Storage for MCA POS data */ - /* All new SECOND_PASS_INTERFACE fields added from this point */ - struct ProcBoardInfo *BoardData; - struct CPU_Info *CPU_Data; - /* All new fields must be added from this point */ -} Voyager_KernelSUS_Mbox_t; - -/* structure for finding the right memory address to send a QIC CPI to */ -struct voyager_qic_cpi { - /* Each cache line (32 bytes) can trigger a cpi. The cpi - * read/write may occur anywhere in the cache line---pick the - * middle to be safe */ - struct { - __u32 pad1[3]; - __u32 cpi; - __u32 pad2[4]; - } qic_cpi[8]; -}; - -struct voyager_status { - __u32 power_fail:1; - __u32 switch_off:1; - __u32 request_from_kernel:1; -}; - -struct voyager_psi_regs { - __u8 cat_id; - __u8 cat_dev; - __u8 cat_control; - __u8 subaddr; - __u8 dummy4; - __u8 checkbit; - __u8 subaddr_low; - __u8 subaddr_high; - __u8 intstatus; - __u8 stat1; - __u8 stat3; - __u8 fault; - __u8 tms; - __u8 gen; - __u8 sysconf; - __u8 dummy15; -}; - -struct voyager_psi_subregs { - __u8 supply; - __u8 mask; - __u8 present; - __u8 DCfail; - __u8 ACfail; - __u8 fail; - __u8 UPSfail; - __u8 genstatus; -}; - -struct voyager_psi { - struct voyager_psi_regs regs; - struct voyager_psi_subregs subregs; -}; - -struct voyager_SUS { -#define VOYAGER_DUMP_BUTTON_NMI 0x1 -#define VOYAGER_SUS_VALID 0x2 -#define VOYAGER_SYSINT_COMPLETE 0x3 - __u8 SUS_mbox; -#define VOYAGER_NO_COMMAND 0x0 -#define VOYAGER_IGNORE_DUMP 0x1 -#define VOYAGER_DO_DUMP 0x2 -#define VOYAGER_SYSINT_HANDSHAKE 0x3 -#define VOYAGER_DO_MEM_DUMP 0x4 -#define VOYAGER_SYSINT_WAS_RECOVERED 0x5 - __u8 kernel_mbox; -#define VOYAGER_MAILBOX_VERSION 0x10 - __u8 SUS_version; - __u8 kernel_version; -#define VOYAGER_OS_HAS_SYSINT 0x1 -#define VOYAGER_OS_IN_PROGRESS 0x2 -#define VOYAGER_UPDATING_WDPERIOD 0x4 - __u32 kernel_flags; -#define VOYAGER_SUS_BOOTING 0x1 -#define VOYAGER_SUS_IN_PROGRESS 0x2 - __u32 SUS_flags; - __u32 watchdog_period; - __u32 watchdog_count; - __u32 SUS_errorlog; - /* lots of system configuration stuff under here */ -}; - -/* Variables exported by voyager_smp */ -extern __u32 voyager_extended_vic_processors; -extern __u32 voyager_allowed_boot_processors; -extern __u32 voyager_quad_processors; -extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS]; -extern struct voyager_SUS *voyager_SUS; - -/* variables exported always */ -extern struct task_struct *voyager_thread; -extern int voyager_level; -extern struct voyager_status voyager_status; - -/* functions exported by the voyager and voyager_smp modules */ -extern int voyager_cat_readb(__u8 module, __u8 asic, int reg); -extern void voyager_cat_init(void); -extern void voyager_detect(struct voyager_bios_info *); -extern void voyager_trap_init(void); -extern void voyager_setup_irqs(void); -extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length); -extern void voyager_smp_intr_init(void); -extern __u8 voyager_extended_cmos_read(__u16 cmos_address); -extern void voyager_smp_dump(void); -extern void voyager_timer_interrupt(void); -extern void smp_local_timer_interrupt(void); -extern void voyager_power_off(void); -extern void smp_voyager_power_off(void *dummy); -extern void voyager_restart(void); -extern void voyager_cat_power_off(void); -extern void voyager_cat_do_common_interrupt(void); -extern void voyager_handle_nmi(void); -extern void voyager_smp_intr_init(void); -/* Commands for the following are */ -#define VOYAGER_PSI_READ 0 -#define VOYAGER_PSI_WRITE 1 -#define VOYAGER_PSI_SUBREAD 2 -#define VOYAGER_PSI_SUBWRITE 3 -extern void voyager_cat_psi(__u8, __u16, __u8 *); - -/* These define the CPIs we use in linux */ -#define VIC_CPI_LEVEL0 0 -#define VIC_CPI_LEVEL1 1 -/* now the fake CPIs */ -#define VIC_TIMER_CPI 2 -#define VIC_INVALIDATE_CPI 3 -#define VIC_RESCHEDULE_CPI 4 -#define VIC_ENABLE_IRQ_CPI 5 -#define VIC_CALL_FUNCTION_CPI 6 -#define VIC_CALL_FUNCTION_SINGLE_CPI 7 - -/* Now the QIC CPIs: Since we don't need the two initial levels, - * these are 2 less than the VIC CPIs */ -#define QIC_CPI_OFFSET 1 -#define QIC_TIMER_CPI (VIC_TIMER_CPI - QIC_CPI_OFFSET) -#define QIC_INVALIDATE_CPI (VIC_INVALIDATE_CPI - QIC_CPI_OFFSET) -#define QIC_RESCHEDULE_CPI (VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET) -#define QIC_ENABLE_IRQ_CPI (VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_CPI (VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET) -#define QIC_CALL_FUNCTION_SINGLE_CPI (VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET) - -#define VIC_START_FAKE_CPI VIC_TIMER_CPI -#define VIC_END_FAKE_CPI VIC_CALL_FUNCTION_SINGLE_CPI - -/* this is the SYS_INT CPI. */ -#define VIC_SYS_INT 8 -#define VIC_CMN_INT 15 - -/* This is the boot CPI for alternate processors. It gets overwritten - * by the above once the system has activated all available processors */ -#define VIC_CPU_BOOT_CPI VIC_CPI_LEVEL0 -#define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) - -extern asmlinkage void vic_cpi_interrupt(void); -extern asmlinkage void vic_sys_interrupt(void); -extern asmlinkage void vic_cmn_interrupt(void); -extern asmlinkage void qic_timer_interrupt(void); -extern asmlinkage void qic_invalidate_interrupt(void); -extern asmlinkage void qic_reschedule_interrupt(void); -extern asmlinkage void qic_enable_irq_interrupt(void); -extern asmlinkage void qic_call_function_interrupt(void); diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index c70e12b1a63..8dab8f7844d 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -3,7 +3,6 @@ config LGUEST_GUEST select PARAVIRT depends on X86_32 depends on !X86_PAE - depends on !X86_VOYAGER select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/x86/mach-voyager/Makefile b/arch/x86/mach-voyager/Makefile deleted file mode 100644 index 15c250b371d..00000000000 --- a/arch/x86/mach-voyager/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the linux kernel. -# - -EXTRA_CFLAGS := -Iarch/x86/kernel -obj-y := setup.o voyager_basic.o voyager_thread.o - -obj-$(CONFIG_SMP) += voyager_smp.o voyager_cat.o diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c deleted file mode 100644 index 88c3c555634..00000000000 --- a/arch/x86/mach-voyager/setup.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Machine specific setup for generic - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -void __init pre_intr_init_hook(void) -{ - init_ISA_irqs(); -} - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - -void __init intr_init_hook(void) -{ -#ifdef CONFIG_SMP - voyager_smp_intr_init(); -#endif - - setup_irq(2, &irq2); -} - -static void voyager_disable_tsc(void) -{ - /* Voyagers run their CPUs from independent clocks, so disable - * the TSC code because we can't sync them */ - setup_clear_cpu_cap(X86_FEATURE_TSC); -} - -void __init pre_setup_arch_hook(void) -{ - voyager_disable_tsc(); -} - -void __init pre_time_init_hook(void) -{ - voyager_disable_tsc(); -} - -void __init trap_init_hook(void) -{ -} - -static struct irqaction irq0 = { - .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, - .mask = CPU_MASK_NONE, - .name = "timer" -}; - -void __init time_init_hook(void) -{ - irq0.mask = cpumask_of_cpu(safe_smp_processor_id()); - setup_irq(0, &irq0); -} - -/* Hook for machine specific memory setup. */ - -char *__init machine_specific_memory_setup(void) -{ - char *who; - int new_nr; - - who = "NOT VOYAGER"; - - if (voyager_level == 5) { - __u32 addr, length; - int i; - - who = "Voyager-SUS"; - - e820.nr_map = 0; - for (i = 0; voyager_memory_detect(i, &addr, &length); i++) { - e820_add_region(addr, length, E820_RAM); - } - return who; - } else if (voyager_level == 4) { - __u32 tom; - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - /* select the DINO config space */ - outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT); - /* Read DINO top of memory register */ - tom = ((inb(catbase + 0x4) & 0xf0) << 16) - + ((inb(catbase + 0x5) & 0x7f) << 24); - - if (inb(catbase) != VOYAGER_DINO) { - printk(KERN_ERR - "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n"); - tom = (boot_params.screen_info.ext_mem_k) << 10; - } - who = "Voyager-TOM"; - e820_add_region(0, 0x9f000, E820_RAM); - /* map from 1M to top of memory */ - e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024, - E820_RAM); - /* FIXME: Should check the ASICs to see if I need to - * take out the 8M window. Just do it at the moment - * */ - e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024, - E820_RESERVED); - return who; - } - - return default_machine_specific_memory_setup(); -} diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c deleted file mode 100644 index 46d6f806769..00000000000 --- a/arch/x86/mach-voyager/voyager_basic.c +++ /dev/null @@ -1,317 +0,0 @@ -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the voyager specific routines for getting - * initialisation of the architecture to function. For additional - * features see: - * - * voyager_cat.c - Voyager CAT bus interface - * voyager_smp.c - Voyager SMP hal (emulates linux smp.c) - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Power off function, if any - */ -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -int voyager_level = 0; - -struct voyager_SUS *voyager_SUS = NULL; - -#ifdef CONFIG_SMP -static void voyager_dump(int dummy1, struct tty_struct *dummy3) -{ - /* get here via a sysrq */ - voyager_smp_dump(); -} - -static struct sysrq_key_op sysrq_voyager_dump_op = { - .handler = voyager_dump, - .help_msg = "Voyager", - .action_msg = "Dump Voyager Status", -}; -#endif - -void voyager_detect(struct voyager_bios_info *bios) -{ - if (bios->len != 0xff) { - int class = (bios->class_1 << 8) - | (bios->class_2 & 0xff); - - printk("Voyager System detected.\n" - " Class %x, Revision %d.%d\n", - class, bios->major, bios->minor); - if (class == VOYAGER_LEVEL4) - voyager_level = 4; - else if (class < VOYAGER_LEVEL5_AND_ABOVE) - voyager_level = 3; - else - voyager_level = 5; - printk(" Architecture Level %d\n", voyager_level); - if (voyager_level < 4) - printk - ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n"); - /* install the power off handler */ - pm_power_off = voyager_power_off; -#ifdef CONFIG_SMP - register_sysrq_key('v', &sysrq_voyager_dump_op); -#endif - } else { - printk("\n\n**WARNING**: No Voyager Subsystem Found\n"); - } -} - -void voyager_system_interrupt(int cpl, void *dev_id) -{ - printk("Voyager: detected system interrupt\n"); -} - -/* Routine to read information from the extended CMOS area */ -__u8 voyager_extended_cmos_read(__u16 addr) -{ - outb(addr & 0xff, 0x74); - outb((addr >> 8) & 0xff, 0x75); - return inb(0x76); -} - -/* internal definitions for the SUS Click Map of memory */ - -#define CLICK_ENTRIES 16 -#define CLICK_SIZE 4096 /* click to byte conversion for Length */ - -typedef struct ClickMap { - struct Entry { - __u32 Address; - __u32 Length; - } Entry[CLICK_ENTRIES]; -} ClickMap_t; - -/* This routine is pretty much an awful hack to read the bios clickmap by - * mapping it into page 0. There are usually three regions in the map: - * Base Memory - * Extended Memory - * zero length marker for end of map - * - * Returns are 0 for failure and 1 for success on extracting region. - */ -int __init voyager_memory_detect(int region, __u32 * start, __u32 * length) -{ - int i; - int retval = 0; - __u8 cmos[4]; - ClickMap_t *map; - unsigned long map_addr; - unsigned long old; - - if (region >= CLICK_ENTRIES) { - printk("Voyager: Illegal ClickMap region %d\n", region); - return 0; - } - - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = - voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i); - - map_addr = *(unsigned long *)cmos; - - /* steal page 0 for this */ - old = pg0[0]; - pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT); - local_flush_tlb(); - /* now clear everything out but page 0 */ - map = (ClickMap_t *) (map_addr & (~PAGE_MASK)); - - /* zero length is the end of the clickmap */ - if (map->Entry[region].Length != 0) { - *length = map->Entry[region].Length * CLICK_SIZE; - *start = map->Entry[region].Address; - retval = 1; - } - - /* replace the mapping */ - pg0[0] = old; - local_flush_tlb(); - return retval; -} - -/* voyager specific handling code for timer interrupts. Used to hand - * off the timer tick to the SMP code, since the VIC doesn't have an - * internal timer (The QIC does, but that's another story). */ -void voyager_timer_interrupt(void) -{ - if ((jiffies & 0x3ff) == 0) { - - /* There seems to be something flaky in either - * hardware or software that is resetting the timer 0 - * count to something much higher than it should be - * This seems to occur in the boot sequence, just - * before root is mounted. Therefore, every 10 - * seconds or so, we sanity check the timer zero count - * and kick it back to where it should be. - * - * FIXME: This is the most awful hack yet seen. I - * should work out exactly what is interfering with - * the timer count settings early in the boot sequence - * and swiftly introduce it to something sharp and - * pointy. */ - __u16 val; - - spin_lock(&i8253_lock); - - outb_p(0x00, 0x43); - val = inb_p(0x40); - val |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - - if (val > LATCH) { - printk - ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", - val); - spin_lock(&i8253_lock); - outb(0x34, 0x43); - outb_p(LATCH & 0xff, 0x40); /* LSB */ - outb(LATCH >> 8, 0x40); /* MSB */ - spin_unlock(&i8253_lock); - } - } -#ifdef CONFIG_SMP - smp_vic_timer_interrupt(); -#endif -} - -void voyager_power_off(void) -{ - printk("VOYAGER Power Off\n"); - - if (voyager_level == 5) { - voyager_cat_power_off(); - } else if (voyager_level == 4) { - /* This doesn't apparently work on most L4 machines, - * but the specs say to do this to get automatic power - * off. Unfortunately, if it doesn't power off the - * machine, it ends up doing a cold restart, which - * isn't really intended, so comment out the code */ -#if 0 - int port; - - /* enable the voyager Configuration Space */ - outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP); - /* the port for the power off flag is an offset from the - floating base */ - port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21; - /* set the power off flag */ - outb(inb(port) | 0x1, port); -#endif - } - /* and wait for it to happen */ - local_irq_disable(); - for (;;) - halt(); -} - -/* copied from process.c */ -static inline void kb_wait(void) -{ - int i; - - for (i = 0; i < 0x10000; i++) - if ((inb_p(0x64) & 0x02) == 0) - break; -} - -void machine_shutdown(void) -{ - /* Architecture specific shutdown needed before a kexec */ -} - -void machine_restart(char *cmd) -{ - printk("Voyager Warm Restart\n"); - kb_wait(); - - if (voyager_level == 5) { - /* write magic values to the RTC to inform system that - * shutdown is beginning */ - outb(0x8f, 0x70); - outb(0x5, 0x71); - - udelay(50); - outb(0xfe, 0x64); /* pull reset low */ - } else if (voyager_level == 4) { - __u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8; - __u8 basebd = inb(VOYAGER_MC_SETUP); - - outb(basebd | 0x08, VOYAGER_MC_SETUP); - outb(0x02, catbase + 0x21); - } - local_irq_disable(); - for (;;) - halt(); -} - -void machine_emergency_restart(void) -{ - /*for now, just hook this to a warm restart */ - machine_restart(NULL); -} - -void mca_nmi_hook(void) -{ - __u8 dumpval __maybe_unused = inb(0xf823); - __u8 swnmi __maybe_unused = inb(0xf813); - - /* FIXME: assume dump switch pressed */ - /* check to see if the dump switch was pressed */ - VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi)); - /* clear swnmi */ - outb(0xff, 0xf813); - /* tell SUS to ignore dump */ - if (voyager_level == 5 && voyager_SUS != NULL) { - if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) { - voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND; - voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS; - udelay(1000); - voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP; - voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS; - } - } - printk(KERN_ERR - "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n", - smp_processor_id()); - show_stack(NULL, NULL); - show_state(); -} - -void machine_halt(void) -{ - /* treat a halt like a power off */ - machine_power_off(); -} - -void machine_power_off(void) -{ - if (pm_power_off) - pm_power_off(); -} diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c deleted file mode 100644 index 2ad598c104a..00000000000 --- a/arch/x86/mach-voyager/voyager_cat.c +++ /dev/null @@ -1,1197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file contains all the logic for manipulating the CAT bus - * in a level 5 machine. - * - * The CAT bus is a serial configuration and test bus. Its primary - * uses are to probe the initial configuration of the system and to - * diagnose error conditions when a system interrupt occurs. The low - * level interface is fairly primitive, so most of this file consists - * of bit shift manipulations to send and receive packets on the - * serial bus */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef VOYAGER_CAT_DEBUG -#define CDEBUG(x) printk x -#else -#define CDEBUG(x) -#endif - -/* the CAT command port */ -#define CAT_CMD (sspb + 0xe) -/* the CAT data port */ -#define CAT_DATA (sspb + 0xd) - -/* the internal cat functions */ -static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits); -static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data, - __u16 num_bits); -static void cat_build_header(__u8 * header, const __u16 len, - const __u16 smallest_reg_bits, - const __u16 longest_reg_bits); -static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 op); -static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 * value); -static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, - __u8 pad_bits); -static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 value); -static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value); -static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, - __u16 offset, __u16 len, void *buf); -static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value); -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp); -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp); - -static inline const char *cat_module_name(int module_id) -{ - switch (module_id) { - case 0x10: - return "Processor Slot 0"; - case 0x11: - return "Processor Slot 1"; - case 0x12: - return "Processor Slot 2"; - case 0x13: - return "Processor Slot 4"; - case 0x14: - return "Memory Slot 0"; - case 0x15: - return "Memory Slot 1"; - case 0x18: - return "Primary Microchannel"; - case 0x19: - return "Secondary Microchannel"; - case 0x1a: - return "Power Supply Interface"; - case 0x1c: - return "Processor Slot 5"; - case 0x1d: - return "Processor Slot 6"; - case 0x1e: - return "Processor Slot 7"; - case 0x1f: - return "Processor Slot 8"; - default: - return "Unknown Module"; - } -} - -static int sspb = 0; /* stores the super port location */ -int voyager_8slot = 0; /* set to true if a 51xx monster */ - -voyager_module_t *voyager_cat_list; - -/* the I/O port assignments for the VIC and QIC */ -static struct resource vic_res = { - .name = "Voyager Interrupt Controller", - .start = 0xFC00, - .end = 0xFC6F -}; -static struct resource qic_res = { - .name = "Quad Interrupt Controller", - .start = 0xFC70, - .end = 0xFCFF -}; - -/* This function is used to pack a data bit stream inside a message. - * It writes num_bits of the data buffer in msg starting at start_bit. - * Note: This function assumes that any unused bit in the data stream - * is set to zero so that the ors will work correctly */ -static void -cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - __u16 residue = (num_bits % BITS_PER_BYTE) + offset; - int i; - - /* adjust if we have more than a byte of residue */ - if (residue >= BITS_PER_BYTE) { - residue -= BITS_PER_BYTE; - len++; - } - - /* clear out the bits. We assume here that if len==0 then - * residue >= offset. This is always true for the catbus - * operations */ - msg[byte] &= 0xff << (BITS_PER_BYTE - offset); - msg[byte++] |= data[0] >> offset; - if (len == 0) - return; - for (i = 1; i < len; i++) - msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset)) - | (data[i] >> offset); - if (residue != 0) { - __u8 mask = 0xff >> residue; - __u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset) - | (data[i] >> offset); - - last_byte &= ~mask; - msg[byte] &= mask; - msg[byte] |= last_byte; - } - return; -} - -/* unpack the data again (same arguments as cat_pack()). data buffer - * must be zero populated. - * - * Function: given a message string move to start_bit and copy num_bits into - * data (starting at bit 0 in data). - */ -static void -cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits) -{ - /* compute initial shift needed */ - const __u16 offset = start_bit % BITS_PER_BYTE; - __u16 len = num_bits / BITS_PER_BYTE; - const __u8 last_bits = num_bits % BITS_PER_BYTE; - __u16 byte = start_bit / BITS_PER_BYTE; - int i; - - if (last_bits != 0) - len++; - - /* special case: want < 8 bits from msg and we can get it from - * a single byte of the msg */ - if (len == 0 && BITS_PER_BYTE - offset >= num_bits) { - data[0] = msg[byte] << offset; - data[0] &= 0xff >> (BITS_PER_BYTE - num_bits); - return; - } - for (i = 0; i < len; i++) { - /* this annoying if has to be done just in case a read of - * msg one beyond the array causes a panic */ - if (offset != 0) { - data[i] = msg[byte++] << offset; - data[i] |= msg[byte] >> (BITS_PER_BYTE - offset); - } else { - data[i] = msg[byte++]; - } - } - /* do we need to truncate the final byte */ - if (last_bits != 0) { - data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits); - } - return; -} - -static void -cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits, - const __u16 longest_reg_bits) -{ - int i; - __u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE; - __u8 *last_byte = &header[len - 1]; - - if (start_bit == 0) - start_bit = 1; /* must have at least one bit in the hdr */ - - for (i = 0; i < len; i++) - header[i] = 0; - - for (i = start_bit; i > 0; i--) - *last_byte = ((*last_byte) << 1) + 1; - -} - -static int -cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op) -{ - __u8 parity, inst, inst_buf[4] = { 0 }; - __u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE]; - __u16 ibytes, hbytes, padbits; - int i; - - /* - * Parity is the parity of the register number + 1 (READ_REGISTER - * and WRITE_REGISTER always add '1' to the number of bits == 1) - */ - parity = (__u8) (1 + (reg & 0x01) + - ((__u8) (reg & 0x02) >> 1) + - ((__u8) (reg & 0x04) >> 2) + - ((__u8) (reg & 0x08) >> 3)) % 2; - - inst = ((parity << 7) | (reg << 2) | op); - - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - printk - ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n"); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(inst, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n")); - return 1; - } - return 0; - } - ibytes = modp->inst_bits / BITS_PER_BYTE; - if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - ibytes++; - } - hbytes = modp->largest_reg / BITS_PER_BYTE; - if (modp->largest_reg % BITS_PER_BYTE) - hbytes++; - CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes)); - /* initialise the instruction sequence to 0xff */ - for (i = 0; i < ibytes + hbytes; i++) - iseq[i] = 0xff; - cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg); - cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE); - inst_buf[0] = inst; - inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE); - cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("ins = 0x%x, iseq: ", inst); - for (i = 0; i < ibytes + hbytes; i++) - printk("0x%x ", iseq[i]); - printk("\n"); -#endif - if (cat_shiftout(iseq, ibytes, hbytes, padbits)) { - CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n")); - return 1; - } - CDEBUG(("CAT SHIFTOUT DONE\n")); - return 0; -} - -static int -cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - *value = inb(CAT_DATA); - outb(0xAA, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n")); - return 1; - } - return 0; - } else { - __u16 sbits = modp->num_asics - 1 + asicp->ireg_length; - __u16 sbytes = sbits / BITS_PER_BYTE; - __u16 tbytes; - __u8 string[VOYAGER_MAX_SCAN_PATH], - trailer[VOYAGER_MAX_REG_SIZE]; - __u8 padbits; - int i; - - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - - if ((padbits = sbits % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - sbytes++; - } - tbytes = asicp->ireg_length / BITS_PER_BYTE; - if (asicp->ireg_length % BITS_PER_BYTE) - tbytes++; - CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n", - tbytes, sbytes, padbits)); - cat_build_header(trailer, tbytes, 1, asicp->ireg_length); - - for (i = tbytes - 1; i >= 0; i--) { - outb(trailer[i], CAT_DATA); - string[sbytes + i] = inb(CAT_DATA); - } - - for (i = sbytes - 1; i >= 0; i--) { - outb(0xaa, CAT_DATA); - string[i] = inb(CAT_DATA); - } - *value = 0; - cat_unpack(string, - padbits + (tbytes * BITS_PER_BYTE) + - asicp->asic_location, value, asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("value=0x%x, string: ", *value); - for (i = 0; i < tbytes + sbytes; i++) - printk("0x%x ", string[i]); - printk("\n"); -#endif - - /* sanity check the rest of the return */ - for (i = 0; i < tbytes; i++) { - __u8 input = 0; - - cat_unpack(string, padbits + (i * BITS_PER_BYTE), - &input, BITS_PER_BYTE); - if (trailer[i] != input) { - CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i])); - return 1; - } - } - CDEBUG(("cat_getdata DONE\n")); - return 0; - } -} - -static int -cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits) -{ - int i; - - for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--) - outb(data[i], CAT_DATA); - - for (i = header_bytes - 1; i >= 0; i--) { - __u8 header = 0; - __u8 input; - - outb(data[i], CAT_DATA); - input = inb(CAT_DATA); - CDEBUG(("cat_shiftout: returned 0x%x\n", input)); - cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits, - &header, BITS_PER_BYTE); - if (input != header) { - CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header)); - return 1; - } - } - return 0; -} - -static int -cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp, - __u8 reg, __u8 value) -{ - outb(VOYAGER_CAT_DRCYC, CAT_CMD); - if (!modp->scan_path_connected) { - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n")); - return 1; - } - outb(VOYAGER_CAT_HEADER, CAT_DATA); - outb(value, CAT_DATA); - if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) { - CDEBUG(("cat_senddata: failed to get correct header response to sent data\n")); - return 1; - } - if (reg > VOYAGER_SUBADDRHI) { - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - } - - return 0; - } else { - __u16 hbytes = asicp->ireg_length / BITS_PER_BYTE; - __u16 dbytes = - (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE; - __u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH], - hseq[VOYAGER_MAX_REG_SIZE]; - int i; - - if ((padbits = (modp->num_asics - 1 - + asicp->ireg_length) % BITS_PER_BYTE) != 0) { - padbits = BITS_PER_BYTE - padbits; - dbytes++; - } - if (asicp->ireg_length % BITS_PER_BYTE) - hbytes++; - - cat_build_header(hseq, hbytes, 1, asicp->ireg_length); - - for (i = 0; i < dbytes + hbytes; i++) - dseq[i] = 0xff; - CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n", - dbytes, hbytes, padbits)); - cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length, - hseq, hbytes * BITS_PER_BYTE); - cat_pack(dseq, asicp->asic_location, &value, - asicp->ireg_length); -#ifdef VOYAGER_CAT_DEBUG - printk("dseq "); - for (i = 0; i < hbytes + dbytes; i++) { - printk("0x%x ", dseq[i]); - } - printk("\n"); -#endif - return cat_shiftout(dseq, dbytes, hbytes, padbits); - } -} - -static int -cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG)) - return 1; - return cat_senddata(modp, asicp, reg, value); -} - -static int -cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, - __u8 * value) -{ - if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG)) - return 1; - return cat_getdata(modp, asicp, reg, value); -} - -static int -cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len) -{ - __u8 val; - - if (len > 1) { - /* set auto increment */ - __u8 newval; - - if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) { - CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n", - val)); - newval = val | VOYAGER_AUTO_INC; - if (newval != val) { - if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) { - CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n")); - return 1; - } - } - } - if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n")); - return 1; - } - if (asicp->subaddr > VOYAGER_SUBADDR_LO) { - if (cat_write - (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) { - CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n")); - return 1; - } - cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset, - val)); - } - cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val); - CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val)); - return 0; -} - -static int -cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - /* FIXME: need special actions for VOYAGER_CAT_ID here */ - if (asicp->asic_id == VOYAGER_CAT_ID) { - CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n")); - /* FIXME -- This is supposed to be handled better - * There is a problem writing to the cat asic in the - * PSI. The 30us delay seems to work, though */ - udelay(30); - } - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - printk("cat_subwrite: cat_subaddrsetup FAILED\n"); - return retval; - } - - if (cat_sendinst - (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) { - printk("cat_subwrite: cat_sendinst FAILED\n"); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) { - printk - ("cat_subwrite: cat_sendata element at %d FAILED\n", - i); - return 1; - } - } - return 0; -} -static int -cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset, - __u16 len, void *buf) -{ - int i, retval; - - if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) { - CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n")); - return retval; - } - - if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) { - CDEBUG(("cat_subread: cat_sendinst failed\n")); - return 1; - } - for (i = 0; i < len; i++) { - if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) { - CDEBUG(("cat_subread: cat_getdata element %d failed\n", - i)); - return 1; - } - } - return 0; -} - -/* buffer for storing EPROM data read in during initialisation */ -static __initdata __u8 eprom_buf[0xFFFF]; -static voyager_module_t *voyager_initial_module; - -/* Initialise the cat bus components. We assume this is called by the - * boot cpu *after* all memory initialisation has been done (so we can - * use kmalloc) but before smp initialisation, so we can probe the SMP - * configuration and pick up necessary information. */ -void __init voyager_cat_init(void) -{ - voyager_module_t **modpp = &voyager_initial_module; - voyager_asic_t **asicpp; - voyager_asic_t *qabc_asic = NULL; - int i, j; - unsigned long qic_addr = 0; - __u8 qabc_data[0x20]; - __u8 num_submodules, val; - voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0]; - - __u8 cmos[4]; - unsigned long addr; - - /* initiallise the SUS mailbox */ - for (i = 0; i < sizeof(cmos); i++) - cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i); - addr = *(unsigned long *)cmos; - if ((addr & 0xff000000) != 0xff000000) { - printk(KERN_ERR - "Voyager failed to get SUS mailbox (addr = 0x%lx\n", - addr); - } else { - static struct resource res; - - res.name = "voyager SUS"; - res.start = addr; - res.end = addr + 0x3ff; - - request_resource(&iomem_resource, &res); - voyager_SUS = (struct voyager_SUS *) - ioremap(addr, 0x400); - printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n", - voyager_SUS->SUS_version); - voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION; - voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT; - } - - /* clear the processor counts */ - voyager_extended_vic_processors = 0; - voyager_quad_processors = 0; - - printk("VOYAGER: beginning CAT bus probe\n"); - /* set up the SuperSet Port Block which tells us where the - * CAT communication port is */ - sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100; - VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb)); - - /* now find out if were 8 slot or normal */ - if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER) - == EIGHT_SLOT_IDENTIFIER) { - voyager_8slot = 1; - printk(KERN_NOTICE - "Voyager: Eight slot 51xx configuration detected\n"); - } - - for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) { - __u8 input; - int asic; - __u16 eprom_size; - __u16 sp_offset; - - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(i, VOYAGER_CAT_CONFIG_PORT); - - /* check the presence of the module */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - outb(VOYAGER_CAT_IRCYC, CAT_CMD); - outb(VOYAGER_CAT_HEADER, CAT_DATA); - /* stream series of alternating 1's and 0's to stimulate - * response */ - outb(0xAA, CAT_DATA); - input = inb(CAT_DATA); - outb(VOYAGER_CAT_END, CAT_CMD); - if (input != VOYAGER_CAT_HEADER) { - continue; - } - CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i, - cat_module_name(i))); - *modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL); /*&voyager_module_storage[cat_count++]; */ - if (*modpp == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset(*modpp, 0, sizeof(voyager_module_t)); - /* need temporary asic for cat_subread. It will be - * filled in correctly later */ - (*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count]; */ - if ((*modpp)->asic == NULL) { - printk("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - memset((*modpp)->asic, 0, sizeof(voyager_asic_t)); - (*modpp)->asic->asic_id = VOYAGER_CAT_ID; - (*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI; - (*modpp)->module_addr = i; - (*modpp)->scan_path_connected = 0; - if (i == VOYAGER_PSI) { - /* Exception leg for modules with no EEPROM */ - printk("Module \"%s\"\n", cat_module_name(i)); - continue; - } - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n", - cat_module_name(i), eprom_hdr->version_id, - *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics); - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* All we really care about are the Quad cards. We - * identify them because they are in a processor slot - * and have only four asics */ - if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) { - modpp = &((*modpp)->next); - continue; - } - /* Now we know it's in a processor slot, does it have - * a quad baseboard submodule */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT, - &num_submodules); - /* lowest two bits, active low */ - num_submodules = ~(0xfc | num_submodules); - CDEBUG(("VOYAGER CAT: %d submodules present\n", - num_submodules)); - if (num_submodules == 0) { - /* fill in the dyadic extended processors */ - __u8 cpu = i & 0x07; - - printk("Module \"%s\": Dyadic Processor Card\n", - cat_module_name(i)); - voyager_extended_vic_processors |= (1 << cpu); - cpu += 4; - voyager_extended_vic_processors |= (1 << cpu); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - - /* now we want to read the asics on the first submodule, - * which should be the quad base board */ - - cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val); - CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val)); - val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD; - cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val); - - outb(VOYAGER_CAT_END, CAT_CMD); - - CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET)); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(*modpp, (*modpp)->asic); - if (cat_subread(*modpp, (*modpp)->asic, - VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size), - &eprom_size)) { - printk - ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n", - i); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - if (eprom_size > sizeof(eprom_buf)) { - printk - ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x. Need %d\n", - i, eprom_size); - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i, - eprom_size)); - if (cat_subread - (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) { - outb(VOYAGER_CAT_END, CAT_CMD); - continue; - } - outb(VOYAGER_CAT_END, CAT_CMD); - /* Now do everything for the QBB submodule 1 */ - (*modpp)->ee_size = eprom_hdr->ee_size; - (*modpp)->num_asics = eprom_hdr->num_asics; - asicpp = &((*modpp)->asic); - sp_offset = eprom_hdr->scan_path_offset; - /* get rid of the dummy CAT asic and read the real one */ - kfree((*modpp)->asic); - for (asic = 0; asic < (*modpp)->num_asics; asic++) { - int j; - voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++]; */ - voyager_sp_table_t *sp_table; - voyager_at_t *asic_table; - voyager_jtt_t *jtag_table; - - if (asicp == NULL) { - printk - ("**WARNING** kmalloc failure in cat_init\n"); - continue; - } - asicpp = &(asicp->next); - asicp->asic_location = asic; - sp_table = - (voyager_sp_table_t *) (eprom_buf + sp_offset); - asicp->asic_id = sp_table->asic_id; - asic_table = - (voyager_at_t *) (eprom_buf + - sp_table->asic_data_offset); - for (j = 0; j < 4; j++) - asicp->jtag_id[j] = asic_table->jtag_id[j]; - jtag_table = - (voyager_jtt_t *) (eprom_buf + - asic_table->jtag_offset); - asicp->ireg_length = jtag_table->ireg_len; - asicp->bit_location = (*modpp)->inst_bits; - (*modpp)->inst_bits += asicp->ireg_length; - if (asicp->ireg_length > (*modpp)->largest_reg) - (*modpp)->largest_reg = asicp->ireg_length; - if (asicp->ireg_length < (*modpp)->smallest_reg || - (*modpp)->smallest_reg == 0) - (*modpp)->smallest_reg = asicp->ireg_length; - CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n", - asicp->asic_id, asicp->ireg_length, - asicp->bit_location)); - if (asicp->asic_id == VOYAGER_QUAD_QABC) { - CDEBUG(("VOYAGER CAT: QABC ASIC found\n")); - qabc_asic = asicp; - } - sp_offset += sizeof(voyager_sp_table_t); - } - CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg)); - /* OK, now we have the QUAD ASICs set up, use them. - * we need to: - * - * 1. Find the Memory area for the Quad CPIs. - * 2. Find the Extended VIC processor - * 3. Configure a second extended VIC processor (This - * cannot be done for the 51xx. - * */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_connect(*modpp, (*modpp)->asic); - CDEBUG(("CAT CONNECTED!!\n")); - cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data); - qic_addr = qabc_data[5] << 8; - qic_addr = (qic_addr | qabc_data[6]) << 8; - qic_addr = (qic_addr | qabc_data[7]) << 8; - printk - ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n", - cat_module_name(i), qic_addr, qabc_data[8]); -#if 0 /* plumbing fails---FIXME */ - if ((qabc_data[8] & 0xf0) == 0) { - /* FIXME: 32 way 8 CPU slot monster cannot be - * plumbed this way---need to check for it */ - - printk("Plumbing second Extended Quad Processor\n"); - /* second VIC line hardwired to Quad CPU 1 */ - qabc_data[8] |= 0x20; - cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]); -#ifdef VOYAGER_CAT_DEBUG - /* verify plumbing */ - cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]); - if ((qabc_data[8] & 0xf0) == 0) { - CDEBUG(("PLUMBING FAILED: 0x%x\n", - qabc_data[8])); - } -#endif - } -#endif - - { - struct resource *res = - kzalloc(sizeof(struct resource), GFP_KERNEL); - res->name = kmalloc(128, GFP_KERNEL); - sprintf((char *)res->name, "Voyager %s Quad CPI", - cat_module_name(i)); - res->start = qic_addr; - res->end = qic_addr + 0x3ff; - request_resource(&iomem_resource, res); - } - - qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400); - - for (j = 0; j < 4; j++) { - __u8 cpu; - - if (voyager_8slot) { - /* 8 slot has a different mapping, - * each slot has only one vic line, so - * 1 cpu in each slot must be < 8 */ - cpu = (i & 0x07) + j * 8; - } else { - cpu = (i & 0x03) + j * 4; - } - if ((qabc_data[8] & (1 << j))) { - voyager_extended_vic_processors |= (1 << cpu); - } - if (qabc_data[8] & (1 << (j + 4))) { - /* Second SET register plumbed: Quad - * card has two VIC connected CPUs. - * Secondary cannot be booted as a VIC - * CPU */ - voyager_extended_vic_processors |= (1 << cpu); - voyager_allowed_boot_processors &= - (~(1 << cpu)); - } - - voyager_quad_processors |= (1 << cpu); - voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *) - (qic_addr + (j << 8)); - CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu, - (unsigned long)voyager_quad_cpi_addr[cpu])); - } - outb(VOYAGER_CAT_END, CAT_CMD); - - *asicpp = NULL; - modpp = &((*modpp)->next); - } - *modpp = NULL; - printk - ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n", - voyager_extended_vic_processors, voyager_quad_processors, - voyager_allowed_boot_processors); - request_resource(&ioport_resource, &vic_res); - if (voyager_quad_processors) - request_resource(&ioport_resource, &qic_res); - /* set up the front power switch */ -} - -int voyager_cat_readb(__u8 module, __u8 asic, int reg) -{ - return 0; -} - -static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (!modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_disconnect: ASIC is not CAT\n")); - return 1; - } - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_disconnect: failed to read SCANPATH\n")); - return err; - } - val &= VOYAGER_DISCONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_disconnect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 0; - - return 0; -} - -static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp) -{ - __u8 val; - int err = 0; - - if (modp->scan_path_connected) - return 0; - if (asicp->asic_id != VOYAGER_CAT_ID) { - CDEBUG(("cat_connect: ASIC is not CAT\n")); - return 1; - } - - err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val); - if (err) { - CDEBUG(("cat_connect: failed to read SCANPATH\n")); - return err; - } - val |= VOYAGER_CONNECT_ASIC; - err = cat_write(modp, asicp, VOYAGER_SCANPATH, val); - if (err) { - CDEBUG(("cat_connect: failed to write SCANPATH\n")); - return err; - } - outb(VOYAGER_CAT_END, CAT_CMD); - outb(VOYAGER_CAT_RUN, CAT_CMD); - modp->scan_path_connected = 1; - - return 0; -} - -void voyager_cat_power_off(void) -{ - /* Power the machine off by writing to the PSI over the CAT - * bus */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - CDEBUG(("PSI STATUS 0x%x\n", data)); - /* These two writes are power off prep and perform */ - data = PSI_CLEAR; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - data = PSI_POWER_DOWN; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); -} - -struct voyager_status voyager_status = { 0 }; - -void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data) -{ - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - switch (cmd) { - case VOYAGER_PSI_READ: - cat_read(&psi, &psi_asic, reg, data); - break; - case VOYAGER_PSI_WRITE: - cat_write(&psi, &psi_asic, reg, *data); - break; - case VOYAGER_PSI_SUBREAD: - cat_subread(&psi, &psi_asic, reg, 1, data); - break; - case VOYAGER_PSI_SUBWRITE: - cat_subwrite(&psi, &psi_asic, reg, 1, data); - break; - default: - printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd); - break; - } - outb(VOYAGER_CAT_END, CAT_CMD); -} - -void voyager_cat_do_common_interrupt(void) -{ - /* This is caused either by a memory parity error or something - * in the PSI */ - __u8 data; - voyager_module_t psi = { 0 }; - voyager_asic_t psi_asic = { 0 }; - struct voyager_psi psi_reg; - int i; - re_read: - psi.asic = &psi_asic; - psi.asic->asic_id = VOYAGER_CAT_ID; - psi.asic->subaddr = VOYAGER_SUBADDR_HI; - psi.module_addr = VOYAGER_PSI; - psi.scan_path_connected = 0; - - outb(VOYAGER_CAT_END, CAT_CMD); - /* Connect the PSI to the CAT Bus */ - outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT); - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_disconnect(&psi, &psi_asic); - /* Read the status. NOTE: Need to read *all* the PSI regs here - * otherwise the cmn int will be reasserted */ - for (i = 0; i < sizeof(psi_reg.regs); i++) { - cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); - if ((psi_reg.regs.checkbit & 0x02) == 0) { - psi_reg.regs.checkbit |= 0x02; - cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit); - printk("VOYAGER RE-READ PSI\n"); - goto re_read; - } - outb(VOYAGER_CAT_RUN, CAT_CMD); - for (i = 0; i < sizeof(psi_reg.subregs); i++) { - /* This looks strange, but the PSI doesn't do auto increment - * correctly */ - cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i, - 1, &((__u8 *) & psi_reg.subregs)[i]); - } - outb(VOYAGER_CAT_END, CAT_CMD); -#ifdef VOYAGER_CAT_DEBUG - printk("VOYAGER PSI: "); - for (i = 0; i < sizeof(psi_reg.regs); i++) - printk("%02x ", ((__u8 *) & psi_reg.regs)[i]); - printk("\n "); - for (i = 0; i < sizeof(psi_reg.subregs); i++) - printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]); - printk("\n"); -#endif - if (psi_reg.regs.intstatus & PSI_MON) { - /* switch off or power fail */ - - if (psi_reg.subregs.supply & PSI_SWITCH_OFF) { - if (voyager_status.switch_off) { - printk(KERN_ERR - "Voyager front panel switch turned off again---Immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else { - printk(KERN_ERR - "Voyager front panel switch turned off\n"); - voyager_status.switch_off = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - /* Tell the hardware we're taking care of the - * shutdown, otherwise it will power the box off - * within 3 seconds of the switch being pressed and, - * which is much more important to us, continue to - * assert the common interrupt */ - data = PSI_CLR_SWITCH_OFF; - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - } else { - - VDEBUG(("Voyager ac fail reg 0x%x\n", - psi_reg.subregs.ACfail)); - if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) { - /* No further update */ - return; - } -#if 0 - /* Don't bother trying to find out who failed. - * FIXME: This probably makes the code incorrect on - * anything other than a 345x */ - for (i = 0; i < 5; i++) { - if (psi_reg.subregs.ACfail & (1 << i)) { - break; - } - } - printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i); -#endif - /* DON'T do this: it shuts down the AC PSI - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_MASK_MASK | i; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - */ - printk(KERN_ERR "Voyager AC power failure\n"); - outb(VOYAGER_CAT_RUN, CAT_CMD); - data = PSI_COLD_START; - cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, - 1, &data); - outb(VOYAGER_CAT_END, CAT_CMD); - voyager_status.power_fail = 1; - voyager_status.request_from_kernel = 1; - wake_up_process(voyager_thread); - } - - } else if (psi_reg.regs.intstatus & PSI_FAULT) { - /* Major fault! */ - printk(KERN_ERR - "Voyager PSI Detected major fault, immediate power off!\n"); - voyager_cat_power_off(); - /* not reached */ - } else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM - | PSI_CURRENT | PSI_DVM - | PSI_PSCFAULT | PSI_STAT_CHG)) { - /* other psi fault */ - - printk(KERN_WARNING "Voyager PSI status 0x%x\n", data); - /* clear the PSI fault */ - outb(VOYAGER_CAT_RUN, CAT_CMD); - cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0); - outb(VOYAGER_CAT_END, CAT_CMD); - } -} diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c deleted file mode 100644 index 98e3c2bc756..00000000000 --- a/arch/x86/mach-voyager/voyager_smp.c +++ /dev/null @@ -1,1805 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 1999,2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This file provides all the same external entries as smp.c but uses - * the voyager hal to provide the functionality - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* TLB state -- visible externally, indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 }; - -/* CPU IRQ affinity -- set to all ones initially */ -static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = - {[0 ... NR_CPUS-1] = ~0UL }; - -/* per CPU data structure (for /proc/cpuinfo et al), visible externally - * indexed physically */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); -EXPORT_PER_CPU_SYMBOL(cpu_info); - -/* physical ID of the CPU used to boot the system */ -unsigned char boot_cpu_id; - -/* The memory line addresses for the Quad CPIs */ -struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned; - -/* The masks for the Extended VIC processors, filled in by cat_init */ -__u32 voyager_extended_vic_processors = 0; - -/* Masks for the extended Quad processors which cannot be VIC booted */ -__u32 voyager_allowed_boot_processors = 0; - -/* The mask for the Quad Processors (both extended and non-extended) */ -__u32 voyager_quad_processors = 0; - -/* Total count of live CPUs, used in process.c to display - * the CPU information and in irq.c for the per CPU irq - * activity count. Finally exported by i386_ksyms.c */ -static int voyager_extended_cpus = 1; - -/* Used for the invalidate map that's also checked in the spinlock */ -static volatile unsigned long smp_invalidate_needed; - -/* Bitmask of CPUs present in the system - exported by i386_syms.c, used - * by scheduler but indexed physically */ -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; - -/* The internal functions */ -static void send_CPI(__u32 cpuset, __u8 cpi); -static void ack_CPI(__u8 cpi); -static int ack_QIC_CPI(__u8 cpi); -static void ack_special_QIC_CPI(__u8 cpi); -static void ack_VIC_CPI(__u8 cpi); -static void send_CPI_allbutself(__u8 cpi); -static void mask_vic_irq(unsigned int irq); -static void unmask_vic_irq(unsigned int irq); -static unsigned int startup_vic_irq(unsigned int irq); -static void enable_local_vic_irq(unsigned int irq); -static void disable_local_vic_irq(unsigned int irq); -static void before_handle_vic_irq(unsigned int irq); -static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask); -static void ack_vic_irq(unsigned int irq); -static void vic_enable_cpi(void); -static void do_boot_cpu(__u8 cpuid); -static void do_quad_bootstrap(void); -static void initialize_secondary(void); - -int hard_smp_processor_id(void); -int safe_smp_processor_id(void); - -/* Inline functions */ -static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi) -{ - voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi = - (smp_processor_id() << 16) + cpi; -} - -static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) { -#ifdef VOYAGER_DEBUG - if (!cpu_online(cpu)) - VDEBUG(("CPU%d sending cpi %d to CPU%d not in " - "cpu_online_map\n", - hard_smp_processor_id(), cpi, cpu)); -#endif - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - } - } -} - -static inline void wrapper_smp_local_timer_interrupt(void) -{ - irq_enter(); - smp_local_timer_interrupt(); - irq_exit(); -} - -static inline void send_one_CPI(__u8 cpu, __u8 cpi) -{ - if (voyager_quad_processors & (1 << cpu)) - send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET); - else - send_CPI(1 << cpu, cpi); -} - -static inline void send_CPI_allbutself(__u8 cpi) -{ - __u8 cpu = smp_processor_id(); - __u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu); - send_CPI(mask, cpi); -} - -static inline int is_cpu_quad(void) -{ - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER); -} - -static inline int is_cpu_extended(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors & (1 << cpu)); -} - -static inline int is_cpu_vic_boot(void) -{ - __u8 cpu = hard_smp_processor_id(); - - return (voyager_extended_vic_processors - & voyager_allowed_boot_processors & (1 << cpu)); -} - -static inline void ack_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CPU_BOOT_CPI: - if (is_cpu_quad() && !is_cpu_vic_boot()) - ack_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - case VIC_SYS_INT: - case VIC_CMN_INT: - /* These are slightly strange. Even on the Quad card, - * They are vectored as VIC CPIs */ - if (is_cpu_quad()) - ack_special_QIC_CPI(cpi); - else - ack_VIC_CPI(cpi); - break; - default: - printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi); - break; - } -} - -/* local variables */ - -/* The VIC IRQ descriptors -- these look almost identical to the - * 8259 IRQs except that masks and things must be kept per processor - */ -static struct irq_chip vic_chip = { - .name = "VIC", - .startup = startup_vic_irq, - .mask = mask_vic_irq, - .unmask = unmask_vic_irq, - .set_affinity = set_vic_irq_affinity, -}; - -/* used to count up as CPUs are brought on line (starts at 0) */ -static int cpucount = 0; - -/* The per cpu profile stuff - used in smp_local_timer_interrupt */ -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - -/* the map used to check if a CPU has booted */ -static __u32 cpu_booted_map; - -/* the synchronize flag used to hold all secondary CPUs spinning in - * a tight loop until the boot sequence is ready for them */ -static cpumask_t smp_commenced_mask = CPU_MASK_NONE; - -/* This is for the new dynamic CPU boot code */ - -/* The per processor IRQ masks (these are usually kept in sync) */ -static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; - -/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */ -static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Lock for enable/disable of VIC interrupts */ -static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock); - -/* The boot processor is correctly set up in PC mode when it - * comes up, but the secondaries need their master/slave 8259 - * pairs initializing correctly */ - -/* Interrupt counters (per cpu) and total - used to try to - * even up the interrupt handling routines */ -static long vic_intr_total = 0; -static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 }; -static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 }; - -/* Since we can only use CPI0, we fake all the other CPIs */ -static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned; - -/* debugging routine to read the isr of the cpu's pic */ -static inline __u16 vic_read_isr(void) -{ - __u16 isr; - - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - - return isr; -} - -static __init void qic_setup(void) -{ - if (!is_cpu_quad()) { - /* not a quad, no setup */ - return; - } - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - - if (is_cpu_extended()) { - /* the QIC duplicate of the VIC base register */ - outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER); - outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER); - - /* FIXME: should set up the QIC timer and memory parity - * error vectors here */ - } -} - -static __init void vic_setup_pic(void) -{ - outb(1, VIC_REDIRECT_REGISTER_1); - /* clear the claim registers for dynamic routing */ - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - outb(0, VIC_PRIORITY_REGISTER); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - /* Now initiallise the master PIC belonging to this CPU by - * sending the four ICWs */ - - /* ICW1: level triggered, ICW4 needed */ - outb(0x19, 0x20); - - /* ICW2: vector base */ - outb(FIRST_EXTERNAL_VECTOR, 0x21); - - /* ICW3: slave at line 2 */ - outb(0x04, 0x21); - - /* ICW4: 8086 mode */ - outb(0x01, 0x21); - - /* now the same for the slave PIC */ - - /* ICW1: level trigger, ICW4 needed */ - outb(0x19, 0xA0); - - /* ICW2: slave vector base */ - outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1); - - /* ICW3: slave ID */ - outb(0x02, 0xA1); - - /* ICW4: 8086 mode */ - outb(0x01, 0xA1); -} - -static void do_quad_bootstrap(void) -{ - if (is_cpu_quad() && is_cpu_vic_boot()) { - int i; - unsigned long flags; - __u8 cpuid = hard_smp_processor_id(); - - local_irq_save(flags); - - for (i = 0; i < 4; i++) { - /* FIXME: this would be >>3 &0x7 on the 32 way */ - if (((cpuid >> 2) & 0x03) == i) - /* don't lower our own mask! */ - continue; - - /* masquerade as local Quad CPU */ - outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID); - /* enable the startup CPI */ - outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1); - /* restore cpu id */ - outb(0, QIC_PROCESSOR_ID); - } - local_irq_restore(flags); - } -} - -void prefill_possible_map(void) -{ - /* This is empty on voyager because we need a much - * earlier detection which is done in find_smp_config */ -} - -/* Set up all the basic stuff: read the SMP config and make all the - * SMP information reflect only the boot cpu. All others will be - * brought on-line later. */ -void __init find_smp_config(void) -{ - int i; - - boot_cpu_id = hard_smp_processor_id(); - - printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); - - /* initialize the CPU structures (moved from smp_boot_cpus) */ - for (i = 0; i < nr_cpu_ids; i++) - cpu_irq_affinity[i] = ~0; - cpu_online_map = cpumask_of_cpu(boot_cpu_id); - - /* The boot CPU must be extended */ - voyager_extended_vic_processors = 1 << boot_cpu_id; - /* initially, all of the first 8 CPUs can boot */ - voyager_allowed_boot_processors = 0xff; - /* set up everything for just this CPU, we can alter - * this as we start the other CPUs later */ - /* now get the CPU disposition from the extended CMOS */ - cpus_addr(phys_cpu_present_map)[0] = - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK); - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 2) << 16; - cpus_addr(phys_cpu_present_map)[0] |= - voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + - 3) << 24; - init_cpu_possible(&phys_cpu_present_map); - printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", - cpus_addr(phys_cpu_present_map)[0]); - /* Here we set up the VIC to enable SMP */ - /* enable the CPIs by writing the base vector to their register */ - outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER); - outb(1, VIC_REDIRECT_REGISTER_1); - /* set the claim registers for static routing --- Boot CPU gets - * all interrupts untill all other CPUs started */ - outb(0xff, VIC_CLAIM_REGISTER_0); - outb(0xff, VIC_CLAIM_REGISTER_1); - /* Set the Primary and Secondary Microchannel vector - * bases to be the same as the ordinary interrupts - * - * FIXME: This would be more efficient using separate - * vectors. */ - outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE); - outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE); - - /* Finally tell the firmware that we're driving */ - outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG, - VOYAGER_SUS_IN_CONTROL_PORT); - - current_thread_info()->cpu = boot_cpu_id; - percpu_write(cpu_number, boot_cpu_id); -} - -/* - * The bootstrap kernel entry code has set these up. Save them - * for a given CPU, id is physical */ -void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c = &cpu_data(id); - - *c = boot_cpu_data; - c->cpu_index = id; - - identify_secondary_cpu(c); -} - -/* Routine initially called when a non-boot CPU is brought online */ -static void __init start_secondary(void *unused) -{ - __u8 cpuid = hard_smp_processor_id(); - - cpu_init(); - - /* OK, we're in the routine */ - ack_CPI(VIC_CPU_BOOT_CPI); - - /* setup the 8259 master slave pair belonging to this CPU --- - * we won't actually receive any until the boot CPU - * relinquishes it's static routing mask */ - vic_setup_pic(); - - qic_setup(); - - if (is_cpu_quad() && !is_cpu_vic_boot()) { - /* clear the boot CPI */ - __u8 dummy; - - dummy = - voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi; - printk("read dummy %d\n", dummy); - } - - /* lower the mask to receive CPIs */ - vic_enable_cpi(); - - VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid)); - - notify_cpu_starting(cpuid); - - /* enable interrupts */ - local_irq_enable(); - - /* get our bogomips */ - calibrate_delay(); - - /* save our processor parameters */ - smp_store_cpu_info(cpuid); - - /* if we're a quad, we may need to bootstrap other CPUs */ - do_quad_bootstrap(); - - /* FIXME: this is rather a poor hack to prevent the CPU - * activating softirqs while it's supposed to be waiting for - * permission to proceed. Without this, the new per CPU stuff - * in the softirqs will fail */ - local_irq_disable(); - cpu_set(cpuid, cpu_callin_map); - - /* signal that we're done */ - cpu_booted_map = 1; - - while (!cpu_isset(cpuid, smp_commenced_mask)) - rep_nop(); - local_irq_enable(); - - local_flush_tlb(); - - cpu_set(cpuid, cpu_online_map); - wmb(); - cpu_idle(); -} - -/* Routine to kick start the given CPU and wait for it to report ready - * (or timeout in startup). When this routine returns, the requested - * CPU is either fully running and configured or known to be dead. - * - * We call this routine sequentially 1 CPU at a time, so no need for - * locking */ - -static void __init do_boot_cpu(__u8 cpu) -{ - struct task_struct *idle; - int timeout; - unsigned long flags; - int quad_boot = (1 << cpu) & voyager_quad_processors - & ~(voyager_extended_vic_processors - & voyager_allowed_boot_processors); - - /* This is the format of the CPI IDT gate (in real mode) which - * we're hijacking to boot the CPU */ - union IDTFormat { - struct seg { - __u16 Offset; - __u16 Segment; - } idt; - __u32 val; - } hijack_source; - - __u32 *hijack_vector; - __u32 start_phys_address = setup_trampoline(); - - /* There's a clever trick to this: The linux trampoline is - * compiled to begin at absolute location zero, so make the - * address zero but have the data segment selector compensate - * for the actual address */ - hijack_source.idt.Offset = start_phys_address & 0x000F; - hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF; - - cpucount++; - alternatives_smp_switch(1); - - idle = fork_idle(cpu); - if (IS_ERR(idle)) - panic("failed fork for CPU%d", cpu); - idle->thread.ip = (unsigned long)start_secondary; - /* init_tasks (in sched.c) is indexed logically */ - stack_start.sp = (void *)idle->thread.sp; - - per_cpu(current_task, cpu) = idle; - early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); - irq_ctx_init(cpu); - - /* Note: Don't modify initial ss override */ - VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, - (unsigned long)hijack_source.val, hijack_source.idt.Segment, - hijack_source.idt.Offset, stack_start.sp)); - - /* init lowmem identity mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); - flush_tlb_all(); - - if (quad_boot) { - printk("CPU %d: non extended Quad boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } else { - printk("CPU%d: extended VIC boot\n", cpu); - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - /* VIC errata, may also receive interrupt at this address */ - hijack_vector = - (__u32 *) - phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI + - VIC_DEFAULT_CPI_BASE) * 4); - *hijack_vector = hijack_source.val; - } - /* All non-boot CPUs start with interrupts fully masked. Need - * to lower the mask of the CPI we're about to send. We do - * this in the VIC by masquerading as the processor we're - * about to boot and lowering its interrupt mask */ - local_irq_save(flags); - if (quad_boot) { - send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI); - } else { - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - /* here we're altering registers belonging to `cpu' */ - - outb(VIC_BOOT_INTERRUPT_MASK, 0x21); - /* now go back to our original identity */ - outb(boot_cpu_id, VIC_PROCESSOR_ID); - - /* and boot the CPU */ - - send_CPI((1 << cpu), VIC_CPU_BOOT_CPI); - } - cpu_booted_map = 0; - local_irq_restore(flags); - - /* now wait for it to become ready (or timeout) */ - for (timeout = 0; timeout < 50000; timeout++) { - if (cpu_booted_map) - break; - udelay(100); - } - /* reset the page table */ - zap_low_mappings(); - - if (cpu_booted_map) { - VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n", - cpu, smp_processor_id())); - - printk("CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - wmb(); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_present_map); - } else { - printk("CPU%d FAILED TO BOOT: ", cpu); - if (* - ((volatile unsigned char *)phys_to_virt(start_phys_address)) - == 0xA5) - printk("Stuck.\n"); - else - printk("Not responding.\n"); - - cpucount--; - } -} - -void __init smp_boot_cpus(void) -{ - int i; - - /* CAT BUS initialisation must be done after the memory */ - /* FIXME: The L4 has a catbus too, it just needs to be - * accessed in a totally different way */ - if (voyager_level == 5) { - voyager_cat_init(); - - /* now that the cat has probed the Voyager System Bus, sanity - * check the cpu map */ - if (((voyager_quad_processors | voyager_extended_vic_processors) - & cpus_addr(phys_cpu_present_map)[0]) != - cpus_addr(phys_cpu_present_map)[0]) { - /* should panic */ - printk("\n\n***WARNING*** " - "Sanity check of CPU present map FAILED\n"); - } - } else if (voyager_level == 4) - voyager_extended_vic_processors = - cpus_addr(phys_cpu_present_map)[0]; - - /* this sets up the idle task to run on the current cpu */ - voyager_extended_cpus = 1; - /* Remove the global_irq_holder setting, it triggers a BUG() on - * schedule at the moment */ - //global_irq_holder = boot_cpu_id; - - /* FIXME: Need to do something about this but currently only works - * on CPUs with a tsc which none of mine have. - smp_tune_scheduling(); - */ - smp_store_cpu_info(boot_cpu_id); - /* setup the jump vector */ - initial_code = (unsigned long)initialize_secondary; - printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data(boot_cpu_id)); - - if (is_cpu_quad()) { - /* booting on a Quad CPU */ - printk("VOYAGER SMP: Boot CPU is Quad\n"); - qic_setup(); - do_quad_bootstrap(); - } - - /* enable our own CPIs */ - vic_enable_cpi(); - - cpu_set(boot_cpu_id, cpu_online_map); - cpu_set(boot_cpu_id, cpu_callout_map); - - /* loop over all the extended VIC CPUs and boot them. The - * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < nr_cpu_ids; i++) { - if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) - continue; - do_boot_cpu(i); - /* This udelay seems to be needed for the Quad boots - * don't remove unless you know what you're doing */ - udelay(1000); - } - /* we could compute the total bogomips here, but why bother?, - * Code added from smpboot.c */ - { - unsigned long bogosum = 0; - - for_each_online_cpu(i) - bogosum += cpu_data(i).loops_per_jiffy; - printk(KERN_INFO "Total of %d processors activated " - "(%lu.%02lu BogoMIPS).\n", - cpucount + 1, bogosum / (500000 / HZ), - (bogosum / (5000 / HZ)) % 100); - } - voyager_extended_cpus = hweight32(voyager_extended_vic_processors); - printk("VOYAGER: Extended (interrupt handling CPUs): " - "%d, non-extended: %d\n", voyager_extended_cpus, - num_booting_cpus() - voyager_extended_cpus); - /* that's it, switch to symmetric mode */ - outb(0, VIC_PRIORITY_REGISTER); - outb(0, VIC_CLAIM_REGISTER_0); - outb(0, VIC_CLAIM_REGISTER_1); - - VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus())); -} - -/* Reload the secondary CPUs task structure (this function does not - * return ) */ -static void __init initialize_secondary(void) -{ -#if 0 - // AC kernels only - set_current(hard_get_current()); -#endif - - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile ("movl %0,%%esp\n\t" - "jmp *%1"::"r" (current->thread.sp), - "r"(current->thread.ip)); -} - -/* handle a Voyager SYS_INT -- If we don't, the base board will - * panic the system. - * - * System interrupts occur because some problem was detected on the - * various busses. To find out what you have to probe all the - * hardware via the CAT bus. FIXME: At the moment we do nothing. */ -void smp_vic_sys_interrupt(struct pt_regs *regs) -{ - ack_CPI(VIC_SYS_INT); - printk("Voyager SYSTEM INTERRUPT\n"); -} - -/* Handle a voyager CMN_INT; These interrupts occur either because of - * a system status change or because a single bit memory error - * occurred. FIXME: At the moment, ignore all this. */ -void smp_vic_cmn_interrupt(struct pt_regs *regs) -{ - static __u8 in_cmn_int = 0; - static DEFINE_SPINLOCK(cmn_int_lock); - - /* common ints are broadcast, so make sure we only do this once */ - _raw_spin_lock(&cmn_int_lock); - if (in_cmn_int) - goto unlock_end; - - in_cmn_int++; - _raw_spin_unlock(&cmn_int_lock); - - VDEBUG(("Voyager COMMON INTERRUPT\n")); - - if (voyager_level == 5) - voyager_cat_do_common_interrupt(); - - _raw_spin_lock(&cmn_int_lock); - in_cmn_int = 0; - unlock_end: - _raw_spin_unlock(&cmn_int_lock); - ack_CPI(VIC_CMN_INT); -} - -/* - * Reschedule call back. Nothing to do, all the work is done - * automatically when we return from the interrupt. */ -static void smp_reschedule_interrupt(void) -{ - /* do nothing */ -} - -static struct mm_struct *flush_mm; -static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); - -/* - * We cannot call mmdrop() because we are in interrupt context, - * instead update mm->cpu_vm_mask. - * - * We need to reload %cr3 since the page tables may be going - * away from under us.. - */ -static inline void voyager_leave_mm(unsigned long cpu) -{ - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) - BUG(); - cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); - load_cr3(swapper_pg_dir); -} - -/* - * Invalidate call-back - */ -static void smp_invalidate_interrupt(void) -{ - __u8 cpu = smp_processor_id(); - - if (!test_bit(cpu, &smp_invalidate_needed)) - return; - /* This will flood messages. Don't uncomment unless you see - * Problems with cross cpu invalidation - VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n", - smp_processor_id())); - */ - - if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { - if (flush_va == TLB_FLUSH_ALL) - local_flush_tlb(); - else - __flush_tlb_one(flush_va); - } else - voyager_leave_mm(cpu); - } - smp_mb__before_clear_bit(); - clear_bit(cpu, &smp_invalidate_needed); - smp_mb__after_clear_bit(); -} - -/* All the new flush operations for 2.4 */ - -/* This routine is called with a physical cpu mask */ -static void -voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm, - unsigned long va) -{ - int stuck = 50000; - - if (!cpumask) - BUG(); - if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask) - BUG(); - if (cpumask & (1 << smp_processor_id())) - BUG(); - if (!mm) - BUG(); - - spin_lock(&tlbstate_lock); - - flush_mm = mm; - flush_va = va; - atomic_set_mask(cpumask, &smp_invalidate_needed); - /* - * We have to send the CPI only to - * CPUs affected. - */ - send_CPI(cpumask, VIC_INVALIDATE_CPI); - - while (smp_invalidate_needed) { - mb(); - if (--stuck == 0) { - printk("***WARNING*** Stuck doing invalidate CPI " - "(CPU%d)\n", smp_processor_id()); - break; - } - } - - /* Uncomment only to debug invalidation problems - VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu)); - */ - - flush_mm = NULL; - flush_va = 0; - spin_unlock(&tlbstate_lock); -} - -void flush_tlb_current_task(void) -{ - struct mm_struct *mm = current->mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - local_flush_tlb(); - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - - if (current->active_mm == mm) { - if (current->mm) - local_flush_tlb(); - else - voyager_leave_mm(smp_processor_id()); - } - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); - - preempt_enable(); -} - -void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long cpu_mask; - - preempt_disable(); - - cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); - if (current->active_mm == mm) { - if (current->mm) - __flush_tlb_one(va); - else - voyager_leave_mm(smp_processor_id()); - } - - if (cpu_mask) - voyager_flush_tlb_others(cpu_mask, mm, va); - - preempt_enable(); -} - -EXPORT_SYMBOL(flush_tlb_page); - -/* enable the requested IRQs */ -static void smp_enable_irq_interrupt(void) -{ - __u8 irq; - __u8 cpu = get_cpu(); - - VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu, - vic_irq_enable_mask[cpu])); - - spin_lock(&vic_irq_lock); - for (irq = 0; irq < 16; irq++) { - if (vic_irq_enable_mask[cpu] & (1 << irq)) - enable_local_vic_irq(irq); - } - vic_irq_enable_mask[cpu] = 0; - spin_unlock(&vic_irq_lock); - - put_cpu_no_resched(); -} - -/* - * CPU halt call-back - */ -static void smp_stop_cpu_function(void *dummy) -{ - VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id())); - cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); - for (;;) - halt(); -} - -/* execute a thread on a new CPU. The function to be called must be - * previously set up. This is used to schedule a function for - * execution on all CPUs - set up the function then broadcast a - * function_interrupt CPI to come here on each CPU */ -static void smp_call_function_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -static void smp_call_function_single_interrupt(void) -{ - irq_enter(); - generic_smp_call_function_single_interrupt(); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); -} - -/* Sorry about the name. In an APIC based system, the APICs - * themselves are programmed to send a timer interrupt. This is used - * by linux to reschedule the processor. Voyager doesn't have this, - * so we use the system clock to interrupt one processor, which in - * turn, broadcasts a timer CPI to all the others --- we receive that - * CPI here. We don't use this actually for counting so losing - * ticks doesn't matter - * - * FIXME: For those CPUs which actually have a local APIC, we could - * try to use it to trigger this interrupt instead of having to - * broadcast the timer tick. Unfortunately, all my pentium DYADs have - * no local APIC, so I can't do this - * - * This function is currently a placeholder and is unused in the code */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -/* All of the QUAD interrupt GATES */ -void smp_qic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - ack_QIC_CPI(QIC_TIMER_CPI); - wrapper_smp_local_timer_interrupt(); - set_irq_regs(old_regs); -} - -void smp_qic_invalidate_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_INVALIDATE_CPI); - smp_invalidate_interrupt(); -} - -void smp_qic_reschedule_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_RESCHEDULE_CPI); - smp_reschedule_interrupt(); -} - -void smp_qic_enable_irq_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_ENABLE_IRQ_CPI); - smp_enable_irq_interrupt(); -} - -void smp_qic_call_function_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_CPI); - smp_call_function_interrupt(); -} - -void smp_qic_call_function_single_interrupt(struct pt_regs *regs) -{ - ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI); - smp_call_function_single_interrupt(); -} - -void smp_vic_cpi_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - __u8 cpu = smp_processor_id(); - - if (is_cpu_quad()) - ack_QIC_CPI(VIC_CPI_LEVEL0); - else - ack_VIC_CPI(VIC_CPI_LEVEL0); - - if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu])) - wrapper_smp_local_timer_interrupt(); - if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu])) - smp_invalidate_interrupt(); - if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu])) - smp_reschedule_interrupt(); - if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu])) - smp_enable_irq_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_interrupt(); - if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu])) - smp_call_function_single_interrupt(); - set_irq_regs(old_regs); -} - -static void do_flush_tlb_all(void *info) -{ - unsigned long cpu = smp_processor_id(); - - __flush_tlb_all(); - if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) - voyager_leave_mm(cpu); -} - -/* flush the TLB of every active CPU in the system */ -void flush_tlb_all(void) -{ - on_each_cpu(do_flush_tlb_all, 0, 1); -} - -/* send a reschedule CPI to one CPU by physical CPU number*/ -static void voyager_smp_send_reschedule(int cpu) -{ - send_one_CPI(cpu, VIC_RESCHEDULE_CPI); -} - -int hard_smp_processor_id(void) -{ - __u8 i; - __u8 cpumask = inb(VIC_PROC_WHO_AM_I); - if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER) - return cpumask & 0x1F; - - for (i = 0; i < 8; i++) { - if (cpumask & (1 << i)) - return i; - } - printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask); - return 0; -} - -int safe_smp_processor_id(void) -{ - return hard_smp_processor_id(); -} - -/* broadcast a halt to all other CPUs */ -static void voyager_smp_send_stop(void) -{ - smp_call_function(smp_stop_cpu_function, NULL, 1); -} - -/* this function is triggered in time.c when a clock tick fires - * we need to re-broadcast the tick to all CPUs */ -void smp_vic_timer_interrupt(void) -{ - send_CPI_allbutself(VIC_TIMER_CPI); - smp_local_timer_interrupt(); -} - -/* local (per CPU) timer interrupt. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ -void smp_local_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - long weight; - - profile_tick(CPU_PROFILING); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - /* FIXME: need to update the vic timer tick here */ - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - - update_process_times(user_mode_vm(get_irq_regs())); - } - - if (((1 << cpu) & voyager_extended_vic_processors) == 0) - /* only extended VIC processors participate in - * interrupt distribution */ - return; - - /* - * We take the 'long' return path, and there every subsystem - * grabs the appropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ - - if ((++vic_tick[cpu] & 0x7) != 0) - return; - /* get here every 16 ticks (about every 1/6 of a second) */ - - /* Change our priority to give someone else a chance at getting - * the IRQ. The algorithm goes like this: - * - * In the VIC, the dynamically routed interrupt is always - * handled by the lowest priority eligible (i.e. receiving - * interrupts) CPU. If >1 eligible CPUs are equal lowest, the - * lowest processor number gets it. - * - * The priority of a CPU is controlled by a special per-CPU - * VIC priority register which is 3 bits wide 0 being lowest - * and 7 highest priority.. - * - * Therefore we subtract the average number of interrupts from - * the number we've fielded. If this number is negative, we - * lower the activity count and if it is positive, we raise - * it. - * - * I'm afraid this still leads to odd looking interrupt counts: - * the totals are all roughly equal, but the individual ones - * look rather skewed. - * - * FIXME: This algorithm is total crap when mixed with SMP - * affinity code since we now try to even up the interrupt - * counts when an affinity binding is keeping them on a - * particular CPU*/ - weight = (vic_intr_count[cpu] * voyager_extended_cpus - - vic_intr_total) >> 4; - weight += 4; - if (weight > 7) - weight = 7; - if (weight < 0) - weight = 0; - - outb((__u8) weight, VIC_PRIORITY_REGISTER); - -#ifdef VOYAGER_DEBUG - if ((vic_tick[cpu] & 0xFFF) == 0) { - /* print this message roughly every 25 secs */ - printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n", - cpu, vic_tick[cpu], weight); - } -#endif -} - -/* setup the profiling timer */ -int setup_profiling_timer(unsigned int multiplier) -{ - int i; - - if ((!multiplier)) - return -EINVAL; - - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. - */ - for (i = 0; i < nr_cpu_ids; ++i) - per_cpu(prof_multiplier, i) = multiplier; - - return 0; -} - -/* This is a bit of a mess, but forced on us by the genirq changes - * there's no genirq handler that really does what voyager wants - * so hack it up with the simple IRQ handler */ -static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) -{ - before_handle_vic_irq(irq); - handle_simple_irq(irq, desc); - after_handle_vic_irq(irq); -} - -/* The CPIs are handled in the per cpu 8259s, so they must be - * enabled to be received: FIX: enabling the CPIs in the early - * boot sequence interferes with bug checking; enable them later - * on in smp_init */ -#define VIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector)) -#define QIC_SET_GATE(cpi, vector) \ - set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) - -void __init voyager_smp_intr_init(void) -{ - int i; - - /* initialize the per cpu irq mask to all disabled */ - for (i = 0; i < nr_cpu_ids; i++) - vic_irq_mask[i] = 0xFFFF; - - VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); - - VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt); - VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt); - - QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt); - QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt); - QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt); - QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt); - QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt); - - /* now put the VIC descriptor into the first 48 IRQs - * - * This is for later: first 16 correspond to PC IRQs; next 16 - * are Primary MC IRQs and final 16 are Secondary MC IRQs */ - for (i = 0; i < 48; i++) - set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq); -} - -/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per - * processor to receive CPI */ -static void send_CPI(__u32 cpuset, __u8 cpi) -{ - int cpu; - __u32 quad_cpuset = (cpuset & voyager_quad_processors); - - if (cpi < VIC_START_FAKE_CPI) { - /* fake CPI are only used for booting, so send to the - * extended quads as well---Quads must be VIC booted */ - outb((__u8) (cpuset), VIC_CPI_Registers[cpi]); - return; - } - if (quad_cpuset) - send_QIC_CPI(quad_cpuset, cpi); - cpuset &= ~quad_cpuset; - cpuset &= 0xff; /* only first 8 CPUs vaild for VIC CPI */ - if (cpuset == 0) - return; - for_each_online_cpu(cpu) { - if (cpuset & (1 << cpu)) - set_bit(cpi, &vic_cpi_mailbox[cpu]); - } - if (cpuset) - outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]); -} - -/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and - * set the cache line to shared by reading it. - * - * DON'T make this inline otherwise the cache line read will be - * optimised away - * */ -static int ack_QIC_CPI(__u8 cpi) -{ - __u8 cpu = hard_smp_processor_id(); - - cpi &= 7; - - outb(1 << cpi, QIC_INTERRUPT_CLEAR1); - return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi; -} - -static void ack_special_QIC_CPI(__u8 cpi) -{ - switch (cpi) { - case VIC_CMN_INT: - outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0); - break; - case VIC_SYS_INT: - outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0); - break; - } - /* also clear at the VIC, just in case (nop for non-extended proc) */ - ack_VIC_CPI(cpi); -} - -/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */ -static void ack_VIC_CPI(__u8 cpi) -{ -#ifdef VOYAGER_DEBUG - unsigned long flags; - __u16 isr; - __u8 cpu = smp_processor_id(); - - local_irq_save(flags); - isr = vic_read_isr(); - if ((isr & (1 << (cpi & 7))) == 0) { - printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi); - } -#endif - /* send specific EOI; the two system interrupts have - * bit 4 set for a separate vector but behave as the - * corresponding 3 bit intr */ - outb_p(0x60 | (cpi & 7), 0x20); - -#ifdef VOYAGER_DEBUG - if ((vic_read_isr() & (1 << (cpi & 7))) != 0) { - printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi); - } - local_irq_restore(flags); -#endif -} - -/* cribbed with thanks from irq.c */ -#define __byte(x,y) (((unsigned char *)&(y))[x]) -#define cached_21(cpu) (__byte(0,vic_irq_mask[cpu])) -#define cached_A1(cpu) (__byte(1,vic_irq_mask[cpu])) - -static unsigned int startup_vic_irq(unsigned int irq) -{ - unmask_vic_irq(irq); - - return 0; -} - -/* The enable and disable routines. This is where we run into - * conflicting architectural philosophy. Fundamentally, the voyager - * architecture does not expect to have to disable interrupts globally - * (the IRQ controllers belong to each CPU). The processor masquerade - * which is used to start the system shouldn't be used in a running OS - * since it will cause great confusion if two separate CPUs drive to - * the same IRQ controller (I know, I've tried it). - * - * The solution is a variant on the NCR lazy SPL design: - * - * 1) To disable an interrupt, do nothing (other than set the - * IRQ_DISABLED flag). This dares the interrupt actually to arrive. - * - * 2) If the interrupt dares to come in, raise the local mask against - * it (this will result in all the CPU masks being raised - * eventually). - * - * 3) To enable the interrupt, lower the mask on the local CPU and - * broadcast an Interrupt enable CPI which causes all other CPUs to - * adjust their masks accordingly. */ - -static void unmask_vic_irq(unsigned int irq) -{ - /* linux doesn't to processor-irq affinity, so enable on - * all CPUs we know about */ - int cpu = smp_processor_id(), real_cpu; - __u16 mask = (1 << irq); - __u32 processorList = 0; - unsigned long flags; - - VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n", - irq, cpu, cpu_irq_affinity[cpu])); - spin_lock_irqsave(&vic_irq_lock, flags); - for_each_online_cpu(real_cpu) { - if (!(voyager_extended_vic_processors & (1 << real_cpu))) - continue; - if (!(cpu_irq_affinity[real_cpu] & mask)) { - /* irq has no affinity for this CPU, ignore */ - continue; - } - if (real_cpu == cpu) { - enable_local_vic_irq(irq); - } else if (vic_irq_mask[real_cpu] & mask) { - vic_irq_enable_mask[real_cpu] |= mask; - processorList |= (1 << real_cpu); - } - } - spin_unlock_irqrestore(&vic_irq_lock, flags); - if (processorList) - send_CPI(processorList, VIC_ENABLE_IRQ_CPI); -} - -static void mask_vic_irq(unsigned int irq) -{ - /* lazy disable, do nothing */ -} - -static void enable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = ~(1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - vic_irq_mask[cpu] &= mask; - if (vic_irq_mask[cpu] == old_mask) - return; - - VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -static void disable_local_vic_irq(unsigned int irq) -{ - __u8 cpu = smp_processor_id(); - __u16 mask = (1 << irq); - __u16 old_mask = vic_irq_mask[cpu]; - - if (irq == 7) - return; - - vic_irq_mask[cpu] |= mask; - if (old_mask == vic_irq_mask[cpu]) - return; - - VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n", - irq, cpu)); - - if (irq & 8) { - outb_p(cached_A1(cpu), 0xA1); - (void)inb_p(0xA1); - } else { - outb_p(cached_21(cpu), 0x21); - (void)inb_p(0x21); - } -} - -/* The VIC is level triggered, so the ack can only be issued after the - * interrupt completes. However, we do Voyager lazy interrupt - * handling here: It is an extremely expensive operation to mask an - * interrupt in the vic, so we merely set a flag (IRQ_DISABLED). If - * this interrupt actually comes in, then we mask and ack here to push - * the interrupt off to another CPU */ -static void before_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - __u8 cpu = smp_processor_id(); - - _raw_spin_lock(&vic_irq_lock); - vic_intr_total++; - vic_intr_count[cpu]++; - - if (!(cpu_irq_affinity[cpu] & (1 << irq))) { - /* The irq is not in our affinity mask, push it off - * onto another CPU */ - VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d " - "on cpu %d\n", irq, cpu)); - disable_local_vic_irq(irq); - /* set IRQ_INPROGRESS to prevent the handler in irq.c from - * actually calling the interrupt routine */ - desc->status |= IRQ_REPLAY | IRQ_INPROGRESS; - } else if (desc->status & IRQ_DISABLED) { - /* Damn, the interrupt actually arrived, do the lazy - * disable thing. The interrupt routine in irq.c will - * not handle a IRQ_DISABLED interrupt, so nothing more - * need be done here */ - VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n", - irq, cpu)); - disable_local_vic_irq(irq); - desc->status |= IRQ_REPLAY; - } else { - desc->status &= ~IRQ_REPLAY; - } - - _raw_spin_unlock(&vic_irq_lock); -} - -/* Finish the VIC interrupt: basically mask */ -static void after_handle_vic_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_to_desc(irq); - - _raw_spin_lock(&vic_irq_lock); - { - unsigned int status = desc->status & ~IRQ_INPROGRESS; -#ifdef VOYAGER_DEBUG - __u16 isr; -#endif - - desc->status = status; - if ((status & IRQ_DISABLED)) - disable_local_vic_irq(irq); -#ifdef VOYAGER_DEBUG - /* DEBUG: before we ack, check what's in progress */ - isr = vic_read_isr(); - if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) { - int i; - __u8 cpu = smp_processor_id(); - __u8 real_cpu; - int mask; /* Um... initialize me??? --RR */ - - printk("VOYAGER SMP: CPU%d lost interrupt %d\n", - cpu, irq); - for_each_possible_cpu(real_cpu, mask) { - - outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu, - VIC_PROCESSOR_ID); - isr = vic_read_isr(); - if (isr & (1 << irq)) { - printk - ("VOYAGER SMP: CPU%d ack irq %d\n", - real_cpu, irq); - ack_vic_irq(irq); - } - outb(cpu, VIC_PROCESSOR_ID); - } - } -#endif /* VOYAGER_DEBUG */ - /* as soon as we ack, the interrupt is eligible for - * receipt by another CPU so everything must be in - * order here */ - ack_vic_irq(irq); - if (status & IRQ_REPLAY) { - /* replay is set if we disable the interrupt - * in the before_handle_vic_irq() routine, so - * clear the in progress bit here to allow the - * next CPU to handle this correctly */ - desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS); - } -#ifdef VOYAGER_DEBUG - isr = vic_read_isr(); - if ((isr & (1 << irq)) != 0) - printk("VOYAGER SMP: after_handle_vic_irq() after " - "ack irq=%d, isr=0x%x\n", irq, isr); -#endif /* VOYAGER_DEBUG */ - } - _raw_spin_unlock(&vic_irq_lock); - - /* All code after this point is out of the main path - the IRQ - * may be intercepted by another CPU if reasserted */ -} - -/* Linux processor - interrupt affinity manipulations. - * - * For each processor, we maintain a 32 bit irq affinity mask. - * Initially it is set to all 1's so every processor accepts every - * interrupt. In this call, we change the processor's affinity mask: - * - * Change from enable to disable: - * - * If the interrupt ever comes in to the processor, we will disable it - * and ack it to push it off to another CPU, so just accept the mask here. - * - * Change from disable to enable: - * - * change the mask and then do an interrupt enable CPI to re-enable on - * the selected processors */ - -void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - /* Only extended processors handle interrupts */ - unsigned long real_mask; - unsigned long irq_mask = 1 << irq; - int cpu; - - real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors; - - if (cpus_addr(*mask)[0] == 0) - /* can't have no CPUs to accept the interrupt -- extremely - * bad things will happen */ - return; - - if (irq == 0) - /* can't change the affinity of the timer IRQ. This - * is due to the constraint in the voyager - * architecture that the CPI also comes in on and IRQ - * line and we have chosen IRQ0 for this. If you - * raise the mask on this interrupt, the processor - * will no-longer be able to accept VIC CPIs */ - return; - - if (irq >= 32) - /* You can only have 32 interrupts in a voyager system - * (and 32 only if you have a secondary microchannel - * bus) */ - return; - - for_each_online_cpu(cpu) { - unsigned long cpu_mask = 1 << cpu; - - if (cpu_mask & real_mask) { - /* enable the interrupt for this cpu */ - cpu_irq_affinity[cpu] |= irq_mask; - } else { - /* disable the interrupt for this cpu */ - cpu_irq_affinity[cpu] &= ~irq_mask; - } - } - /* this is magic, we now have the correct affinity maps, so - * enable the interrupt. This will send an enable CPI to - * those CPUs who need to enable it in their local masks, - * causing them to correct for the new affinity . If the - * interrupt is currently globally disabled, it will simply be - * disabled again as it comes in (voyager lazy disable). If - * the affinity map is tightened to disable the interrupt on a - * cpu, it will be pushed off when it comes in */ - unmask_vic_irq(irq); -} - -static void ack_vic_irq(unsigned int irq) -{ - if (irq & 8) { - outb(0x62, 0x20); /* Specific EOI to cascade */ - outb(0x60 | (irq & 7), 0xA0); - } else { - outb(0x60 | (irq & 7), 0x20); - } -} - -/* enable the CPIs. In the VIC, the CPIs are delivered by the 8259 - * but are not vectored by it. This means that the 8259 mask must be - * lowered to receive them */ -static __init void vic_enable_cpi(void) -{ - __u8 cpu = smp_processor_id(); - - /* just take a copy of the current mask (nop for boot cpu) */ - vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id]; - - enable_local_vic_irq(VIC_CPI_LEVEL0); - enable_local_vic_irq(VIC_CPI_LEVEL1); - /* for sys int and cmn int */ - enable_local_vic_irq(7); - - if (is_cpu_quad()) { - outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0); - outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1); - VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, QIC_CPI_ENABLE)); - } - - VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n", - cpu, vic_irq_mask[cpu])); -} - -void voyager_smp_dump() -{ - int old_cpu = smp_processor_id(), cpu; - - /* dump the interrupt masks of each processor */ - for_each_online_cpu(cpu) { - __u16 imr, isr, irr; - unsigned long flags; - - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID); - imr = (inb(0xa1) << 8) | inb(0x21); - outb(0x0a, 0xa0); - irr = inb(0xa0) << 8; - outb(0x0a, 0x20); - irr |= inb(0x20); - outb(0x0b, 0xa0); - isr = inb(0xa0) << 8; - outb(0x0b, 0x20); - isr |= inb(0x20); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n", - cpu, vic_irq_mask[cpu], imr, irr, isr); -#if 0 - /* These lines are put in to try to unstick an un ack'd irq */ - if (isr != 0) { - int irq; - for (irq = 0; irq < 16; irq++) { - if (isr & (1 << irq)) { - printk("\tCPU%d: ack irq %d\n", - cpu, irq); - local_irq_save(flags); - outb(VIC_CPU_MASQUERADE_ENABLE | cpu, - VIC_PROCESSOR_ID); - ack_vic_irq(irq); - outb(old_cpu, VIC_PROCESSOR_ID); - local_irq_restore(flags); - } - } - } -#endif - } -} - -void smp_voyager_power_off(void *dummy) -{ - if (smp_processor_id() == boot_cpu_id) - voyager_power_off(); - else - smp_stop_cpu_function(NULL); -} - -static void __init voyager_smp_prepare_cpus(unsigned int max_cpus) -{ - /* FIXME: ignore max_cpus for now */ - smp_boot_cpus(); -} - -static void __cpuinit voyager_smp_prepare_boot_cpu(void) -{ - int cpu = smp_processor_id(); - switch_to_new_gdt(cpu); - - cpu_set(cpu, cpu_online_map); - cpu_set(cpu, cpu_callout_map); - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); - -} - -static int __cpuinit voyager_cpu_up(unsigned int cpu) -{ - /* This only works at boot for x86. See "rewrite" above. */ - if (cpu_isset(cpu, smp_commenced_mask)) - return -ENOSYS; - - /* In case one didn't come up */ - if (!cpu_isset(cpu, cpu_callin_map)) - return -EIO; - /* Unleash the CPU! */ - cpu_set(cpu, smp_commenced_mask); - while (!cpu_online(cpu)) - mb(); - return 0; -} - -static void __init voyager_smp_cpus_done(unsigned int max_cpus) -{ - zap_low_mappings(); -} - -void __init smp_setup_processor_id(void) -{ - current_thread_info()->cpu = hard_smp_processor_id(); -} - -static void voyager_send_call_func(const struct cpumask *callmask) -{ - __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id()); - send_CPI(mask, VIC_CALL_FUNCTION_CPI); -} - -static void voyager_send_call_func_single(int cpu) -{ - send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI); -} - -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, - .smp_prepare_cpus = voyager_smp_prepare_cpus, - .cpu_up = voyager_cpu_up, - .smp_cpus_done = voyager_smp_cpus_done, - - .smp_send_stop = voyager_smp_send_stop, - .smp_send_reschedule = voyager_smp_send_reschedule, - - .send_call_func_ipi = voyager_send_call_func, - .send_call_func_single_ipi = voyager_send_call_func_single, -}; diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c deleted file mode 100644 index 15464a20fb3..00000000000 --- a/arch/x86/mach-voyager/voyager_thread.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8 -*- */ - -/* Copyright (C) 2001 - * - * Author: J.E.J.Bottomley@HansenPartnership.com - * - * This module provides the machine status monitor thread for the - * voyager architecture. This allows us to monitor the machine - * environment (temp, voltage, fan function) and the front panel and - * internal UPS. If a fault is detected, this thread takes corrective - * action (usually just informing init) - * */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct task_struct *voyager_thread; -static __u8 set_timeout; - -static int execute(const char *string) -{ - int ret; - - char *envp[] = { - "HOME=/", - "TERM=linux", - "PATH=/sbin:/usr/sbin:/bin:/usr/bin", - NULL, - }; - char *argv[] = { - "/bin/bash", - "-c", - (char *)string, - NULL, - }; - - if ((ret = - call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { - printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string, - ret); - } - return ret; -} - -static void check_from_kernel(void) -{ - if (voyager_status.switch_off) { - - /* FIXME: This should be configurable via proc */ - execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1"); - } else if (voyager_status.power_fail) { - VDEBUG(("Voyager daemon detected AC power failure\n")); - - /* FIXME: This should be configureable via proc */ - execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1"); - set_timeout = 1; - } -} - -static void check_continuing_condition(void) -{ - if (voyager_status.power_fail) { - __u8 data; - voyager_cat_psi(VOYAGER_PSI_SUBREAD, - VOYAGER_PSI_AC_FAIL_REG, &data); - if ((data & 0x1f) == 0) { - /* all power restored */ - printk(KERN_NOTICE - "VOYAGER AC power restored, cancelling shutdown\n"); - /* FIXME: should be user configureable */ - execute - ("umask 600; echo O > /etc/powerstatus; kill -PWR 1"); - set_timeout = 0; - } - } -} - -static int thread(void *unused) -{ - printk(KERN_NOTICE "Voyager starting monitor thread\n"); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT); - - VDEBUG(("Voyager Daemon awoken\n")); - if (voyager_status.request_from_kernel == 0) { - /* probably awoken from timeout */ - check_continuing_condition(); - } else { - check_from_kernel(); - voyager_status.request_from_kernel = 0; - } - } -} - -static int __init voyager_thread_start(void) -{ - voyager_thread = kthread_run(thread, NULL, "kvoyagerd"); - if (IS_ERR(voyager_thread)) { - printk(KERN_ERR - "Voyager: Failed to create system monitor thread.\n"); - return PTR_ERR(voyager_thread); - } - return 0; -} - -static void __exit voyager_thread_stop(void) -{ - kthread_stop(voyager_thread); -} - -module_init(voyager_thread_start); -module_exit(voyager_thread_stop); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 87b9ab16642..b83e119fbeb 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -6,7 +6,7 @@ config XEN bool "Xen guest support" select PARAVIRT select PARAVIRT_CLOCK - depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) + depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) depends on X86_CMPXCHG && X86_TSC help This is the Linux Xen port. Enabling this will allow the diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index 76f2b36881c..a3d3cbab359 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST tristate "Linux hypervisor example code" - depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !X86_VOYAGER + depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX select HVC_DRIVER ---help--- This is a very simple module which allows you to run From d85a881d780cc7aaebe1b7aefcddbcb939acbe2d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Feb 2009 00:29:45 +0100 Subject: [PATCH 573/682] x86: remove various unused subarch hooks Impact: remove dead code Remove: - pre_setup_arch_hook() - mca_nmi_hook() If needed they can be added back via an x86_quirk handler. Signed-off-by: Ingo Molnar --- arch/x86/include/asm/arch_hooks.h | 6 +++--- arch/x86/kernel/mca_32.c | 4 +--- arch/x86/kernel/setup.c | 34 ------------------------------- 3 files changed, 4 insertions(+), 40 deletions(-) diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h index cbd4957838a..54248172be1 100644 --- a/arch/x86/include/asm/arch_hooks.h +++ b/arch/x86/include/asm/arch_hooks.h @@ -15,12 +15,12 @@ extern void init_ISA_irqs(void); extern irqreturn_t timer_interrupt(int irq, void *dev_id); /* these are the defined hooks */ -extern void intr_init_hook(void); extern void pre_intr_init_hook(void); -extern void pre_setup_arch_hook(void); +extern void intr_init_hook(void); + extern void trap_init_hook(void); + extern void pre_time_init_hook(void); extern void time_init_hook(void); -extern void mca_nmi_hook(void); #endif /* _ASM_X86_ARCH_HOOKS_H */ diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 2dc183758be..f74eef52ab5 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -474,6 +474,4 @@ void __kprobes mca_handle_nmi(void) * adapter was responsible for the error. */ bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); - - mca_nmi_hook(); -} /* mca_handle_nmi */ +} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d699811b3f7..d4de1e4c204 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -668,7 +668,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); - pre_setup_arch_hook(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); #endif @@ -1022,18 +1021,6 @@ void __init intr_init_hook(void) } } -/** - * pre_setup_arch_hook - hook called prior to any setup_arch() execution - * - * Description: - * generally used to activate any machine specific identification - * routines that may be needed before setup_arch() runs. On Voyager - * this is used to get the board revision and type. - **/ -void __init pre_setup_arch_hook(void) -{ -} - /** * trap_init_hook - initialise system specific traps * @@ -1088,25 +1075,4 @@ void __init time_init_hook(void) irq0.mask = cpumask_of_cpu(0); setup_irq(0, &irq0); } - -#ifdef CONFIG_MCA -/** - * mca_nmi_hook - hook into MCA specific NMI chain - * - * Description: - * The MCA (Microchannel Architecture) has an NMI chain for NMI sources - * along the MCA bus. Use this to hook into that chain if you will need - * it. - **/ -void mca_nmi_hook(void) -{ - /* - * If I recall correctly, there's a whole bunch of other things that - * we can do to check for NMI problems, but that's all I know about - * at the moment. - */ - pr_warning("NMI generated from unknown source!\n"); -} -#endif /* CONFIG_MCA */ - #endif /* CONFIG_X86_32 */ From 8e6dafd6c741cd4679b4de3c5d9698851e4fa59c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Feb 2009 00:34:39 +0100 Subject: [PATCH 574/682] x86: refactor x86_quirks support Impact: cleanup Make x86_quirks support more transparent. The highlevel methods are now named: extern void x86_quirk_pre_intr_init(void); extern void x86_quirk_intr_init(void); extern void x86_quirk_trap_init(void); extern void x86_quirk_pre_time_init(void); extern void x86_quirk_time_init(void); This makes it clear that if some platform extension has to do something here that it is considered ... weird, and is discouraged. Also remove arch_hooks.h and move it into setup.h (and other header files where appropriate). Signed-off-by: Ingo Molnar --- arch/x86/include/asm/arch_hooks.h | 26 -------------------------- arch/x86/include/asm/i8259.h | 4 ++++ arch/x86/include/asm/setup.h | 9 +++++++++ arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/apic/apic.c | 1 - arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/i8259.c | 1 - arch/x86/kernel/irqinit_32.c | 13 +++++++------ arch/x86/kernel/kvmclock.c | 1 - arch/x86/kernel/mca_32.c | 1 - arch/x86/kernel/paravirt.c | 1 - arch/x86/kernel/setup.c | 23 ++++++++++++----------- arch/x86/kernel/time_32.c | 6 +++--- arch/x86/kernel/traps.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 1 - arch/x86/kernel/vmiclock_32.c | 1 - 16 files changed, 39 insertions(+), 56 deletions(-) delete mode 100644 arch/x86/include/asm/arch_hooks.h diff --git a/arch/x86/include/asm/arch_hooks.h b/arch/x86/include/asm/arch_hooks.h deleted file mode 100644 index 54248172be1..00000000000 --- a/arch/x86/include/asm/arch_hooks.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _ASM_X86_ARCH_HOOKS_H -#define _ASM_X86_ARCH_HOOKS_H - -#include - -/* - * linux/include/asm/arch_hooks.h - * - * define the architecture specific hooks - */ - -/* these aren't arch hooks, they are generic routines - * that can be used by the hooks */ -extern void init_ISA_irqs(void); -extern irqreturn_t timer_interrupt(int irq, void *dev_id); - -/* these are the defined hooks */ -extern void pre_intr_init_hook(void); -extern void intr_init_hook(void); - -extern void trap_init_hook(void); - -extern void pre_time_init_hook(void); -extern void time_init_hook(void); - -#endif /* _ASM_X86_ARCH_HOOKS_H */ diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091eeb1..1a99e6c092a 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -60,4 +60,8 @@ extern struct irq_chip i8259A_chip; extern void mask_8259A(void); extern void unmask_8259A(void); +#ifdef CONFIG_X86_32 +extern void init_ISA_irqs(void); +#endif + #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 8029369cd6f..66801cb72f6 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -13,6 +13,7 @@ struct mpc_cpu; struct mpc_bus; struct mpc_oemtable; + struct x86_quirks { int (*arch_pre_time_init)(void); int (*arch_time_init)(void); @@ -33,6 +34,14 @@ struct x86_quirks { int (*update_apic)(void); }; +extern void x86_quirk_pre_intr_init(void); +extern void x86_quirk_intr_init(void); + +extern void x86_quirk_trap_init(void); + +extern void x86_quirk_pre_time_init(void); +extern void x86_quirk_time_init(void); + #endif /* __ASSEMBLY__ */ #ifdef __i386__ diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 2bb6a835c45..a81195eaa2b 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -3,6 +3,7 @@ #include #include #include +#include #define TICK_SIZE (tick_nsec / 1000) @@ -12,6 +13,7 @@ unsigned long native_calibrate_tsc(void); #ifdef CONFIG_X86_32 extern int timer_ack; extern int recalibrate_cpu_khz(void); +extern irqreturn_t timer_interrupt(int irq, void *dev_id); #endif /* CONFIG_X86_32 */ extern int no_timer_check; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c6bd7710585..f9cecdfd05c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -34,7 +34,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index c9ec90742e9..3a730fa574b 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 11d5093eb28..df89102bef8 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -22,7 +22,6 @@ #include #include #include -#include #include /* diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index bf629cadec1..50b8c3a3006 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include @@ -127,8 +127,8 @@ void __init native_init_IRQ(void) { int i; - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); /* * Cover the whole vector space, no vector can escape @@ -188,10 +188,11 @@ void __init native_init_IRQ(void) if (!acpi_ioapic) setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in - * the architecture specific gates) + /* + * Call quirks after call gates are initialised (usually add in + * the architecture specific gates): */ - intr_init_hook(); + x86_quirk_intr_init(); /* * External FPU? Set up irq13 if so, for diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 652fce6d2cc..137f2e8132d 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index f74eef52ab5..845d80ce1ef 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -51,7 +51,6 @@ #include #include #include -#include static unsigned char which_scsi; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6dc4dca255e..63dd358d8ee 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d4de1e4c204..5b85759e797 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -74,8 +74,9 @@ #include #include #include -#include #include +#include +#include #include #include #include @@ -987,7 +988,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * pre_intr_init_hook - initialisation prior to setting up interrupt vectors + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors * * Description: * Perform any necessary interrupt initialisation prior to setting up @@ -995,7 +996,7 @@ void __init setup_arch(char **cmdline_p) * interrupts should be initialised here if the machine emulates a PC * in any way. **/ -void __init pre_intr_init_hook(void) +void __init x86_quirk_pre_intr_init(void) { if (x86_quirks->arch_pre_intr_init) { if (x86_quirks->arch_pre_intr_init()) @@ -1005,7 +1006,7 @@ void __init pre_intr_init_hook(void) } /** - * intr_init_hook - post gate setup interrupt initialisation + * x86_quirk_intr_init - post gate setup interrupt initialisation * * Description: * Fill in any interrupts that may have been left out by the general @@ -1013,7 +1014,7 @@ void __init pre_intr_init_hook(void) * than the devices on the I/O bus (like APIC interrupts in intel MP * systems) are started here. **/ -void __init intr_init_hook(void) +void __init x86_quirk_intr_init(void) { if (x86_quirks->arch_intr_init) { if (x86_quirks->arch_intr_init()) @@ -1022,13 +1023,13 @@ void __init intr_init_hook(void) } /** - * trap_init_hook - initialise system specific traps + * x86_quirk_trap_init - initialise system specific traps * * Description: * Called as the final act of trap_init(). Used in VISWS to initialise * the various board specific APIC traps. **/ -void __init trap_init_hook(void) +void __init x86_quirk_trap_init(void) { if (x86_quirks->arch_trap_init) { if (x86_quirks->arch_trap_init()) @@ -1044,23 +1045,23 @@ static struct irqaction irq0 = { }; /** - * pre_time_init_hook - do any specific initialisations before. + * x86_quirk_pre_time_init - do any specific initialisations before. * **/ -void __init pre_time_init_hook(void) +void __init x86_quirk_pre_time_init(void) { if (x86_quirks->arch_pre_time_init) x86_quirks->arch_pre_time_init(); } /** - * time_init_hook - do any specific initialisations for the system timer. + * x86_quirk_time_init - do any specific initialisations for the system timer. * * Description: * Must plug the system timer interrupt source at HZ into the IRQ listed * in irq_vectors.h:TIMER_IRQ **/ -void __init time_init_hook(void) +void __init x86_quirk_time_init(void) { if (x86_quirks->arch_time_init) { /* diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 764c74e871f..5c5d87f0b2e 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -118,7 +118,7 @@ void __init hpet_time_init(void) { if (!hpet_enable()) setup_pit_timer(); - time_init_hook(); + x86_quirk_time_init(); } /* @@ -131,7 +131,7 @@ void __init hpet_time_init(void) */ void __init time_init(void) { - pre_time_init_hook(); + x86_quirk_pre_time_init(); tsc_init(); late_time_init = choose_time_init(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index acb8c0585ab..c8c0a7e530b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -61,7 +61,7 @@ #include #else #include -#include +#include #include #include "cpu/mcheck/mce.h" @@ -1026,6 +1026,6 @@ void __init trap_init(void) cpu_init(); #ifdef CONFIG_X86_32 - trap_init_hook(); + x86_quirk_trap_init(); #endif } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34199d30ff4..191a876e9e8 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 8c48b465059..49b4cd6707f 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -28,7 +28,6 @@ #include #include -#include #include #include #include From 0d3a9cf5ab041c15691fd03dab3af0841af63606 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 22 Feb 2009 14:58:56 -0800 Subject: [PATCH 575/682] acpi: add some missing section markers early_acpi_os_unmap_memory() is an __init function, and acpi_os_unmap_memory() is allowed to access an __init function until acpi_gbl_permanent_mmap is set up. Signed-off-by: Jeremy Fitzhardinge Cc: Len Brown Signed-off-by: Ingo Molnar --- drivers/acpi/osl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index d1dd5160daa..2b6c5902825 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -272,7 +272,7 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size) } EXPORT_SYMBOL_GPL(acpi_os_map_memory); -void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) +void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size) { if (acpi_gbl_permanent_mmap) iounmap(virt); @@ -281,7 +281,7 @@ void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) } EXPORT_SYMBOL_GPL(acpi_os_unmap_memory); -void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size) +void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size) { if (!acpi_gbl_permanent_mmap) __acpi_unmap_table(virt, size); From ecda06289f8202d4c6beb235f59ea464f4a91209 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 22 Feb 2009 22:14:56 -0800 Subject: [PATCH 576/682] x86: check mptable physptr with max_low_pfn on 32bit Impact: fix early crash on LinuxBIOS systems Kevin O'Connor reported that Coreboot aka LinuxBIOS tries to put mptable somewhere very high, well above max_low_pfn (below which BIOSes generally put the mptable), causing a panic. The BIOS will probably be changed to be compatible with older Linus versions, but nevertheless the MP-spec does not forbid an MP-table in arbitrary system RAM, so make sure it all works even if the table is in an unexpected place. Check physptr with max_low_pfn * PAGE_SIZE. Reported-by: Kevin O'Connor Signed-off-by: Yinghai Lu Cc: Stefan Reinauer Cc: coreboot@coreboot.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 7f4d2586972..37cb1bda1ba 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -710,13 +710,22 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, * of physical memory; so that simply reserving * PAGE_SIZE from mpf->physptr yields BUG() * in reserve_bootmem. + * also need to make sure physptr is below than + * max_low_pfn + * we don't need reserve the area above max_low_pfn */ unsigned long end = max_low_pfn * PAGE_SIZE; - if (mpf->physptr + size > end) - size = end - mpf->physptr; -#endif + + if (mpf->physptr < end) { + if (mpf->physptr + size > end) + size = end - mpf->physptr; + reserve_bootmem_generic(mpf->physptr, size, + BOOTMEM_DEFAULT); + } +#else reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); +#endif } return 1; From bda3a89745d7bb88d3e735046c0cdc3d0eb2ac24 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Mon, 23 Feb 2009 19:13:07 +0300 Subject: [PATCH 577/682] x86: minor cleanup in the espfix code Impact: Cleanup Checkin be44d2aabce2d62f72d5751d1871b6212bf7a1c7 eliminates the use of a 16-bit stack for espfix. However, at least one instruction remained that only operated on the low 16 bits of %esp. This is not a bug per se because the kernel stack is always an aligned 4K or 8K block. Therefore it cannot cross 64K boundaries; this code, in fact, relies strictly on that fact. However, it's a lot cleaner (and, for that matter, smaller) to operate on the entire 32-bit register. Signed-off-by: Stas Sergeev CC: Zachary Amsden CC: Chuck Ebbert Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 999e827ef9c..899e8938e79 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1359,7 +1359,7 @@ nmi_espfix_stack: CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) + addl $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 pushl 16(%esp) From cb83b42e23bd6c4bf91793a320fbe83787c13596 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:20 +0900 Subject: [PATCH 578/682] percpu: fix pcpu_chunk_struct_size Impact: fix short allocation leading to memory corruption While dropping rvalue wrapping macros around global parameters, pcpu_chunk_struct_size was set incorrectly resulting in shorter page pointer array. Fix it. Signed-off-by: Tejun Heo --- mm/percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 997724c2ea2..ed92caa2aa3 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -850,7 +850,7 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) - + (1 << pcpu_unit_pages_shift) * sizeof(struct page *); + + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); /* allocate chunk slots */ pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); From c132937556f56ee4b831ef4b23f1846e05fde102 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:20 +0900 Subject: [PATCH 579/682] bootmem: clean up arch-specific bootmem wrapping Impact: cleaner and consistent bootmem wrapping By setting CONFIG_HAVE_ARCH_BOOTMEM_NODE, archs can define arch-specific wrappers for bootmem allocation. However, this is done a bit strangely in that only the high level convenience macros can be changed while lower level, but still exported, interface functions can't be wrapped. This not only is messy but also leads to strange situation where alloc_bootmem() does what the arch wants it to do but the equivalent __alloc_bootmem() call doesn't although they should be able to be used interchangeably. This patch updates bootmem such that archs can override / wrap the backend function - alloc_bootmem_core() instead of the highlevel interface functions to allow simpler and consistent wrapping. Also, HAVE_ARCH_BOOTMEM_NODE is renamed to HAVE_ARCH_BOOTMEM. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- arch/avr32/Kconfig | 2 +- arch/x86/Kconfig | 2 +- arch/x86/include/asm/mmzone_32.h | 43 ++++---------------------------- include/linux/bootmem.h | 10 +++----- mm/bootmem.c | 14 ++++++++--- 5 files changed, 22 insertions(+), 49 deletions(-) diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index b189680d18b..05fe3053dca 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -181,7 +181,7 @@ source "kernel/Kconfig.preempt" config QUICKLIST def_bool y -config HAVE_ARCH_BOOTMEM_NODE +config HAVE_ARCH_BOOTMEM def_bool n config ARCH_HAVE_MEMORY_PRESENT diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d3f6eadfd4b..6fd3b2302ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1111,7 +1111,7 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accomodate various tables. -config HAVE_ARCH_BOOTMEM_NODE +config HAVE_ARCH_BOOTMEM def_bool y depends on X86_32 && NUMA diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 07f1af494ca..1e0fa9e63af 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -93,45 +93,12 @@ static inline int pfn_valid(int pfn) #endif /* CONFIG_DISCONTIGMEM */ #ifdef CONFIG_NEED_MULTIPLE_NODES - -/* - * Following are macros that are specific to this numa platform. - */ -#define reserve_bootmem(addr, size, flags) \ - reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags)) -#define alloc_bootmem(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_nopanic(x) \ - __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ - __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) -#define alloc_bootmem_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_pages_nopanic(x) \ - __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \ - __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define alloc_bootmem_node(pgdat, x) \ +/* always use node 0 for bootmem on this numa platform */ +#define alloc_bootmem_core(__bdata, size, align, goal, limit) \ ({ \ - struct pglist_data __maybe_unused \ - *__alloc_bootmem_node__pgdat = (pgdat); \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ - __pa(MAX_DMA_ADDRESS)); \ -}) -#define alloc_bootmem_pages_node(pgdat, x) \ -({ \ - struct pglist_data __maybe_unused \ - *__alloc_bootmem_node__pgdat = (pgdat); \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ - __pa(MAX_DMA_ADDRESS)); \ -}) -#define alloc_bootmem_low_pages_node(pgdat, x) \ -({ \ - struct pglist_data __maybe_unused \ - *__alloc_bootmem_node__pgdat = (pgdat); \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ + bootmem_data_t __maybe_unused * __abm_bdata_dummy = (__bdata); \ + __alloc_bootmem_core(NODE_DATA(0)->bdata, \ + (size), (align), (goal), (limit)); \ }) #endif /* CONFIG_NEED_MULTIPLE_NODES */ diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 95837bfb525..3a87f93081e 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -69,10 +69,9 @@ extern int reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -#endif - +extern int reserve_bootmem(unsigned long addr, + unsigned long size, + int flags); extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); @@ -94,7 +93,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE + #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ @@ -113,7 +112,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); diff --git a/mm/bootmem.c b/mm/bootmem.c index 51a0ccf61e0..d7140c008ba 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -37,6 +37,16 @@ static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); static int bootmem_debug; +/* + * If an arch needs to apply workarounds to bootmem allocation, it can + * set CONFIG_HAVE_ARCH_BOOTMEM and define a wrapper around + * __alloc_bootmem_core(). + */ +#ifndef CONFIG_HAVE_ARCH_BOOTMEM +#define alloc_bootmem_core(bdata, size, align, goal, limit) \ + __alloc_bootmem_core((bdata), (size), (align), (goal), (limit)) +#endif + static int __init bootmem_debug_setup(char *buf) { bootmem_debug = 1; @@ -382,7 +392,6 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); } -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE /** * reserve_bootmem - mark a page range as usable * @addr: starting address of the range @@ -403,7 +412,6 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, return mark_bootmem(start, end, 1, flags); } -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx, unsigned long step) @@ -428,7 +436,7 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, return ALIGN(base + off, align) - base; } -static void * __init alloc_bootmem_core(struct bootmem_data *bdata, +static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { From 2d0aae41695257603fc281b519677131ab5a752b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 580/682] bootmem: reorder interface functions and add a missing one Impact: cleanup and addition of missing interface wrapper The interface functions in bootmem.h was ordered in not so orderly manner. Reorder them such that * functions allocating the same area group together - ie. alloc_bootmem group and alloc_bootmem_low group. * functions w/o node parameter come before the ones w/ node parameter. * nopanic variants are immediately below their panicky counterparts. While at it, add alloc_bootmem_pages_node_nopanic() which was missing. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3a87f93081e..455d83219fa 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -65,22 +65,20 @@ extern void free_bootmem(unsigned long addr, unsigned long size); #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) -extern int reserve_bootmem_node(pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size, - int flags); extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -extern void *__alloc_bootmem_nopanic(unsigned long size, +extern int reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size, + int flags); + +extern void *__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem(unsigned long size, +extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -89,6 +87,9 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -98,18 +99,21 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_nopanic(x) \ __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + +#define alloc_bootmem_low(x) \ + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) From c0c0a29379b5848aec2e8f1c58d853d3cb7118b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 581/682] vmalloc: add @align to vm_area_register_early() Impact: allow larger alignment for early vmalloc area allocation Some early vmalloc users might want larger alignment, for example, for custom large page mapping. Add @align to vm_area_register_early(). While at it, drop docbook comment on non-existent @size. Signed-off-by: Tejun Heo Cc: Nick Piggin Cc: Ivan Kokshaysky --- arch/alpha/mm/init.c | 2 +- include/linux/vmalloc.h | 2 +- mm/percpu.c | 2 +- mm/vmalloc.c | 11 +++++++---- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index df6df025ded..91eddd8505d 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -200,7 +200,7 @@ callback_init(void * kernel_end) /* register the vm area */ console_remap_vm.flags = VM_ALLOC; console_remap_vm.size = nr_pages << PAGE_SHIFT; - vm_area_register_early(&console_remap_vm); + vm_area_register_early(&console_remap_vm, PAGE_SIZE); vaddr = (unsigned long)consle_remap_vm.addr; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 599ba798431..2f6994fdf0e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -109,6 +109,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm); +extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/percpu.c b/mm/percpu.c index ed92caa2aa3..41e7a5f5ab1 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -860,7 +860,7 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, /* init and register vm area */ static_vm.flags = VM_ALLOC; static_vm.size = pcpu_chunk_size; - vm_area_register_early(&static_vm); + vm_area_register_early(&static_vm, PAGE_SIZE); /* init static_chunk */ static_chunk = alloc_bootmem(pcpu_chunk_struct_size); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 224eca9650a..366ae9ea6af 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -995,7 +995,7 @@ EXPORT_SYMBOL(vm_map_ram); /** * vm_area_register_early - register vmap area early during boot * @vm: vm_struct to register - * @size: size of area to register + * @align: requested alignment * * This function is used to register kernel vm area before * vmalloc_init() is called. @vm->size and @vm->flags should contain @@ -1004,12 +1004,15 @@ EXPORT_SYMBOL(vm_map_ram); * * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING. */ -void __init vm_area_register_early(struct vm_struct *vm) +void __init vm_area_register_early(struct vm_struct *vm, size_t align) { static size_t vm_init_off __initdata; + unsigned long addr; - vm->addr = (void *)VMALLOC_START + vm_init_off; - vm_init_off = PFN_ALIGN(vm_init_off + vm->size); + addr = ALIGN(VMALLOC_START + vm_init_off, align); + vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START; + + vm->addr = (void *)addr; vm->next = vmlist; vmlist = vm; From 458a3e644c3327be529393982e24277eda8f1ac7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 582/682] x86: update populate_extra_pte() and add populate_extra_pmd() Impact: minor change to populate_extra_pte() and addition of pmd flavor Update populate_extra_pte() to return pointer to the pte_t for the specified address and add populate_extra_pmd() which only populates till the pmd and returns pointer to the pmd entry for the address. For 64bit, pud/pmd/pte fill functions are separated out from set_pte_vaddr[_pud]() and used for set_pte_vaddr[_pud]() and populate_extra_{pte|pmd}(). Signed-off-by: Tejun Heo --- arch/x86/include/asm/pgtable.h | 3 +- arch/x86/kernel/setup_percpu.c | 7 ++- arch/x86/mm/init_32.c | 13 ++++-- arch/x86/mm/init_64.c | 83 ++++++++++++++++++++-------------- 4 files changed, 67 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dd91c2515c6..46312eb0d68 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -402,7 +402,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); -void populate_extra_pte(unsigned long vaddr); +pmd_t *populate_extra_pmd(unsigned long vaddr); +pte_t *populate_extra_pte(unsigned long vaddr); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2dce4355821..671e6528a82 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -41,6 +41,11 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static void __init pcpu4k_populate_pte(unsigned long addr) +{ + populate_extra_pte(addr); +} + static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -104,7 +109,7 @@ void __init setup_per_cpu_areas(void) } } - pcpu_unit_size = pcpu_setup_static(populate_extra_pte, pages, size); + pcpu_unit_size = pcpu_setup_static(pcpu4k_populate_pte, pages, size); free_bootmem(__pa(pages), pages_size); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8b1a0ef7f87..84a26883ab4 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,14 +137,21 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) return pte_offset_kernel(pmd, 0); } -void __init populate_extra_pte(unsigned long vaddr) +pmd_t * __init populate_extra_pmd(unsigned long vaddr) { int pgd_idx = pgd_index(vaddr); int pmd_idx = pmd_index(vaddr); + + return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx; +} + +pte_t * __init populate_extra_pte(unsigned long vaddr) +{ + int pte_idx = pte_index(vaddr); pmd_t *pmd; - pmd = one_md_table_init(swapper_pg_dir + pgd_idx); - one_page_table_init(pmd + pmd_idx); + pmd = populate_extra_pmd(vaddr); + return one_page_table_init(pmd) + pte_idx; } static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7f91e2cdc4c..7d4e76da336 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -168,34 +168,51 @@ static __ref void *spp_getpage(void) return ptr; } -void -set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) +static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) +{ + if (pgd_none(*pgd)) { + pud_t *pud = (pud_t *)spp_getpage(); + pgd_populate(&init_mm, pgd, pud); + if (pud != pud_offset(pgd, 0)) + printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", + pud, pud_offset(pgd, 0)); + } + return pud_offset(pgd, vaddr); +} + +static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) +{ + if (pud_none(*pud)) { + pmd_t *pmd = (pmd_t *) spp_getpage(); + pud_populate(&init_mm, pud, pmd); + if (pmd != pmd_offset(pud, 0)) + printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", + pmd, pmd_offset(pud, 0)); + } + return pmd_offset(pud, vaddr); +} + +static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr) +{ + if (pmd_none(*pmd)) { + pte_t *pte = (pte_t *) spp_getpage(); + pmd_populate_kernel(&init_mm, pmd, pte); + if (pte != pte_offset_kernel(pmd, 0)) + printk(KERN_ERR "PAGETABLE BUG #02!\n"); + } + return pte_offset_kernel(pmd, vaddr); +} + +void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) { pud_t *pud; pmd_t *pmd; pte_t *pte; pud = pud_page + pud_index(vaddr); - if (pud_none(*pud)) { - pmd = (pmd_t *) spp_getpage(); - pud_populate(&init_mm, pud, pmd); - if (pmd != pmd_offset(pud, 0)) { - printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n", - pmd, pmd_offset(pud, 0)); - return; - } - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - pte = (pte_t *) spp_getpage(); - pmd_populate_kernel(&init_mm, pmd, pte); - if (pte != pte_offset_kernel(pmd, 0)) { - printk(KERN_ERR "PAGETABLE BUG #02!\n"); - return; - } - } + pmd = fill_pmd(pud, vaddr); + pte = fill_pte(pmd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); set_pte(pte, new_pte); /* @@ -205,8 +222,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) __flush_tlb_one(vaddr); } -void -set_pte_vaddr(unsigned long vaddr, pte_t pteval) +void set_pte_vaddr(unsigned long vaddr, pte_t pteval) { pgd_t *pgd; pud_t *pud_page; @@ -223,23 +239,22 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval) set_pte_vaddr_pud(pud_page, vaddr, pteval); } -void __init populate_extra_pte(unsigned long vaddr) +pmd_t * __init populate_extra_pmd(unsigned long vaddr) { pgd_t *pgd; pud_t *pud; pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd)) { - pud = (pud_t *)spp_getpage(); - pgd_populate(&init_mm, pgd, pud); - if (pud != pud_offset(pgd, 0)) { - printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n", - pud, pud_offset(pgd, 0)); - return; - } - } + pud = fill_pud(pgd, vaddr); + return fill_pmd(pud, vaddr); +} - set_pte_vaddr_pud((pud_t *)pgd_page_vaddr(*pgd), vaddr, __pte(0)); +pte_t * __init populate_extra_pte(unsigned long vaddr) +{ + pmd_t *pmd; + + pmd = populate_extra_pmd(vaddr); + return fill_pte(pmd, vaddr); } /* From d9b55eeb1d55ef2dc5a4fdbff9604c2c68cb5649 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 583/682] percpu: remove unit_size power-of-2 restriction Impact: allow unit_size to be arbitrary multiple of PAGE_SIZE In dynamic percpu allocator, there is no reason the unit size should be power of two. Remove the restriction. As non-power-of-two unit size means that empty chunks fall into the same slot index as lightly occupied chunks which is bad for reclaming. Reserve an extra slot for empty chunks. Signed-off-by: Tejun Heo --- mm/percpu.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 41e7a5f5ab1..d9e6e5d1dbd 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -67,7 +67,7 @@ #include #include -#define PCPU_MIN_UNIT_PAGES_SHIFT 4 /* also max alloc size */ +#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ @@ -83,9 +83,7 @@ struct pcpu_chunk { struct page *page[]; /* #cpus * UNIT_PAGES */ }; -static int pcpu_unit_pages_shift; static int pcpu_unit_pages; -static int pcpu_unit_shift; static int pcpu_unit_size; static int pcpu_chunk_size; static int pcpu_nr_slots; @@ -117,12 +115,19 @@ static DEFINE_MUTEX(pcpu_mutex); static struct list_head *pcpu_slot; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ -static int pcpu_size_to_slot(int size) +static int __pcpu_size_to_slot(int size) { int highbit = fls(size); /* size is in bytes */ return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); } +static int pcpu_size_to_slot(int size) +{ + if (size == pcpu_unit_size) + return pcpu_nr_slots - 1; + return __pcpu_size_to_slot(size); +} + static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) { if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int)) @@ -133,7 +138,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) static int pcpu_page_idx(unsigned int cpu, int page_idx) { - return (cpu << pcpu_unit_pages_shift) + page_idx; + return cpu * pcpu_unit_pages + page_idx; } static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk, @@ -659,7 +664,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) goto err; for_each_possible_cpu(cpu) - memset(chunk->vm->addr + (cpu << pcpu_unit_shift) + off, 0, + memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0, size); return 0; @@ -722,7 +727,7 @@ void *__alloc_percpu(size_t size, size_t align) struct pcpu_chunk *chunk; int slot, off; - if (unlikely(!size || size > PAGE_SIZE << PCPU_MIN_UNIT_PAGES_SHIFT || + if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " "percpu allocation\n", size, align); @@ -840,19 +845,19 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, unsigned int cpu; int err, i; - pcpu_unit_pages_shift = max_t(int, PCPU_MIN_UNIT_PAGES_SHIFT, - order_base_2(cpu_size) - PAGE_SHIFT); + pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); pcpu_static_size = cpu_size; - pcpu_unit_pages = 1 << pcpu_unit_pages_shift; - pcpu_unit_shift = PAGE_SHIFT + pcpu_unit_pages_shift; - pcpu_unit_size = 1 << pcpu_unit_shift; + pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; - pcpu_nr_slots = pcpu_size_to_slot(pcpu_unit_size) + 1; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); - /* allocate chunk slots */ + /* + * Allocate chunk slots. The additional last slot is for + * empty chunks. + */ + pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0])); for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); From 8d408b4be37bc49c9086531f2ebe411cf5731746 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 584/682] percpu: give more latitude to arch specific first chunk initialization Impact: more latitude for first percpu chunk allocation The first percpu chunk serves the kernel static percpu area and may or may not contain extra room for further dynamic allocation. Initialization of the first chunk needs to be done before normal memory allocation service is up, so it has its own init path - pcpu_setup_static(). It seems archs need more latitude while initializing the first chunk for example to take advantage of large page mapping. This patch makes the following changes to allow this. * Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space to reserve in the first chunk for further dynamic allocation. * Rename pcpu_setup_static() to pcpu_setup_first_chunk(). * Make pcpu_setup_first_chunk() much more flexible by fetching page pointer by callback and adding optional @unit_size, @free_size and @base_addr arguments which allow archs to selectively part of chunk initialization to their likings. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 15 +++- include/linux/percpu.h | 39 ++++++++- mm/percpu.c | 151 +++++++++++++++++++++++++-------- 3 files changed, 168 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 671e6528a82..d928e888720 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -41,6 +41,16 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +static struct page **pcpu4k_pages __initdata; +static int pcpu4k_nr_static_pages __initdata; + +static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) +{ + if (pageno < pcpu4k_nr_static_pages) + return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; + return NULL; +} + static void __init pcpu4k_populate_pte(unsigned long addr) { populate_extra_pte(addr); @@ -109,7 +119,10 @@ void __init setup_per_cpu_areas(void) } } - pcpu_unit_size = pcpu_setup_static(pcpu4k_populate_pte, pages, size); + pcpu4k_pages = pages; + pcpu4k_nr_static_pages = nr_cpu_pages; + pcpu_unit_size = pcpu_setup_first_chunk(pcpu4k_get_page, size, 0, 0, + NULL, pcpu4k_populate_pte); free_bootmem(__pa(pages), pages_size); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 18080995ff3..910beb0abea 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,12 +78,47 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +/* minimum unit size, also is the maximum supported allocation size */ +#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) + +/* + * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy + * back on the first chunk if arch is manually allocating and mapping + * it for faster access (as a part of large page mapping for example). + * Note that dynamic percpu allocator covers both static and dynamic + * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * + * On typical configuration with modules, the following values leave + * about 8k of free space on the first chunk after boot on both x86_32 + * and 64 when module support is enabled. When module support is + * disabled, it's much tighter. + */ +#ifndef PERCPU_DYNAMIC_RESERVE +# if BITS_PER_LONG > 32 +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# endif +# else +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# endif +# endif +#endif /* PERCPU_DYNAMIC_RESERVE */ + extern void *pcpu_base_addr; +typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); -extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, - struct page **pages, size_t cpu_size); +extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index d9e6e5d1dbd..9ac01980cce 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -48,8 +48,8 @@ * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate * regular address to percpu pointer and back * - * - use pcpu_setup_static() during percpu area initialization to - * setup kernel static percpu area + * - use pcpu_setup_first_chunk() during percpu area initialization to + * setup the first chunk containing the kernel static percpu area */ #include @@ -67,7 +67,6 @@ #include #include -#define PCPU_MIN_UNIT_PAGES 16 /* max alloc size in pages */ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ @@ -80,6 +79,7 @@ struct pcpu_chunk { int map_used; /* # of map entries used */ int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ + bool immutable; /* no [de]population allowed */ struct page *page[]; /* #cpus * UNIT_PAGES */ }; @@ -521,6 +521,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, unsigned int last = num_possible_cpus() - 1; unsigned int cpu; + /* unmap must not be done on immutable chunk */ + WARN_ON(chunk->immutable); + /* * Each flushing trial can be very expensive, issue flush on * the whole region at once rather than doing it for each cpu. @@ -602,6 +605,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) unsigned int cpu; int err; + /* map must not be done on immutable chunk */ + WARN_ON(chunk->immutable); + for_each_possible_cpu(cpu) { err = map_kernel_range_noflush( pcpu_chunk_addr(chunk, cpu, page_start), @@ -727,8 +733,7 @@ void *__alloc_percpu(size_t size, size_t align) struct pcpu_chunk *chunk; int slot, off; - if (unlikely(!size || size > PCPU_MIN_UNIT_PAGES * PAGE_SIZE || - align > PAGE_SIZE)) { + if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " "percpu allocation\n", size, align); return NULL; @@ -776,6 +781,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); static void pcpu_kill_chunk(struct pcpu_chunk *chunk) { + WARN_ON(chunk->immutable); pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); list_del(&chunk->list); rb_erase(&chunk->rb_node, &pcpu_addr_root); @@ -821,33 +827,73 @@ void free_percpu(void *ptr) EXPORT_SYMBOL_GPL(free_percpu); /** - * pcpu_setup_static - initialize kernel static percpu area - * @populate_pte_fn: callback to allocate pagetable - * @pages: num_possible_cpus() * PFN_UP(cpu_size) pages - * @cpu_size: the size of static percpu area in bytes + * pcpu_setup_first_chunk - initialize the first percpu chunk + * @get_page_fn: callback to fetch page pointer + * @static_size: the size of static percpu area in bytes + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto + * @free_size: free size in bytes, 0 for auto + * @base_addr: mapped address, NULL for auto + * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * - * Initialize kernel static percpu area. The caller should allocate - * all the necessary pages and pass them in @pages. - * @populate_pte_fn() is called on each page to be used for percpu - * mapping and is responsible for making sure all the necessary page - * tables for the page is allocated. + * Initialize the first percpu chunk which contains the kernel static + * perpcu area. This function is to be called from arch percpu area + * setup path. The first two parameters are mandatory. The rest are + * optional. + * + * @get_page_fn() should return pointer to percpu page given cpu + * number and page number. It should at least return enough pages to + * cover the static area. The returned pages for static area should + * have been initialized with valid data. If @unit_size is specified, + * it can also return pages after the static area. NULL return + * indicates end of pages for the cpu. Note that @get_page_fn() must + * return the same number of pages for all cpus. + * + * @unit_size, if non-zero, determines unit size and must be aligned + * to PAGE_SIZE and equal to or larger than @static_size + @free_size. + * + * @free_size determines the number of free bytes after the static + * area in the first chunk. If zero, whatever left is available. + * Specifying non-zero value make percpu leave the area after + * @static_size + @free_size alone. + * + * Non-null @base_addr means that the caller already allocated virtual + * region for the first chunk and mapped it. percpu must not mess + * with the chunk. Note that @base_addr with 0 @unit_size or non-NULL + * @populate_pte_fn doesn't make any sense. + * + * @populate_pte_fn is used to populate the pagetable. NULL means the + * caller already populated the pagetable. * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access. */ -size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, - struct page **pages, size_t cpu_size) +size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct static_vm; struct pcpu_chunk *static_chunk; - int nr_cpu_pages = DIV_ROUND_UP(cpu_size, PAGE_SIZE); unsigned int cpu; + int nr_pages; int err, i; - pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_PAGES, PFN_UP(cpu_size)); + /* santiy checks */ + BUG_ON(!static_size); + BUG_ON(!unit_size && free_size); + BUG_ON(unit_size && unit_size < static_size + free_size); + BUG_ON(unit_size & ~PAGE_MASK); + BUG_ON(base_addr && !unit_size); + BUG_ON(base_addr && populate_pte_fn); - pcpu_static_size = cpu_size; + if (unit_size) + pcpu_unit_pages = unit_size >> PAGE_SHIFT; + else + pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, + PFN_UP(static_size)); + + pcpu_static_size = static_size; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) @@ -862,29 +908,66 @@ size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* init and register vm area */ - static_vm.flags = VM_ALLOC; - static_vm.size = pcpu_chunk_size; - vm_area_register_early(&static_vm, PAGE_SIZE); - /* init static_chunk */ static_chunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&static_chunk->list); static_chunk->vm = &static_vm; - static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + + if (free_size) + static_chunk->free_size = free_size; + else + static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + static_chunk->contig_hint = static_chunk->free_size; - /* assign pages and map them */ - for_each_possible_cpu(cpu) { - for (i = 0; i < nr_cpu_pages; i++) { - *pcpu_chunk_pagep(static_chunk, cpu, i) = *pages++; - populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); - } + /* allocate vm address */ + static_vm.flags = VM_ALLOC; + static_vm.size = pcpu_chunk_size; + + if (!base_addr) + vm_area_register_early(&static_vm, PAGE_SIZE); + else { + /* + * Pages already mapped. No need to remap into + * vmalloc area. In this case the static chunk can't + * be mapped or unmapped by percpu and is marked + * immutable. + */ + static_vm.addr = base_addr; + static_chunk->immutable = true; } - err = pcpu_map(static_chunk, 0, nr_cpu_pages); - if (err) - panic("failed to setup static percpu area, err=%d\n", err); + /* assign pages */ + nr_pages = -1; + for_each_possible_cpu(cpu) { + for (i = 0; i < pcpu_unit_pages; i++) { + struct page *page = get_page_fn(cpu, i); + + if (!page) + break; + *pcpu_chunk_pagep(static_chunk, cpu, i) = page; + } + + BUG_ON(i < PFN_UP(pcpu_static_size)); + + if (nr_pages < 0) + nr_pages = i; + else + BUG_ON(nr_pages != i); + } + + /* map them */ + if (populate_pte_fn) { + for_each_possible_cpu(cpu) + for (i = 0; i < nr_pages; i++) + populate_pte_fn(pcpu_chunk_addr(static_chunk, + cpu, i)); + + err = pcpu_map(static_chunk, 0, nr_pages); + if (err) + panic("failed to setup static percpu area, err=%d\n", + err); + } /* link static_chunk in */ pcpu_chunk_relocate(static_chunk, -1); From 5f5d8405d1c50f5cf7e1dbfe9c9b44e2f015c8fd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 585/682] x86: separate out setup_pcpu_4k() from setup_per_cpu_areas() Impact: modularize percpu first chunk allocation x86 is gonna have a few different strategies for the first chunk allocation. Modularize it by separating out the current allocation mechanism into pcpu_alloc_bootmem() and setup_pcpu_4k(). Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 144 +++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d928e888720..4a17c96f4f6 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,52 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/** + * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu + * @cpu: cpu to allocate for + * @size: size allocation in bytes + * @align: alignment + * + * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper + * does the right thing for NUMA regardless of the current + * configuration. + * + * RETURNS: + * Pointer to the allocated area on success, NULL on failure. + */ +static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, + unsigned long align) +{ + const unsigned long goal = __pa(MAX_DMA_ADDRESS); +#ifdef CONFIG_NEED_MULTIPLE_NODES + int node = early_cpu_to_node(cpu); + void *ptr; + + if (!node_online(node) || !NODE_DATA(node)) { + ptr = __alloc_bootmem_nopanic(size, align, goal); + pr_info("cpu %d has no node %d or node-local memory\n", + cpu, node); + pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", + cpu, size, __pa(ptr)); + } else { + ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), + size, align, goal); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at " + "%016lx\n", cpu, size, node, __pa(ptr)); + } + return ptr; +#else + return __alloc_bootmem_nopanic(size, align, goal); +#endif +} + +/* + * 4k page allocator + * + * This is the basic allocator. Static percpu area is allocated + * page-by-page and most of initialization is done by the generic + * setup function. + */ static struct page **pcpu4k_pages __initdata; static int pcpu4k_nr_static_pages __initdata; @@ -56,6 +103,51 @@ static void __init pcpu4k_populate_pte(unsigned long addr) populate_extra_pte(addr); } +static ssize_t __init setup_pcpu_4k(size_t static_size) +{ + size_t pages_size; + unsigned int cpu; + int i, j; + ssize_t ret; + + pcpu4k_nr_static_pages = PFN_UP(static_size); + + /* unaligned allocations can't be freed, round up to page size */ + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() + * sizeof(pcpu4k_pages[0])); + pcpu4k_pages = alloc_bootmem(pages_size); + + /* allocate and copy */ + j = 0; + for_each_possible_cpu(cpu) + for (i = 0; i < pcpu4k_nr_static_pages; i++) { + void *ptr; + + ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); + if (!ptr) + goto enomem; + + memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); + pcpu4k_pages[j++] = virt_to_page(ptr); + } + + /* we're ready, commit */ + pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", + pcpu4k_nr_static_pages, static_size); + + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, + pcpu4k_populate_pte); + goto out_free_ar; + +enomem: + while (--j >= 0) + free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); + ret = -ENOMEM; +out_free_ar: + free_bootmem(__pa(pcpu4k_pages), pages_size); + return ret; +} + static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 @@ -76,56 +168,24 @@ static inline void setup_percpu_segment(int cpu) */ void __init setup_per_cpu_areas(void) { - ssize_t size = __per_cpu_end - __per_cpu_start; - unsigned int nr_cpu_pages = DIV_ROUND_UP(size, PAGE_SIZE); - static struct page **pages; - size_t pages_size; - unsigned int cpu, i, j; + size_t static_size = __per_cpu_end - __per_cpu_start; + unsigned int cpu; unsigned long delta; size_t pcpu_unit_size; + ssize_t ret; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - pr_info("PERCPU: Allocating %zd bytes for static per cpu data\n", size); - pages_size = nr_cpu_pages * num_possible_cpus() * sizeof(pages[0]); - pages = alloc_bootmem(pages_size); + /* allocate percpu area */ + ret = setup_pcpu_4k(static_size); + if (ret < 0) + panic("cannot allocate static percpu area (%zu bytes, err=%zd)", + static_size, ret); - j = 0; - for_each_possible_cpu(cpu) { - void *ptr; - - for (i = 0; i < nr_cpu_pages; i++) { -#ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(PAGE_SIZE); -#else - int node = early_cpu_to_node(cpu); - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(PAGE_SIZE); - pr_info("cpu %d has no node %d or node-local " - "memory\n", cpu, node); - pr_debug("per cpu data for cpu%d at %016lx\n", - cpu, __pa(ptr)); - } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(node), - PAGE_SIZE); - pr_debug("per cpu data for cpu%d on node%d " - "at %016lx\n", cpu, node, __pa(ptr)); - } -#endif - memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); - pages[j++] = virt_to_page(ptr); - } - } - - pcpu4k_pages = pages; - pcpu4k_nr_static_pages = nr_cpu_pages; - pcpu_unit_size = pcpu_setup_first_chunk(pcpu4k_get_page, size, 0, 0, - NULL, pcpu4k_populate_pte); - - free_bootmem(__pa(pages), pages_size); + pcpu_unit_size = ret; + /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; From 89c9215165ca609096e845926d9a18f1306176a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: [PATCH 586/682] x86: add embedding percpu first chunk allocator Impact: add better first percpu allocation for !NUMA On !NUMA, we can simply allocate contiguous memory and use it for the first chunk without mapping it into vmalloc area. As the memory area is covered by the large page physical memory mapping, it allows the dynamic perpcu allocator to not add any TLB overhead for the static percpu area and whatever falls into the first chunk and the implementation is very simple too. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 86 +++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4a17c96f4f6..fd4c399675d 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -42,6 +42,35 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/** + * pcpu_need_numa - determine percpu allocation needs to consider NUMA + * + * If NUMA is not configured or there is only one NUMA node available, + * there is no reason to consider NUMA. This function determines + * whether percpu allocation should consider NUMA or not. + * + * RETURNS: + * true if NUMA should be considered; otherwise, false. + */ +static bool __init pcpu_need_numa(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + pg_data_t *last = NULL; + unsigned int cpu; + + for_each_possible_cpu(cpu) { + int node = early_cpu_to_node(cpu); + + if (node_online(node) && NODE_DATA(node) && + last && last != NODE_DATA(node)) + return true; + + last = NODE_DATA(node); + } +#endif + return false; +} + /** * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu * @cpu: cpu to allocate for @@ -81,6 +110,59 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } +/* + * Embedding allocator + * + * The first chunk is sized to just contain the static area plus + * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using + * bootmem allocator and used as-is without being mapped into vmalloc + * area. This enables the first chunk to piggy back on the linear + * physical PMD mapping and doesn't add any additional pressure to + * TLB. + */ +static void *pcpue_ptr __initdata; +static size_t pcpue_unit_size __initdata; + +static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) +{ + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + + ((size_t)pageno << PAGE_SHIFT)); +} + +static ssize_t __init setup_pcpu_embed(size_t static_size) +{ + unsigned int cpu; + + /* + * If large page isn't supported, there's no benefit in doing + * this. Also, embedding allocation doesn't play well with + * NUMA. + */ + if (!cpu_has_pse || pcpu_need_numa()) + return -EINVAL; + + /* allocate and copy */ + pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_unit_size = max(pcpue_unit_size, PCPU_MIN_UNIT_SIZE); + pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, + PAGE_SIZE); + if (!pcpue_ptr) + return -ENOMEM; + + for_each_possible_cpu(cpu) + memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, + static_size); + + /* we're ready, commit */ + pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", + pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); + + return pcpu_setup_first_chunk(pcpue_get_page, static_size, + pcpue_unit_size, + pcpue_unit_size - static_size, pcpue_ptr, + NULL); +} + /* * 4k page allocator * @@ -178,7 +260,9 @@ void __init setup_per_cpu_areas(void) NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* allocate percpu area */ - ret = setup_pcpu_4k(static_size); + ret = setup_pcpu_embed(static_size); + if (ret < 0) + ret = setup_pcpu_4k(static_size); if (ret < 0) panic("cannot allocate static percpu area (%zu bytes, err=%zd)", static_size, ret); From 8ac837571491e239e64bd87863c1679d8002e8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:22 +0900 Subject: [PATCH 587/682] x86: add remapping percpu first chunk allocator Impact: add better first percpu allocation for NUMA On NUMA, embedding allocator can't be used as different units can't be made to fall in the correct NUMA nodes. To use large page mapping, each unit needs to be remapped. However, percpu areas are usually much smaller than large page size and unused space hurts a lot as the number of cpus grow. This allocator remaps large pages for each chunk but gives back unused part to the bootmem allocator making the large pages mapped twice. This adds slightly to the TLB pressure but is much better than using 4k mappings while still being NUMA-friendly. Ingo suggested that this would be the correct approach for NUMA. Signed-off-by: Tejun Heo Cc: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 137 ++++++++++++++++++++++++++++++++- 1 file changed, 135 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index fd4c399675d..2d946a8f78b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -110,6 +110,133 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, #endif } +/* + * Remap allocator + * + * This allocator uses PMD page as unit. A PMD page is allocated for + * each cpu and each is remapped into vmalloc area using PMD mapping. + * As PMD page is quite large, only part of it is used for the first + * chunk. Unused part is returned to the bootmem allocator. + * + * So, the PMD pages are mapped twice - once to the physical mapping + * and to the vmalloc area for the first percpu chunk. The double + * mapping does add one more PMD TLB entry pressure but still is much + * better than only using 4k mappings while still being NUMA friendly. + */ +#ifdef CONFIG_NEED_MULTIPLE_NODES +static size_t pcpur_size __initdata; +static void **pcpur_ptrs __initdata; + +static struct page * __init pcpur_get_page(unsigned int cpu, int pageno) +{ + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpur_size) + return NULL; + + return virt_to_page(pcpur_ptrs[cpu] + off); +} + +static ssize_t __init setup_pcpu_remap(size_t static_size) +{ + static struct vm_struct vm; + pg_data_t *last; + size_t ptrs_size; + unsigned int cpu; + ssize_t ret; + + /* + * If large page isn't supported, there's no benefit in doing + * this. Also, on non-NUMA, embedding is better. + */ + if (!cpu_has_pse || pcpu_need_numa()) + return -EINVAL; + + last = NULL; + for_each_possible_cpu(cpu) { + int node = early_cpu_to_node(cpu); + + if (node_online(node) && NODE_DATA(node) && + last && last != NODE_DATA(node)) + goto proceed; + + last = NODE_DATA(node); + } + return -EINVAL; + +proceed: + /* + * Currently supports only single page. Supporting multiple + * pages won't be too difficult if it ever becomes necessary. + */ + pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + if (pcpur_size > PMD_SIZE) { + pr_warning("PERCPU: static data is larger than large page, " + "can't use large page\n"); + return -EINVAL; + } + + /* allocate pointer array and alloc large pages */ + ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); + pcpur_ptrs = alloc_bootmem(ptrs_size); + + for_each_possible_cpu(cpu) { + pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE); + if (!pcpur_ptrs[cpu]) + goto enomem; + + /* + * Only use pcpur_size bytes and give back the rest. + * + * Ingo: The 2MB up-rounding bootmem is needed to make + * sure the partial 2MB page is still fully RAM - it's + * not well-specified to have a PAT-incompatible area + * (unmapped RAM, device memory, etc.) in that hole. + */ + free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size), + PMD_SIZE - pcpur_size); + + memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size); + } + + /* allocate address and map */ + vm.flags = VM_ALLOC; + vm.size = num_possible_cpus() * PMD_SIZE; + vm_area_register_early(&vm, PMD_SIZE); + + for_each_possible_cpu(cpu) { + pmd_t *pmd; + + pmd = populate_extra_pmd((unsigned long)vm.addr + + cpu * PMD_SIZE); + set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])), + PAGE_KERNEL_LARGE)); + } + + /* we're ready, commit */ + pr_info("PERCPU: Remapped at %p with large pages, static data " + "%zu bytes\n", vm.addr, static_size); + + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, + pcpur_size - static_size, vm.addr, NULL); + goto out_free_ar; + +enomem: + for_each_possible_cpu(cpu) + if (pcpur_ptrs[cpu]) + free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE); + ret = -ENOMEM; +out_free_ar: + free_bootmem(__pa(pcpur_ptrs), ptrs_size); + return ret; +} +#else +static ssize_t __init setup_pcpu_remap(size_t static_size) +{ + return -EINVAL; +} +#endif + /* * Embedding allocator * @@ -259,8 +386,14 @@ void __init setup_per_cpu_areas(void) pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - /* allocate percpu area */ - ret = setup_pcpu_embed(static_size); + /* + * Allocate percpu area. If PSE is supported, try to make use + * of large page mappings. Please read comments on top of + * each allocator for details. + */ + ret = setup_pcpu_remap(static_size); + if (ret < 0) + ret = setup_pcpu_embed(static_size); if (ret < 0) ret = setup_pcpu_4k(static_size); if (ret < 0) From 40150d37be7f7949b2ec07d511244da856647d84 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 12:32:28 +0900 Subject: [PATCH 588/682] percpu: add __read_mostly to variables which are mostly read only Most global variables in percpu allocator are initialized during boot and read only from that point on. Add __read_mostly as per Rusty's suggestion. Signed-off-by: Tejun Heo Cc: Rusty Russell --- mm/percpu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 9ac01980cce..5954e7a9eb1 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -83,18 +83,18 @@ struct pcpu_chunk { struct page *page[]; /* #cpus * UNIT_PAGES */ }; -static int pcpu_unit_pages; -static int pcpu_unit_size; -static int pcpu_chunk_size; -static int pcpu_nr_slots; -static size_t pcpu_chunk_struct_size; +static int pcpu_unit_pages __read_mostly; +static int pcpu_unit_size __read_mostly; +static int pcpu_chunk_size __read_mostly; +static int pcpu_nr_slots __read_mostly; +static size_t pcpu_chunk_struct_size __read_mostly; /* the address of the first chunk which starts with the kernel static area */ -void *pcpu_base_addr; +void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); /* the size of kernel static area */ -static int pcpu_static_size; +static int pcpu_static_size __read_mostly; /* * One mutex to rule them all. @@ -112,7 +112,7 @@ static int pcpu_static_size; */ static DEFINE_MUTEX(pcpu_mutex); -static struct list_head *pcpu_slot; /* chunk list slots */ +static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ static int __pcpu_size_to_slot(int size) From 30d697fa3a25fed809a873b17531a00282dc1234 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Mon, 23 Feb 2009 18:03:04 -0800 Subject: [PATCH 589/682] x86: fix performance regression in write() syscall While the introduction of __copy_from_user_nocache (see commit: 0812a579c92fefa57506821fa08e90f47cb6dbdd) may have been an improvement for sufficiently large writes, there is evidence to show that it is deterimental for small writes. Unixbench's fstime test gives the following results for 256 byte writes with MAX_BLOCK of 2000: 2.6.29-rc6 ( 5 samples, each in KB/sec ): 283750, 295200, 294500, 293000, 293300 2.6.29-rc6 + this patch (5 samples, each in KB/sec): 313050, 3106750, 293350, 306300, 307900 2.6.18 395700, 342000, 399100, 366050, 359850 See w_test() in src/fstime.c in unixbench version 4.1.0. Basically, the above test consists of counting how much we can write in this manner: alarm(10); while (!sigalarm) { for (f_blocks = 0; f_blocks < 2000; ++f_blocks) { write(f, buf, 256); } lseek(f, 0L, 0); } Note, there are other components to the write syscall regression that are not addressed here. Signed-off-by: Salman Qazi Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 84210c479fc..987a2c10fe2 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -192,14 +192,26 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_sleep(); - return __copy_user_nocache(dst, src, size, 1); + /* + * In practice this limit means that large file write()s + * which get chunked to 4K copies get handled via + * non-temporal stores here. Smaller writes get handled + * via regular __copy_from_user(): + */ + if (likely(size >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 1); + else + return __copy_from_user(dst, src, size); } static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) { - return __copy_user_nocache(dst, src, size, 0); + if (likely(size >= PAGE_SIZE)) + return __copy_user_nocache(dst, src, size, 0); + else + return __copy_from_user_inatomic(dst, src, size); } unsigned long From 2a0b1001116de51f13b2969a9a52bd2bc294644f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:56:57 +0300 Subject: [PATCH 590/682] x86: head_64.S - remove useless balign Impact: cleanup NEXT_PAGE already has 'balign' so no need to keep this redundant one. Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2e648e3a5ea..52c9af429dd 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -329,8 +329,6 @@ early_idt_ripmsg: #endif /* CONFIG_EARLY_PRINTK */ .previous -.balign PAGE_SIZE - #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ ENTRY(name) From 5e112ae23b404ccba0a61630b82ec44f0d084880 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:56:58 +0300 Subject: [PATCH 591/682] x86: head_64.S - use IDT_ENTRIES instead of hardcoded number Impact: cleanup Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Cc: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 52c9af429dd..54b29bb24e7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -417,7 +417,7 @@ ENTRY(phys_base) .section .bss, "aw", @nobits .align L1_CACHE_BYTES ENTRY(idt_table) - .skip 256 * 16 + .skip IDT_ENTRIES * 16 .section .bss.page_aligned, "aw", @nobits .align PAGE_SIZE From 57e372932cec8eb141cde039aaeaa91b69fceba2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:56:59 +0300 Subject: [PATCH 592/682] x86: invalid_vm86_irq -- use predefined macros Impact: cleanup Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Cc: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index b07278c55e9..8a285f356f8 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -128,7 +128,7 @@ #ifndef __ASSEMBLY__ static inline int invalid_vm86_irq(int irq) { - return irq < 3 || irq > 15; + return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ; } #endif From b3baaa138cd4223bffb6ca64b873d25cfb1d7c70 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:57:00 +0300 Subject: [PATCH 593/682] x86: entry_64.S - add missing ENDPROC native_usergs_sysret64 is described as extern void native_usergs_sysret64(void) so lets add ENDPROC here Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fbcf96b295f..7cf8d798042 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -188,6 +188,7 @@ return_to_handler: ENTRY(native_usergs_sysret64) swapgs sysretq +ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ From bc8b2b9258488b932cd399112e01d5afffc4ee96 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:57:01 +0300 Subject: [PATCH 594/682] x86: head_64.S - use GLOBAL macro Impact: cleanup Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7cf8d798042..8a6c7c63885 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -77,20 +77,17 @@ ENTRY(ftrace_caller) movq 8(%rbp), %rsi subq $MCOUNT_INSN_SIZE, %rdi -.globl ftrace_call -ftrace_call: +GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME #ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: +GLOBAL(ftrace_graph_call) jmp ftrace_stub #endif -.globl ftrace_stub -ftrace_stub: +GLOBAL(ftrace_stub) retq END(ftrace_caller) @@ -110,8 +107,7 @@ ENTRY(mcount) jnz ftrace_graph_caller #endif -.globl ftrace_stub -ftrace_stub: +GLOBAL(ftrace_stub) retq trace: @@ -148,9 +144,7 @@ ENTRY(ftrace_graph_caller) retq END(ftrace_graph_caller) - -.globl return_to_handler -return_to_handler: +GLOBAL(return_to_handler) subq $80, %rsp movq %rax, (%rsp) @@ -634,16 +628,14 @@ tracesys: * Syscall return path ending with IRET. * Has correct top of stack, but partial stack frame. */ - .globl int_ret_from_sys_call - .globl int_with_check -int_ret_from_sys_call: +GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ -int_with_check: +GLOBAL(int_with_check) LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%edx From 9f331119a4f95a44d918fe6d5e85998fabf99b72 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 23 Feb 2009 22:57:02 +0300 Subject: [PATCH 595/682] x86: efi_stub_32,64 - add missing ENDPROCs Signed-off-by: Cyrill Gorcunov Cc: heukelum@fastmail.fm Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi_stub_32.S | 1 + arch/x86/kernel/efi_stub_64.S | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S index ef00bb77d7e..a206b946929 100644 --- a/arch/x86/kernel/efi_stub_32.S +++ b/arch/x86/kernel/efi_stub_32.S @@ -113,6 +113,7 @@ ENTRY(efi_call_phys) movl (%edx), %ecx pushl %ecx ret +ENDPROC(efi_call_phys) .previous .data diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S index 99b47d48c9f..4c07ccab814 100644 --- a/arch/x86/kernel/efi_stub_64.S +++ b/arch/x86/kernel/efi_stub_64.S @@ -41,6 +41,7 @@ ENTRY(efi_call0) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call0) ENTRY(efi_call1) SAVE_XMM @@ -50,6 +51,7 @@ ENTRY(efi_call1) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call1) ENTRY(efi_call2) SAVE_XMM @@ -59,6 +61,7 @@ ENTRY(efi_call2) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call2) ENTRY(efi_call3) SAVE_XMM @@ -69,6 +72,7 @@ ENTRY(efi_call3) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call3) ENTRY(efi_call4) SAVE_XMM @@ -80,6 +84,7 @@ ENTRY(efi_call4) addq $32, %rsp RESTORE_XMM ret +ENDPROC(efi_call4) ENTRY(efi_call5) SAVE_XMM @@ -92,6 +97,7 @@ ENTRY(efi_call5) addq $48, %rsp RESTORE_XMM ret +ENDPROC(efi_call5) ENTRY(efi_call6) SAVE_XMM @@ -107,3 +113,4 @@ ENTRY(efi_call6) addq $48, %rsp RESTORE_XMM ret +ENDPROC(efi_call6) From 10b614eaa847447c2c8646030728309d14bd2d05 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 24 Feb 2009 21:41:32 +0100 Subject: [PATCH 596/682] x86_32: summit_32, use BAD_APICID Use BAD_APICID instead of 0xFF constants in summit_cpu_mask_to_apicid. Also remove bogus comments about what we actually return. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/summit_32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index cfe7b09015d..d02f4385707 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -303,12 +303,10 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) int cpu; num_bits_set = cpus_weight(*cpumask); - /* Return id to all */ if (num_bits_set >= nr_cpu_ids) - return 0xFF; + return BAD_APICID; /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): + * The cpus in the mask must all be on the apic cluster. */ cpu = first_cpu(*cpumask); apicid = summit_cpu_to_logical_apicid(cpu); @@ -318,9 +316,9 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) int new_apicid = summit_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk ("%s: Not a valid mask!\n", __func__); + printk("%s: Not a valid mask!\n", __func__); - return 0xFF; + return BAD_APICID; } apicid = apicid | new_apicid; cpus_found++; From b5f26d05565d070b7b352dba56b1f96e10021980 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 24 Feb 2009 21:41:33 +0100 Subject: [PATCH 597/682] x86_32: summit_32, de-inline functions The ones which go only into struct genapic are de-inlined by compiler anyway, so remove the inline specifier from them. Afterwards, remove summit_setup_portio_remap completely as it is unused. Remove inline also from summit_cpu_mask_to_apicid, since it's not worth it (it is used in struct genapic too). Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/summit_32.c | 47 ++++++++++++++------------------ 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index d02f4385707..32838b57a94 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -48,7 +48,7 @@ #include #include -static inline unsigned summit_get_apic_id(unsigned long x) +static unsigned summit_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; } @@ -58,7 +58,7 @@ static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector) default_send_IPI_mask_sequence_logical(mask, vector); } -static inline void summit_send_IPI_allbutself(int vector) +static void summit_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; cpu_clear(smp_processor_id(), mask); @@ -67,7 +67,7 @@ static inline void summit_send_IPI_allbutself(int vector) summit_send_IPI_mask(&mask, vector); } -static inline void summit_send_IPI_all(int vector) +static void summit_send_IPI_all(int vector) { summit_send_IPI_mask(&cpu_online_map, vector); } @@ -82,8 +82,8 @@ extern void setup_summit(void); #define setup_summit() {} #endif -static inline int -summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, + char *productid) { if (!strncmp(oem, "IBM ENSW", 8) && (!strncmp(productid, "VIGIL SMP", 9) @@ -98,7 +98,7 @@ summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) } /* Hook from generic ACPI tables.c */ -static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERVIGIL", 8) @@ -186,7 +186,7 @@ static inline int is_WPEG(struct rio_detail *rio){ #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline const cpumask_t *summit_target_cpus(void) +static const cpumask_t *summit_target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing @@ -195,19 +195,18 @@ static inline const cpumask_t *summit_target_cpus(void) return &cpumask_of_cpu(0); } -static inline unsigned long -summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } /* we don't use the phys_cpu_present_map to indicate apicid presence */ -static inline unsigned long summit_check_apicid_present(int bit) +static unsigned long summit_check_apicid_present(int bit) { return 1; } -static inline void summit_init_apic_ldr(void) +static void summit_init_apic_ldr(void) { unsigned long val, id; int count = 0; @@ -234,18 +233,18 @@ static inline void summit_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline int summit_apic_id_registered(void) +static int summit_apic_id_registered(void) { return 1; } -static inline void summit_setup_apic_routing(void) +static void summit_setup_apic_routing(void) { printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", nr_ioapics); } -static inline int summit_apicid_to_node(int logical_apicid) +static int summit_apicid_to_node(int logical_apicid) { #ifdef CONFIG_SMP return apicid_2_node[hard_smp_processor_id()]; @@ -266,7 +265,7 @@ static inline int summit_cpu_to_logical_apicid(int cpu) #endif } -static inline int summit_cpu_present_to_apicid(int mps_cpu) +static int summit_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); @@ -274,28 +273,23 @@ static inline int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t -summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0x0F); } -static inline physid_mask_t summit_apicid_to_cpu_present(int apicid) +static physid_mask_t summit_apicid_to_cpu_present(int apicid) { return physid_mask_of_physid(0); } -static inline void summit_setup_portio_remap(void) -{ -} - -static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) +static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) { return 1; } -static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpus_found = 0; int num_bits_set; @@ -328,8 +322,7 @@ static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) return apicid; } -static inline unsigned int -summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, +static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { int apicid = summit_cpu_to_logical_apicid(0); @@ -354,7 +347,7 @@ summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, * * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. */ -static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb) +static int summit_phys_pkg_id(int cpuid_apic, int index_msb) { return hard_smp_processor_id() >> index_msb; } From 46cb27f5169d37be38be8e5729b9a0100e989fa8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 24 Feb 2009 13:12:43 -0800 Subject: [PATCH 598/682] x86: check range in reserve_early() Impact: cleanup one 32-bit system reports: BIOS-provided physical RAM map: BIOS-e820: 0000000000000000 - 000000000009fc00 (usable) BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved) BIOS-e820: 00000000000f0000 - 0000000000100000 (reserved) BIOS-e820: 0000000000100000 - 000000001c000000 (usable) BIOS-e820: 00000000ffff0000 - 0000000100000000 (reserved) DMI 2.0 present. last_pfn = 0x1c000 max_arch_pfn = 0x100000 kernel direct mapping tables up to 1c000000 @ 7000-c000 .. RAMDISK: 1bc69000 - 1bfef4fa .. 0MB HIGHMEM available. 448MB LOWMEM available. mapped low ram: 0 - 1c000000 low ram: 00000000 - 1c000000 bootmap 00002000 - 00005800 (9 early reservations) ==> bootmem [0000000000 - 001c000000] #0 [0000000000 - 0000001000] BIOS data page ==> [0000000000 - 0000001000] #1 [0000001000 - 0000002000] EX TRAMPOLINE ==> [0000001000 - 0000002000] #2 [0000006000 - 0000007000] TRAMPOLINE ==> [0000006000 - 0000007000] #3 [0000400000 - 00009ed14c] TEXT DATA BSS ==> [0000400000 - 00009ed14c] #4 [001bc69000 - 001bfef4fa] RAMDISK ==> [001bc69000 - 001bfef4fa] #5 [00009ee000 - 00009f2000] INIT_PG_TABLE ==> [00009ee000 - 00009f2000] #6 [000009f400 - 0000100000] BIOS reserved ==> [000009f400 - 0000100000] #7 [0000007000 - 0000007000] PGTABLE #8 [0000002000 - 0000006000] BOOTMAP ==> [0000002000 - 0000006000] Notice the strange blank PGTABLE entry. The reason is init_pg_table is big enough, and zero range is called with init_memory_mapping/reserve_early(). So try to check the range in reserve_early() v2: fix the reversed compare Signed-off-by: Yinghai Lu Cc: nickpiggin@yahoo.com.au Cc: ink@jurassic.park.msu.ru Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e85826829cf..508bec1cee2 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -858,6 +858,9 @@ void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) */ void __init reserve_early(u64 start, u64 end, char *name) { + if (start >= end) + return; + drop_overlaps_that_are_ok(start, end); __reserve_early(start, end, name, 0); } From 1250fbed1471b681f3e75e4938f356a1c0a92d5e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 12 Feb 2009 13:43:20 +0100 Subject: [PATCH 599/682] x86, mce: enable machine checks in 64-bit defconfig Impact: defconfig change Enable MCE in the 64-bit defconfig. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/configs/x86_64_defconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 10728fd91c6..9fd7d156a12 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:57:29 2009 +# Tue Feb 24 15:44:16 2009 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -268,7 +268,9 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -# CONFIG_X86_MCE is not set +CONFIG_X86_MCE=y +CONFIG_X86_MCE_INTEL=y +CONFIG_X86_MCE_AMD=y # CONFIG_I8K is not set CONFIG_MICROCODE=y CONFIG_MICROCODE_INTEL=y From 15d4fcd615989ed83fe848e6a3c7e9f0361cf0d0 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 24 Feb 2009 15:52:58 -0800 Subject: [PATCH 600/682] x86, mce: enable machine checks in 32-bit defconfig Impact: defconfig change Enable MCE in the 32-bit defconfig. Signed-off-by: H. Peter Anvin --- arch/x86/configs/i386_defconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 8f40a80ccb5..c2cae417fab 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.29-rc4 -# Thu Feb 12 12:57:57 2009 +# Tue Feb 24 15:50:58 2009 # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -268,7 +268,9 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -# CONFIG_X86_MCE is not set +CONFIG_X86_MCE=y +CONFIG_X86_MCE_NONFATAL=y +CONFIG_X86_MCE_P4THERMAL=y CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set From 24ff954233ecfd45801383f831626f88937ebe6f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Feb 2009 10:38:10 +0900 Subject: [PATCH 601/682] x86, percpu: fix minor bugs in setup_percpu.c Recent changes in setup_percpu.c made a now meaningless DBG() statement fail to compile and introduced a comparison-of-different-types warning. Fix them. Compile failure is reported by Ingo Molnar. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2d946a8f78b..c29f301d388 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -270,7 +270,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) /* allocate and copy */ pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max(pcpue_unit_size, PCPU_MIN_UNIT_SIZE); + pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); if (!pcpue_ptr) @@ -438,8 +438,6 @@ void __init setup_per_cpu_areas(void) */ if (cpu == boot_cpu_id) switch_to_new_gdt(cpu); - - DBG("PERCPU: cpu %4d %p\n", cpu, ptr); } /* indicate the early static arrays will soon be gone */ From d325100504f1d0c296a1fbfef558deaa655e2240 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Feb 2009 11:01:40 +0900 Subject: [PATCH 602/682] x86: convert cacheflush macros inline functions Impact: cleanup Unused macro parameters cause spurious unused variable warnings. Convert all cacheflush macros to inline functions to avoid the warnings and achieve better type checking. Signed-off-by: Tejun Heo --- arch/x86/include/asm/cacheflush.h | 53 +++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 2f8466540fb..5b301b7ff5f 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -5,24 +5,43 @@ #include /* Caches aren't brain-dead on the intel. */ -#define flush_cache_all() do { } while (0) -#define flush_cache_mm(mm) do { } while (0) -#define flush_cache_dup_mm(mm) do { } while (0) -#define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) -#define flush_dcache_page(page) do { } while (0) -#define flush_dcache_mmap_lock(mapping) do { } while (0) -#define flush_dcache_mmap_unlock(mapping) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) -#define flush_icache_page(vma, pg) do { } while (0) -#define flush_icache_user_range(vma, pg, adr, len) do { } while (0) -#define flush_cache_vmap(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) do { } while (0) +static inline void flush_cache_all(void) { } +static inline void flush_cache_mm(struct mm_struct *mm) { } +static inline void flush_cache_dup_mm(struct mm_struct *mm) { } +static inline void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { } +static inline void flush_cache_page(struct vm_area_struct *vma, + unsigned long vmaddr, unsigned long pfn) { } +static inline void flush_dcache_page(struct page *page) { } +static inline void flush_dcache_mmap_lock(struct address_space *mapping) { } +static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { } +static inline void flush_icache_range(unsigned long start, + unsigned long end) { } +static inline void flush_icache_page(struct vm_area_struct *vma, + struct page *page) { } +static inline void flush_icache_user_range(struct vm_area_struct *vma, + struct page *page, + unsigned long addr, + unsigned long len) { } +static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } +static inline void flush_cache_vunmap(unsigned long start, + unsigned long end) { } -#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ - memcpy((dst), (src), (len)) -#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ - memcpy((dst), (src), (len)) +static inline void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, + void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); +} + +static inline void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, + void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); +} #define PG_non_WB PG_arch_1 PAGEFLAG(NonWB, non_WB) From 3255aa2eb636a508fc82a73fabbb8aaf2ff23c0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:21:52 +0100 Subject: [PATCH 603/682] x86, mm: pass in 'total' to __copy_from_user_*nocache() Impact: cleanup, enable future change Add a 'total bytes copied' parameter to __copy_from_user_*nocache(), and update all the callsites. The parameter is not used yet - architecture code can use it to more intelligently decide whether the copy should be cached or non-temporal. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++-- arch/x86/include/asm/uaccess_64.h | 5 ++--- drivers/gpu/drm/i915/i915_gem.c | 2 +- include/linux/uaccess.h | 4 ++-- mm/filemap.c | 10 ++++++---- mm/filemap_xip.c | 2 +- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 5e06259e90e..a0ba6138697 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) } static __always_inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { might_fault(); if (__builtin_constant_p(n)) { @@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, - unsigned long n) + unsigned long n, unsigned long total) { return __copy_from_user_ll_nocache_nozero(to, from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 987a2c10fe2..a748253db0c 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -189,7 +189,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); static inline int __copy_from_user_nocache(void *dst, const void __user *src, - unsigned size) + unsigned size, unsigned long total) { might_sleep(); /* @@ -205,8 +205,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, } static inline int __copy_from_user_inatomic_nocache(void *dst, - const void __user *src, - unsigned size) + const void __user *src, unsigned size, unsigned total) { if (likely(size >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 0); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 81857665409..6b209db8370 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -215,7 +215,7 @@ fast_user_write(struct io_mapping *mapping, vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, - user_data, length); + user_data, length, length); io_mapping_unmap_atomic(vaddr_atomic); if (unwritten) return -EFAULT; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145..6f3c603b0d6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user(to, from, n); } diff --git a/mm/filemap.c b/mm/filemap.c index 23acefe5180..60fd56772cc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1816,14 +1816,14 @@ EXPORT_SYMBOL(file_remove_suid); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { - size_t copied = 0, left = 0; + size_t copied = 0, left = 0, total = bytes; while (bytes) { char __user *buf = iov->iov_base + base; int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); + left = __copy_from_user_inatomic_nocache(vaddr, buf, copy, total); copied += copy; bytes -= copy; vaddr += copy; @@ -1851,8 +1851,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; + left = __copy_from_user_inatomic_nocache(kaddr + offset, - buf, bytes); + buf, bytes, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -1880,7 +1881,8 @@ size_t iov_iter_copy_from_user(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user_nocache(kaddr + offset, buf, bytes); + + left = __copy_from_user_nocache(kaddr + offset, buf, bytes, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 0c04615651b..bf54f8a2cf1 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -354,7 +354,7 @@ __xip_file_write(struct file *filp, const char __user *buf, break; copied = bytes - - __copy_from_user_nocache(xip_mem + offset, buf, bytes); + __copy_from_user_nocache(xip_mem + offset, buf, bytes, bytes); if (likely(copied > 0)) { status = copied; From 95108fa34a83ffd97e0af959e4b28d7c62008781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:22:20 +0100 Subject: [PATCH 604/682] x86: usercopy: check for total size when deciding non-temporal cutoff Impact: make more types of copies non-temporal This change makes the following simple fix: 30d697f: x86: fix performance regression in write() syscall A bit more sophisticated: we check the 'total' number of bytes written to decide whether to copy in a cached or a non-temporal way. This will for example cause the tail (modulo 4096 bytes) chunk of a large write() to be non-temporal too - not just the page-sized chunks. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index a748253db0c..dcaa0404cf7 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -198,7 +198,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, * non-temporal stores here. Smaller writes get handled * via regular __copy_from_user(): */ - if (likely(size >= PAGE_SIZE)) + if (likely(total >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 1); else return __copy_from_user(dst, src, size); @@ -207,7 +207,7 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src, static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size, unsigned total) { - if (likely(size >= PAGE_SIZE)) + if (likely(total >= PAGE_SIZE)) return __copy_user_nocache(dst, src, size, 0); else return __copy_from_user_inatomic(dst, src, size); From 40823f737e5bd186a1156fb1c28f360260e1e084 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:26:18 +0100 Subject: [PATCH 605/682] x86: memtest: reuse test patterns when memtest parameter exceeds number of available patterns Impact: fix unexpected behaviour when pattern number is out of range Current implementation provides 4 patterns for memtest. The code doesn't check whether the memtest parameter value exceeds the maximum pattern number. Instead the memtest code pretends to test with non-existing patterns, e.g. when booting with memtest=10 I've observed the following ... early_memtest: pattern num 10 0000001000 - 0000006000 pattern 0 ... 0000001000 - 0000006000 pattern 1 ... 0000001000 - 0000006000 pattern 2 ... 0000001000 - 0000006000 pattern 3 ... 0000001000 - 0000006000 pattern 4 ... 0000001000 - 0000006000 pattern 5 ... 0000001000 - 0000006000 pattern 6 ... 0000001000 - 0000006000 pattern 7 ... 0000001000 - 0000006000 pattern 8 ... 0000001000 - 0000006000 pattern 9 ... But in fact Linux didn't test anything for patterns > 4 as the default case in memtest() is to leave the function. I suggest to use the memtest parameter as the number of tests to be performed and to re-iterate over all existing patterns. Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 9cab18b0b85..00b8bdc64c3 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -9,6 +9,8 @@ #include +#define _MAX_MEM_PATTERNS 4 + static void __init memtest(unsigned long start_phys, unsigned long size, unsigned pattern) { @@ -21,6 +23,8 @@ static void __init memtest(unsigned long start_phys, unsigned long size, unsigned long count; unsigned long incr; + pattern = pattern % _MAX_MEM_PATTERNS; + switch (pattern) { case 0: val = 0UL; @@ -110,8 +114,9 @@ void __init early_memtest(unsigned long start, unsigned long end) t_size = end - t_start; printk(KERN_CONT "\n %010llx - %010llx pattern %d", - (unsigned long long)t_start, - (unsigned long long)t_start + t_size, pattern); + (unsigned long long)t_start, + (unsigned long long)t_start + t_size, + pattern % _MAX_MEM_PATTERNS); memtest(t_start, t_size, pattern); From 6d74171bf7315257d276aa35400c5a8d6a993f19 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:27:27 +0100 Subject: [PATCH 606/682] x86: memtest: introduce array to select memtest patterns Impact: code cleanup Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 65 +++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 42 deletions(-) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 00b8bdc64c3..827f94044cf 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -9,48 +9,25 @@ #include -#define _MAX_MEM_PATTERNS 4 +static u64 patterns[] __initdata = { + 0, + 0xffffffffffffffffULL, + 0x5555555555555555ULL, + 0xaaaaaaaaaaaaaaaaULL, +}; static void __init memtest(unsigned long start_phys, unsigned long size, - unsigned pattern) + u64 pattern) { unsigned long i; - unsigned long *start; + u64 *start; unsigned long start_bad; unsigned long last_bad; - unsigned long val; unsigned long start_phys_aligned; unsigned long count; unsigned long incr; - pattern = pattern % _MAX_MEM_PATTERNS; - - switch (pattern) { - case 0: - val = 0UL; - break; - case 1: - val = -1UL; - break; - case 2: -#ifdef CONFIG_X86_64 - val = 0x5555555555555555UL; -#else - val = 0x55555555UL; -#endif - break; - case 3: -#ifdef CONFIG_X86_64 - val = 0xaaaaaaaaaaaaaaaaUL; -#else - val = 0xaaaaaaaaUL; -#endif - break; - default: - return; - } - - incr = sizeof(unsigned long); + incr = sizeof(pattern); start_phys_aligned = ALIGN(start_phys, incr); count = (size - (start_phys_aligned - start_phys))/incr; start = __va(start_phys_aligned); @@ -58,15 +35,16 @@ static void __init memtest(unsigned long start_phys, unsigned long size, last_bad = 0; for (i = 0; i < count; i++) - start[i] = val; + start[i] = pattern; for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start != val) { + if (*start != pattern) { if (start_phys_aligned == last_bad + incr) { last_bad += incr; } else { if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", - val, start_bad, last_bad + incr); + printk(KERN_CONT "\n %016llx bad mem addr %010lx - %010lx reserved", + (unsigned long long) pattern, + start_bad, last_bad + incr); reserve_early(start_bad, last_bad + incr, "BAD RAM"); } start_bad = last_bad = start_phys_aligned; @@ -74,8 +52,9 @@ static void __init memtest(unsigned long start_phys, unsigned long size, } } if (start_bad) { - printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", - val, start_bad, last_bad + incr); + printk(KERN_CONT "\n %016llx bad mem addr %010lx - %010lx reserved", + (unsigned long long) pattern, start_bad, + last_bad + incr); reserve_early(start_bad, last_bad + incr, "BAD RAM"); } } @@ -95,13 +74,16 @@ early_param("memtest", parse_memtest); void __init early_memtest(unsigned long start, unsigned long end) { u64 t_start, t_size; - unsigned pattern; + unsigned int i; + u64 pattern; if (!memtest_pattern) return; printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); - for (pattern = 0; pattern < memtest_pattern; pattern++) { + for (i = 0; i < memtest_pattern; i++) { + unsigned int idx = i % ARRAY_SIZE(patterns); + pattern = patterns[idx]; t_start = start; t_size = 0; while (t_start < end) { @@ -115,8 +97,7 @@ void __init early_memtest(unsigned long start, unsigned long end) printk(KERN_CONT "\n %010llx - %010llx pattern %d", (unsigned long long)t_start, - (unsigned long long)t_start + t_size, - pattern % _MAX_MEM_PATTERNS); + (unsigned long long)t_start + t_size, idx); memtest(t_start, t_size, pattern); From 7dad169e57eda1f0aa6dc5ac43a898b4b0ced2c7 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:28:07 +0100 Subject: [PATCH 607/682] x86: memtest: cleanup memtest function Impact: code cleanup Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 827f94044cf..01a72d6825b 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -16,6 +16,15 @@ static u64 patterns[] __initdata = { 0xaaaaaaaaaaaaaaaaULL, }; +static void __init reserve_bad_mem(u64 pattern, unsigned long start_bad, + unsigned long end_bad) +{ + printk(KERN_CONT "\n %016llx bad mem addr " + "%010lx - %010lx reserved", + (unsigned long long) pattern, start_bad, end_bad); + reserve_early(start_bad, end_bad, "BAD RAM"); +} + static void __init memtest(unsigned long start_phys, unsigned long size, u64 pattern) { @@ -37,26 +46,18 @@ static void __init memtest(unsigned long start_phys, unsigned long size, for (i = 0; i < count; i++) start[i] = pattern; for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { - if (*start != pattern) { - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - } else { - if (start_bad) { - printk(KERN_CONT "\n %016llx bad mem addr %010lx - %010lx reserved", - (unsigned long long) pattern, - start_bad, last_bad + incr); - reserve_early(start_bad, last_bad + incr, "BAD RAM"); - } - start_bad = last_bad = start_phys_aligned; - } + if (*start == pattern) + continue; + if (start_phys_aligned == last_bad + incr) { + last_bad += incr; + continue; } + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); + start_bad = last_bad = start_phys_aligned; } - if (start_bad) { - printk(KERN_CONT "\n %016llx bad mem addr %010lx - %010lx reserved", - (unsigned long long) pattern, start_bad, - last_bad + incr); - reserve_early(start_bad, last_bad + incr, "BAD RAM"); - } + if (start_bad) + reserve_bad_mem(pattern, start_bad, last_bad + incr); } /* default is disabled */ From 570c9e69aaa84689fb8ed2a3a4af39ca54ba7a47 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:28:58 +0100 Subject: [PATCH 608/682] x86: memtest: adapt log messages - print test pattern instead of pattern number, - show pattern as stored in memory, - use proper priority flags, - consistent use of u64 throughout the code Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 01a72d6825b..3232397783a 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -16,25 +16,22 @@ static u64 patterns[] __initdata = { 0xaaaaaaaaaaaaaaaaULL, }; -static void __init reserve_bad_mem(u64 pattern, unsigned long start_bad, - unsigned long end_bad) +static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) { - printk(KERN_CONT "\n %016llx bad mem addr " - "%010lx - %010lx reserved", - (unsigned long long) pattern, start_bad, end_bad); + printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", + (unsigned long long) pattern, + (unsigned long long) start_bad, + (unsigned long long) end_bad); reserve_early(start_bad, end_bad, "BAD RAM"); } -static void __init memtest(unsigned long start_phys, unsigned long size, - u64 pattern) +static void __init memtest(u64 pattern, u64 start_phys, u64 size) { - unsigned long i; + u64 i, count; u64 *start; - unsigned long start_bad; - unsigned long last_bad; - unsigned long start_phys_aligned; - unsigned long count; - unsigned long incr; + u64 start_bad, last_bad; + u64 start_phys_aligned; + size_t incr; incr = sizeof(pattern); start_phys_aligned = ALIGN(start_phys, incr); @@ -81,7 +78,7 @@ void __init early_memtest(unsigned long start, unsigned long end) if (!memtest_pattern) return; - printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); + printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); for (i = 0; i < memtest_pattern; i++) { unsigned int idx = i % ARRAY_SIZE(patterns); pattern = patterns[idx]; @@ -96,14 +93,13 @@ void __init early_memtest(unsigned long start, unsigned long end) if (t_start + t_size > end) t_size = end - t_start; - printk(KERN_CONT "\n %010llx - %010llx pattern %d", - (unsigned long long)t_start, - (unsigned long long)t_start + t_size, idx); - - memtest(t_start, t_size, pattern); + printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", + (unsigned long long) t_start, + (unsigned long long) t_start + t_size, + (unsigned long long) cpu_to_be64(pattern)); + memtest(pattern, t_start, t_size); t_start += t_size; } } - printk(KERN_CONT "\n"); } From bfb4dc0da45f8fddc76eba7e62919420c7d6dae2 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:30:04 +0100 Subject: [PATCH 609/682] x86: memtest: wipe out test pattern from memory Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 54 +++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 3232397783a..9c52ef19e6f 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -57,6 +57,29 @@ static void __init memtest(u64 pattern, u64 start_phys, u64 size) reserve_bad_mem(pattern, start_bad, last_bad + incr); } +static void __init do_one_pass(u64 pattern, u64 start, u64 end) +{ + u64 size = 0; + + while (start < end) { + start = find_e820_area_size(start, &size, 1); + + /* done ? */ + if (start >= end) + break; + if (start + size > end) + size = end - start; + + printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", + (unsigned long long) start, + (unsigned long long) start + size, + (unsigned long long) cpu_to_be64(pattern)); + memtest(pattern, start, size); + + start += size; + } +} + /* default is disabled */ static int memtest_pattern __initdata; @@ -71,35 +94,22 @@ early_param("memtest", parse_memtest); void __init early_memtest(unsigned long start, unsigned long end) { - u64 t_start, t_size; unsigned int i; - u64 pattern; + unsigned int idx = 0; if (!memtest_pattern) return; printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); for (i = 0; i < memtest_pattern; i++) { - unsigned int idx = i % ARRAY_SIZE(patterns); - pattern = patterns[idx]; - t_start = start; - t_size = 0; - while (t_start < end) { - t_start = find_e820_area_size(t_start, &t_size, 1); + idx = i % ARRAY_SIZE(patterns); + do_one_pass(patterns[idx], start, end); + } - /* done ? */ - if (t_start >= end) - break; - if (t_start + t_size > end) - t_size = end - t_start; - - printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", - (unsigned long long) t_start, - (unsigned long long) t_start + t_size, - (unsigned long long) cpu_to_be64(pattern)); - memtest(pattern, t_start, t_size); - - t_start += t_size; - } + if (idx > 0) { + printk(KERN_INFO "early_memtest: wipe out " + "test pattern from memory\n"); + /* additional test with pattern 0 will do this */ + do_one_pass(0, start, end); } } From 9e5f6cf5f755ca5c52071c317421fae19966a658 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:30:45 +0100 Subject: [PATCH 610/682] x86: update description for memtest boot parameter Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6d5d5b9b2b..10c4b8b75c9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1308,8 +1308,13 @@ and is between 256 and 4096 characters. It is defined in the file memtest= [KNL,X86] Enable memtest Format: - range: 0,4 : pattern number default : 0 + Specifies the number of memtest passes to be + performed. Each pass selects another test + pattern from a given set of patterns. Memtest + fills the memory with this pattern, validates + memory contents and reserves bad memory + regions that are detected. meye.*= [HW] Set MotionEye Camera parameters See Documentation/video4linux/meye.txt. From 63823126c221dd721ce7351b596b3b73aa943613 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 25 Feb 2009 11:31:49 +0100 Subject: [PATCH 611/682] x86: memtest: add additional (regular) test patterns Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 9c52ef19e6f..0bcd7883d03 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -14,6 +14,19 @@ static u64 patterns[] __initdata = { 0xffffffffffffffffULL, 0x5555555555555555ULL, 0xaaaaaaaaaaaaaaaaULL, + 0x1111111111111111ULL, + 0x2222222222222222ULL, + 0x4444444444444444ULL, + 0x8888888888888888ULL, + 0x3333333333333333ULL, + 0x6666666666666666ULL, + 0x9999999999999999ULL, + 0xccccccccccccccccULL, + 0x7777777777777777ULL, + 0xbbbbbbbbbbbbbbbbULL, + 0xddddddddddddddddULL, + 0xeeeeeeeeeeeeeeeeULL, + 0x7a6c7258554e494cULL, /* yeah ;-) */ }; static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) From 7880f7464546842ee14179bef16a6e14381ea638 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:13 -0800 Subject: [PATCH 612/682] gpu/drm, x86, PAT: routine to keep identity map in sync Add a function to check and keep identity maps in sync, when changing any memory type. One of the follow on patches will also use this routine. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pat.h | 3 +++ arch/x86/mm/pat.c | 46 ++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h index b8493b3b989..abb3c29fc9d 100644 --- a/arch/x86/include/asm/pat.h +++ b/arch/x86/include/asm/pat.h @@ -19,4 +19,7 @@ extern int free_memtype(u64 start, u64 end); extern void pat_disable(char *reason); +extern int kernel_map_sync_memtype(u64 base, unsigned long size, + unsigned long flag); + #endif /* _ASM_X86_PAT_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index aebbf67a79d..399383c6f61 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -624,6 +624,33 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) free_memtype(addr, addr + size); } +/* + * Change the memory type for the physial address range in kernel identity + * mapping space if that range is a part of identity map. + */ +int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) +{ + unsigned long id_sz; + + if (!pat_enabled || base >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < base + size) ? + __pa(high_memory) - base : + size; + + if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) { + printk(KERN_INFO + "%s:%d ioremap_change_attr failed %s " + "for %Lx-%Lx\n", + current->comm, current->pid, + cattr_name(flags), + base, (unsigned long long)(base + size)); + return -EINVAL; + } + return 0; +} + /* * Internal interface to reserve a range of physical memory with prot. * Reserved non RAM regions only and after successful reserve_memtype, @@ -633,7 +660,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { int is_ram = 0; - int id_sz, ret; + int ret; unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); @@ -670,23 +697,8 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags); } - /* Need to keep identity mapping in sync */ - if (paddr >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < paddr + size) ? - __pa(high_memory) - paddr : - size; - - if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { + if (kernel_map_sync_memtype(paddr, size, flags) < 0) { free_memtype(paddr, paddr + size); - printk(KERN_ERR - "%s:%d reserve_pfn_range ioremap_change_attr failed %s " - "for %Lx-%Lx\n", - current->comm, current->pid, - cattr_name(flags), - (unsigned long long)paddr, - (unsigned long long)(paddr + size)); return -EINVAL; } return 0; From 17581ad812a9abb0182260374ef2e52d4a808a64 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:14 -0800 Subject: [PATCH 613/682] gpu/drm, x86, PAT: PAT support for io_mapping_* Make io_mapping_create_wc and io_mapping_free go through PAT to make sure that there are no memory type aliases. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 5 ++++- arch/x86/mm/iomap_32.c | 42 ++++++++++++++++++++++++++++++++++-- include/linux/io-mapping.h | 6 ++++-- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 86af26091d6..bd46495ff7d 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -24,7 +24,10 @@ #include int -is_io_mapping_possible(resource_size_t base, unsigned long size); +reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot); + +void +free_io_memtype(u64 base, unsigned long size); void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 6c2b1af1692..94596f794b1 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,13 +21,13 @@ #include #ifdef CONFIG_X86_PAE -int +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { return 1; } #else -int +static int is_io_mapping_possible(resource_size_t base, unsigned long size) { /* There is no way to map greater than 1 << 32 address without PAE */ @@ -38,6 +38,44 @@ is_io_mapping_possible(resource_size_t base, unsigned long size) } #endif +int +reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot) +{ + unsigned long ret_flag; + + if (!is_io_mapping_possible(base, size)) + goto out_err; + + if (!pat_enabled) { + *prot = pgprot_noncached(PAGE_KERNEL); + return 0; + } + + if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag)) + goto out_err; + + if (ret_flag == _PAGE_CACHE_WB) + goto out_free; + + if (kernel_map_sync_memtype(base, size, ret_flag)) + goto out_free; + + *prot = __pgprot(__PAGE_KERNEL | ret_flag); + return 0; + +out_free: + free_memtype(base, base + size); +out_err: + return -EINVAL; +} + +void +free_io_memtype(u64 base, unsigned long size) +{ + if (pat_enabled) + free_memtype(base, base + size); +} + /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index cbc2f0cd631..f1ed66c4378 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,8 +49,9 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; + pgprot_t prot; - if (!is_io_mapping_possible(base, size)) + if (!reserve_io_memtype_wc(base, size, &prot)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -59,13 +60,14 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + iomap->prot = prot; return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + free_io_memtype(mapping->base, mapping->size); kfree(mapping); } From 0dcec8c27ba44cd11c6e68c46d5fd553818a3837 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:07:33 +0100 Subject: [PATCH 614/682] alloc_percpu: add align argument to __alloc_percpu, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: build fix API was changed, but not all usage sites were converted: net/ipv4/route.c: In function ‘ip_rt_init’: net/ipv4/route.c:3379: error: too few arguments to function ‘__alloc_percpu’ Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97f71153584..bf895401218 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3376,7 +3376,7 @@ int __init ip_rt_init(void) int rc = 0; #ifdef CONFIG_NET_CLS_ROUTE - ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); + ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); #endif From d2b0261506602bd969164879206027b30358ffdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:36:45 +0100 Subject: [PATCH 615/682] alloc_percpu: fix UP build Impact: build fix the !SMP branch had a 'gfp' leftover: include/linux/percpu.h: In function '__alloc_percpu': include/linux/percpu.h:160: error: 'gfp' undeclared (first use in this function) include/linux/percpu.h:160: error: (Each undeclared identifier is reported only once include/linux/percpu.h:160: error: for each function it appears in.) Use GFP_KERNEL like the SMP version does. Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 910beb0abea..d8e5a9abbce 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -157,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > __alignof__(unsigned long long)); - return kzalloc(size, gfp); + return kzalloc(size, GFP_KERNEL); } static inline void free_percpu(void *p) From 34754b69a6f87aa6aa2860525a82f12532f83afd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Feb 2009 16:04:03 +0100 Subject: [PATCH 616/682] x86: make vmap yell louder when it is used under irqs_disabled() Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 6 +++--- mm/vmalloc.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a84ac7b570e..6907b8e85d5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -498,12 +498,12 @@ void *text_poke_early(void *addr, const void *opcode, size_t len) */ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) { - unsigned long flags; char *vaddr; int nr_pages = 2; struct page *pages[2]; int i; + might_sleep(); if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); @@ -517,9 +517,9 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) nr_pages = 1; vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); BUG_ON(!vaddr); - local_irq_save(flags); + local_irq_disable(); memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); + local_irq_enable(); vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4dd2636d0b9..f83a70167b9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1257,6 +1257,7 @@ EXPORT_SYMBOL(vfree); void vunmap(const void *addr) { BUG_ON(in_interrupt()); + might_sleep(); __vunmap(addr, 0); } EXPORT_SYMBOL(vunmap); @@ -1276,6 +1277,8 @@ void *vmap(struct page **pages, unsigned int count, { struct vm_struct *area; + might_sleep(); + if (count > num_physpages) return NULL; From e317603694bfd17b28a40de9d65e1a4ec12f816e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Feb 2009 10:54:17 +0900 Subject: [PATCH 617/682] percpu: fix too low alignment restriction on UP UP __alloc_percpu() triggered WARN_ON_ONCE() if the requested alignment is larger than that of unsigned long long, which is too small for all the cacheline aligned allocations. Bump it up to SMP_CACHE_BYTES which kmalloc allocations generally guarantee. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d8e5a9abbce..545b068bcb7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -156,7 +156,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * on it. Larger alignment should only be used for module * percpu sections on SMP for which this path isn't used. */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); + WARN_ON_ONCE(align > SMP_CACHE_BYTES); return kzalloc(size, GFP_KERNEL); } From 13093cb0e59053bf97910de3a24f07cdff71c62c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 03:16:47 +0100 Subject: [PATCH 618/682] gpu/drm, x86, PAT: PAT support for io_mapping_*, export symbols for modules Impact: build fix ERROR: "reserve_io_memtype_wc" [drivers/gpu/drm/i915/i915.ko] undefined! ERROR: "free_io_memtype" [drivers/gpu/drm/i915/i915.ko] undefined! Signed-off-by: Ingo Molnar --- arch/x86/mm/iomap_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 94596f794b1..d5e28424622 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -68,6 +68,7 @@ out_free: out_err: return -EINVAL; } +EXPORT_SYMBOL_GPL(reserve_io_memtype_wc); void free_io_memtype(u64 base, unsigned long size) @@ -75,6 +76,7 @@ free_io_memtype(u64 base, unsigned long size) if (pat_enabled) free_memtype(base, base + size); } +EXPORT_SYMBOL_GPL(free_io_memtype); /* Map 'pfn' using fixed map 'type' and protections 'prot' */ From 2b6163bf5772644068694583816fa41e8474239f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Feb 2009 20:50:49 -0800 Subject: [PATCH 619/682] x86: remove update_apic from x86_quirks Impact: cleanup x86_quirks->update_apic() calling looks crazy. so try to remove it: 1. every apic take wakeup_cpu member directly 2. separate es7000_apic to es7000_apic_cluster 3. use uv_wakeup_cpu directly Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 7 +- arch/x86/include/asm/setup.h | 3 - arch/x86/include/asm/uv/uv.h | 3 - arch/x86/kernel/apic/apic_flat_64.c | 4 +- arch/x86/kernel/apic/bigsmp_32.c | 2 +- arch/x86/kernel/apic/es7000_32.c | 127 ++++++++++++++++++++------ arch/x86/kernel/apic/numaq_32.c | 10 +- arch/x86/kernel/apic/probe_32.c | 16 +--- arch/x86/kernel/apic/probe_64.c | 3 - arch/x86/kernel/apic/summit_32.c | 2 +- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 2 +- arch/x86/kernel/apic/x2apic_uv_x.c | 11 ++- arch/x86/kernel/setup.c | 14 +-- arch/x86/kernel/smpboot.c | 8 +- 15 files changed, 121 insertions(+), 93 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a6208dc7463..860504178e9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -325,6 +325,9 @@ struct apic { }; extern struct apic *apic; +extern atomic_t init_deasserted; +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); static inline u32 apic_read(u32 reg) { @@ -384,9 +387,7 @@ static inline unsigned default_get_apic_id(unsigned long x) #define DEFAULT_TRAMPOLINE_PHYS_LOW 0x467 #define DEFAULT_TRAMPOLINE_PHYS_HIGH 0x469 -#ifdef CONFIG_X86_32 -extern void es7000_update_apic_to_cluster(void); -#else +#ifdef CONFIG_X86_64 extern struct apic apic_flat; extern struct apic apic_physflat; extern struct apic apic_x2apic_cluster; diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 66801cb72f6..126877e502e 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -31,7 +31,6 @@ struct x86_quirks { void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, unsigned short oemsize); int (*setup_ioapic_ids)(void); - int (*update_apic)(void); }; extern void x86_quirk_pre_intr_init(void); @@ -77,8 +76,6 @@ static inline void visws_early_detect(void) { } static inline int is_visws_box(void) { return 0; } #endif -extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); extern struct x86_quirks *x86_quirks; extern unsigned long saved_video_mode; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 8242bf96581..c0a01b5d985 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -12,7 +12,6 @@ extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); extern void uv_cpu_init(void); extern void uv_system_init(void); -extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, @@ -24,8 +23,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) -{ return 1; } static inline const struct cpumask * uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 3b002995e14..00595bc2da8 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -222,7 +222,7 @@ struct apic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, @@ -373,7 +373,7 @@ struct apic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 0b1093394fd..8c25917b51a 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -256,7 +256,7 @@ struct apic apic_bigsmp = { .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 320f2d2e4e5..9f6102fc87a 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -163,17 +163,12 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static int __init es7000_update_apic(void) +static int __init es7000_apic_is_cluster(void) { - apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; - /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { - es7000_update_apic_to_cluster(); - apic->wait_for_init_deassert = NULL; - apic->wakeup_cpu = wakeup_secondary_cpu_via_mip; - } + (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) + return 1; return 0; } @@ -192,8 +187,6 @@ static void __init setup_unisys(void) else es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; - - x86_quirks->update_apic = es7000_update_apic; } /* @@ -310,6 +303,8 @@ static int es7000_check_dsdt(void) return 0; } +static int __initdata es7000_acpi_ret; + /* Hook from generic ACPI tables.c */ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -332,8 +327,19 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) */ unmap_unisys_acpi_oem_table(oem_addr); } - return ret; + + es7000_acpi_ret = ret; + + return ret && !es7000_apic_is_cluster(); } +static int __init es7000_acpi_madt_oem_check_cluster(char *oem_id, + char *oem_table_id) +{ + int ret = es7000_acpi_ret; + + return ret && es7000_apic_is_cluster(); +} + #else /* !CONFIG_ACPI: */ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { @@ -416,11 +422,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask) static void es7000_wait_for_init_deassert(atomic_t *deassert) { -#ifndef CONFIG_ES7000_CLUSTERED_APIC while (!atomic_read(deassert)) cpu_relax(); -#endif - return; } static unsigned int es7000_get_apic_id(unsigned long x) @@ -659,37 +662,103 @@ static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -void __init es7000_update_apic_to_cluster(void) -{ - apic->target_cpus = target_cpus_cluster; - apic->irq_delivery_mode = dest_LowestPrio; - /* logical delivery broadcast to all procs: */ - apic->irq_dest_mode = 1; - - apic->init_apic_ldr = es7000_init_apic_ldr_cluster; - - apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster; -} - static int probe_es7000(void) { /* probed later in mptable/ACPI hooks */ return 0; } +static int __initdata es7000_mps_ret; static __init int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { + int ret = 0; + if (mpc->oemptr) { struct mpc_oemtable *oem_table = (struct mpc_oemtable *)mpc->oemptr; if (!strncmp(oem, "UNISYS", 6)) - return parse_unisys_oem((char *)oem_table); + ret = parse_unisys_oem((char *)oem_table); } - return 0; + + es7000_mps_ret = ret; + + return ret && !es7000_apic_is_cluster(); } +static __init int +es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, char *productid) +{ + int ret = es7000_mps_ret; + + return ret && es7000_apic_is_cluster(); +} + +struct apic apic_es7000_cluster = { + + .name = "es7000", + .probe = probe_es7000, + .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, + .apic_id_registered = es7000_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all procs: */ + .irq_dest_mode = 1, + + .target_cpus = target_cpus_cluster, + .disable_esr = 1, + .dest_logical = 0, + .check_apicid_used = es7000_check_apicid_used, + .check_apicid_present = es7000_check_apicid_present, + + .vector_allocation_domain = es7000_vector_allocation_domain, + .init_apic_ldr = es7000_init_apic_ldr_cluster, + + .ioapic_phys_id_map = es7000_ioapic_phys_id_map, + .setup_apic_routing = es7000_setup_apic_routing, + .multi_timer_check = NULL, + .apicid_to_node = es7000_apicid_to_node, + .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, + .cpu_present_to_apicid = es7000_cpu_present_to_apicid, + .apicid_to_cpu_present = es7000_apicid_to_cpu_present, + .setup_portio_remap = NULL, + .check_phys_apicid_present = es7000_check_phys_apicid_present, + .enable_apic_mode = es7000_enable_apic_mode, + .phys_pkg_id = es7000_phys_pkg_id, + .mps_oem_check = es7000_mps_oem_check_cluster, + + .get_apic_id = es7000_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0xFF << 24, + + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster, + .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, + + .send_IPI_mask = es7000_send_IPI_mask, + .send_IPI_mask_allbutself = NULL, + .send_IPI_allbutself = es7000_send_IPI_allbutself, + .send_IPI_all = es7000_send_IPI_all, + .send_IPI_self = default_send_IPI_self, + + .wakeup_cpu = wakeup_secondary_cpu_via_mip, + + .trampoline_phys_low = 0x467, + .trampoline_phys_high = 0x469, + + .wait_for_init_deassert = NULL, + + /* Nothing to do for most platforms, since cleared by the INIT cycle: */ + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = default_inquire_remote_apic, + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, +}; struct apic apic_es7000 = { @@ -737,7 +806,7 @@ struct apic apic_es7000 = { .send_IPI_all = es7000_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index d9d6d61eed8..c503c1799d6 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -256,13 +256,6 @@ static int __init numaq_setup_ioapic_ids(void) return 1; } -static int __init numaq_update_apic(void) -{ - apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; - - return 0; -} - static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -278,7 +271,6 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, .setup_ioapic_ids = numaq_setup_ioapic_ids, - .update_apic = numaq_update_apic, }; static __init void early_check_numaq(void) @@ -546,7 +538,7 @@ struct apic apic_numaq = { .send_IPI_all = numaq_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_nmi, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 3a730fa574b..13c6fc7dff9 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -138,7 +138,7 @@ struct apic apic_default = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, @@ -159,6 +159,7 @@ extern struct apic apic_numaq; extern struct apic apic_summit; extern struct apic apic_bigsmp; extern struct apic apic_es7000; +extern struct apic apic_es7000_cluster; extern struct apic apic_default; struct apic *apic = &apic_default; @@ -176,6 +177,7 @@ static struct apic *apic_probe[] __initdata = { #endif #ifdef CONFIG_X86_ES7000 &apic_es7000, + &apic_es7000_cluster, #endif &apic_default, /* must be last */ NULL, @@ -197,9 +199,6 @@ static int __init parse_apic(char *arg) } } - if (x86_quirks->update_apic) - x86_quirks->update_apic(); - /* Parsed again by __setup for debug/verbose */ return 0; } @@ -218,8 +217,6 @@ void __init generic_bigsmp_probe(void) if (!cmdline_apic && apic == &apic_default) { if (apic_bigsmp.probe()) { apic = &apic_bigsmp; - if (x86_quirks->update_apic) - x86_quirks->update_apic(); printk(KERN_INFO "Overriding APIC driver with %s\n", apic->name); } @@ -240,9 +237,6 @@ void __init generic_apic_probe(void) /* Not visible without early console */ if (!apic_probe[i]) panic("Didn't find an APIC driver"); - - if (x86_quirks->update_apic) - x86_quirks->update_apic(); } printk(KERN_INFO "Using APIC driver %s\n", apic->name); } @@ -262,8 +256,6 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) if (!cmdline_apic) { apic = apic_probe[i]; - if (x86_quirks->update_apic) - x86_quirks->update_apic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } @@ -284,8 +276,6 @@ int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (!cmdline_apic) { apic = apic_probe[i]; - if (x86_quirks->update_apic) - x86_quirks->update_apic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index e7c163661c7..8d7748efe6a 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -68,9 +68,6 @@ void __init default_setup_apic_routing(void) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } - - if (x86_quirks->update_apic) - x86_quirks->update_apic(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 32838b57a94..5a75d563f67 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -574,7 +574,7 @@ struct apic apic_summit = { .send_IPI_all = summit_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 354b9c45601..561a6b1042a 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -224,7 +224,7 @@ struct apic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 5bcb174409b..785f8ee4b1d 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -213,7 +213,7 @@ struct apic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 20b4ad07c3a..6d7b9d960dd 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -91,7 +91,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) +static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { unsigned long val; int pnode; @@ -99,16 +99,19 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) pnode = uv_apicid_to_pnode(phys_apicid); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | - (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_STARTUP; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); + + atomic_set(&init_deasserted, 1); + return 0; } @@ -285,7 +288,7 @@ struct apic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_self = uv_send_IPI_self, - .wakeup_cpu = NULL, + .wakeup_cpu = uv_wakeup_secondary, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5b85759e797..2280d93c5b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -600,19 +600,7 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif -static int __init default_update_apic(void) -{ -#ifdef CONFIG_SMP - if (!apic->wakeup_cpu) - apic->wakeup_cpu = wakeup_secondary_cpu_via_init; -#endif - - return 0; -} - -static struct x86_quirks default_x86_quirks __initdata = { - .update_apic = default_update_apic, -}; +static struct x86_quirks default_x86_quirks __initdata; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9ce666387f3..9b338aa03b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -112,7 +112,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); -static atomic_t init_deasserted; +atomic_t init_deasserted; /* Set if we find a B stepping CPU */ @@ -614,12 +614,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; - if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { - send_status = uv_wakeup_secondary(phys_apicid, start_eip); - atomic_set(&init_deasserted, 1); - return send_status; - } - maxlvt = lapic_get_maxlvt(); /* From 129d8bc828e011bda0b7110a097bf3a0167f966e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Feb 2009 21:20:50 -0800 Subject: [PATCH 620/682] x86: don't compile vsmp_64 for 32bit Impact: cleanup that is only needed when CONFIG_X86_VSMP is defined with 64bit also remove dead code about PCI, because CONFIG_X86_VSMP depends on PCI Signed-off-by: Yinghai Lu Cc: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 7 +++++++ arch/x86/include/asm/setup.h | 4 ++++ arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/setup.c | 2 -- arch/x86/kernel/vsmp_64.c | 12 +----------- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 860504178e9..24e21273e30 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -75,7 +75,14 @@ static inline void default_inquire_remote_apic(int apicid) #define setup_secondary_clock setup_secondary_APIC_clock #endif +#ifdef CONFIG_X86_VSMP extern int is_vsmp_box(void); +#else +static inline int is_vsmp_box(void) +{ + return 0; +} +#endif extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); extern void xapic_icr_write(u32, u32); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 126877e502e..05c6f6b11fd 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -64,7 +64,11 @@ extern void x86_quirk_time_init(void); #include /* Interrupt control for vSMPowered x86_64 systems */ +#ifdef CONFIG_X86_VSMP void vsmp_init(void); +#else +static inline void vsmp_init(void) { } +#endif void setup_bios_corruption_check(void); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index de5657c039e..95f216bbfaf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-y += vsmp_64.o +obj-$(CONFIG_X86_VSMP) += vsmp_64.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module_$(BITS).o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2280d93c5b9..4c54bc0d8ff 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -863,9 +863,7 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); -#ifdef CONFIG_X86_64 vsmp_init(); -#endif io_delay_init(); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index c609205df59..74de562812c 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -22,7 +22,7 @@ #include #include -#if defined CONFIG_PCI && defined CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT /* * Interrupt control on vSMPowered systems: * ~AC is a shadow of IF. If IF is 'on' AC should be 'off' @@ -114,7 +114,6 @@ static void __init set_vsmp_pv_ops(void) } #endif -#ifdef CONFIG_PCI static int is_vsmp = -1; static void __init detect_vsmp_box(void) @@ -139,15 +138,6 @@ int is_vsmp_box(void) return 0; } } -#else -static void __init detect_vsmp_box(void) -{ -} -int is_vsmp_box(void) -{ - return 0; -} -#endif void __init vsmp_init(void) { From 0917c01f8e793f57a53cf886533d4c75c67f6e89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 12:47:40 +0100 Subject: [PATCH 621/682] x86: remove update_apic from x86_quirks, fix Impact: build fix wakeup_secondary_cpu_via_init(), the default platform method for booting a secondary CPU, is always used on UP due to probe_32.c, if CONFIG_X86_LOCAL_APIC is enabled but SMP is off. So provide a UP wrapper inline as well. Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 24e21273e30..0fbf6f1520f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -331,10 +331,27 @@ struct apic { u32 (*safe_wait_icr_idle)(void); }; +/* + * Pointer to the local APIC driver in use on this system (there's + * always just one such driver in use - the kernel decides via an + * early probing process which one it picks - and then sticks to it): + */ extern struct apic *apic; + +/* + * APIC functionality to boot other CPUs - only used on SMP: + */ +#ifdef CONFIG_SMP extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); +#else +static inline int +wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip) +{ + return 0; +} +#endif static inline u32 apic_read(u32 reg) { From 1f5bcabf1b997d6b76a09114b5a79423495a1263 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 13:51:40 +0100 Subject: [PATCH 622/682] x86: apic: simplify secondary CPU wakeup methods Impact: cleanup - rename apic->wakeup_cpu to apic->wakeup_secondary_cpu, to make it apparent that this is an SMP-only method - handle NULL ->wakeup_secondary_cpus to mean the default INIT wakeup sequence - this allows simplification of the APIC driver templates. Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 9 +-------- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/es7000_32.c | 4 +--- arch/x86/kernel/apic/numaq_32.c | 2 +- arch/x86/kernel/apic/probe_32.c | 1 - arch/x86/kernel/apic/summit_32.c | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 1 - arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- arch/x86/kernel/smpboot.c | 11 ++++++++--- 11 files changed, 12 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0fbf6f1520f..4ef949c1972 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -313,7 +313,7 @@ struct apic { void (*send_IPI_self)(int vector); /* wakeup_secondary_cpu */ - int (*wakeup_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); int trampoline_phys_low; int trampoline_phys_high; @@ -344,13 +344,6 @@ extern struct apic *apic; #ifdef CONFIG_SMP extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); -extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); -#else -static inline int -wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip) -{ - return 0; -} #endif static inline u32 apic_read(u32 reg) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 00595bc2da8..f933822dba1 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -222,7 +222,6 @@ struct apic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, @@ -373,7 +372,6 @@ struct apic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_self = apic_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 8c25917b51a..69c512e23a9 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -256,7 +256,6 @@ struct apic apic_bigsmp = { .send_IPI_all = bigsmp_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 9f6102fc87a..b4838ed3f26 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -741,7 +741,7 @@ struct apic apic_es7000_cluster = { .send_IPI_all = es7000_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_mip, + .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip, .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, @@ -806,8 +806,6 @@ struct apic apic_es7000 = { .send_IPI_all = es7000_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, - .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index c503c1799d6..a7f711f5110 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -538,7 +538,7 @@ struct apic apic_numaq = { .send_IPI_all = numaq_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_nmi, + .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 13c6fc7dff9..141c99a1c26 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -138,7 +138,6 @@ struct apic apic_default = { .send_IPI_all = default_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 5a75d563f67..0a1135c5a6d 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -574,7 +574,6 @@ struct apic apic_summit = { .send_IPI_all = summit_send_IPI_all, .send_IPI_self = default_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 561a6b1042a..8fb87b6dd63 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 785f8ee4b1d..23625b9f98b 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_self = x2apic_send_IPI_self, - .wakeup_cpu = wakeup_secondary_cpu_via_init, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 6d7b9d960dd..7151de74a39 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -288,7 +288,7 @@ struct apic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_self = uv_send_IPI_self, - .wakeup_cpu = uv_wakeup_secondary, + .wakeup_secondary_cpu = uv_wakeup_secondary, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, .wait_for_init_deassert = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9b338aa03b4..249334f5080 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -742,7 +742,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. - * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. + * Returns zero if CPU booted OK, else error code from + * ->wakeup_secondary_cpu. */ static int __cpuinit do_boot_cpu(int apicid, int cpu) { @@ -829,9 +830,13 @@ do_rest: } /* - * Starting actual IPI sequence... + * Kick the secondary CPU. Use the method in the APIC driver + * if it's defined - or use an INIT boot APIC message otherwise: */ - boot_error = apic->wakeup_cpu(apicid, start_ip); + if (apic->wakeup_secondary_cpu) + boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); + else + boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); if (!boot_error) { /* From 0b1da1c8fc1a0cb71f17701efad06855a059f752 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 14:10:10 +0100 Subject: [PATCH 623/682] x86: apic: simplify secondary CPU wakeup methods, fix Impact: build fix init_deasserted is only available on SMP. Make the secondary-wakeup function conditional on SMP. Also clean up the file some. Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7151de74a39..1bd6da1f8fa 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -7,28 +7,28 @@ * * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ - -#include -#include -#include #include +#include +#include +#include +#include +#include #include #include -#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include + #include #include +#include +#include #include +#include +#include +#include +#include DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -93,6 +93,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { +#ifdef CONFIG_SMP unsigned long val; int pnode; @@ -111,7 +112,7 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); atomic_set(&init_deasserted, 1); - +#endif return 0; } @@ -368,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift, paddr = base << shift; bytes = (1UL << shift) * (max_pnode + 1); printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, - paddr + bytes); + paddr + bytes); if (map_type == map_uc) init_extra_mapping_uc(paddr, bytes); else @@ -531,7 +532,7 @@ late_initcall(uv_init_heartbeat); /* * Called on each cpu to initialize the per_cpu UV data area. - * ZZZ hotplug not supported yet + * FIXME: hotplug not supported yet */ void __cpuinit uv_cpu_init(void) { From 3b900d44190c7da8681101c57a5be6b354dab2c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 14:34:08 +0100 Subject: [PATCH 624/682] x86: fix !ACPI build for es7000_32.c arch/x86/kernel/apic/es7000_32.c:702: error: 'es7000_acpi_madt_oem_check_cluster' undeclared here (not in a function) Provide a es7000_acpi_madt_oem_check_cluster() definition in the !ACPI case too. Cc: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index b4838ed3f26..da37e2c59fe 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -332,8 +332,9 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return ret && !es7000_apic_is_cluster(); } -static int __init es7000_acpi_madt_oem_check_cluster(char *oem_id, - char *oem_table_id) + +static int __init +es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) { int ret = es7000_acpi_ret; @@ -345,6 +346,12 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + +static int __init +es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) +{ + return 0; +} #endif /* !CONFIG_ACPI */ static void es7000_spin(int n) From d09375a9ec80d8bae06196bb247460e0829d884c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:52 -0300 Subject: [PATCH 625/682] x86, fixmap: rename __FIXADDR_SIZE and __FIXADDR_BOOT_SIZE Impact: rename Rename __FIXADDR_SIZE to FIXADDR_SIZE and __FIXADDR_BOOT_SIZE to FIXADDR_BOOT_SIZE. Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_32.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index 047d9bab2b3..d3fd0b92b4e 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -106,10 +106,10 @@ extern void reserve_top_address(unsigned long reserve); #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) -#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_32_H */ From ab93e3c45dbac66c2e8e24fd560c052a3907903e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:53 -0300 Subject: [PATCH 626/682] x86, fixmap: define FIXADDR_BOOT_* and redefine FIX_ADDR_SIZE Impact: new interface, not yet used Now, with these macros, x86_64 code can know where start the permanent and non-permanent fixed mapped address. This patch make these macros equal fixmap_32.h for future x86 integration. Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_64.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 298d9ba3fae..d905de08675 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -69,8 +69,11 @@ enum fixed_addresses { }; #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) -#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) /* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) From fd862dde18c3e360f510780e1d1bf615866b11c2 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:54 -0300 Subject: [PATCH 627/682] x86, fixmap: define reserve_top_address for x86_64 Impact: new interface (not yet use) Define reserve_top_address for x86_64; only for later x86 integration. Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_64.h | 2 ++ arch/x86/mm/pgtable.c | 18 ++++++++++++++++++ arch/x86/mm/pgtable_32.c | 16 ---------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index d905de08675..b6c06dd55f1 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -68,6 +68,8 @@ enum fixed_addresses { __end_of_fixed_addresses }; +extern void reserve_top_address(unsigned long reserve); + #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 86f2ffc43c3..5b7c7c8464f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -313,6 +313,24 @@ int ptep_clear_flush_young(struct vm_area_struct *vma, return young; } +/** + * reserve_top_address - reserves a hole in the top of kernel address space + * @reserve - size of hole to reserve + * + * Can be used to relocate the fixmap area and poke a hole in the top + * of kernel address space to make room for a hypervisor. + */ +void __init reserve_top_address(unsigned long reserve) +{ +#ifdef CONFIG_X86_32 + BUG_ON(fixmaps_set > 0); + printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", + (int)-reserve); + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; +#endif +} + int fixmaps_set; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 0951db9ee51..c3cf6e11576 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -97,22 +97,6 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) unsigned long __FIXADDR_TOP = 0xfffff000; EXPORT_SYMBOL(__FIXADDR_TOP); -/** - * reserve_top_address - reserves a hole in the top of kernel address space - * @reserve - size of hole to reserve - * - * Can be used to relocate the fixmap area and poke a hole in the top - * of kernel address space to make room for a hypervisor. - */ -void __init reserve_top_address(unsigned long reserve) -{ - BUG_ON(fixmaps_set > 0); - printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", - (int)-reserve); - __FIXADDR_TOP = -reserve - PAGE_SIZE; - __VMALLOC_RESERVE += reserve; -} - /* * vmalloc=size forces the vmalloc area to be exactly 'size' * bytes. This can be used to increase (or decrease) the From 2ae38daf25204b7c3725e053052f77469eff62cf Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:55 -0300 Subject: [PATCH 628/682] x86, fixmap: add CONFIG_X86_{LOCAL,IO}_APIC Impact: New fixmap allocations Add CONFIG_X86_{LOCAL,IO}_APIC to enum fixed_address. FIX_APIC_BASE is used only when CONFIG_X86_LOCAL_APIC is enabled and FIX_IO_APIC_BASE_* are used only when CONFIG_X86_IO_APIC is enabled. Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_64.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index b6c06dd55f1..7a518ec8263 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -40,9 +40,13 @@ enum fixed_addresses { VSYSCALL_HPET, FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +#endif FIX_EFI_IO_MAP_LAST_PAGE, FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + MAX_EFI_IO_PAGES - 1, From 5f403fa9de5b3bb1309ec5f72b1e52e5b51321d4 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:56 -0300 Subject: [PATCH 629/682] x86, fixmap: add CONFIG_EFI Impact: new fixmap allocation FIX_EFI_IO_MAP_FIRST_PAGE is used only when EFI is enabled. Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_64.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 7a518ec8263..baff6d0da99 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -16,7 +16,9 @@ #include #include #include +#ifdef CONFIG_EFI #include +#endif /* * Here we define all the compile-time 'special' virtual @@ -47,9 +49,11 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif +#ifdef CONFIG_EFI FIX_EFI_IO_MAP_LAST_PAGE, FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + MAX_EFI_IO_PAGES - 1, +#endif #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif From e365bcd9214db993436de7bc915b76ab8402045f Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:57 -0300 Subject: [PATCH 630/682] x86, fixmap: prepare fixmap_64.h for unification Impact: cleanup Just prepare fixmap for later mechanic unification. No real modification on code. text data bss dec hex filename 4312362 527192 421924 5261478 5048a6 vmlinux-64.after 4312362 527192 421924 5261478 5048a6 vmlinux-64.before Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_64.h | 76 +++++++++++++++++++++++++++++--- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index baff6d0da99..95acae0daa5 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -6,25 +6,63 @@ * for more details. * * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 */ #ifndef _ASM_X86_FIXMAP_64_H #define _ASM_X86_FIXMAP_64_H +#ifndef __ASSEMBLY__ #include #include #include #include +#ifdef CONFIG_X86_32 +#include +#include +#else #include #ifdef CONFIG_EFI #include #endif +#endif + +/* + * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall + * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. + * Because of this, FIXADDR_TOP x86 integration was left as later work. + */ +#ifdef CONFIG_X86_32 +/* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) + +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +#else +#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) + +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +#endif /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at * compile time, but to set the physical address only * in the boot process. + * for x86_32: We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. * * These 'compile-time allocated' memory buffers are * fixed-size 4k pages (or larger if used with an increment @@ -34,12 +72,16 @@ * TLB entries of such buffers will not be flushed across * task switches. */ - enum fixed_addresses { +#ifdef CONFIG_X86_32 + FIX_HOLE, + FIX_VDSO, +#else VSYSCALL_LAST_PAGE, VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, VSYSCALL_HPET, +#endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, #ifdef CONFIG_X86_LOCAL_APIC @@ -49,11 +91,32 @@ enum fixed_addresses { FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, #endif +#ifdef CONFIG_X86_64 #ifdef CONFIG_EFI FIX_EFI_IO_MAP_LAST_PAGE, FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + MAX_EFI_IO_PAGES - 1, #endif +#endif +#ifdef CONFIG_X86_VISWS_APIC + FIX_CO_CPU, /* Cobalt timer */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_LI_PCIA, /* Lithium PCI Bridge A */ + FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_X86_F00F_BUG + FIX_F00F_IDT, /* Virtual mapping for IDT */ +#endif +#ifdef CONFIG_X86_CYCLONE_TIMER + FIX_CYCLONE_TIMER, /*cyclone timer register*/ +#endif +#ifdef CONFIG_X86_32 + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif +#endif #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif @@ -73,20 +136,19 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +#ifdef CONFIG_X86_32 + FIX_WP_TEST, +#endif __end_of_fixed_addresses }; -extern void reserve_top_address(unsigned long reserve); -#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) +extern void reserve_top_address(unsigned long reserve); #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) #define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) -/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ -#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) - +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_64_H */ From c78f322ce8cea6052bc99513b57283d4d325a13e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:58 -0300 Subject: [PATCH 631/682] x86, fixmap: prepare fixmap_32.h for unification Impact: cleanup Just prepare fixmap for later mechanic unification. No real modification on code. text data bss dec hex filename 3831152 353188 372736 4557076 458914 vmlinux-32.after 3831152 353188 372736 4557076 458914 vmlinux-32.before Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap_32.h | 79 ++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index d3fd0b92b4e..8384df78eee 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -8,50 +8,80 @@ * Copyright (C) 1998 Ingo Molnar * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 */ #ifndef _ASM_X86_FIXMAP_32_H #define _ASM_X86_FIXMAP_32_H - -/* used by vmalloc.c, vsyscall.lds.S. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap. - */ -extern unsigned long __FIXADDR_TOP; -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) - #ifndef __ASSEMBLY__ #include #include #include #include +#ifdef CONFIG_X86_32 #include #include +#else +#include +#ifdef CONFIG_EFI +#include +#endif +#endif + +/* + * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall + * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. + * Because of this, FIXADDR_TOP x86 integration was left as later work. + */ +#ifdef CONFIG_X86_32 +/* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) + +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +#else +#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) + +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +#endif /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at * compile time, but to set the physical address only - * in the boot process. We allocate these special addresses + * in the boot process. + * for x86_32: We allocate these special addresses * from the end of virtual memory (0xfffff000) backwards. * Also this lets us do fail-safe vmalloc(), we * can guarantee that these special addresses and * vmalloc()-ed addresses never overlap. * - * these 'compile-time allocated' memory buffers are - * fixed-size 4k pages. (or larger if used with an increment - * highger than 1) use fixmap_set(idx,phys) to associate + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate * physical memory with fixmap indices. * * TLB entries of such buffers will not be flushed across * task switches. */ enum fixed_addresses { +#ifdef CONFIG_X86_32 FIX_HOLE, FIX_VDSO, +#else + VSYSCALL_LAST_PAGE, + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VSYSCALL_HPET, +#endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, #ifdef CONFIG_X86_LOCAL_APIC @@ -59,7 +89,14 @@ enum fixed_addresses { #endif #ifdef CONFIG_X86_IO_APIC FIX_IO_APIC_BASE_0, - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +#endif +#ifdef CONFIG_X86_64 +#ifdef CONFIG_EFI + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + + MAX_EFI_IO_PAGES - 1, +#endif #endif #ifdef CONFIG_X86_VISWS_APIC FIX_CO_CPU, /* Cobalt timer */ @@ -73,15 +110,20 @@ enum fixed_addresses { #ifdef CONFIG_X86_CYCLONE_TIMER FIX_CYCLONE_TIMER, /*cyclone timer register*/ #endif +#ifdef CONFIG_X86_32 FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif +#endif #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif __end_of_permanent_fixed_addresses, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. @@ -94,18 +136,15 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +#ifdef CONFIG_X86_32 FIX_WP_TEST, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - FIX_OHCI1394_BASE, #endif __end_of_fixed_addresses }; + extern void reserve_top_address(unsigned long reserve); - -#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) - #define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) #define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) From c577b098f9bf467fb05dc279ba83604cb3f7cea0 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 15 Feb 2009 21:48:59 -0300 Subject: [PATCH 632/682] x86, fixmap: unify fixmap.h Impact: unification This patch unify fixmap_32.h and fixmap_64.h into fixmap.h. Things that we can't merge now are using CONFIG_X86_{32,64} (e.g.:vsyscall and EFI) Signed-off-by: Gustavo F. Padovan Acked-by: Glauber Costa Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/fixmap.h | 149 +++++++++++++++++++++++++++++- arch/x86/include/asm/fixmap_32.h | 154 ------------------------------- arch/x86/include/asm/fixmap_64.h | 154 ------------------------------- 3 files changed, 147 insertions(+), 310 deletions(-) delete mode 100644 arch/x86/include/asm/fixmap_32.h delete mode 100644 arch/x86/include/asm/fixmap_64.h diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 23696d44a0a..dca8f03da5b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -1,11 +1,155 @@ +/* + * fixmap.h: compile-time virtual memory allocation + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998 Ingo Molnar + * + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 + * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 + */ + #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +#ifndef __ASSEMBLY__ +#include +#include +#include +#include #ifdef CONFIG_X86_32 -# include "fixmap_32.h" +#include +#include #else -# include "fixmap_64.h" +#include +#ifdef CONFIG_EFI +#include #endif +#endif + +/* + * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall + * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. + * Because of this, FIXADDR_TOP x86 integration was left as later work. + */ +#ifdef CONFIG_X86_32 +/* used by vmalloc.c, vsyscall.lds.S. + * + * Leave one empty page between vmalloc'ed areas and + * the start of the fixmap. + */ +extern unsigned long __FIXADDR_TOP; +#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) + +#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) +#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) +#else +#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) + +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) +#endif + + +/* + * Here we define all the compile-time 'special' virtual + * addresses. The point is to have a constant address at + * compile time, but to set the physical address only + * in the boot process. + * for x86_32: We allocate these special addresses + * from the end of virtual memory (0xfffff000) backwards. + * Also this lets us do fail-safe vmalloc(), we + * can guarantee that these special addresses and + * vmalloc()-ed addresses never overlap. + * + * These 'compile-time allocated' memory buffers are + * fixed-size 4k pages (or larger if used with an increment + * higher than 1). Use set_fixmap(idx,phys) to associate + * physical memory with fixmap indices. + * + * TLB entries of such buffers will not be flushed across + * task switches. + */ +enum fixed_addresses { +#ifdef CONFIG_X86_32 + FIX_HOLE, + FIX_VDSO, +#else + VSYSCALL_LAST_PAGE, + VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VSYSCALL_HPET, +#endif + FIX_DBGP_BASE, + FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ +#endif +#ifdef CONFIG_X86_IO_APIC + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, +#endif +#ifdef CONFIG_X86_64 +#ifdef CONFIG_EFI + FIX_EFI_IO_MAP_LAST_PAGE, + FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE + + MAX_EFI_IO_PAGES - 1, +#endif +#endif +#ifdef CONFIG_X86_VISWS_APIC + FIX_CO_CPU, /* Cobalt timer */ + FIX_CO_APIC, /* Cobalt APIC Redirection Table */ + FIX_LI_PCIA, /* Lithium PCI Bridge A */ + FIX_LI_PCIB, /* Lithium PCI Bridge B */ +#endif +#ifdef CONFIG_X86_F00F_BUG + FIX_F00F_IDT, /* Virtual mapping for IDT */ +#endif +#ifdef CONFIG_X86_CYCLONE_TIMER + FIX_CYCLONE_TIMER, /*cyclone timer register*/ +#endif +#ifdef CONFIG_X86_32 + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, +#ifdef CONFIG_PCI_MMCONFIG + FIX_PCIE_MCFG, +#endif +#endif +#ifdef CONFIG_PARAVIRT + FIX_PARAVIRT_BOOTMAP, +#endif + __end_of_permanent_fixed_addresses, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif + /* + * 256 temporary boot-time mappings, used by early_ioremap(), + * before ioremap() is functional. + * + * We round it up to the next 256 pages boundary so that we + * can have a single pgd entry and a single pte table: + */ +#define NR_FIX_BTMAPS 64 +#define FIX_BTMAPS_SLOTS 4 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - + (__end_of_permanent_fixed_addresses & 255), + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, +#ifdef CONFIG_X86_32 + FIX_WP_TEST, +#endif + __end_of_fixed_addresses +}; + + +extern void reserve_top_address(unsigned long reserve); + +#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) extern int fixmaps_set; @@ -69,4 +213,5 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); return __virt_to_fix(vaddr); } +#endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_FIXMAP_H */ diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h deleted file mode 100644 index 8384df78eee..00000000000 --- a/arch/x86/include/asm/fixmap_32.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 - */ - -#ifndef _ASM_X86_FIXMAP_32_H -#define _ASM_X86_FIXMAP_32_H - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#ifdef CONFIG_X86_32 -#include -#include -#else -#include -#ifdef CONFIG_EFI -#include -#endif -#endif - -/* - * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall - * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. - * Because of this, FIXADDR_TOP x86 integration was left as later work. - */ -#ifdef CONFIG_X86_32 -/* used by vmalloc.c, vsyscall.lds.S. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap. - */ -extern unsigned long __FIXADDR_TOP; -#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) - -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#else -#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) - -/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ -#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -#endif - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. - * for x86_32: We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * These 'compile-time allocated' memory buffers are - * fixed-size 4k pages (or larger if used with an increment - * higher than 1). Use set_fixmap(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ -enum fixed_addresses { -#ifdef CONFIG_X86_32 - FIX_HOLE, - FIX_VDSO, -#else - VSYSCALL_LAST_PAGE, - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE - + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, - VSYSCALL_HPET, -#endif - FIX_DBGP_BASE, - FIX_EARLYCON_MEM_BASE, -#ifdef CONFIG_X86_LOCAL_APIC - FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ -#endif -#ifdef CONFIG_X86_IO_APIC - FIX_IO_APIC_BASE_0, - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, -#endif -#ifdef CONFIG_X86_64 -#ifdef CONFIG_EFI - FIX_EFI_IO_MAP_LAST_PAGE, - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE - + MAX_EFI_IO_PAGES - 1, -#endif -#endif -#ifdef CONFIG_X86_VISWS_APIC - FIX_CO_CPU, /* Cobalt timer */ - FIX_CO_APIC, /* Cobalt APIC Redirection Table */ - FIX_LI_PCIA, /* Lithium PCI Bridge A */ - FIX_LI_PCIB, /* Lithium PCI Bridge B */ -#endif -#ifdef CONFIG_X86_F00F_BUG - FIX_F00F_IDT, /* Virtual mapping for IDT */ -#endif -#ifdef CONFIG_X86_CYCLONE_TIMER - FIX_CYCLONE_TIMER, /*cyclone timer register*/ -#endif -#ifdef CONFIG_X86_32 - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#ifdef CONFIG_PCI_MMCONFIG - FIX_PCIE_MCFG, -#endif -#endif -#ifdef CONFIG_PARAVIRT - FIX_PARAVIRT_BOOTMAP, -#endif - __end_of_permanent_fixed_addresses, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - FIX_OHCI1394_BASE, -#endif - /* - * 256 temporary boot-time mappings, used by early_ioremap(), - * before ioremap() is functional. - * - * We round it up to the next 256 pages boundary so that we - * can have a single pgd entry and a single pte table: - */ -#define NR_FIX_BTMAPS 64 -#define FIX_BTMAPS_SLOTS 4 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - - (__end_of_permanent_fixed_addresses & 255), - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, -#ifdef CONFIG_X86_32 - FIX_WP_TEST, -#endif - __end_of_fixed_addresses -}; - - -extern void reserve_top_address(unsigned long reserve); - -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_X86_FIXMAP_32_H */ diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h deleted file mode 100644 index 95acae0daa5..00000000000 --- a/arch/x86/include/asm/fixmap_64.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * fixmap.h: compile-time virtual memory allocation - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998 Ingo Molnar - * - * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 - * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009 - */ - -#ifndef _ASM_X86_FIXMAP_64_H -#define _ASM_X86_FIXMAP_64_H - -#ifndef __ASSEMBLY__ -#include -#include -#include -#include -#ifdef CONFIG_X86_32 -#include -#include -#else -#include -#ifdef CONFIG_EFI -#include -#endif -#endif - -/* - * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall - * uses fixmaps that relies on FIXADDR_TOP for proper address calculation. - * Because of this, FIXADDR_TOP x86 integration was left as later work. - */ -#ifdef CONFIG_X86_32 -/* used by vmalloc.c, vsyscall.lds.S. - * - * Leave one empty page between vmalloc'ed areas and - * the start of the fixmap. - */ -extern unsigned long __FIXADDR_TOP; -#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) - -#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) -#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) -#else -#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) - -/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ -#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) -#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -#endif - -/* - * Here we define all the compile-time 'special' virtual - * addresses. The point is to have a constant address at - * compile time, but to set the physical address only - * in the boot process. - * for x86_32: We allocate these special addresses - * from the end of virtual memory (0xfffff000) backwards. - * Also this lets us do fail-safe vmalloc(), we - * can guarantee that these special addresses and - * vmalloc()-ed addresses never overlap. - * - * These 'compile-time allocated' memory buffers are - * fixed-size 4k pages (or larger if used with an increment - * higher than 1). Use set_fixmap(idx,phys) to associate - * physical memory with fixmap indices. - * - * TLB entries of such buffers will not be flushed across - * task switches. - */ -enum fixed_addresses { -#ifdef CONFIG_X86_32 - FIX_HOLE, - FIX_VDSO, -#else - VSYSCALL_LAST_PAGE, - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE - + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, - VSYSCALL_HPET, -#endif - FIX_DBGP_BASE, - FIX_EARLYCON_MEM_BASE, -#ifdef CONFIG_X86_LOCAL_APIC - FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ -#endif -#ifdef CONFIG_X86_IO_APIC - FIX_IO_APIC_BASE_0, - FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, -#endif -#ifdef CONFIG_X86_64 -#ifdef CONFIG_EFI - FIX_EFI_IO_MAP_LAST_PAGE, - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE - + MAX_EFI_IO_PAGES - 1, -#endif -#endif -#ifdef CONFIG_X86_VISWS_APIC - FIX_CO_CPU, /* Cobalt timer */ - FIX_CO_APIC, /* Cobalt APIC Redirection Table */ - FIX_LI_PCIA, /* Lithium PCI Bridge A */ - FIX_LI_PCIB, /* Lithium PCI Bridge B */ -#endif -#ifdef CONFIG_X86_F00F_BUG - FIX_F00F_IDT, /* Virtual mapping for IDT */ -#endif -#ifdef CONFIG_X86_CYCLONE_TIMER - FIX_CYCLONE_TIMER, /*cyclone timer register*/ -#endif -#ifdef CONFIG_X86_32 - FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#ifdef CONFIG_PCI_MMCONFIG - FIX_PCIE_MCFG, -#endif -#endif -#ifdef CONFIG_PARAVIRT - FIX_PARAVIRT_BOOTMAP, -#endif - __end_of_permanent_fixed_addresses, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - FIX_OHCI1394_BASE, -#endif - /* - * 256 temporary boot-time mappings, used by early_ioremap(), - * before ioremap() is functional. - * - * We round it up to the next 256 pages boundary so that we - * can have a single pgd entry and a single pte table: - */ -#define NR_FIX_BTMAPS 64 -#define FIX_BTMAPS_SLOTS 4 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - - (__end_of_permanent_fixed_addresses & 255), - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, -#ifdef CONFIG_X86_32 - FIX_WP_TEST, -#endif - __end_of_fixed_addresses -}; - - -extern void reserve_top_address(unsigned long reserve); - -#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE) - -#endif /* !__ASSEMBLY__ */ -#endif /* _ASM_X86_FIXMAP_64_H */ From 144b0712dd9dd9ebd4e80c4e5388c9f6afc2b497 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 27 Feb 2009 10:27:04 -0800 Subject: [PATCH 633/682] x86: signal: add __user annotation Impact: cleanup Add missing __user annotation to the parameter of get_sigframe(). Also change cast type to void __user * of *fpstate. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7cdcd16885e..e89eaf417e5 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -215,7 +215,7 @@ static const struct { */ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -243,7 +243,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (used_math()) { sp = sp - sig_xstate_size; - *fpstate = (struct _fpstate *) sp; + *fpstate = (void __user *) sp; if (save_i387_xstate(*fpstate) < 0) return (void __user *)-1L; } From 97286a2b64725aac2d584ddd1f94871f9991d5a1 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 27 Feb 2009 10:28:48 -0800 Subject: [PATCH 634/682] x86: signal: intrroduce get_sigframe() and replace get_sigstack() Impact: cleanup Introduce get_sigframe() like 32-bit to replace get_sigstack(). Move the i387 stuff into get_sigframe(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e89eaf417e5..82d37c77b0f 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -392,10 +392,13 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Determine which stack to use.. */ static void __user * -get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + void __user **fpstate) { - /* Default to using normal stack - redzone*/ - sp -= 128; + unsigned long sp; + + /* Default to using normal stack - redzone */ + sp = regs->sp - 128; /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { @@ -403,7 +406,18 @@ get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) sp = current->sas_ss_sp + current->sas_ss_size; } - return (void __user *)round_down(sp - size, 64); + if (used_math()) { + sp -= sig_xstate_size; + *fpstate = (void __user *)round_down(sp, 64); + if (save_i387_xstate(*fpstate) < 0) + return (void __user *) -1L; + + sp -= frame_size; + return (void __user *)round_down(sp, 16) - 8; + } + + sp -= frame_size; + return (void __user *)round_down(sp, 64) - 8; } static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, @@ -414,15 +428,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int err = 0; struct task_struct *me = current; - if (used_math()) { - fp = get_stack(ka, regs->sp, sig_xstate_size); - frame = (void __user *)round_down( - (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; - - if (save_i387_xstate(fp) < 0) - return -EFAULT; - } else - frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; + frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; From 36a4526583ad61fe7cb7432f53bce52ea198813a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 27 Feb 2009 10:29:09 -0800 Subject: [PATCH 635/682] x86: signal: use 16 bytes boundary for rt_sigframe Impact: cleanup Supporting xsave/xrestore introduces 64 bytes boundary for save_i387_xstate(). 16 bytes boundary is OK for rt_sigframe. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 82d37c77b0f..89ef90df985 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -409,15 +409,13 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (used_math()) { sp -= sig_xstate_size; *fpstate = (void __user *)round_down(sp, 64); + if (save_i387_xstate(*fpstate) < 0) return (void __user *) -1L; - - sp -= frame_size; - return (void __user *)round_down(sp, 16) - 8; } sp -= frame_size; - return (void __user *)round_down(sp, 64) - 8; + return (void __user *)round_down(sp, 16) - 8; } static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, From 75779f05264b9968d7ae7ecb4ca5127b08785692 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 27 Feb 2009 10:29:57 -0800 Subject: [PATCH 636/682] x86: signal: unify get_sigframe() Impact: cleanup Unify get_sigframe(). Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 141 +++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 78 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 89ef90df985..53425c681f2 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -187,6 +187,69 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, /* * Set up a signal frame. */ + +/* + * Determine which stack to use.. + */ +static inline void __user * +get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, + void __user **fpstate) +{ + /* Default to using normal stack */ + unsigned long sp = regs->sp; + +#ifdef CONFIG_X86_64 + /* redzone */ + sp -= 128; +#endif /* CONFIG_X86_64 */ + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) + return (void __user *) -1L; + + /* This is the X/Open sanctioned signal stack switching. */ + if (ka->sa.sa_flags & SA_ONSTACK) { + if (sas_ss_flags(sp) == 0) + sp = current->sas_ss_sp + current->sas_ss_size; + } else { +#ifdef CONFIG_X86_32 + /* This is the legacy signal stack switching. */ + if ((regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) + sp = (unsigned long) ka->sa.sa_restorer; +#endif /* CONFIG_X86_32 */ + } + + if (used_math()) { + sp -= sig_xstate_size; +#ifdef CONFIG_X86_32 + *fpstate = (void __user *) sp; +#else /* !CONFIG_X86_32 */ + *fpstate = (void __user *)round_down(sp, 64); +#endif /* CONFIG_X86_32 */ + + if (save_i387_xstate(*fpstate) < 0) + return (void __user *)-1L; + } + + sp -= frame_size; +#ifdef CONFIG_X86_32 + /* + * Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. + */ + sp = ((sp + 4) & -16ul) - 4; +#else /* !CONFIG_X86_32 */ + sp = round_down(sp, 16) - 8; +#endif + + return (void __user *) sp; +} + #ifdef CONFIG_X86_32 static const struct { u16 poplmovl; @@ -210,54 +273,6 @@ static const struct { 0 }; -/* - * Determine which stack to use.. - */ -static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void __user **fpstate) -{ - unsigned long sp; - - /* Default to using normal stack */ - sp = regs->sp; - - /* - * If we are on the alternate signal stack and would overflow it, don't. - * Return an always-bogus address instead so we will die with SIGSEGV. - */ - if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size))) - return (void __user *) -1L; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) - sp = current->sas_ss_sp + current->sas_ss_size; - } else { - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) - sp = (unsigned long) ka->sa.sa_restorer; - } - - if (used_math()) { - sp = sp - sig_xstate_size; - *fpstate = (void __user *) sp; - if (save_i387_xstate(*fpstate) < 0) - return (void __user *)-1L; - } - - sp -= frame_size; - /* - * Align the stack pointer according to the i386 ABI, - * i.e. so that on function entry ((sp + 4) & 15) == 0. - */ - sp = ((sp + 4) & -16ul) - 4; - - return (void __user *) sp; -} - static int __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) @@ -388,36 +403,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; } #else /* !CONFIG_X86_32 */ -/* - * Determine which stack to use.. - */ -static void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void __user **fpstate) -{ - unsigned long sp; - - /* Default to using normal stack - redzone */ - sp = regs->sp - 128; - - /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) - sp = current->sas_ss_sp + current->sas_ss_size; - } - - if (used_math()) { - sp -= sig_xstate_size; - *fpstate = (void __user *)round_down(sp, 64); - - if (save_i387_xstate(*fpstate) < 0) - return (void __user *) -1L; - } - - sp -= frame_size; - return (void __user *)round_down(sp, 16) - 8; -} - static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { From 1fae0279ce4811fa123001515d1ed3d68c1d557f Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 27 Feb 2009 10:30:32 -0800 Subject: [PATCH 637/682] x86: signal: introduce helper align_sigframe() Impact: cleanup Introduce helper align_sigframe() to align stack pointer for signal frame. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 53425c681f2..dde3f2ae237 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -191,6 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, /* * Determine which stack to use.. */ +static unsigned long align_sigframe(unsigned long sp) +{ +#ifdef CONFIG_X86_32 + /* + * Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. + */ + sp = ((sp + 4) & -16ul) - 4; +#else /* !CONFIG_X86_32 */ + sp = round_down(sp, 16) - 8; +#endif + return sp; +} + static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) @@ -236,18 +250,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; } - sp -= frame_size; -#ifdef CONFIG_X86_32 - /* - * Align the stack pointer according to the i386 ABI, - * i.e. so that on function entry ((sp + 4) & 15) == 0. - */ - sp = ((sp + 4) & -16ul) - 4; -#else /* !CONFIG_X86_32 */ - sp = round_down(sp, 16) - 8; -#endif - - return (void __user *) sp; + return (void __user *)align_sigframe(sp - frame_size); } #ifdef CONFIG_X86_32 From 327f4387e39cf7bfe79a673e56dbf5479db3fec9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 28 Feb 2009 18:50:21 +0530 Subject: [PATCH 638/682] x86: remove double copy of show_cpuinfo_core for 32 and 64 bit Impact: unification show_cpuinfo_core is identical for 32 and 64 bit and can be unified, and CONFIG_X86_HT inherently depends on CONFIG_X86_SMP. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/proc.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 01b1244ef1c..d67e0e48bc2 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -7,11 +7,10 @@ /* * Get CPU information for use by the procfs. */ -#ifdef CONFIG_X86_32 static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, unsigned int cpu) { -#ifdef CONFIG_X86_HT +#ifdef CONFIG_SMP if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); seq_printf(m, "siblings\t: %d\n", @@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, #endif } +#ifdef CONFIG_X86_32 static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) { /* @@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) c->wp_works_ok ? "yes" : "no"); } #else -static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, - unsigned int cpu) -{ -#ifdef CONFIG_SMP - if (c->x86_max_cores * smp_num_siblings > 1) { - seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", - cpus_weight(per_cpu(cpu_core_map, cpu))); - seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); - seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); - seq_printf(m, "apicid\t\t: %d\n", c->apicid); - seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid); - } -#endif -} - static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) { seq_printf(m, From 02d51fdfb2bfcf6bbd776f983177f55868aa0a79 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Mar 2009 15:42:36 +0900 Subject: [PATCH 639/682] percpu: kill compile warning in pcpu_populate_chunk() Impact: remove compile warning Mark local variable map_end in pcpu_populate_chunk() with uninitialized_var(). The variable is always used in tandem with map_start and guaranteed to be initialized before use but gcc doesn't understand that. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- mm/percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 5954e7a9eb1..3d0f5456827 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -639,7 +639,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) int page_start = PFN_DOWN(off); int page_end = PFN_UP(off + size); int map_start = -1; - int map_end; + int uninitialized_var(map_end); unsigned int cpu; int i; From af6326d72c95d6e0bbc88c92185c654f57acef3b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Mar 2009 16:03:16 +0900 Subject: [PATCH 640/682] alpha: fix typo in recent early vmalloc change Impact: fix build Add missing 'o' in variable name. Compile tested. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar Cc: Ivan Kokshaysky --- arch/alpha/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 91eddd8505d..af71d38c8e4 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -202,7 +202,7 @@ callback_init(void * kernel_end) console_remap_vm.size = nr_pages << PAGE_SHIFT; vm_area_register_early(&console_remap_vm, PAGE_SIZE); - vaddr = (unsigned long)consle_remap_vm.addr; + vaddr = (unsigned long)console_remap_vm.addr; /* Set up the third level PTEs and update the virtual addresses of the CRB entries. */ From d0c4f570276cb4d2dc4215b90eb7cb6e2bdd4a15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Mar 2009 16:06:56 +0900 Subject: [PATCH 641/682] bootmem, x86: further fixes for arch-specific bootmem wrapping Impact: fix new breakages introduced by previous fix Commit c132937556f56ee4b831ef4b23f1846e05fde102 tried to clean up bootmem arch wrapper but it wasn't quite correct. Before the commit, the followings were broken. * Low level interface functions prefixed with __ ignored arch preference. * reserve_bootmem(...) can't be mapped into reserve_bootmem_node(NODE_DATA(0)->bdata, ...) because the node is not preference here. The region specified MUST fall into the specified region; otherwise, it will panic. After the commit, * If allocation fails for the arch preferred node, it should fallback to whatever is available. Instead, it simply failed allocation. There are too many internal details to allow generic wrapping and still keep things simple for archs. Plus, all that arch wants is a way to prefer certain node over another. This patch drops the generic wrapping around alloc_bootmem_core() and add alloc_bootmem_core() instead. If necessary, arch can define bootmem_arch_referred_node() macro or function which takes all allocation information and returns the preferred node. bootmem generic code will always try the preferred node first and then fallback to other nodes as usual. Breakages noted and changes reviewed by Johannes Weiner. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- arch/x86/include/asm/mmzone_32.h | 8 ++---- mm/bootmem.c | 45 +++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index eeacf67de49..ede6998bd92 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -92,12 +92,8 @@ static inline int pfn_valid(int pfn) #ifdef CONFIG_NEED_MULTIPLE_NODES /* always use node 0 for bootmem on this numa platform */ -#define alloc_bootmem_core(__bdata, size, align, goal, limit) \ -({ \ - bootmem_data_t __maybe_unused * __abm_bdata_dummy = (__bdata); \ - __alloc_bootmem_core(NODE_DATA(0)->bdata, \ - (size), (align), (goal), (limit)); \ -}) +#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \ + (NODE_DATA(0)->bdata) #endif /* CONFIG_NEED_MULTIPLE_NODES */ #endif /* _ASM_X86_MMZONE_32_H */ diff --git a/mm/bootmem.c b/mm/bootmem.c index d7140c008ba..daf92713f7d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -37,16 +37,6 @@ static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); static int bootmem_debug; -/* - * If an arch needs to apply workarounds to bootmem allocation, it can - * set CONFIG_HAVE_ARCH_BOOTMEM and define a wrapper around - * __alloc_bootmem_core(). - */ -#ifndef CONFIG_HAVE_ARCH_BOOTMEM -#define alloc_bootmem_core(bdata, size, align, goal, limit) \ - __alloc_bootmem_core((bdata), (size), (align), (goal), (limit)) -#endif - static int __init bootmem_debug_setup(char *buf) { bootmem_debug = 1; @@ -436,9 +426,9 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, return ALIGN(base + off, align) - base; } -static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, - unsigned long goal, unsigned long limit) +static void * __init alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { unsigned long fallback = 0; unsigned long min, max, start, sidx, midx, step; @@ -538,17 +528,34 @@ find_block: return NULL; } +static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) +{ +#ifdef CONFIG_HAVE_ARCH_BOOTMEM + bootmem_data_t *p_bdata; + + p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); + if (p_bdata) + return alloc_bootmem_core(p_bdata, size, align, goal, limit); +#endif + return NULL; +} + static void * __init ___alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { bootmem_data_t *bdata; + void *region; restart: - list_for_each_entry(bdata, &bdata_list, list) { - void *region; + region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); + if (region) + return region; + list_for_each_entry(bdata, &bdata_list, list) { if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) continue; if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) @@ -626,6 +633,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, { void *ptr; + ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); + if (ptr) + return ptr; + ptr = alloc_bootmem_core(bdata, size, align, goal, limit); if (ptr) return ptr; @@ -682,6 +693,10 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); + if (ptr) + return ptr; + ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; From f5c1aa1537be39d8b9bb5279b5881d81898fd3cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Mar 2009 12:32:08 +0100 Subject: [PATCH 642/682] Revert "gpu/drm, x86, PAT: PAT support for io_mapping_*" This reverts commit 17581ad812a9abb0182260374ef2e52d4a808a64. Sitsofe Wheeler reported that /dev/dri/card0 is MIA on his EeePC 900 and bisected it to this commit. Graphics card is an i915 in an EeePC 900: 00:02.0 VGA compatible controller [0300]: Intel Corporation Mobile 915GM/GMS/910GML Express Graphics Controller [8086:2592] (rev 04) ( Most likely the ioremap() of the driver failed and hence the card did not initialize. ) Reported-by: Sitsofe Wheeler Bisected-by: Sitsofe Wheeler Cc: Venkatesh Pallipadi Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 5 +--- arch/x86/mm/iomap_32.c | 44 ++---------------------------------- include/linux/io-mapping.h | 6 ++--- 3 files changed, 5 insertions(+), 50 deletions(-) diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index bd46495ff7d..86af26091d6 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -24,10 +24,7 @@ #include int -reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot); - -void -free_io_memtype(u64 base, unsigned long size); +is_io_mapping_possible(resource_size_t base, unsigned long size); void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index d5e28424622..6c2b1af1692 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -21,13 +21,13 @@ #include #ifdef CONFIG_X86_PAE -static int +int is_io_mapping_possible(resource_size_t base, unsigned long size) { return 1; } #else -static int +int is_io_mapping_possible(resource_size_t base, unsigned long size) { /* There is no way to map greater than 1 << 32 address without PAE */ @@ -38,46 +38,6 @@ is_io_mapping_possible(resource_size_t base, unsigned long size) } #endif -int -reserve_io_memtype_wc(u64 base, unsigned long size, pgprot_t *prot) -{ - unsigned long ret_flag; - - if (!is_io_mapping_possible(base, size)) - goto out_err; - - if (!pat_enabled) { - *prot = pgprot_noncached(PAGE_KERNEL); - return 0; - } - - if (reserve_memtype(base, base + size, _PAGE_CACHE_WC, &ret_flag)) - goto out_err; - - if (ret_flag == _PAGE_CACHE_WB) - goto out_free; - - if (kernel_map_sync_memtype(base, size, ret_flag)) - goto out_free; - - *prot = __pgprot(__PAGE_KERNEL | ret_flag); - return 0; - -out_free: - free_memtype(base, base + size); -out_err: - return -EINVAL; -} -EXPORT_SYMBOL_GPL(reserve_io_memtype_wc); - -void -free_io_memtype(u64 base, unsigned long size) -{ - if (pat_enabled) - free_memtype(base, base + size); -} -EXPORT_SYMBOL_GPL(free_io_memtype); - /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index f1ed66c4378..cbc2f0cd631 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,9 +49,8 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; - pgprot_t prot; - if (!reserve_io_memtype_wc(base, size, &prot)) + if (!is_io_mapping_possible(base, size)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -60,14 +59,13 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = prot; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { - free_io_memtype(mapping->base, mapping->size); kfree(mapping); } From f180053694b43d5714bf56cb95499a3c32ff155c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Mar 2009 11:00:57 +0100 Subject: [PATCH 643/682] x86, mm: dont use non-temporal stores in pagecache accesses Impact: standardize IO on cached ops On modern CPUs it is almost always a bad idea to use non-temporal stores, as the regression in this commit has shown it: 30d697f: x86: fix performance regression in write() syscall The kernel simply has no good information about whether using non-temporal stores is a good idea or not - and trying to add heuristics only increases complexity and inserts fragility. The regression on cached write()s took very long to be found - over two years. So dont take any chances and let the hardware decide how it makes use of its caches. The only exception is drivers/gpu/drm/i915/i915_gem.c: there were we are absolutely sure that another entity (the GPU) will pick up the dirty data immediately and that the CPU will not touch that data before the GPU will. Also, keep the _nocache() primitives to make it easier for people to experiment with these details. There may be more clear-cut cases where non-cached copies can be used, outside of filemap.c. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uaccess_32.h | 4 ++-- arch/x86/include/asm/uaccess_64.h | 25 +++++++------------------ drivers/gpu/drm/i915/i915_gem.c | 2 +- include/linux/uaccess.h | 4 ++-- mm/filemap.c | 11 ++++------- mm/filemap_xip.c | 2 +- 6 files changed, 17 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index a0ba6138697..5e06259e90e 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -157,7 +157,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) } static __always_inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { might_fault(); if (__builtin_constant_p(n)) { @@ -180,7 +180,7 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, - unsigned long n, unsigned long total) + unsigned long n) { return __copy_from_user_ll_nocache_nozero(to, from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index dcaa0404cf7..8cc687326eb 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -188,29 +188,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); -static inline int __copy_from_user_nocache(void *dst, const void __user *src, - unsigned size, unsigned long total) +static inline int +__copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { might_sleep(); - /* - * In practice this limit means that large file write()s - * which get chunked to 4K copies get handled via - * non-temporal stores here. Smaller writes get handled - * via regular __copy_from_user(): - */ - if (likely(total >= PAGE_SIZE)) - return __copy_user_nocache(dst, src, size, 1); - else - return __copy_from_user(dst, src, size); + return __copy_user_nocache(dst, src, size, 1); } -static inline int __copy_from_user_inatomic_nocache(void *dst, - const void __user *src, unsigned size, unsigned total) +static inline int +__copy_from_user_inatomic_nocache(void *dst, const void __user *src, + unsigned size) { - if (likely(total >= PAGE_SIZE)) - return __copy_user_nocache(dst, src, size, 0); - else - return __copy_from_user_inatomic(dst, src, size); + return __copy_user_nocache(dst, src, size, 0); } unsigned long diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6b209db8370..81857665409 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -215,7 +215,7 @@ fast_user_write(struct io_mapping *mapping, vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, - user_data, length, length); + user_data, length); io_mapping_unmap_atomic(vaddr_atomic); if (unwritten) return -EFAULT; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6f3c603b0d6..6b58367d145 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user(to, from, n); } diff --git a/mm/filemap.c b/mm/filemap.c index 60fd56772cc..126d3973b3d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1816,14 +1816,14 @@ EXPORT_SYMBOL(file_remove_suid); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { - size_t copied = 0, left = 0, total = bytes; + size_t copied = 0, left = 0; while (bytes) { char __user *buf = iov->iov_base + base; int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic_nocache(vaddr, buf, copy, total); + left = __copy_from_user_inatomic(vaddr, buf, copy); copied += copy; bytes -= copy; vaddr += copy; @@ -1851,9 +1851,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - - left = __copy_from_user_inatomic_nocache(kaddr + offset, - buf, bytes, bytes); + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -1881,8 +1879,7 @@ size_t iov_iter_copy_from_user(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - - left = __copy_from_user_nocache(kaddr + offset, buf, bytes, bytes); + left = __copy_from_user(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index bf54f8a2cf1..0c04615651b 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -354,7 +354,7 @@ __xip_file_write(struct file *filp, const char __user *buf, break; copied = bytes - - __copy_from_user_nocache(xip_mem + offset, buf, bytes, bytes); + __copy_from_user_nocache(xip_mem + offset, buf, bytes); if (likely(copied > 0)) { status = copied; From 9694cd6c17582cd6c29bf4a4e4aa767862ff4f4c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 10:53:55 +0100 Subject: [PATCH 644/682] x86_32: apic/bigsmp_32, de-inline functions The ones which go only into struct apic are de-inlined by compiler anyway, so remove the inline specifier from them. Afterwards, remove bigsmp_setup_portio_remap completely as it is unused. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/bigsmp_32.c | 40 ++++++++++++++------------------ 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 69c512e23a9..d806ecaa948 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -16,17 +16,17 @@ #include #include -static inline unsigned bigsmp_get_apic_id(unsigned long x) +static unsigned bigsmp_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; } -static inline int bigsmp_apic_id_registered(void) +static int bigsmp_apic_id_registered(void) { return 1; } -static inline const cpumask_t *bigsmp_target_cpus(void) +static const cpumask_t *bigsmp_target_cpus(void) { #ifdef CONFIG_SMP return &cpu_online_map; @@ -35,13 +35,12 @@ static inline const cpumask_t *bigsmp_target_cpus(void) #endif } -static inline unsigned long -bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; } -static inline unsigned long bigsmp_check_apicid_present(int bit) +static unsigned long bigsmp_check_apicid_present(int bit) { return 1; } @@ -64,7 +63,7 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static inline void bigsmp_init_apic_ldr(void) +static void bigsmp_init_apic_ldr(void) { unsigned long val; int cpu = smp_processor_id(); @@ -74,19 +73,19 @@ static inline void bigsmp_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static inline void bigsmp_setup_apic_routing(void) +static void bigsmp_setup_apic_routing(void) { printk(KERN_INFO "Enabling APIC mode: Physflat. Using %d I/O APICs\n", nr_ioapics); } -static inline int bigsmp_apicid_to_node(int logical_apicid) +static int bigsmp_apicid_to_node(int logical_apicid) { return apicid_2_node[hard_smp_processor_id()]; } -static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) +static int bigsmp_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); @@ -94,7 +93,7 @@ static inline int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) +static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) { return physid_mask_of_physid(phys_apicid); } @@ -107,29 +106,24 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) { /* For clustered we don't have a good way to do this yet - hack */ return physids_promote(0xFFL); } -static inline void bigsmp_setup_portio_remap(void) -{ -} - -static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) +static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) { return 1; } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) +static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask) { return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask)); } -static inline unsigned int -bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, const struct cpumask *andmask) { int cpu; @@ -148,7 +142,7 @@ bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return BAD_APICID; } -static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) +static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; } @@ -158,12 +152,12 @@ static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector) default_send_IPI_mask_sequence_phys(mask, vector); } -static inline void bigsmp_send_IPI_allbutself(int vector) +static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); } -static inline void bigsmp_send_IPI_all(int vector) +static void bigsmp_send_IPI_all(int vector) { bigsmp_send_IPI_mask(cpu_online_mask, vector); } From c2b20cbd057b97e2f440fa3bc90b3df51de324fe Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 10:53:56 +0100 Subject: [PATCH 645/682] x86_32: apic/es7000_32, cpu_mask_to_apicid cleanup Remove es7000_cpu_mask_to_apicid_cluster completely, because it's almost the same as es7000_cpu_mask_to_apicid except 2 code paths. One of them is about to be removed soon, the another should be BAD_APICID (it's a fail path). The _cluster one was not invoked on apic->cpu_mask_to_apicid_and anyway, since there was no _cluster_and variant. Also use newer cpumask functions. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 50 ++++---------------------------- 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index da37e2c59fe..9b9e86f3cfd 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -575,50 +575,12 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) return 1; } -static unsigned int -es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) -{ - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; - - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return 0xFF; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): - */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - WARN(1, "Not a valid mask!"); - - return 0xFF; - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } - return apicid; -} - static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int cpus_found = 0; - int num_bits_set; + unsigned int cpu, num_bits_set, cpus_found = 0; int apicid; - int cpu; - num_bits_set = cpus_weight(*cpumask); + num_bits_set = cpumask_weight(cpumask); /* Return id to all */ if (num_bits_set == nr_cpu_ids) return es7000_cpu_to_logical_apicid(0); @@ -626,14 +588,14 @@ static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of target_cpus(): */ - cpu = first_cpu(*cpumask); + cpu = cpumask_first(cpumask); apicid = es7000_cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = es7000_cpu_to_logical_apicid(cpu); if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); + WARN(1, "Not a valid mask!"); return es7000_cpu_to_logical_apicid(0); } @@ -739,7 +701,7 @@ struct apic apic_es7000_cluster = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster, + .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, .send_IPI_mask = es7000_send_IPI_mask, From 0edc0b324a37c4bf9e13f3194f2f45c677808792 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 10:53:57 +0100 Subject: [PATCH 646/682] x86_32: apic/es7000_32, fix cpu_mask_to_apicid Perform same-cluster checking even for masks with all (nr_cpu_ids) bits set and report BAD_APICID on failure. While at it, convert it to for_each_cpu. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/es7000_32.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 9b9e86f3cfd..4d8830ffe48 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -577,32 +577,22 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid) static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask) { - unsigned int cpu, num_bits_set, cpus_found = 0; - int apicid; + unsigned int round = 0; + int cpu, uninitialized_var(apicid); - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - return es7000_cpu_to_logical_apicid(0); /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of target_cpus(): + * The cpus in the mask must all be on the apic cluster. */ - cpu = cpumask_first(cpumask); - apicid = es7000_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); + for_each_cpu(cpu, cpumask) { + int new_apicid = es7000_cpu_to_logical_apicid(cpu); - if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - WARN(1, "Not a valid mask!"); + if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { + WARN(1, "Not a valid mask!"); - return es7000_cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; + return BAD_APICID; } - cpu++; + apicid = new_apicid; + round++; } return apicid; } From fae176d6e03578fee7cfe948ff9ae81bf7ea0ef0 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 10:53:58 +0100 Subject: [PATCH 647/682] x86_32: apic/summit_32, fix cpu_mask_to_apicid Perform same-cluster checking even for masks with all (nr_cpu_ids) bits set and report correct apicid on success instead. While at it, convert it to for_each_cpu and newer cpumask api. Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/summit_32.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 0a1135c5a6d..beda620bee8 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -291,33 +291,21 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask) { - int cpus_found = 0; - int num_bits_set; - int apicid; - int cpu; + unsigned int round = 0; + int cpu, apicid = 0; - num_bits_set = cpus_weight(*cpumask); - if (num_bits_set >= nr_cpu_ids) - return BAD_APICID; /* * The cpus in the mask must all be on the apic cluster. */ - cpu = first_cpu(*cpumask); - apicid = summit_cpu_to_logical_apicid(cpu); + for_each_cpu(cpu, cpumask) { + int new_apicid = summit_cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, *cpumask)) { - int new_apicid = summit_cpu_to_logical_apicid(cpu); - - if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); - - return BAD_APICID; - } - apicid = apicid | new_apicid; - cpus_found++; + if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { + printk("%s: Not a valid mask!\n", __func__); + return BAD_APICID; } - cpu++; + apicid |= new_apicid; + round++; } return apicid; } From 871d78c6d9fc83b2ad584788a175fcfca07a3666 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 11:34:27 +0100 Subject: [PATCH 648/682] x86_32: apic/es7000_32, fix section mismatch Remove __init section placement for some functions, so that we don't get section mismatch warnings. [v2]: 2 of them were not caught by DEBUG_SECTION_MISMATCH=y magic. Fix it. Signed-off-by: Jiri Slaby Cc: Jiri Slaby Signed-off-by: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/kernel/apic/es7000_32.c | 37 +++++++++++++++----------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 4d8830ffe48..19588f2770e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -163,7 +163,7 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) return 0; } -static int __init es7000_apic_is_cluster(void) +static int es7000_apic_is_cluster(void) { /* MPENTIUMIII */ if (boot_cpu_data.x86 == 6 && @@ -173,7 +173,7 @@ static int __init es7000_apic_is_cluster(void) return 0; } -static void __init setup_unisys(void) +static void setup_unisys(void) { /* * Determine the generation of the ES7000 currently running. @@ -192,7 +192,7 @@ static void __init setup_unisys(void) /* * Parse the OEM Table: */ -static int __init parse_unisys_oem(char *oemptr) +static int parse_unisys_oem(char *oemptr) { int i; int success = 0; @@ -254,7 +254,7 @@ static int __init parse_unisys_oem(char *oemptr) } #ifdef CONFIG_ACPI -static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) +static int find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; struct es7000_oem_table *table; @@ -285,7 +285,7 @@ static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) return 0; } -static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) +static void unmap_unisys_acpi_oem_table(unsigned long oem_addr) { if (!oem_addr) return; @@ -303,10 +303,10 @@ static int es7000_check_dsdt(void) return 0; } -static int __initdata es7000_acpi_ret; +static int es7000_acpi_ret; /* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { unsigned long oem_addr = 0; int check_dsdt; @@ -333,8 +333,7 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return ret && !es7000_apic_is_cluster(); } -static int __init -es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) +static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) { int ret = es7000_acpi_ret; @@ -342,13 +341,12 @@ es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) } #else /* !CONFIG_ACPI: */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } -static int __init -es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) +static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) { return 0; } @@ -362,8 +360,7 @@ static void es7000_spin(int n) rep_nop(); } -static int __init -es7000_mip_write(struct mip_reg *mip_reg) +static int es7000_mip_write(struct mip_reg *mip_reg) { int status = 0; int spin; @@ -396,7 +393,7 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -static void __init es7000_enable_apic_mode(void) +static void es7000_enable_apic_mode(void) { struct mip_reg es7000_mip_reg; int mip_status; @@ -627,9 +624,9 @@ static int probe_es7000(void) return 0; } -static int __initdata es7000_mps_ret; -static __init int -es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) +static int es7000_mps_ret; +static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, + char *productid) { int ret = 0; @@ -646,8 +643,8 @@ es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) return ret && !es7000_apic_is_cluster(); } -static __init int -es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, char *productid) +static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, + char *productid) { int ret = es7000_mps_ret; From 2fcb1f1f38e9b10ee5f339be4e17ba5cad9b4421 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 11:34:28 +0100 Subject: [PATCH 649/682] x86_32: apic/summit_32, fix section mismatch Remove __init section placement for some functions/data, so that we don't get section mismatch warnings. Also make inline function instead of empty setup_summit macro. [v2] One of them was not caught by DEBUG_SECTION_MISMATCH=y magic. Fix it. Signed-off-by: Jiri Slaby Cc: Jiri Slaby Signed-off-by: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/kernel/apic/summit_32.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index beda620bee8..aac52fa873f 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -77,9 +77,9 @@ static void summit_send_IPI_all(int vector) extern int use_cyclone; #ifdef CONFIG_X86_SUMMIT_NUMA -extern void setup_summit(void); +static void setup_summit(void); #else -#define setup_summit() {} +static inline void setup_summit(void) {} #endif static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, @@ -360,15 +360,15 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask) } #ifdef CONFIG_X86_SUMMIT_NUMA -static struct rio_table_hdr *rio_table_hdr __initdata; -static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +static struct rio_table_hdr *rio_table_hdr; +static struct scal_detail *scal_devs[MAX_NUMNODES]; +static struct rio_detail *rio_devs[MAX_NUMNODES*4]; #ifndef CONFIG_X86_NUMAQ -static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata; +static int mp_bus_id_to_node[MAX_MP_BUSSES]; #endif -static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) +static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) { int twister = 0, node = 0; int i, bus, num_buses; @@ -430,7 +430,7 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) return bus; } -static int __init build_detail_arrays(void) +static int build_detail_arrays(void) { unsigned long ptr; int i, scal_detail_size, rio_detail_size; @@ -464,7 +464,7 @@ static int __init build_detail_arrays(void) return 1; } -void __init setup_summit(void) +void setup_summit(void) { unsigned long ptr; unsigned short offset; From b6122b3843216f3f8e9624f9e876f4f0514f9205 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Mar 2009 11:34:29 +0100 Subject: [PATCH 650/682] x86_32: apic/numaq_32, fix section mismatch Remove __cpuinitdata section placement for translation_table structure, since it is referenced from a functions within .text. Signed-off-by: Jiri Slaby Cc: Jiri Slaby Signed-off-by: Ingo Molnar Cc: H. Peter Anvin --- arch/x86/kernel/apic/numaq_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index a7f711f5110..ba2fc646553 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -69,7 +69,7 @@ struct mpc_trans { /* x86_quirks member */ static int mpc_record; -static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY]; +static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; int mp_bus_id_to_node[MAX_MP_BUSSES]; int mp_bus_id_to_local[MAX_MP_BUSSES]; From db949bba3c7cf2e664ac12e237c6d4c914f0c69d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:25:21 -0800 Subject: [PATCH 651/682] x86-32: use non-lazy io bitmap context switching Impact: remove 32-bit optimization to prepare unification x86-32 and -64 differ in the way they context-switch tasks with io permission bitmaps. x86-64 simply copies the next tasks io bitmap into place (if any) on context switch. x86-32 invalidates the bitmap on context switch, so that the next IO instruction will fault; at that point it installs the appropriate IO bitmap. This makes context switching IO-bitmap-using tasks a bit more less expensive, at the cost of making the next IO instruction slower due to the extra fault. This tradeoff only makes sense if IO-bitmap-using processes are relatively common, but they don't actually use IO instructions very often. However, in a typical desktop system, the only process likely to be using IO bitmaps is the X server, and nothing at all on a server. Therefore the lazy context switch doesn't really win all that much, and its just a gratuitious difference from 64-bit code. This patch removes the lazy context switch, with a view to unifying this code in a later change. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 6 ----- arch/x86/kernel/ioport.c | 11 -------- arch/x86/kernel/process_32.c | 36 +++++++------------------ arch/x86/kernel/traps.c | 46 -------------------------------- 4 files changed, 9 insertions(+), 90 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7a98f73821..76139506c3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -248,7 +248,6 @@ struct x86_hw_tss { #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) #define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 struct tss_struct { /* @@ -263,11 +262,6 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; - /* - * Cache the current maximum and the last task that used the bitmap: - */ - unsigned long io_bitmap_max; - struct thread_struct *io_bitmap_owner; /* * .. and then another 0x100 bytes for the emergency kernel stack: diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index e41980a373a..99c4d308f16 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap_max = bytes; -#ifdef CONFIG_X86_32 - /* - * Sets the lazy trigger so that the next I/O operation will - * reload the correct bitmap. - * Reset the owner so that a process switch will not set - * tss->io_bitmap_base to IO_BITMAP_OFFSET. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; - tss->io_bitmap_owner = NULL; -#else /* Update the TSS: */ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); -#endif put_cpu(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 646da41a620..a59314e877f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -248,11 +248,8 @@ void exit_thread(void) /* * Careful, clear this in the TSS too: */ - memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); + memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; - tss->io_bitmap_owner = NULL; - tss->io_bitmap_max = 0; - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } @@ -458,34 +455,19 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } - if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* - * Disable the bitmap via an invalid offset. We still cache - * the previous bitmap owner and the IO bitmap contents: + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - return; - } - - if (likely(next == tss->io_bitmap_owner)) { + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* - * Previous owner of the bitmap (hence the bitmap content) - * matches the next task, we dont have to do anything but - * to set a valid offset in the TSS: + * Clear any possible leftover bits: */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - return; + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } - /* - * Lazy TSS's I/O bitmap copy. We set an invalid offset here - * and we let the task to get a GPF in case an I/O instruction - * is performed. The handler of the GPF will verify that the - * faulting task has a valid I/O bitmap and, it true, does the - * real copy and restart the instruction. This will save us - * redundant copies when the currently switched task does not - * perform any I/O during its timeslice. - */ - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index c05430ac1b4..a1d288327ff 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -118,47 +118,6 @@ die_if_kernel(const char *str, struct pt_regs *regs, long err) if (!user_mode_vm(regs)) die(str, regs, err); } - -/* - * Perform the lazy TSS's I/O bitmap copy. If the TSS has an - * invalid offset set (the LAZY one) and the faulting thread has - * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, - * we set the offset field correctly and return 1. - */ -static int lazy_iobitmap_copy(void) -{ - struct thread_struct *thread; - struct tss_struct *tss; - int cpu; - - cpu = get_cpu(); - tss = &per_cpu(init_tss, cpu); - thread = ¤t->thread; - - if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && - thread->io_bitmap_ptr) { - memcpy(tss->io_bitmap, thread->io_bitmap_ptr, - thread->io_bitmap_max); - /* - * If the previously set map was extending to higher ports - * than the current one, pad extra space with 0xff (no access). - */ - if (thread->io_bitmap_max < tss->io_bitmap_max) { - memset((char *) tss->io_bitmap + - thread->io_bitmap_max, 0xff, - tss->io_bitmap_max - thread->io_bitmap_max); - } - tss->io_bitmap_max = thread->io_bitmap_max; - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; - tss->io_bitmap_owner = thread; - put_cpu(); - - return 1; - } - put_cpu(); - - return 0; -} #endif static void __kprobes @@ -309,11 +268,6 @@ do_general_protection(struct pt_regs *regs, long error_code) conditional_sti(regs); #ifdef CONFIG_X86_32 - if (lazy_iobitmap_copy()) { - /* restart the faulting instruction */ - return; - } - if (regs->flags & X86_VM_MASK) goto gp_in_vm86; #endif From 389d1fb11e5f2a16b5e34c547756f0c4dec641f7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:25:28 -0800 Subject: [PATCH 652/682] x86: unify chunks of kernel/process*.c With x86-32 and -64 using the same mechanism for managing the tss io permissions bitmap, large chunks of process*.c are trivially unifyable, including: - exit_thread - flush_thread - __switch_to_xtra (along with tsc enable/disable) and as bonus pickups: - sys_fork - sys_vfork (Note: asmlinkage expands to empty on x86-64) Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 2 + arch/x86/kernel/process.c | 191 +++++++++++++++++++++++++++++++++- arch/x86/kernel/process_32.c | 172 ------------------------------ arch/x86/kernel/process_64.c | 188 --------------------------------- 4 files changed, 192 insertions(+), 361 deletions(-) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index c00bfdbdd45..1a7bf39f72d 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -20,6 +20,8 @@ struct task_struct; /* one of the stranger aspects of C forward declarations */ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 87b69d4fac1..6afa5232dbb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1,8 +1,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -55,6 +58,192 @@ void arch_task_cache_init(void) SLAB_PANIC, NULL); } +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ + struct task_struct *me = current; + struct thread_struct *t = &me->thread; + + if (me->thread.io_bitmap_ptr) { + struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); + /* + * Careful, clear this in the TSS too: + */ + memset(tss->io_bitmap, 0xff, t->io_bitmap_max); + t->io_bitmap_max = 0; + put_cpu(); + } + + ds_exit_thread(current); +} + +void flush_thread(void) +{ + struct task_struct *tsk = current; + +#ifdef CONFIG_X86_64 + if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { + clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); + if (test_tsk_thread_flag(tsk, TIF_IA32)) { + clear_tsk_thread_flag(tsk, TIF_IA32); + } else { + set_tsk_thread_flag(tsk, TIF_IA32); + current_thread_info()->status |= TS_COMPAT; + } + } +#endif + + clear_tsk_thread_flag(tsk, TIF_DEBUG); + + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; + tsk->thread.debugreg3 = 0; + tsk->thread.debugreg6 = 0; + tsk->thread.debugreg7 = 0; + memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + /* + * Forget coprocessor state.. + */ + tsk->fpu_counter = 0; + clear_fpu(tsk); + clear_used_math(); +} + +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +static void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, + struct tss_struct *tss) +{ + struct thread_struct *prev, *next; + + prev = &prev_p->thread; + next = &next_p->thread; + + if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || + test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) + ds_switch_to(prev_p, next_p); + else if (next->debugctlmsr != prev->debugctlmsr) + update_debugctlmsr(next->debugctlmsr); + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { + set_debugreg(next->debugreg0, 0); + set_debugreg(next->debugreg1, 1); + set_debugreg(next->debugreg2, 2); + set_debugreg(next->debugreg3, 3); + /* no 4 and 5 */ + set_debugreg(next->debugreg6, 6); + set_debugreg(next->debugreg7, 7); + } + + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { + /* + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: + */ + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { + /* + * Clear any possible leftover bits: + */ + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } +} + +int sys_fork(struct pt_regs *regs) +{ + return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); +} + +/* + * This is trivial, and on the face of it looks like it + * could equally well be done in user mode. + * + * Not so, for quite unobvious reasons - register pressure. + * In user mode vfork() cannot have a stack frame, and if + * done by calling the "clone()" system call directly, you + * do not have enough call-clobbered registers to hold all + * the information you need. + */ +int sys_vfork(struct pt_regs *regs) +{ + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, + NULL, NULL); +} + + /* * Idle related variables and functions */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a59314e877f..14014d766ca 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -230,52 +230,6 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) } EXPORT_SYMBOL(kernel_thread); -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - /* The process may have allocated an io port bitmap... nuke it. */ - if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { - struct task_struct *tsk = current; - struct thread_struct *t = &tsk->thread; - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); - - kfree(t->io_bitmap_ptr); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); - t->io_bitmap_max = 0; - put_cpu(); - } - - ds_exit_thread(current); -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - clear_tsk_thread_flag(tsk, TIF_DEBUG); - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - void release_thread(struct task_struct *dead_task) { BUG_ON(dead_task->mm); @@ -363,112 +317,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -static void hard_disable_TSC(void) -{ - write_cr4(read_cr4() | X86_CR4_TSD); -} - -void disable_TSC(void) -{ - preempt_disable(); - if (!test_and_set_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_disable_TSC(); - preempt_enable(); -} - -static void hard_enable_TSC(void) -{ - write_cr4(read_cr4() & ~X86_CR4_TSD); -} - -static void enable_TSC(void) -{ - preempt_disable(); - if (test_and_clear_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_enable_TSC(); - preempt_enable(); -} - -int get_tsc_mode(unsigned long adr) -{ - unsigned int val; - - if (test_thread_flag(TIF_NOTSC)) - val = PR_TSC_SIGSEGV; - else - val = PR_TSC_ENABLE; - - return put_user(val, (unsigned int __user *)adr); -} - -int set_tsc_mode(unsigned int val) -{ - if (val == PR_TSC_SIGSEGV) - disable_TSC(); - else if (val == PR_TSC_ENABLE) - enable_TSC(); - else - return -EINVAL; - - return 0; -} - -static noinline void -__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *prev, *next; - - prev = &prev_p->thread; - next = &next_p->thread; - - if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || - test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) - ds_switch_to(prev_p, next_p); - else if (next->debugctlmsr != prev->debugctlmsr) - update_debugctlmsr(next->debugctlmsr); - - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } -} /* * switch_to(x,yn) should switch tasks from x to y. @@ -582,11 +430,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } -int sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - int sys_clone(struct pt_regs *regs) { unsigned long clone_flags; @@ -602,21 +445,6 @@ int sys_clone(struct pt_regs *regs) return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); } -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -int sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - /* * sys_execve() executes a new program. */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 836ef6575f0..abb7e6a7f0c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -237,61 +237,6 @@ void show_regs(struct pt_regs *regs) show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -/* - * Free current thread data structures etc.. - */ -void exit_thread(void) -{ - struct task_struct *me = current; - struct thread_struct *t = &me->thread; - - if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); - - kfree(t->io_bitmap_ptr); - t->io_bitmap_ptr = NULL; - clear_thread_flag(TIF_IO_BITMAP); - /* - * Careful, clear this in the TSS too: - */ - memset(tss->io_bitmap, 0xff, t->io_bitmap_max); - t->io_bitmap_max = 0; - put_cpu(); - } - - ds_exit_thread(current); -} - -void flush_thread(void) -{ - struct task_struct *tsk = current; - - if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { - clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); - if (test_tsk_thread_flag(tsk, TIF_IA32)) { - clear_tsk_thread_flag(tsk, TIF_IA32); - } else { - set_tsk_thread_flag(tsk, TIF_IA32); - current_thread_info()->status |= TS_COMPAT; - } - } - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; - memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { @@ -425,118 +370,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -static void hard_disable_TSC(void) -{ - write_cr4(read_cr4() | X86_CR4_TSD); -} - -void disable_TSC(void) -{ - preempt_disable(); - if (!test_and_set_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_disable_TSC(); - preempt_enable(); -} - -static void hard_enable_TSC(void) -{ - write_cr4(read_cr4() & ~X86_CR4_TSD); -} - -static void enable_TSC(void) -{ - preempt_disable(); - if (test_and_clear_thread_flag(TIF_NOTSC)) - /* - * Must flip the CPU state synchronously with - * TIF_NOTSC in the current running context. - */ - hard_enable_TSC(); - preempt_enable(); -} - -int get_tsc_mode(unsigned long adr) -{ - unsigned int val; - - if (test_thread_flag(TIF_NOTSC)) - val = PR_TSC_SIGSEGV; - else - val = PR_TSC_ENABLE; - - return put_user(val, (unsigned int __user *)adr); -} - -int set_tsc_mode(unsigned int val) -{ - if (val == PR_TSC_SIGSEGV) - disable_TSC(); - else if (val == PR_TSC_ENABLE) - enable_TSC(); - else - return -EINVAL; - - return 0; -} - -/* - * This special macro can be used to load a debugging register - */ -#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) - -static inline void __switch_to_xtra(struct task_struct *prev_p, - struct task_struct *next_p, - struct tss_struct *tss) -{ - struct thread_struct *prev, *next; - - prev = &prev_p->thread, - next = &next_p->thread; - - if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || - test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) - ds_switch_to(prev_p, next_p); - else if (next->debugctlmsr != prev->debugctlmsr) - update_debugctlmsr(next->debugctlmsr); - - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); - /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); - } - - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) - hard_disable_TSC(); - else - hard_enable_TSC(); - } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } -} - /* * switch_to(x,y) should switch tasks from x to y. * @@ -694,11 +527,6 @@ void set_personality_64bit(void) current->personality &= ~READ_IMPLIES_EXEC; } -asmlinkage long sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) @@ -708,22 +536,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); } -/* - * This is trivial, and on the face of it looks like it - * could equally well be done in user mode. - * - * Not so, for quite unobvious reasons - register pressure. - * In user mode vfork() cannot have a stack frame, and if - * done by calling the "clone()" system call directly, you - * do not have enough call-clobbered registers to hold all - * the information you need. - */ -asmlinkage long sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, - NULL, NULL); -} - unsigned long get_wchan(struct task_struct *p) { unsigned long stack; From 2fb6b2a048ed8fa3f049c7d42f7a2dd3f0c8d7a6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:25:33 -0800 Subject: [PATCH 653/682] x86: add forward decl for tss_struct Its the correct thing to do before using the struct in a prototype. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/system.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 1a7bf39f72d..643c59b4bc6 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -20,6 +20,7 @@ struct task_struct; /* one of the stranger aspects of C forward declarations */ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); +struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); From 9976b39b5031bbf76f715893cf080b6a17683881 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 09:19:26 -0800 Subject: [PATCH 654/682] xen: deal with virtually mapped percpu data The virtually mapped percpu space causes us two problems: - for hypercalls which take an mfn, we need to do a full pagetable walk to convert the percpu va into an mfn, and - when a hypercall requires a page to be mapped RO via all its aliases, we need to make sure its RO in both the percpu mapping and in the linear mapping This primarily affects the gdt and the vcpu info structure. Signed-off-by: Jeremy Fitzhardinge Cc: Xen-devel Cc: Gerd Hoffmann Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/page.h | 1 + arch/x86/xen/enlighten.c | 10 ++++++---- arch/x86/xen/mmu.c | 7 +++++++ arch/x86/xen/smp.c | 8 ++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 4bd990ee43d..1a918dde46b 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -164,6 +164,7 @@ static inline pte_t __pte_ma(pteval_t x) xmaddr_t arbitrary_virt_to_machine(void *address); +unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 86497d5f44c..352ea683065 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -103,7 +103,7 @@ static void xen_vcpu_setup(int cpu) vcpup = &per_cpu(xen_vcpu_info, cpu); - info.mfn = virt_to_mfn(vcpup); + info.mfn = arbitrary_virt_to_mfn(vcpup); info.offset = offset_in_page(vcpup); printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n", @@ -301,8 +301,10 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames = mcs.args; for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = virt_to_mfn(va); + frames[f] = arbitrary_virt_to_mfn((void *)va); + make_lowmem_page_readonly((void *)va); + make_lowmem_page_readonly(mfn_to_virt(frames[f])); } MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); @@ -314,7 +316,7 @@ static void load_TLS_descriptor(struct thread_struct *t, unsigned int cpu, unsigned int i) { struct desc_struct *gdt = get_cpu_gdt_table(cpu); - xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); + xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]); struct multicall_space mc = __xen_mc_entry(0); MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]); @@ -488,7 +490,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, break; default: { - xmaddr_t maddr = virt_to_machine(&dt[entry]); + xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]); xen_mc_flush(); if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc)) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 319bd40a57c..cb6afa4ec95 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -276,6 +276,13 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) p2m_top[topidx][idx] = mfn; } +unsigned long arbitrary_virt_to_mfn(void *vaddr) +{ + xmaddr_t maddr = arbitrary_virt_to_machine(vaddr); + + return PFN_DOWN(maddr.maddr); +} + xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 035582ae815..8d470562ffc 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -219,6 +219,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) { struct vcpu_guest_context *ctxt; struct desc_struct *gdt; + unsigned long gdt_mfn; if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; @@ -248,9 +249,12 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->ldt_ents = 0; BUG_ON((unsigned long)gdt & ~PAGE_MASK); - make_lowmem_page_readonly(gdt); - ctxt->gdt_frames[0] = virt_to_mfn(gdt); + gdt_mfn = arbitrary_virt_to_mfn(gdt); + make_lowmem_page_readonly(gdt); + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); + + ctxt->gdt_frames[0] = gdt_mfn; ctxt->gdt_ents = GDT_ENTRIES; ctxt->user_regs.cs = __KERNEL_CS; From 2505170211f7630361a852e25b60f4df4c878daa Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 2 Mar 2009 17:20:01 -0800 Subject: [PATCH 655/682] x86, signals: fix xine & firefox bustage Impact: fix bad frame in rt_sigreturn on 64-bit After commit 97286a2b64725aac2d584ddd1f94871f9991d5a1 some applications fail to return from signal handler: [ 145.150133] firefox[3250] bad frame in rt_sigreturn frame:00007f902b44eb28 ip:352e80b307 sp:7f902b44ef70 orax:ffffffffffffffff in libpthread-2.9.so[352e800000+17000] [ 665.519017] firefox[5420] bad frame in rt_sigreturn frame:00007faa8deaeb28 ip:352e80b307 sp:7faa8deaef70 orax:ffffffffffffffff in libpthread-2.9.so[352e800000+17000] The root cause is forgetting to keep 64 byte aligned value of fpstate for next stack pointer calculation. Reported-by: Jaswinder Singh Rajput Reported-by: Mike Galbraith Signed-off-by: Hiroshi Shimamoto LKML-Reference: <49AC85C1.7060600@ct.jp.nec.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index dde3f2ae237..d2cc6428c58 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -240,11 +240,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (used_math()) { sp -= sig_xstate_size; -#ifdef CONFIG_X86_32 - *fpstate = (void __user *) sp; -#else /* !CONFIG_X86_32 */ - *fpstate = (void __user *)round_down(sp, 64); -#endif /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_64 + sp = round_down(sp, 64); +#endif /* CONFIG_X86_64 */ + *fpstate = (void __user *)sp; if (save_i387_xstate(*fpstate) < 0) return (void __user *)-1L; From 2b688dfd0a93cf3b17c38feef693361da47b0606 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 12:55:04 +0200 Subject: [PATCH 656/682] x86: move __VMALLOC_RESERVE to pgtable_32.c Impact: cleanup The __VMALLOC_RESERVE global variable is not used in init_32.c. Move that to pgtable_32.c to reduce the diff between init_32.c and init_64.c. Signed-off-by: Pekka Enberg LKML-Reference: <1236077704.2675.4.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 2 -- arch/x86/mm/pgtable_32.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 06708ee94aa..5b06a2f5dea 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -50,8 +50,6 @@ #include #include -unsigned int __VMALLOC_RESERVE = 128 << 20; - unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 0951db9ee51..e4032c886c3 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -20,6 +20,8 @@ #include #include +unsigned int __VMALLOC_RESERVE = 128 << 20; + /* * Associate a virtual page frame with a given physical page frame * and protection flags for that frame. From fd578f9c0a0a7bf3e460e6f21cdc6f4018949e80 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 12:55:05 +0200 Subject: [PATCH 657/682] x86: use roundup() instead of PAGE_ALIGN() in find_early_table_space() Impact: cleanup This patch changes find_early_table_space() to use roundup() for rounding up tables to page size to unify the common parts of the 32-bit and 64-bit implementations. Signed-off-by: Pekka Enberg LKML-Reference: <1236077705.2675.6.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5b06a2f5dea..1dd6b6334dc 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -845,10 +845,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse) unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = PAGE_ALIGN(puds * sizeof(pud_t)); + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += PAGE_ALIGN(pmds * sizeof(pmd_t)); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (use_pse) { unsigned long extra; @@ -859,10 +859,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse) } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += PAGE_ALIGN(ptes * sizeof(pte_t)); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* for fixmap */ - tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t)); + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could From 05f209e7b936a48e341d36831079116a06658ccc Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 13:15:02 +0200 Subject: [PATCH 658/682] x86: add sanity checks to init_32.c Impact: unification This patch adds sanity checks that are already in init_64.c to init_32.c. Signed-off-by: Pekka Enberg LKML-Reference: <1236078902.2675.16.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1dd6b6334dc..1570a822c18 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1214,18 +1214,21 @@ void mark_rodata_ro(void) void free_init_pages(char *what, unsigned long begin, unsigned long end) { -#ifdef CONFIG_DEBUG_PAGEALLOC + unsigned long addr = begin; + + if (addr >= end) + return; + /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will * create a kernel page fault: */ +#ifdef CONFIG_DEBUG_PAGEALLOC printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", begin, PAGE_ALIGN(end)); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else - unsigned long addr; - /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that @@ -1233,14 +1236,16 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) */ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - for (addr = begin; addr < end; addr += PAGE_SIZE) { + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); + + for (; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + memset((void *)(addr & ~(PAGE_SIZE-1)), + POISON_FREE_INITMEM, PAGE_SIZE); free_page(addr); totalram_pages++; } - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); #endif } From e087edd8c056292191bb989baf49f83ee509e624 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 13:15:04 +0200 Subject: [PATCH 659/682] x86: make sure initmem is writable on 64-bit Impact: unification This patch ports commit 3c1df68b848b39270752ff8d4b956cc4a4dce0f6 ("x86: make sure initmem is writable") to the 64-bit version to unify implementations of free_init_pages(). Signed-off-by: Pekka Enberg Cc: Arjan van de Ven LKML-Reference: <1236078904.2675.17.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e6d36b49025..03da9030d0e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -962,6 +962,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) begin, PAGE_ALIGN(end)); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else + /* + * We just marked the kernel text read only above, now that + * we are going to free part of that, we need to make that + * writeable first. + */ + set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); for (; addr < end; addr += PAGE_SIZE) { From e5b2bb552706ca0e30795ee84caacbb37cec5705 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 13:15:06 +0200 Subject: [PATCH 660/682] x86: unify free_init_pages() and free_initmem() Impact: unification This patch introduces a common arch/x86/mm/init.c and moves the identical free_init_pages() and free_initmem() functions to the file. Signed-off-by: Pekka Enberg LKML-Reference: <1236078906.2675.18.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/Makefile | 2 +- arch/x86/mm/init.c | 49 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/init_32.c | 44 -------------------------------------- arch/x86/mm/init_64.c | 44 -------------------------------------- 4 files changed, 50 insertions(+), 89 deletions(-) create mode 100644 arch/x86/mm/init.c diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 2b938a38491..08537747cb5 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,4 +1,4 @@ -obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ +obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o obj-$(CONFIG_SMP) += tlb.o diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c new file mode 100644 index 00000000000..ce6a722587d --- /dev/null +++ b/arch/x86/mm/init.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include + +void free_init_pages(char *what, unsigned long begin, unsigned long end) +{ + unsigned long addr = begin; + + if (addr >= end) + return; + + /* + * If debugging page accesses then do not free this memory but + * mark them not present - any buggy init-section access will + * create a kernel page fault: + */ +#ifdef CONFIG_DEBUG_PAGEALLOC + printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", + begin, PAGE_ALIGN(end)); + set_memory_np(begin, (end - begin) >> PAGE_SHIFT); +#else + /* + * We just marked the kernel text read only above, now that + * we are going to free part of that, we need to make that + * writeable first. + */ + set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); + + printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); + + for (; addr < end; addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + init_page_count(virt_to_page(addr)); + memset((void *)(addr & ~(PAGE_SIZE-1)), + POISON_FREE_INITMEM, PAGE_SIZE); + free_page(addr); + totalram_pages++; + } +#endif +} + +void free_initmem(void) +{ + free_init_pages("unused kernel memory", + (unsigned long)(&__init_begin), + (unsigned long)(&__init_end)); +} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1570a822c18..cd8d6732613 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1212,50 +1212,6 @@ void mark_rodata_ro(void) } #endif -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = begin; - - if (addr >= end) - return; - - /* - * If debugging page accesses then do not free this memory but - * mark them not present - any buggy init-section access will - * create a kernel page fault: - */ -#ifdef CONFIG_DEBUG_PAGEALLOC - printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, PAGE_ALIGN(end)); - set_memory_np(begin, (end - begin) >> PAGE_SHIFT); -#else - /* - * We just marked the kernel text read only above, now that - * we are going to free part of that, we need to make that - * writeable first. - */ - set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)(addr & ~(PAGE_SIZE-1)), - POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } -#endif -} - -void free_initmem(void) -{ - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -} - #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 03da9030d0e..aae87456d93 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -945,50 +945,6 @@ void __init mem_init(void) initsize >> 10); } -void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = begin; - - if (addr >= end) - return; - - /* - * If debugging page accesses then do not free this memory but - * mark them not present - any buggy init-section access will - * create a kernel page fault: - */ -#ifdef CONFIG_DEBUG_PAGEALLOC - printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, PAGE_ALIGN(end)); - set_memory_np(begin, (end - begin) >> PAGE_SHIFT); -#else - /* - * We just marked the kernel text read only above, now that - * we are going to free part of that, we need to make that - * writeable first. - */ - set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); - - printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - - for (; addr < end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - memset((void *)(addr & ~(PAGE_SIZE-1)), - POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } -#endif -} - -void free_initmem(void) -{ - free_init_pages("unused kernel memory", - (unsigned long)(&__init_begin), - (unsigned long)(&__init_end)); -} - #ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); From 867c5b5292583b1e474cbbcb4c77f09bfca3903c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 3 Mar 2009 14:10:12 +0200 Subject: [PATCH 661/682] x86: set_highmem_pages_init() cleanup Impact: cleanup This patch moves set_highmem_pages_init() to arch/x86/mm/highmem_32.c. The declaration of the function is kept in asm/numa_32.h because asm/highmem.h is included only if CONFIG_HIGHMEM is enabled so we can't put the empty static inline function there. Signed-off-by: Pekka Enberg LKML-Reference: <1236082212.2675.24.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/numa_32.h | 6 +++++- arch/x86/mm/highmem_32.c | 34 ++++++++++++++++++++++++++++++++++ arch/x86/mm/init_32.c | 12 ------------ arch/x86/mm/numa_32.c | 26 -------------------------- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index e9f5db79624..a37229011b5 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h @@ -4,8 +4,12 @@ extern int pxm_to_nid(int pxm); extern void numa_remove_cpu(int cpu); -#ifdef CONFIG_NUMA +#ifdef CONFIG_HIGHMEM extern void set_highmem_pages_init(void); +#else +static inline void set_highmem_pages_init(void) +{ +} #endif #endif /* _ASM_X86_NUMA_32_H */ diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index bcc079c282d..13a823cf564 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -1,5 +1,6 @@ #include #include +#include /* for totalram_pages */ void *kmap(struct page *page) { @@ -156,3 +157,36 @@ EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); EXPORT_SYMBOL(kunmap_atomic); + +#ifdef CONFIG_NUMA +void __init set_highmem_pages_init(void) +{ + struct zone *zone; + int nid; + + for_each_zone(zone) { + unsigned long zone_start_pfn, zone_end_pfn; + + if (!is_highmem(zone)) + continue; + + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + nid = zone_to_nid(zone); + printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, nid, zone_start_pfn, zone_end_pfn); + + add_highpages_with_active_regions(nid, zone_start_pfn, + zone_end_pfn); + } + totalram_pages += totalhigh_pages; +} +#else +static void __init set_highmem_pages_init(void) +{ + add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); + + totalram_pages += totalhigh_pages; +} +#endif /* CONFIG_NUMA */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cd8d6732613..0b087dcd2c1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -467,22 +467,10 @@ void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn, work_with_active_regions(nid, add_highpages_work_fn, &data); } -#ifndef CONFIG_NUMA -static void __init set_highmem_pages_init(void) -{ - add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); - - totalram_pages += totalhigh_pages; -} -#endif /* !CONFIG_NUMA */ - #else static inline void permanent_kmaps_init(pgd_t *pgd_base) { } -static inline void set_highmem_pages_init(void) -{ -} #endif /* CONFIG_HIGHMEM */ void __init native_pagetable_setup_start(pgd_t *base) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d1f7439d173..a04092a8acc 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -423,32 +423,6 @@ void __init initmem_init(unsigned long start_pfn, setup_bootmem_allocator(); } -void __init set_highmem_pages_init(void) -{ -#ifdef CONFIG_HIGHMEM - struct zone *zone; - int nid; - - for_each_zone(zone) { - unsigned long zone_start_pfn, zone_end_pfn; - - if (!is_highmem(zone)) - continue; - - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone_start_pfn + zone->spanned_pages; - - nid = zone_to_nid(zone); - printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, nid, zone_start_pfn, zone_end_pfn); - - add_highpages_with_active_regions(nid, zone_start_pfn, - zone_end_pfn); - } - totalram_pages += totalhigh_pages; -#endif -} - #ifdef CONFIG_MEMORY_HOTPLUG static int paddr_to_nid(u64 addr) { From 03787ceed8f7bf06af29f3b213017d89f8e9423d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 3 Mar 2009 15:32:24 +0100 Subject: [PATCH 662/682] x86: set_highmem_pages_init() cleanup, fix !CONFIG_NUMA && CONFIG_HIGHMEM=y Impact: build fix arch/x86/mm/highmem_32.c:187: error: static declaration of 'set_highmem_pages_init' follows non-static declaration arch/x86/include/asm/numa_32.h:8: error: previous declaration of 'set_highmem_pages_init' was here Signed-off-by: Pekka Enberg LKML-Reference: <1236082212.2675.24.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/highmem_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 13a823cf564..00f127c80b0 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -183,7 +183,7 @@ void __init set_highmem_pages_init(void) totalram_pages += totalhigh_pages; } #else -static void __init set_highmem_pages_init(void) +void __init set_highmem_pages_init(void) { add_highpages_with_active_regions(0, highstart_pfn, highend_pfn); From 4e8304758cc09a6097dbd2c4f44a5369e5c1edb0 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Mar 2009 11:51:39 -0800 Subject: [PATCH 663/682] x86: remove vestigial fix_ioremap prototypes The function seems to have disappeared at some point, leaving some vestigial prototypes behind... Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 683d0b4c00f..e5383e3d2f8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -172,8 +172,6 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) extern void iounmap(volatile void __iomem *addr); -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); - #ifdef CONFIG_X86_32 # include "io_32.h" @@ -198,7 +196,6 @@ extern void early_ioremap_reset(void); extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); extern void __iomem *early_memremap(unsigned long offset, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); -extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define IO_SPACE_LIMIT 0xffff From f254f3909efaf59ca2d0f408de2d044dace60706 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 3 Mar 2009 12:02:57 -0800 Subject: [PATCH 664/682] x86: un-__init fill_pud/pmd/pte They are used by __set_fixmap->set_pte_vaddr_pud, which can be used by arch_setup_additional_pages(), and so is used after init. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 11981fc8570..07f44d491df 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -168,7 +168,7 @@ static __ref void *spp_getpage(void) return ptr; } -static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) +static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr) { if (pgd_none(*pgd)) { pud_t *pud = (pud_t *)spp_getpage(); @@ -180,7 +180,7 @@ static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr) return pud_offset(pgd, vaddr); } -static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) +static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr) { if (pud_none(*pud)) { pmd_t *pmd = (pmd_t *) spp_getpage(); @@ -192,7 +192,7 @@ static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr) return pmd_offset(pud, vaddr); } -static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr) +static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr) { if (pmd_none(*pmd)) { pte_t *pte = (pte_t *) spp_getpage(); From 6a242909b01120f6f3d571c0b75e20ec61f0d8d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:58 +0900 Subject: [PATCH 665/682] percpu: clean up percpu constants Impact: cleaup Make the following cleanups. * There isn't much arch-specific about PERCPU_MODULE_RESERVE. Always define it whether arch overrides PERCPU_ENOUGH_ROOM or not. * blackfin overrides PERCPU_ENOUGH_ROOM to align static area size. Do it by default. * percpu allocation sizes doesn't have much to do with the page size. Don't use PAGE_SHIFT in their definition. Signed-off-by: Tejun Heo Cc: Bryan Wu --- arch/blackfin/include/asm/percpu.h | 10 ---------- include/linux/percpu.h | 24 +++++++++++++----------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/arch/blackfin/include/asm/percpu.h b/arch/blackfin/include/asm/percpu.h index 797c0c16506..c94c7bc88c7 100644 --- a/arch/blackfin/include/asm/percpu.h +++ b/arch/blackfin/include/asm/percpu.h @@ -3,14 +3,4 @@ #include -#ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 -#else -#define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #endif /* __ARCH_BLACKFIN_PERCPU__ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 545b068bcb7..2d34b038fe7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -5,6 +5,7 @@ #include /* For kmalloc() */ #include #include +#include #include @@ -52,17 +53,18 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ -#ifndef PERCPU_ENOUGH_ROOM +/* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 +#define PERCPU_MODULE_RESERVE (8 << 10) #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif +#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) -#endif /* PERCPU_ENOUGH_ROOM */ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) +#endif /* * Must be an lvalue. Since @var must be a simple identifier, @@ -79,7 +81,7 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -96,15 +98,15 @@ #ifndef PERCPU_DYNAMIC_RESERVE # if BITS_PER_LONG > 32 # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (24 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # endif # else # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (8 << 10) # endif # endif #endif /* PERCPU_DYNAMIC_RESERVE */ From 2441d15c97d498b18f03ae9fba262ffeae42a08b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 666/682] percpu: cosmetic renames in pcpu_setup_first_chunk() Impact: cosmetic, preparation for future changes Make the following renames in pcpur_setup_first_chunk() in preparation for future changes. * s/free_size/dyn_size/ * s/static_vm/first_vm/ * s/static_chunk/schunk/ Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- mm/percpu.c | 58 +++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2d34b038fe7..a0b4ea2a335 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -118,7 +118,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* diff --git a/mm/percpu.c b/mm/percpu.c index 3d0f5456827..9531590e6b6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -831,7 +831,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto - * @free_size: free size in bytes, 0 for auto + * @dyn_size: free size for dynamic allocation in bytes, 0 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -849,12 +849,12 @@ EXPORT_SYMBOL_GPL(free_percpu); * return the same number of pages for all cpus. * * @unit_size, if non-zero, determines unit size and must be aligned - * to PAGE_SIZE and equal to or larger than @static_size + @free_size. + * to PAGE_SIZE and equal to or larger than @static_size + @dyn_size. * - * @free_size determines the number of free bytes after the static + * @dyn_size determines the number of free bytes after the static * area in the first chunk. If zero, whatever left is available. * Specifying non-zero value make percpu leave the area after - * @static_size + @free_size alone. + * @static_size + @dyn_size alone. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -870,19 +870,19 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { - static struct vm_struct static_vm; - struct pcpu_chunk *static_chunk; + static struct vm_struct first_vm; + struct pcpu_chunk *schunk; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ BUG_ON(!static_size); - BUG_ON(!unit_size && free_size); - BUG_ON(unit_size && unit_size < static_size + free_size); + BUG_ON(!unit_size && dyn_size); + BUG_ON(unit_size && unit_size < static_size + dyn_size); BUG_ON(unit_size & ~PAGE_MASK); BUG_ON(base_addr && !unit_size); BUG_ON(base_addr && populate_pte_fn); @@ -908,24 +908,24 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* init static_chunk */ - static_chunk = alloc_bootmem(pcpu_chunk_struct_size); - INIT_LIST_HEAD(&static_chunk->list); - static_chunk->vm = &static_vm; + /* init static chunk */ + schunk = alloc_bootmem(pcpu_chunk_struct_size); + INIT_LIST_HEAD(&schunk->list); + schunk->vm = &first_vm; - if (free_size) - static_chunk->free_size = free_size; + if (dyn_size) + schunk->free_size = dyn_size; else - static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + schunk->free_size = pcpu_unit_size - pcpu_static_size; - static_chunk->contig_hint = static_chunk->free_size; + schunk->contig_hint = schunk->free_size; /* allocate vm address */ - static_vm.flags = VM_ALLOC; - static_vm.size = pcpu_chunk_size; + first_vm.flags = VM_ALLOC; + first_vm.size = pcpu_chunk_size; if (!base_addr) - vm_area_register_early(&static_vm, PAGE_SIZE); + vm_area_register_early(&first_vm, PAGE_SIZE); else { /* * Pages already mapped. No need to remap into @@ -933,8 +933,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, * be mapped or unmapped by percpu and is marked * immutable. */ - static_vm.addr = base_addr; - static_chunk->immutable = true; + first_vm.addr = base_addr; + schunk->immutable = true; } /* assign pages */ @@ -945,7 +945,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (!page) break; - *pcpu_chunk_pagep(static_chunk, cpu, i) = page; + *pcpu_chunk_pagep(schunk, cpu, i) = page; } BUG_ON(i < PFN_UP(pcpu_static_size)); @@ -960,20 +960,20 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (populate_pte_fn) { for_each_possible_cpu(cpu) for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(static_chunk, + populate_pte_fn(pcpu_chunk_addr(schunk, cpu, i)); - err = pcpu_map(static_chunk, 0, nr_pages); + err = pcpu_map(schunk, 0, nr_pages); if (err) panic("failed to setup static percpu area, err=%d\n", err); } - /* link static_chunk in */ - pcpu_chunk_relocate(static_chunk, -1); - pcpu_chunk_addr_insert(static_chunk); + /* link the first chunk in */ + pcpu_chunk_relocate(schunk, -1); + pcpu_chunk_addr_insert(schunk); /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); + pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); return pcpu_unit_size; } From 61ace7fa2fff9c4b6641c506b6b3f1a9394a1b11 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 667/682] percpu: improve first chunk initial area map handling Impact: no functional change When the first chunk is created, its initial area map is not allocated because kmalloc isn't online yet. The map is allocated and initialized on the first allocation request on the chunk. This works fine but the scattering of initialization logic between the init function and allocation path is a bit confusing. This patch makes the first chunk initialize and use minimal statically allocated map from pcpu_setpu_first_chunk(). The map resizing path still needs to handle this specially but it's more straight-forward and gives more latitude to the init path. This will ease future changes. Signed-off-by: Tejun Heo --- mm/percpu.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 9531590e6b6..503ccad091a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -93,9 +93,6 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* the size of kernel static area */ -static int pcpu_static_size __read_mostly; - /* * One mutex to rule them all. * @@ -316,15 +313,28 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) /* reallocation required? */ if (chunk->map_alloc < target) { - int new_alloc = chunk->map_alloc; + int new_alloc; int *new; + new_alloc = PCPU_DFL_MAP_ALLOC; while (new_alloc < target) new_alloc *= 2; - new = pcpu_realloc(chunk->map, - chunk->map_alloc * sizeof(new[0]), - new_alloc * sizeof(new[0])); + if (chunk->map_alloc < PCPU_DFL_MAP_ALLOC) { + /* + * map_alloc smaller than the default size + * indicates that the chunk is one of the + * first chunks and still using static map. + * Allocate a dynamic one and copy. + */ + new = pcpu_realloc(NULL, 0, new_alloc * sizeof(new[0])); + if (new) + memcpy(new, chunk->map, + chunk->map_alloc * sizeof(new[0])); + } else + new = pcpu_realloc(chunk->map, + chunk->map_alloc * sizeof(new[0]), + new_alloc * sizeof(new[0])); if (!new) return -ENOMEM; @@ -367,22 +377,6 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) int max_contig = 0; int i, off; - /* - * The static chunk initially doesn't have map attached - * because kmalloc wasn't available during init. Give it one. - */ - if (unlikely(!chunk->map)) { - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); - if (!chunk->map) - return -ENOMEM; - - chunk->map_alloc = PCPU_DFL_MAP_ALLOC; - chunk->map[chunk->map_used++] = -pcpu_static_size; - if (chunk->free_size) - chunk->map[chunk->map_used++] = chunk->free_size; - } - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { bool is_last = i + 1 == chunk->map_used; int head, tail; @@ -874,12 +868,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; + static int smap[2]; struct pcpu_chunk *schunk; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ + BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); BUG_ON(!unit_size && dyn_size); BUG_ON(unit_size && unit_size < static_size + dyn_size); @@ -893,7 +889,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, PFN_UP(static_size)); - pcpu_static_size = static_size; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) @@ -912,14 +907,20 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&schunk->list); schunk->vm = &first_vm; + schunk->map = smap; + schunk->map_alloc = ARRAY_SIZE(smap); if (dyn_size) schunk->free_size = dyn_size; else - schunk->free_size = pcpu_unit_size - pcpu_static_size; + schunk->free_size = pcpu_unit_size - static_size; schunk->contig_hint = schunk->free_size; + schunk->map[schunk->map_used++] = -static_size; + if (schunk->free_size) + schunk->map[schunk->map_used++] = schunk->free_size; + /* allocate vm address */ first_vm.flags = VM_ALLOC; first_vm.size = pcpu_chunk_size; @@ -948,7 +949,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, *pcpu_chunk_pagep(schunk, cpu, i) = page; } - BUG_ON(i < PFN_UP(pcpu_static_size)); + BUG_ON(i < PFN_UP(static_size)); if (nr_pages < 0) nr_pages = i; From cafe8816b217b98dc3f268d3b77445da498beb4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 668/682] percpu: use negative for auto for pcpu_setup_first_chunk() arguments Impact: argument semantic cleanup In pcpu_setup_first_chunk(), zero @unit_size and @dyn_size meant auto-sizing. It's okay for @unit_size as 0 doesn't make sense but 0 dynamic reserve size is valid. Alos, if arch @dyn_size is calculated from other parameters, it might end up passing in 0 @dyn_size and malfunction when the size is automatically adjusted. This patch makes both @unit_size and @dyn_size ssize_t and use -1 for auto sizing. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 2 +- include/linux/percpu.h | 5 ++-- mm/percpu.c | 46 ++++++++++++++++++---------------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c29f301d388..ef3a2cd3fe6 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -344,7 +344,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, pcpu4k_populate_pte); goto out_free_ar; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a0b4ea2a335..a96fc53bbd6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,8 +117,9 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t dyn_size, void *base_addr, + size_t static_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* diff --git a/mm/percpu.c b/mm/percpu.c index 503ccad091a..a84cf9977fa 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -824,8 +824,8 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto - * @dyn_size: free size for dynamic allocation in bytes, 0 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -842,13 +842,14 @@ EXPORT_SYMBOL_GPL(free_percpu); * indicates end of pages for the cpu. Note that @get_page_fn() must * return the same number of pages for all cpus. * - * @unit_size, if non-zero, determines unit size and must be aligned - * to PAGE_SIZE and equal to or larger than @static_size + @dyn_size. + * @unit_size, if non-negative, specifies unit size and must be + * aligned to PAGE_SIZE and equal to or larger than @static_size + + * @dyn_size. * - * @dyn_size determines the number of free bytes after the static - * area in the first chunk. If zero, whatever left is available. - * Specifying non-zero value make percpu leave the area after - * @static_size + @dyn_size alone. + * @dyn_size, if non-negative, limits the number of bytes available + * for dynamic allocation in the first chunk. Specifying non-negative + * value make percpu leave alone the area beyond @static_size + + * @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -863,8 +864,9 @@ EXPORT_SYMBOL_GPL(free_percpu); * percpu access. */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t dyn_size, void *base_addr, + size_t static_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; @@ -877,13 +879,17 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, /* santiy checks */ BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); - BUG_ON(!unit_size && dyn_size); - BUG_ON(unit_size && unit_size < static_size + dyn_size); - BUG_ON(unit_size & ~PAGE_MASK); - BUG_ON(base_addr && !unit_size); + if (unit_size >= 0) { + BUG_ON(unit_size < static_size + + (dyn_size >= 0 ? dyn_size : 0)); + BUG_ON(unit_size & ~PAGE_MASK); + } else { + BUG_ON(dyn_size >= 0); + BUG_ON(base_addr); + } BUG_ON(base_addr && populate_pte_fn); - if (unit_size) + if (unit_size >= 0) pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, @@ -894,6 +900,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); + if (dyn_size < 0) + dyn_size = pcpu_unit_size - static_size; + /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -909,12 +918,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); - - if (dyn_size) - schunk->free_size = dyn_size; - else - schunk->free_size = pcpu_unit_size - static_size; - + schunk->free_size = dyn_size; schunk->contig_hint = schunk->free_size; schunk->map[schunk->map_used++] = -static_size; From 9a4f8a878b68d5a5d9ee60908a52cf6a55e1b823 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 669/682] x86: make embedding percpu allocator return excessive free space Impact: reduce unnecessary memory usage on certain configurations Embedding percpu allocator allocates unit_size * smp_num_possible_cpus() bytes consecutively and use it for the first chunk. However, if the static area is small, this can result in excessive prellocated free space in the first chunk due to PCPU_MIN_UNIT_SIZE restriction. This patch makes embedding percpu allocator preallocate only what's necessary as described by PERPCU_DYNAMIC_RESERVE and return the leftover to the bootmem allocator. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 44 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef3a2cd3fe6..38e2b2a470a 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -241,24 +241,31 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using - * bootmem allocator and used as-is without being mapped into vmalloc - * area. This enables the first chunk to piggy back on the linear - * physical PMD mapping and doesn't add any additional pressure to - * TLB. + * module and dynamic reserves, and allocated as a contiguous area + * using bootmem allocator and used as-is without being mapped into + * vmalloc area. This enables the first chunk to piggy back on the + * linear physical PMD mapping and doesn't add any additional pressure + * to TLB. Note that if the needed size is smaller than the minimum + * unit size, the leftover is returned to the bootmem allocator. */ static void *pcpue_ptr __initdata; +static size_t pcpue_size __initdata; static size_t pcpue_unit_size __initdata; static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) { - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size - + ((size_t)pageno << PAGE_SHIFT)); + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpue_size) + return NULL; + + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); } static ssize_t __init setup_pcpu_embed(size_t static_size) { unsigned int cpu; + size_t dyn_size; /* * If large page isn't supported, there's no benefit in doing @@ -269,25 +276,30 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); + pcpue_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); + dyn_size = pcpue_size - static_size; + pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); if (!pcpue_ptr) return -ENOMEM; - for_each_possible_cpu(cpu) - memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, - static_size); + for_each_possible_cpu(cpu) { + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - pcpue_unit_size, - pcpue_unit_size - static_size, pcpue_ptr, - NULL); + pcpue_unit_size, dyn_size, + pcpue_ptr, NULL); } /* From 3e24aa58907c62bc79d1094e941a374568f62522 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 670/682] percpu: add an indirection ptr for chunk page map access Impact: allow sharing page map, no functional difference yet Make chunk->page access indirect by adding a pointer and renaming the actual array to page_ar. This will be used by future changes. Signed-off-by: Tejun Heo --- mm/percpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index a84cf9977fa..5b47d9fe65f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,7 +80,8 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ bool immutable; /* no [de]population allowed */ - struct page *page[]; /* #cpus * UNIT_PAGES */ + struct page **page; /* points to page array */ + struct page *page_ar[]; /* #cpus * UNIT_PAGES */ }; static int pcpu_unit_pages __read_mostly; @@ -696,6 +697,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; + chunk->page = chunk->page_ar; chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); if (!chunk->vm) { @@ -918,6 +920,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); + schunk->page = schunk->page_ar; schunk->free_size = dyn_size; schunk->contig_hint = schunk->free_size; From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 671/682] percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 8 +- include/linux/percpu.h | 10 ++- kernel/module.c | 2 +- mm/percpu.c | 153 ++++++++++++++++++++++++++++----- 4 files changed, 144 insertions(+), 29 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 38e2b2a470a..dd4eabc747c 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -217,7 +217,7 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, pcpur_size - static_size, vm.addr, NULL); goto out_free_ar; @@ -297,7 +297,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, + return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, pcpue_unit_size, dyn_size, pcpue_ptr, NULL); } @@ -356,8 +356,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, - pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, + NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a96fc53bbd6..8ff15153ae2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { diff --git a/kernel/module.c b/kernel/module.c index 1f0657ae555..f0e04d6b67d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -381,7 +381,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, align = PAGE_SIZE; } - ptr = __alloc_percpu(size, align); + ptr = __alloc_reserved_percpu(size, align); if (!ptr) printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); diff --git a/mm/percpu.c b/mm/percpu.c index 5b47d9fe65f..ef8e169b773 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -94,6 +94,11 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); +/* optional reserved chunk, only accessible for reserved allocations */ +static struct pcpu_chunk *pcpu_reserved_chunk; +/* offset limit of the reserved chunk */ +static int pcpu_reserved_chunk_limit; + /* * One mutex to rule them all. * @@ -201,13 +206,14 @@ static void *pcpu_realloc(void *p, size_t size, size_t new_size) * * This function is called after an allocation or free changed @chunk. * New slot according to the changed state is determined and @chunk is - * moved to the slot. + * moved to the slot. Note that the reserved chunk is never put on + * chunk slots. */ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { int nslot = pcpu_chunk_slot(chunk); - if (oslot != nslot) { + if (chunk != pcpu_reserved_chunk && oslot != nslot) { if (oslot < nslot) list_move(&chunk->list, &pcpu_slot[nslot]); else @@ -255,6 +261,15 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) struct rb_node *n, *parent; struct pcpu_chunk *chunk; + /* is it in the reserved chunk? */ + if (pcpu_reserved_chunk) { + void *start = pcpu_reserved_chunk->vm->addr; + + if (addr >= start && addr < start + pcpu_reserved_chunk_limit) + return pcpu_reserved_chunk; + } + + /* nah... search the regular ones */ n = *pcpu_chunk_rb_search(addr, &parent); if (!n) { /* no exactly matching chunk, the parent is the closest */ @@ -713,9 +728,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) } /** - * __alloc_percpu - allocate percpu area + * pcpu_alloc - the percpu allocator * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) + * @reserved: allocate from the reserved chunk if available * * Allocate percpu area of @size bytes aligned at @align. Might * sleep. Might trigger writeouts. @@ -723,7 +739,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -void *__alloc_percpu(size_t size, size_t align) +static void *pcpu_alloc(size_t size, size_t align, bool reserved) { void *ptr = NULL; struct pcpu_chunk *chunk; @@ -737,7 +753,18 @@ void *__alloc_percpu(size_t size, size_t align) mutex_lock(&pcpu_mutex); - /* allocate area */ + /* serve reserved allocations from the reserved chunk if available */ + if (reserved && pcpu_reserved_chunk) { + chunk = pcpu_reserved_chunk; + if (size > chunk->contig_hint) + goto out_unlock; + off = pcpu_alloc_area(chunk, size, align); + if (off >= 0) + goto area_found; + goto out_unlock; + } + + /* search through normal chunks */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) @@ -773,8 +800,41 @@ out_unlock: mutex_unlock(&pcpu_mutex); return ptr; } + +/** + * __alloc_percpu - allocate dynamic percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, false); +} EXPORT_SYMBOL_GPL(__alloc_percpu); +/** + * __alloc_reserved_percpu - allocate reserved percpu area + * @size: size of area to allocate in bytes + * @align: alignment of area (max PAGE_SIZE) + * + * Allocate percpu area of @size bytes aligned at @align from reserved + * percpu area if arch has set it up; otherwise, allocation is served + * from the same dynamic area. Might sleep. Might trigger writeouts. + * + * RETURNS: + * Percpu pointer to the allocated area on success, NULL on failure. + */ +void *__alloc_reserved_percpu(size_t size, size_t align) +{ + return pcpu_alloc(size, align, true); +} + static void pcpu_kill_chunk(struct pcpu_chunk *chunk) { WARN_ON(chunk->immutable); @@ -826,6 +886,7 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @dyn_size: free size for dynamic allocation in bytes, -1 for auto * @base_addr: mapped address, NULL for auto @@ -844,14 +905,22 @@ EXPORT_SYMBOL_GPL(free_percpu); * indicates end of pages for the cpu. Note that @get_page_fn() must * return the same number of pages for all cpus. * + * @reserved_size, if non-zero, specifies the amount of bytes to + * reserve after the static area in the first chunk. This reserves + * the first chunk such that it's available only through reserved + * percpu allocation. This is primarily used to serve module percpu + * static areas on architectures where the addressing model has + * limited offset range for symbol relocations to guarantee module + * percpu symbols fall inside the relocatable range. + * * @unit_size, if non-negative, specifies unit size and must be * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @dyn_size. + * @reserved_size + @dyn_size. * * @dyn_size, if non-negative, limits the number of bytes available * for dynamic allocation in the first chunk. Specifying non-negative * value make percpu leave alone the area beyond @static_size + - * @dyn_size. + * @reserved_size + @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -861,28 +930,36 @@ EXPORT_SYMBOL_GPL(free_percpu); * @populate_pte_fn is used to populate the pagetable. NULL means the * caller already populated the pagetable. * + * If the first chunk ends up with both reserved and dynamic areas, it + * is served by two chunks - one to serve the core static and reserved + * areas and the other for the dynamic area. They share the same vm + * and page map but uses different area allocation map to stay away + * from each other. The latter chunk is circulated in the chunk slots + * and available for dynamic allocation like any other chunks. + * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access. */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, + size_t static_size, size_t reserved_size, ssize_t unit_size, ssize_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; - static int smap[2]; - struct pcpu_chunk *schunk; + static int smap[2], dmap[2]; + struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ - BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC); + BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || + ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); if (unit_size >= 0) { - BUG_ON(unit_size < static_size + + BUG_ON(unit_size < static_size + reserved_size + (dyn_size >= 0 ? dyn_size : 0)); BUG_ON(unit_size & ~PAGE_MASK); } else { @@ -895,7 +972,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size)); + PFN_UP(static_size + reserved_size)); pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; @@ -903,7 +980,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); if (dyn_size < 0) - dyn_size = pcpu_unit_size - static_size; + dyn_size = pcpu_unit_size - static_size - reserved_size; /* * Allocate chunk slots. The additional last slot is for @@ -914,20 +991,49 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* init static chunk */ + /* + * Initialize static chunk. If reserved_size is zero, the + * static chunk covers static area + dynamic allocation area + * in the first chunk. If reserved_size is not zero, it + * covers static area + reserved area (mostly used for module + * static percpu allocation). + */ schunk = alloc_bootmem(pcpu_chunk_struct_size); INIT_LIST_HEAD(&schunk->list); schunk->vm = &first_vm; schunk->map = smap; schunk->map_alloc = ARRAY_SIZE(smap); schunk->page = schunk->page_ar; - schunk->free_size = dyn_size; + + if (reserved_size) { + schunk->free_size = reserved_size; + pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ + } else { + schunk->free_size = dyn_size; + dyn_size = 0; /* dynamic area covered */ + } schunk->contig_hint = schunk->free_size; schunk->map[schunk->map_used++] = -static_size; if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; + pcpu_reserved_chunk_limit = static_size + schunk->free_size; + + /* init dynamic chunk if necessary */ + if (dyn_size) { + dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); + INIT_LIST_HEAD(&dchunk->list); + dchunk->vm = &first_vm; + dchunk->map = dmap; + dchunk->map_alloc = ARRAY_SIZE(dmap); + dchunk->page = schunk->page_ar; /* share page map with schunk */ + + dchunk->contig_hint = dchunk->free_size = dyn_size; + dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; + dchunk->map[dchunk->map_used++] = dchunk->free_size; + } + /* allocate vm address */ first_vm.flags = VM_ALLOC; first_vm.size = pcpu_chunk_size; @@ -937,12 +1043,14 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, else { /* * Pages already mapped. No need to remap into - * vmalloc area. In this case the static chunk can't - * be mapped or unmapped by percpu and is marked + * vmalloc area. In this case the first chunks can't + * be mapped or unmapped by percpu and are marked * immutable. */ first_vm.addr = base_addr; schunk->immutable = true; + if (dchunk) + dchunk->immutable = true; } /* assign pages */ @@ -978,8 +1086,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, } /* link the first chunk in */ - pcpu_chunk_relocate(schunk, -1); - pcpu_chunk_addr_insert(schunk); + if (!dchunk) { + pcpu_chunk_relocate(schunk, -1); + pcpu_chunk_addr_insert(schunk); + } else { + pcpu_chunk_relocate(dchunk, -1); + pcpu_chunk_addr_insert(dchunk); + } /* we're done */ pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); From 6b19b0c2400437a3c10059ede0e59b517092e1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: [PATCH 672/682] x86, percpu: setup reserved percpu area for x86_64 Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Reported-by: Jaswinder Singh Rajput --- arch/x86/kernel/setup_percpu.c | 37 +++++++++++++++++++++++++--------- include/linux/percpu.h | 35 +++++++++++--------------------- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index dd4eabc747c..efa615f2bf4 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -42,6 +42,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/* + * On x86_64 symbols referenced from code should be reachable using + * 32bit relocations. Reserve space for static percpu variables in + * modules so that they are always served from the first chunk which + * is located at the percpu segment base. On x86_32, anything can + * address anywhere. No need to reserve space in the first chunk. + */ +#ifdef CONFIG_X86_64 +#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE +#else +#define PERCPU_FIRST_CHUNK_RESERVE 0 +#endif + /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -141,7 +154,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; pg_data_t *last; - size_t ptrs_size; + size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; @@ -169,12 +182,14 @@ proceed: * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } + dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); @@ -217,8 +232,9 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, - pcpur_size - static_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, dyn_size, vm.addr, NULL); goto out_free_ar; enomem: @@ -276,9 +292,10 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - dyn_size = pcpue_size - static_size; + dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); @@ -297,7 +314,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, + return pcpu_setup_first_chunk(pcpue_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, pcpue_unit_size, dyn_size, pcpue_ptr, NULL); } @@ -356,8 +374,9 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, - NULL, pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, + pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ff15153ae2..54a968b4b92 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -85,31 +85,20 @@ /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * back on the first chunk for dynamic percpu allocation if arch is + * manually allocating and mapping it for faster access (as a part of + * large page mapping for example). * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. + * The following values give between one and two pages of free space + * after typical minimal boot (2-way SMP, single disk and NIC) with + * both defconfig and a distro config on x86_64 and 32. More + * intelligent way to determine this would be nice. */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (24 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (8 << 10) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ +#if BITS_PER_LONG > 32 +#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#else +#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#endif extern void *pcpu_base_addr; From 1880d93b80acc3171850e9df5048bcb26b75c2f5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Mar 2009 00:44:09 +0900 Subject: [PATCH 673/682] percpu: replace pcpu_realloc() with pcpu_mem_alloc() and pcpu_mem_free() Impact: code reorganization for later changes With static map handling moved to pcpu_split_block(), pcpu_realloc() only clutters the code and it's also unsuitable for scheduled locking changes. Implement and use pcpu_mem_alloc/free() instead. Signed-off-by: Tejun Heo --- mm/percpu.c | 87 ++++++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index ef8e169b773..f1d0e905850 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -164,39 +164,41 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, } /** - * pcpu_realloc - versatile realloc - * @p: the current pointer (can be NULL for new allocations) - * @size: the current size in bytes (can be 0 for new allocations) - * @new_size: the wanted new size in bytes (can be 0 for free) + * pcpu_mem_alloc - allocate memory + * @size: bytes to allocate * - * More robust realloc which can be used to allocate, resize or free a - * memory area of arbitrary size. If the needed size goes over - * PAGE_SIZE, kernel VM is used. + * Allocate @size bytes. If @size is smaller than PAGE_SIZE, + * kzalloc() is used; otherwise, vmalloc() is used. The returned + * memory is always zeroed. * * RETURNS: - * The new pointer on success, NULL on failure. + * Pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_realloc(void *p, size_t size, size_t new_size) +static void *pcpu_mem_alloc(size_t size) { - void *new; - - if (new_size <= PAGE_SIZE) - new = kmalloc(new_size, GFP_KERNEL); - else - new = vmalloc(new_size); - if (new_size && !new) - return NULL; - - memcpy(new, p, min(size, new_size)); - if (new_size > size) - memset(new + size, 0, new_size - size); - if (size <= PAGE_SIZE) - kfree(p); - else - vfree(p); + return kzalloc(size, GFP_KERNEL); + else { + void *ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; + } +} - return new; +/** + * pcpu_mem_free - free memory + * @ptr: memory to free + * @size: size of the area + * + * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). + */ +static void pcpu_mem_free(void *ptr, size_t size) +{ + if (size <= PAGE_SIZE) + kfree(ptr); + else + vfree(ptr); } /** @@ -331,29 +333,27 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) if (chunk->map_alloc < target) { int new_alloc; int *new; + size_t size; new_alloc = PCPU_DFL_MAP_ALLOC; while (new_alloc < target) new_alloc *= 2; - if (chunk->map_alloc < PCPU_DFL_MAP_ALLOC) { - /* - * map_alloc smaller than the default size - * indicates that the chunk is one of the - * first chunks and still using static map. - * Allocate a dynamic one and copy. - */ - new = pcpu_realloc(NULL, 0, new_alloc * sizeof(new[0])); - if (new) - memcpy(new, chunk->map, - chunk->map_alloc * sizeof(new[0])); - } else - new = pcpu_realloc(chunk->map, - chunk->map_alloc * sizeof(new[0]), - new_alloc * sizeof(new[0])); + new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); if (!new) return -ENOMEM; + size = chunk->map_alloc * sizeof(chunk->map[0]); + memcpy(new, chunk->map, size); + + /* + * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the + * chunk is one of the first chunks and still using + * static map. + */ + if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) + pcpu_mem_free(chunk->map, size); + chunk->map_alloc = new_alloc; chunk->map = new; } @@ -696,7 +696,7 @@ static void free_pcpu_chunk(struct pcpu_chunk *chunk) return; if (chunk->vm) free_vm_area(chunk->vm); - pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); + pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); kfree(chunk); } @@ -708,8 +708,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) if (!chunk) return NULL; - chunk->map = pcpu_realloc(NULL, 0, - PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; chunk->page = chunk->page_ar; From 9f7dcf224bd09ec9ebcbfb383bf2c465e0e0b03d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Mar 2009 00:44:09 +0900 Subject: [PATCH 674/682] percpu: move chunk area map extension out of area allocation Impact: code reorganization for later changes Separate out chunk area map extension into a separate function - pcpu_extend_area_map() - and call it directly from pcpu_alloc() such that pcpu_alloc_area() is guaranteed to have enough area map slots on invocation. With this change, pcpu_alloc_area() does only area allocation and the only failure mode is when the chunk doens't have enough room, so there's no need to distinguish it from memory allocation failures. Make it return -1 on such cases instead of hacky -ENOSPC. Signed-off-by: Tejun Heo --- mm/percpu.c | 108 +++++++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 48 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index f1d0e905850..7d9bc35e8ed 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -306,6 +306,50 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) rb_insert_color(&new->rb_node, &pcpu_addr_root); } +/** + * pcpu_extend_area_map - extend area map for allocation + * @chunk: target chunk + * + * Extend area map of @chunk so that it can accomodate an allocation. + * A single allocation can split an area into three areas, so this + * function makes sure that @chunk->map has at least two extra slots. + * + * RETURNS: + * 0 if noop, 1 if successfully extended, -errno on failure. + */ +static int pcpu_extend_area_map(struct pcpu_chunk *chunk) +{ + int new_alloc; + int *new; + size_t size; + + /* has enough? */ + if (chunk->map_alloc >= chunk->map_used + 2) + return 0; + + new_alloc = PCPU_DFL_MAP_ALLOC; + while (new_alloc < chunk->map_used + 2) + new_alloc *= 2; + + new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); + if (!new) + return -ENOMEM; + + size = chunk->map_alloc * sizeof(chunk->map[0]); + memcpy(new, chunk->map, size); + + /* + * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is + * one of the first chunks and still using static map. + */ + if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) + pcpu_mem_free(chunk->map, size); + + chunk->map_alloc = new_alloc; + chunk->map = new; + return 0; +} + /** * pcpu_split_block - split a map block * @chunk: chunk of interest @@ -321,44 +365,16 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) * depending on @head, is reduced by @tail bytes and @tail byte block * is inserted after the target block. * - * RETURNS: - * 0 on success, -errno on failure. + * @chunk->map must have enough free slots to accomodate the split. */ -static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) +static void pcpu_split_block(struct pcpu_chunk *chunk, int i, + int head, int tail) { int nr_extra = !!head + !!tail; - int target = chunk->map_used + nr_extra; - /* reallocation required? */ - if (chunk->map_alloc < target) { - int new_alloc; - int *new; - size_t size; + BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); - new_alloc = PCPU_DFL_MAP_ALLOC; - while (new_alloc < target) - new_alloc *= 2; - - new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); - if (!new) - return -ENOMEM; - - size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, size); - - /* - * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the - * chunk is one of the first chunks and still using - * static map. - */ - if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) - pcpu_mem_free(chunk->map, size); - - chunk->map_alloc = new_alloc; - chunk->map = new; - } - - /* insert a new subblock */ + /* insert new subblocks */ memmove(&chunk->map[i + nr_extra], &chunk->map[i], sizeof(chunk->map[0]) * (chunk->map_used - i)); chunk->map_used += nr_extra; @@ -371,7 +387,6 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) chunk->map[i++] -= tail; chunk->map[i] = tail; } - return 0; } /** @@ -384,8 +399,11 @@ static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) * Note that this function only allocates the offset. It doesn't * populate or map the area. * + * @chunk->map must have at least two free slots. + * * RETURNS: - * Allocated offset in @chunk on success, -errno on failure. + * Allocated offset in @chunk on success, -1 if no matching area is + * found. */ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) { @@ -433,8 +451,7 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) /* split if warranted */ if (head || tail) { - if (pcpu_split_block(chunk, i, head, tail)) - return -ENOMEM; + pcpu_split_block(chunk, i, head, tail); if (head) { i++; off += head; @@ -461,14 +478,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) chunk->contig_hint = max_contig; /* fully scanned */ pcpu_chunk_relocate(chunk, oslot); - /* - * Tell the upper layer that this chunk has no area left. - * Note that this is not an error condition but a notification - * to upper layer that it needs to look at other chunks. - * -ENOSPC is chosen as it isn't used in memory subsystem and - * matches the meaning in a way. - */ - return -ENOSPC; + /* tell the upper layer that this chunk has no matching area */ + return -1; } /** @@ -755,7 +766,8 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) /* serve reserved allocations from the reserved chunk if available */ if (reserved && pcpu_reserved_chunk) { chunk = pcpu_reserved_chunk; - if (size > chunk->contig_hint) + if (size > chunk->contig_hint || + pcpu_extend_area_map(chunk) < 0) goto out_unlock; off = pcpu_alloc_area(chunk, size, align); if (off >= 0) @@ -768,11 +780,11 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) continue; + if (pcpu_extend_area_map(chunk) < 0) + goto out_unlock; off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; - if (off != -ENOSPC) - goto out_unlock; } } From a56dbddf06b653ef9c04ca3767f260fd31ccebab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Mar 2009 00:44:11 +0900 Subject: [PATCH 675/682] percpu: move fully free chunk reclamation into a work Impact: code reorganization for later changes Do fully free chunk reclamation using a work. This change is to prepare for locking changes. Signed-off-by: Tejun Heo --- mm/percpu.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 7d9bc35e8ed..4c8a419119d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -118,6 +119,10 @@ static DEFINE_MUTEX(pcpu_mutex); static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ +/* reclaim work to release fully free chunks, scheduled from free path */ +static void pcpu_reclaim(struct work_struct *work); +static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); + static int __pcpu_size_to_slot(int size) { int highbit = fls(size); /* size is in bytes */ @@ -846,13 +851,37 @@ void *__alloc_reserved_percpu(size_t size, size_t align) return pcpu_alloc(size, align, true); } -static void pcpu_kill_chunk(struct pcpu_chunk *chunk) +/** + * pcpu_reclaim - reclaim fully free chunks, workqueue function + * @work: unused + * + * Reclaim all fully free chunks except for the first one. + */ +static void pcpu_reclaim(struct work_struct *work) { - WARN_ON(chunk->immutable); - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); - list_del(&chunk->list); - rb_erase(&chunk->rb_node, &pcpu_addr_root); - free_pcpu_chunk(chunk); + LIST_HEAD(todo); + struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1]; + struct pcpu_chunk *chunk, *next; + + mutex_lock(&pcpu_mutex); + + list_for_each_entry_safe(chunk, next, head, list) { + WARN_ON(chunk->immutable); + + /* spare the first one */ + if (chunk == list_first_entry(head, struct pcpu_chunk, list)) + continue; + + rb_erase(&chunk->rb_node, &pcpu_addr_root); + list_move(&chunk->list, &todo); + } + + mutex_unlock(&pcpu_mutex); + + list_for_each_entry_safe(chunk, next, &todo, list) { + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + free_pcpu_chunk(chunk); + } } /** @@ -877,14 +906,13 @@ void free_percpu(void *ptr) pcpu_free_area(chunk, off); - /* the chunk became fully free, kill one if there are other free ones */ + /* if there are more than one fully free chunks, wake up grim reaper */ if (chunk->free_size == pcpu_unit_size) { struct pcpu_chunk *pos; - list_for_each_entry(pos, - &pcpu_slot[pcpu_chunk_slot(chunk)], list) + list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) if (pos != chunk) { - pcpu_kill_chunk(pos); + schedule_work(&pcpu_reclaim_work); break; } } From ccea34b5d0fbab081496d1860f31acee99fa8a6d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 7 Mar 2009 00:44:13 +0900 Subject: [PATCH 676/682] percpu: finer grained locking to break deadlock and allow atomic free Impact: fix deadlock and allow atomic free Percpu allocation always uses GFP_KERNEL and whole alloc/free paths were protected by single mutex. All percpu allocations have been from GFP_KERNEL-safe context and the original allocator had this assumption too. However, by protecting both alloc and free paths with the same mutex, the new allocator creates free -> alloc -> GFP_KERNEL dependency which the original allocator didn't have. This can lead to deadlock if free is called from FS or IO paths. Also, in general, allocators are expected to allow free to be called from atomic context. This patch implements finer grained locking to break the deadlock and allow atomic free. For details, please read the "Synchronization rules" comment. While at it, also add CONTEXT: to function comments to describe which context they expect to be called from and what they do to it. This problem was reported by Thomas Gleixner and Peter Zijlstra. http://thread.gmane.org/gmane.linux.kernel/802384 Signed-off-by: Tejun Heo Reported-by: Thomas Gleixner Reported-by: Peter Zijlstra --- mm/percpu.c | 157 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 122 insertions(+), 35 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 4c8a419119d..bfe6a3afaf4 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -101,20 +102,28 @@ static struct pcpu_chunk *pcpu_reserved_chunk; static int pcpu_reserved_chunk_limit; /* - * One mutex to rule them all. + * Synchronization rules. * - * The following mutex is grabbed in the outermost public alloc/free - * interface functions and released only when the operation is - * complete. As such, every function in this file other than the - * outermost functions are called under pcpu_mutex. + * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former + * protects allocation/reclaim paths, chunks and chunk->page arrays. + * The latter is a spinlock and protects the index data structures - + * chunk slots, rbtree, chunks and area maps in chunks. * - * It can easily be switched to use spinlock such that only the area - * allocation and page population commit are protected with it doing - * actual [de]allocation without holding any lock. However, given - * what this allocator does, I think it's better to let them run - * sequentially. + * During allocation, pcpu_alloc_mutex is kept locked all the time and + * pcpu_lock is grabbed and released as necessary. All actual memory + * allocations are done using GFP_KERNEL with pcpu_lock released. + * + * Free path accesses and alters only the index data structures, so it + * can be safely called from atomic context. When memory needs to be + * returned to the system, free path schedules reclaim_work which + * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be + * reclaimed, release both locks and frees the chunks. Note that it's + * necessary to grab both locks to remove a chunk from circulation as + * allocation path might be referencing the chunk with only + * pcpu_alloc_mutex locked. */ -static DEFINE_MUTEX(pcpu_mutex); +static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ +static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ @@ -176,6 +185,9 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, * kzalloc() is used; otherwise, vmalloc() is used. The returned * memory is always zeroed. * + * CONTEXT: + * Does GFP_KERNEL allocation. + * * RETURNS: * Pointer to the allocated area on success, NULL on failure. */ @@ -215,6 +227,9 @@ static void pcpu_mem_free(void *ptr, size_t size) * New slot according to the changed state is determined and @chunk is * moved to the slot. Note that the reserved chunk is never put on * chunk slots. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { @@ -260,6 +275,9 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr, * searchs for the chunk with the highest start address which isn't * beyond @addr. * + * CONTEXT: + * pcpu_lock. + * * RETURNS: * The address of the found chunk. */ @@ -300,6 +318,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @new: chunk to insert * * Insert @new into address rb tree. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) { @@ -319,6 +340,10 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) * A single allocation can split an area into three areas, so this * function makes sure that @chunk->map has at least two extra slots. * + * CONTEXT: + * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired + * if area map is extended. + * * RETURNS: * 0 if noop, 1 if successfully extended, -errno on failure. */ @@ -332,13 +357,25 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) if (chunk->map_alloc >= chunk->map_used + 2) return 0; + spin_unlock_irq(&pcpu_lock); + new_alloc = PCPU_DFL_MAP_ALLOC; while (new_alloc < chunk->map_used + 2) new_alloc *= 2; new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); - if (!new) + if (!new) { + spin_lock_irq(&pcpu_lock); return -ENOMEM; + } + + /* + * Acquire pcpu_lock and switch to new area map. Only free + * could have happened inbetween, so map_used couldn't have + * grown. + */ + spin_lock_irq(&pcpu_lock); + BUG_ON(new_alloc < chunk->map_used + 2); size = chunk->map_alloc * sizeof(chunk->map[0]); memcpy(new, chunk->map, size); @@ -371,6 +408,9 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) * is inserted after the target block. * * @chunk->map must have enough free slots to accomodate the split. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) @@ -406,6 +446,9 @@ static void pcpu_split_block(struct pcpu_chunk *chunk, int i, * * @chunk->map must have at least two free slots. * + * CONTEXT: + * pcpu_lock. + * * RETURNS: * Allocated offset in @chunk on success, -1 if no matching area is * found. @@ -495,6 +538,9 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) * Free area starting from @freeme to @chunk. Note that this function * only modifies the allocation map. It doesn't depopulate or unmap * the area. + * + * CONTEXT: + * pcpu_lock. */ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) { @@ -580,6 +626,9 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, * For each cpu, depopulate and unmap pages [@page_start,@page_end) * from @chunk. If @flush is true, vcache is flushed before unmapping * and tlb after. + * + * CONTEXT: + * pcpu_alloc_mutex. */ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, bool flush) @@ -658,6 +707,9 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. The area is cleared on return. + * + * CONTEXT: + * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { @@ -748,15 +800,16 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) * @align: alignment of area (max PAGE_SIZE) * @reserved: allocate from the reserved chunk if available * - * Allocate percpu area of @size bytes aligned at @align. Might - * sleep. Might trigger writeouts. + * Allocate percpu area of @size bytes aligned at @align. + * + * CONTEXT: + * Does GFP_KERNEL allocation. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ static void *pcpu_alloc(size_t size, size_t align, bool reserved) { - void *ptr = NULL; struct pcpu_chunk *chunk; int slot, off; @@ -766,27 +819,37 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) return NULL; } - mutex_lock(&pcpu_mutex); + mutex_lock(&pcpu_alloc_mutex); + spin_lock_irq(&pcpu_lock); /* serve reserved allocations from the reserved chunk if available */ if (reserved && pcpu_reserved_chunk) { chunk = pcpu_reserved_chunk; if (size > chunk->contig_hint || pcpu_extend_area_map(chunk) < 0) - goto out_unlock; + goto fail_unlock; off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; - goto out_unlock; + goto fail_unlock; } +restart: /* search through normal chunks */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) continue; - if (pcpu_extend_area_map(chunk) < 0) - goto out_unlock; + + switch (pcpu_extend_area_map(chunk)) { + case 0: + break; + case 1: + goto restart; /* pcpu_lock dropped, restart */ + default: + goto fail_unlock; + } + off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; @@ -794,27 +857,36 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) } /* hmmm... no space left, create a new chunk */ + spin_unlock_irq(&pcpu_lock); + chunk = alloc_pcpu_chunk(); if (!chunk) - goto out_unlock; + goto fail_unlock_mutex; + + spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); pcpu_chunk_addr_insert(chunk); - - off = pcpu_alloc_area(chunk, size, align); - if (off < 0) - goto out_unlock; + goto restart; area_found: + spin_unlock_irq(&pcpu_lock); + /* populate, map and clear the area */ if (pcpu_populate_chunk(chunk, off, size)) { + spin_lock_irq(&pcpu_lock); pcpu_free_area(chunk, off); - goto out_unlock; + goto fail_unlock; } - ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); -out_unlock: - mutex_unlock(&pcpu_mutex); - return ptr; + mutex_unlock(&pcpu_alloc_mutex); + + return __addr_to_pcpu_ptr(chunk->vm->addr + off); + +fail_unlock: + spin_unlock_irq(&pcpu_lock); +fail_unlock_mutex: + mutex_unlock(&pcpu_alloc_mutex); + return NULL; } /** @@ -825,6 +897,9 @@ out_unlock: * Allocate percpu area of @size bytes aligned at @align. Might * sleep. Might trigger writeouts. * + * CONTEXT: + * Does GFP_KERNEL allocation. + * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ @@ -843,6 +918,9 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); * percpu area if arch has set it up; otherwise, allocation is served * from the same dynamic area. Might sleep. Might trigger writeouts. * + * CONTEXT: + * Does GFP_KERNEL allocation. + * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ @@ -856,6 +934,9 @@ void *__alloc_reserved_percpu(size_t size, size_t align) * @work: unused * * Reclaim all fully free chunks except for the first one. + * + * CONTEXT: + * workqueue context. */ static void pcpu_reclaim(struct work_struct *work) { @@ -863,7 +944,8 @@ static void pcpu_reclaim(struct work_struct *work) struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; - mutex_lock(&pcpu_mutex); + mutex_lock(&pcpu_alloc_mutex); + spin_lock_irq(&pcpu_lock); list_for_each_entry_safe(chunk, next, head, list) { WARN_ON(chunk->immutable); @@ -876,7 +958,8 @@ static void pcpu_reclaim(struct work_struct *work) list_move(&chunk->list, &todo); } - mutex_unlock(&pcpu_mutex); + spin_unlock_irq(&pcpu_lock); + mutex_unlock(&pcpu_alloc_mutex); list_for_each_entry_safe(chunk, next, &todo, list) { pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); @@ -888,18 +971,22 @@ static void pcpu_reclaim(struct work_struct *work) * free_percpu - free percpu area * @ptr: pointer to area to free * - * Free percpu area @ptr. Might sleep. + * Free percpu area @ptr. + * + * CONTEXT: + * Can be called from atomic context. */ void free_percpu(void *ptr) { void *addr = __pcpu_ptr_to_addr(ptr); struct pcpu_chunk *chunk; + unsigned long flags; int off; if (!ptr) return; - mutex_lock(&pcpu_mutex); + spin_lock_irqsave(&pcpu_lock, flags); chunk = pcpu_chunk_addr_search(addr); off = addr - chunk->vm->addr; @@ -917,7 +1004,7 @@ void free_percpu(void *ptr) } } - mutex_unlock(&pcpu_mutex); + spin_unlock_irqrestore(&pcpu_lock, flags); } EXPORT_SYMBOL_GPL(free_percpu); From 3a450de1365d20afde406f0d9b2931a5e4a4fd6a Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 6 Mar 2009 17:30:56 -0600 Subject: [PATCH 677/682] x86: UV: remove uv_flush_tlb_others() WARN_ON In uv_flush_tlb_others() (arch/x86/kernel/tlb_uv.c), the "WARN_ON(!in_atomic())" fails if CONFIG_PREEMPT is not enabled. And CONFIG_PREEMPT is not enabled by default in the distribution that most UV owners will use. We could #ifdef CONFIG_PREEMPT the warning, but that is not good form. And there seems to be no suitable fix to in_atomic() when CONFIG_PREMPT is not on. As Ingo commented: > and we have no proper primitive to test for atomicity. (mainly > because we dont know about atomicity on a non-preempt kernel) So we drop the WARN_ON. Signed-off-by: Cliff Wickman Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f04549afcfe..d038b9c45cf 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, int locals = 0; struct bau_desc *bau_desc; - WARN_ON(!in_atomic()); - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); From 19390c4d03688b9940a1836f06b76ec622b9cd6f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: [PATCH 678/682] linker script: define __per_cpu_load on all SMP capable archs Impact: __per_cpu_load available on all SMP capable archs Percpu now requires three symbols to be defined - __per_cpu_load, __per_cpu_start and __per_cpu_end. There were three archs which didn't have it. Update them as follows. * powerpc: can use generic PERCPU() macro. Compile tested for powerpc32, compile/boot tested for powerpc64. * ia64: can use generic PERCPU_VADDR() macro. __phys_per_cpu_start is identical to __per_cpu_load. Compile tested and symbol table looks identical after the change except for the additional __per_cpu_load. * arm: added explicit __per_cpu_load definition. Currently uses unified .init output section so can't use the generic macro. Dunno whether the unified .init ouput section is required by arch peculiarity so I left it alone. Please break it up and use PERCPU() if possible. Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Cc: Pat Gefre Cc: Russell King --- arch/arm/kernel/vmlinux.lds.S | 1 + arch/ia64/kernel/vmlinux.lds.S | 12 ++---------- arch/powerpc/kernel/vmlinux.lds.S | 9 +-------- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 85598f7da40..1602373e539 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -64,6 +64,7 @@ SECTIONS __initramfs_end = .; #endif . = ALIGN(4096); + __per_cpu_load = .; __per_cpu_start = .; *(.data.percpu.page_aligned) *(.data.percpu) diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index f45e4e508ec..3765efc5f96 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -213,17 +213,9 @@ SECTIONS { *(.data.cacheline_aligned) } /* Per-cpu data: */ - percpu : { } :percpu . = ALIGN(PERCPU_PAGE_SIZE); - __phys_per_cpu_start = .; - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) - { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU_VADDR(PERCPU_ADDR, :percpu) + __phys_per_cpu_start = __per_cpu_load; . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits * into percpu page size */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 295ccc5e86b..67f07f45338 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -181,14 +181,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu.page_aligned) - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { From e01009833e22dc87075d770554b34d797843ed23 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: [PATCH 679/682] percpu: make x86 addr <-> pcpu ptr conversion macros generic Impact: generic addr <-> pcpu ptr conversion macros There's nothing arch specific about x86 __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr(). With proper __per_cpu_load and __per_cpu_start defined, they'll do the right thing regardless of actual layout. Move these macros from arch/x86/include/asm/percpu.h to mm/percpu.c and allow archs to override it as necessary. Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 8 -------- mm/percpu.c | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 8f1d2fbec1d..aee103b26d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -43,14 +43,6 @@ #else /* ...!ASSEMBLY */ #include -#include - -#define __addr_to_pcpu_ptr(addr) \ - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ - + (unsigned long)__per_cpu_start) -#define __pcpu_ptr_to_addr(ptr) \ - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ - - (unsigned long)__per_cpu_start) #ifdef CONFIG_SMP #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x diff --git a/mm/percpu.c b/mm/percpu.c index bfe6a3afaf4..c6f38a2afac 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -46,7 +46,8 @@ * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA * * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate - * regular address to percpu pointer and back + * regular address to percpu pointer and back if they need to be + * different from the default * * - use pcpu_setup_first_chunk() during percpu area initialization to * setup the first chunk containing the kernel static percpu area @@ -67,11 +68,24 @@ #include #include +#include #include #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ +/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ +#ifndef __addr_to_pcpu_ptr +#define __addr_to_pcpu_ptr(addr) \ + (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ + + (unsigned long)__per_cpu_start) +#endif +#ifndef __pcpu_ptr_to_addr +#define __pcpu_ptr_to_addr(ptr) \ + (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ + - (unsigned long)__per_cpu_start) +#endif + struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ struct rb_node rb_node; /* key is chunk->vm->addr */ From 6074d5b0a319fe8400ff079a3c289406ca024321 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: [PATCH 680/682] percpu: more flexibility for @dyn_size of pcpu_setup_first_chunk() Impact: cleanup, more flexibility for first chunk init Non-negative @dyn_size used to be allowed iff @unit_size wasn't auto. This restriction stemmed from implementation detail and made things a bit less intuitive. This patch allows @dyn_size to be specified regardless of @unit_size and swaps the positions of @dyn_size and @unit_size so that the parameter order makes more sense (static, reserved and dyn sizes followed by enclosing unit_size). While at it, add @unit_size >= PCPU_MIN_UNIT_SIZE sanity check. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 13 ++++++------- include/linux/percpu.h | 2 +- mm/percpu.c | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index efa615f2bf4..e41c51f6ada 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -233,8 +233,8 @@ proceed: "%zu bytes\n", vm.addr, static_size); ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, dyn_size, vm.addr, NULL); + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + PMD_SIZE, vm.addr, NULL); goto out_free_ar; enomem: @@ -315,9 +315,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - pcpue_unit_size, dyn_size, - pcpue_ptr, NULL); + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, + pcpue_unit_size, pcpue_ptr, NULL); } /* @@ -375,8 +374,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pcpu4k_nr_static_pages, static_size); ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, - pcpu4k_populate_pte); + PERCPU_FIRST_CHUNK_RESERVE, -1, + -1, NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 54a968b4b92..fb455dcc59c 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -107,7 +107,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); diff --git a/mm/percpu.c b/mm/percpu.c index c6f38a2afac..2f94661d3e3 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1027,8 +1027,8 @@ EXPORT_SYMBOL_GPL(free_percpu); * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -1053,14 +1053,14 @@ EXPORT_SYMBOL_GPL(free_percpu); * limited offset range for symbol relocations to guarantee module * percpu symbols fall inside the relocatable range. * + * @dyn_size, if non-negative, determines the number of bytes + * available for dynamic allocation in the first chunk. Specifying + * non-negative value makes percpu leave alone the area beyond + * @static_size + @reserved_size + @dyn_size. + * * @unit_size, if non-negative, specifies unit size and must be * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @reserved_size + @dyn_size. - * - * @dyn_size, if non-negative, limits the number of bytes available - * for dynamic allocation in the first chunk. Specifying non-negative - * value make percpu leave alone the area beyond @static_size + - * @reserved_size + @dyn_size. + * @reserved_size + if non-negative, @dyn_size. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -1083,12 +1083,14 @@ EXPORT_SYMBOL_GPL(free_percpu); */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, + ssize_t dyn_size, ssize_t unit_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { static struct vm_struct first_vm; static int smap[2], dmap[2]; + size_t size_sum = static_size + reserved_size + + (dyn_size >= 0 ? dyn_size : 0); struct pcpu_chunk *schunk, *dchunk = NULL; unsigned int cpu; int nr_pages; @@ -1099,20 +1101,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); if (unit_size >= 0) { - BUG_ON(unit_size < static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0)); + BUG_ON(unit_size < size_sum); BUG_ON(unit_size & ~PAGE_MASK); - } else { - BUG_ON(dyn_size >= 0); + BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); + } else BUG_ON(base_addr); - } BUG_ON(base_addr && populate_pte_fn); if (unit_size >= 0) pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size + reserved_size)); + PFN_UP(size_sum)); pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; From 66c3a75772247c31feabefb724e082220a1ab060 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2009 16:27:48 +0900 Subject: [PATCH 681/682] percpu: generalize embedding first chunk setup helper Impact: code reorganization Separate out embedding first chunk setup helper from x86 embedding first chunk allocator and put it in mm/percpu.c. This will be used by the default percpu first chunk allocator and possibly by other archs. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 54 +++------------------ include/linux/percpu.h | 4 ++ mm/percpu.c | 86 ++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 48 deletions(-) diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e41c51f6ada..400331b50a5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -257,31 +257,13 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * module and dynamic reserves, and allocated as a contiguous area - * using bootmem allocator and used as-is without being mapped into - * vmalloc area. This enables the first chunk to piggy back on the - * linear physical PMD mapping and doesn't add any additional pressure - * to TLB. Note that if the needed size is smaller than the minimum - * unit size, the leftover is returned to the bootmem allocator. + * module and dynamic reserves and embedded into linear physical + * mapping so that it can use PMD mapping without additional TLB + * pressure. */ -static void *pcpue_ptr __initdata; -static size_t pcpue_size __initdata; -static size_t pcpue_unit_size __initdata; - -static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) -{ - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpue_size) - return NULL; - - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); -} - static ssize_t __init setup_pcpu_embed(size_t static_size) { - unsigned int cpu; - size_t dyn_size; + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; /* * If large page isn't supported, there's no benefit in doing @@ -291,32 +273,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) if (!cpu_has_pse || pcpu_need_numa()) return -EINVAL; - /* allocate and copy */ - pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - - pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, - PAGE_SIZE); - if (!pcpue_ptr) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; - - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); - memcpy(ptr, __per_cpu_load, static_size); - } - - /* we're ready, commit */ - pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - - return pcpu_setup_first_chunk(pcpue_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, dyn_size, - pcpue_unit_size, pcpue_ptr, NULL); + return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, + reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); } /* diff --git a/include/linux/percpu.h b/include/linux/percpu.h index fb455dcc59c..ee5615d6521 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -111,6 +111,10 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); +extern ssize_t __init pcpu_embed_first_chunk( + size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's diff --git a/mm/percpu.c b/mm/percpu.c index 2f94661d3e3..1aa5d8fbca1 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1238,3 +1238,89 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); return pcpu_unit_size; } + +/* + * Embedding first chunk setup helper. + */ +static void *pcpue_ptr __initdata; +static size_t pcpue_size __initdata; +static size_t pcpue_unit_size __initdata; + +static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) +{ + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpue_size) + return NULL; + + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); +} + +/** + * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem + * @static_size: the size of static percpu area in bytes + * @reserved_size: the size of reserved percpu area in bytes + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto + * + * This is a helper to ease setting up embedded first percpu chunk and + * can be called where pcpu_setup_first_chunk() is expected. + * + * If this function is used to setup the first chunk, it is allocated + * as a contiguous area using bootmem allocator and used as-is without + * being mapped into vmalloc area. This enables the first chunk to + * piggy back on the linear physical mapping which often uses larger + * page size. + * + * When @dyn_size is positive, dynamic area might be larger than + * specified to fill page alignment. Also, when @dyn_size is auto, + * @dyn_size does not fill the whole first chunk but only what's + * necessary for page alignment after static and reserved areas. + * + * If the needed size is smaller than the minimum or specified unit + * size, the leftover is returned to the bootmem allocator. + * + * RETURNS: + * The determined pcpu_unit_size which can be used to initialize + * percpu access on success, -errno on failure. + */ +ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, + ssize_t dyn_size, ssize_t unit_size) +{ + unsigned int cpu; + + /* determine parameters and allocate */ + pcpue_size = PFN_ALIGN(static_size + reserved_size + + (dyn_size >= 0 ? dyn_size : 0)); + if (dyn_size != 0) + dyn_size = pcpue_size - static_size - reserved_size; + + if (unit_size >= 0) { + BUG_ON(unit_size < pcpue_size); + pcpue_unit_size = unit_size; + } else + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); + + pcpue_ptr = __alloc_bootmem_nopanic( + num_possible_cpus() * pcpue_unit_size, + PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!pcpue_ptr) + return -ENOMEM; + + /* return the leftover and copy */ + for_each_possible_cpu(cpu) { + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } + + /* we're ready, commit */ + pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); + + return pcpu_setup_first_chunk(pcpue_get_page, static_size, + reserved_size, dyn_size, + pcpue_unit_size, pcpue_ptr, NULL); +} From 60db56422043aaa455ac7f858ce23c273220f9d9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Mar 2009 14:36:54 +0900 Subject: [PATCH 682/682] percpu: fix spurious alignment WARN in legacy SMP percpu allocator Impact: remove spurious WARN on legacy SMP percpu allocator Commit f2a8205c4ef1af917d175c36a4097ae5587791c8 incorrectly added too tight WARN_ON_ONCE() on alignments for UP and legacy SMP percpu allocator. Commit e317603694bfd17b28a40de9d65e1a4ec12f816e fixed it for UP but legacy SMP allocator was forgotten. Fix it. Signed-off-by: Tejun Heo Reported-by: Sachin P. Sant --- mm/allocpercpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 3653c570232..1882923bc70 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -120,7 +120,7 @@ void *__alloc_percpu(size_t size, size_t align) * on it. Larger alignment should only be used for module * percpu sections on SMP for which this path isn't used. */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); + WARN_ON_ONCE(align > SMP_CACHE_BYTES); if (unlikely(!pdata)) return NULL;