From a4a6198b80cf82eb8160603c98da218d1bd5e104 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 24 Mar 2006 03:15:54 -0800 Subject: [PATCH] [PATCH] tvec_bases too large for per-cpu data With internal Xen-enabled kernels we see the kernel's static per-cpu data area exceed the limit of 32k on x86-64, and even native x86-64 kernels get fairly close to that limit. I generally question whether it is reasonable to have data structures several kb in size allocated as per-cpu data when the space there is rather limited. The biggest arch-independent consumer is tvec_bases (over 4k on 32-bit archs, over 8k on 64-bit ones), which now gets converted to use dynamically allocated memory instead. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/kernel/timer.c b/kernel/timer.c index 2410c18dbeb..4427e725ccd 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -86,7 +86,8 @@ struct tvec_t_base_s { } ____cacheline_aligned_in_smp; typedef struct tvec_t_base_s tvec_base_t; -static DEFINE_PER_CPU(tvec_base_t, tvec_bases); +static DEFINE_PER_CPU(tvec_base_t *, tvec_bases); +static tvec_base_t boot_tvec_bases; static inline void set_running_timer(tvec_base_t *base, struct timer_list *timer) @@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base); void fastcall init_timer(struct timer_list *timer) { timer->entry.next = NULL; - timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; + timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base; } EXPORT_SYMBOL(init_timer); @@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires) ret = 1; } - new_base = &__get_cpu_var(tvec_bases); + new_base = __get_cpu_var(tvec_bases); if (base != &new_base->t_base) { /* @@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - tvec_base_t *base = &per_cpu(tvec_bases, cpu); + tvec_base_t *base = per_cpu(tvec_bases, cpu); unsigned long flags; BUG_ON(timer_pending(timer) || !timer->function); @@ -504,7 +505,7 @@ unsigned long next_timer_interrupt(void) } hr_expires += jiffies; - base = &__get_cpu_var(tvec_bases); + base = __get_cpu_var(tvec_bases); spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); list = NULL; @@ -901,7 +902,7 @@ EXPORT_SYMBOL(xtime_lock); */ static void run_timer_softirq(struct softirq_action *h) { - tvec_base_t *base = &__get_cpu_var(tvec_bases); + tvec_base_t *base = __get_cpu_var(tvec_bases); hrtimer_run_queues(); if (time_after_eq(jiffies, base->timer_jiffies)) @@ -1256,12 +1257,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info) return 0; } -static void __devinit init_timers_cpu(int cpu) +static int __devinit init_timers_cpu(int cpu) { int j; tvec_base_t *base; - base = &per_cpu(tvec_bases, cpu); + base = per_cpu(tvec_bases, cpu); + if (!base) { + static char boot_done; + + /* + * Cannot do allocation in init_timers as that runs before the + * allocator initializes (and would waste memory if there are + * more possible CPUs than will ever be installed/brought up). + */ + if (boot_done) { + base = kmalloc_node(sizeof(*base), GFP_KERNEL, + cpu_to_node(cpu)); + if (!base) + return -ENOMEM; + memset(base, 0, sizeof(*base)); + } else { + base = &boot_tvec_bases; + boot_done = 1; + } + per_cpu(tvec_bases, cpu) = base; + } spin_lock_init(&base->t_base.lock); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); @@ -1273,6 +1294,7 @@ static void __devinit init_timers_cpu(int cpu) INIT_LIST_HEAD(base->tv1.vec + j); base->timer_jiffies = jiffies; + return 0; } #ifdef CONFIG_HOTPLUG_CPU @@ -1295,8 +1317,8 @@ static void __devinit migrate_timers(int cpu) int i; BUG_ON(cpu_online(cpu)); - old_base = &per_cpu(tvec_bases, cpu); - new_base = &get_cpu_var(tvec_bases); + old_base = per_cpu(tvec_bases, cpu); + new_base = get_cpu_var(tvec_bases); local_irq_disable(); spin_lock(&new_base->t_base.lock); @@ -1326,7 +1348,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch(action) { case CPU_UP_PREPARE: - init_timers_cpu(cpu); + if (init_timers_cpu(cpu) < 0) + return NOTIFY_BAD; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: