All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@novell.com>
To: <linux-kernel@vger.kernel.org>
Subject: [PATCH] tvec_bases too large for per-cpu data
Date: Wed, 18 Jan 2006 06:11:36 -0700	[thread overview]
Message-ID: <43CE4C98.76F0.0078.0@novell.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 621 bytes --]

From: Jan Beulich <jbeulich@novell.com>

With internal Xen-enabled kernels we see the kernel's static per-cpu
data area
exceed the limit of 32k on x86-64, and even native x86-64 kernels get
fairly
close to that limit. I generally question whether it is reasonable to
have
data structures several kb in size allocated as per-cpu data when the
space
there is rather limited.
The biggest arch-independent consumer is tvec_bases (over 4k on 32-bit
archs,
over 8k on 64-bit ones), which now gets converted to use dynamically
allocated
memory instead.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>

(actual patch attached)

[-- Attachment #2: linux-2.6.16-rc1-per-cpu-tvec_bases.patch --]
[-- Type: application/octet-stream, Size: 4159 bytes --]

From: Jan Beulich <jbeulich@novell.com>

With internal Xen-enabled kernels we see the kernel's static per-cpu data area
exceed the limit of 32k on x86-64, and even native x86-64 kernels get fairly
close to that limit. I generally question whether it is reasonable to have
data structures several kb in size allocated as per-cpu data when the space
there is rather limited.
The biggest arch-independent consumer is tvec_bases (over 4k on 32-bit archs,
over 8k on 64-bit ones), which now gets converted to use dynamically allocated
memory instead.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>

diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/kernel/timer.c 2.6.16-rc1-per-cpu-tvec_bases/kernel/timer.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/kernel/timer.c	2006-01-18 12:39:13.000000000 +0100
+++ 2.6.16-rc1-per-cpu-tvec_bases/kernel/timer.c	2006-01-18 13:53:28.000000000 +0100
@@ -86,7 +86,8 @@ struct tvec_t_base_s {
 } ____cacheline_aligned_in_smp;
 
 typedef struct tvec_t_base_s tvec_base_t;
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
+static tvec_base_t boot_tvec_bases;
 
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
@@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base);
 void fastcall init_timer(struct timer_list *timer)
 {
 	timer->entry.next = NULL;
-	timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
+	timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
 }
 EXPORT_SYMBOL(init_timer);
 
@@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer
 		ret = 1;
 	}
 
-	new_base = &__get_cpu_var(tvec_bases);
+	new_base = __get_cpu_var(tvec_bases);
 
 	if (base != &new_base->t_base) {
 		/*
@@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	tvec_base_t *base = &per_cpu(tvec_bases, cpu);
+	tvec_base_t *base = per_cpu(tvec_bases, cpu);
   	unsigned long flags;
 
   	BUG_ON(timer_pending(timer) || !timer->function);
@@ -492,7 +493,7 @@ unsigned long next_timer_interrupt(void)
 	tvec_t *varray[4];
 	int i, j;
 
-	base = &__get_cpu_var(tvec_bases);
+	base = __get_cpu_var(tvec_bases);
 	spin_lock(&base->t_base.lock);
 	expires = base->timer_jiffies + (LONG_MAX >> 1);
 	list = 0;
@@ -856,7 +857,7 @@ EXPORT_SYMBOL(xtime_lock);
  */
 static void run_timer_softirq(struct softirq_action *h)
 {
-	tvec_base_t *base = &__get_cpu_var(tvec_bases);
+	tvec_base_t *base = __get_cpu_var(tvec_bases);
 
  	hrtimer_run_queues();
 	if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1209,12 +1210,31 @@ asmlinkage long sys_sysinfo(struct sysin
 	return 0;
 }
 
-static void __devinit init_timers_cpu(int cpu)
+static int __devinit init_timers_cpu(int cpu)
 {
 	int j;
 	tvec_base_t *base;
 
-	base = &per_cpu(tvec_bases, cpu);
+	base = per_cpu(tvec_bases, cpu);
+	if (likely(!base)) {
+		static char boot_done;
+
+		if (likely(boot_done)) {
+#ifdef CONFIG_NUMA
+			base = kmalloc_node(sizeof(*base), GFP_KERNEL, cpu_to_node(cpu));
+			if (!base)
+#endif
+				base = kmalloc(sizeof(*base), GFP_KERNEL);
+			if (!base)
+				return -ENOMEM;
+			memset(base, 0, sizeof(*base));
+		}
+		else {
+			base = &boot_tvec_bases;
+			boot_done = 1;
+		}
+		per_cpu(tvec_bases, cpu) = base;
+	}
 	spin_lock_init(&base->t_base.lock);
 	for (j = 0; j < TVN_SIZE; j++) {
 		INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1226,6 +1246,7 @@ static void __devinit init_timers_cpu(in
 		INIT_LIST_HEAD(base->tv1.vec + j);
 
 	base->timer_jiffies = jiffies;
+	return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -1248,8 +1269,8 @@ static void __devinit migrate_timers(int
 	int i;
 
 	BUG_ON(cpu_online(cpu));
-	old_base = &per_cpu(tvec_bases, cpu);
-	new_base = &get_cpu_var(tvec_bases);
+	old_base = per_cpu(tvec_bases, cpu);
+	new_base = get_cpu_var(tvec_bases);
 
 	local_irq_disable();
 	spin_lock(&new_base->t_base.lock);
@@ -1279,7 +1300,8 @@ static int __devinit timer_cpu_notify(st
 	long cpu = (long)hcpu;
 	switch(action) {
 	case CPU_UP_PREPARE:
-		init_timers_cpu(cpu);
+		if (init_timers_cpu(cpu) < 0)
+			return NOTIFY_BAD;
 		break;
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DEAD:

             reply	other threads:[~2006-01-18 13:11 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-18 13:11 Jan Beulich [this message]
2006-01-21  7:25 ` [PATCH] tvec_bases too large for per-cpu data Andrew Morton
2006-01-23 10:31   ` Jan Beulich
2006-01-23 10:57     ` Andrew Morton
2006-01-24  8:33       ` Jan Beulich
2006-01-24  8:58         ` Andrew Morton
2006-01-24 14:46           ` [PATCH] [SMP] reduce size of percpudata, and make sure per_cpu(object, not_possible_cpu) cause an invalid memory reference Eric Dumazet
2006-01-24 14:53             ` Andi Kleen
2006-02-01  9:21             ` [PATCH] [SMP] __GENERIC_PER_CPU changes Eric Dumazet
2006-01-30  8:43       ` [PATCH] tvec_bases too large for per-cpu data Jan Beulich
2006-01-31 22:27         ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=43CE4C98.76F0.0078.0@novell.com \
    --to=jbeulich@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.