public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] percpu updates
@ 2002-05-01 22:23 Brian Gerst
  2002-05-01 22:44 ` Andrew Morton
  2002-05-05  4:08 ` Andrew Morton
  0 siblings, 2 replies; 11+ messages in thread
From: Brian Gerst @ 2002-05-01 22:23 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Dave Jones, Linux-Kernel

[-- Attachment #1: Type: text/plain, Size: 121 bytes --]

These patches convert some of the existing arrays based on NR_CPUS to 
use the new per cpu code.

-- 

						Brian Gerst

[-- Attachment #2: percpu-page_states --]
[-- Type: text/plain, Size: 1857 bytes --]

diff -urN linux-2.5.12/include/linux/page-flags.h linux/include/linux/page-flags.h
--- linux-2.5.12/include/linux/page-flags.h	Wed May  1 08:40:14 2002
+++ linux/include/linux/page-flags.h	Wed May  1 11:51:43 2002
@@ -42,6 +42,8 @@
  * address space...
  */
 
+#include <linux/percpu.h>
+
 /*
  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
  * locked- and dirty-page accounting.  The top eight bits of page->flags are
@@ -69,18 +71,20 @@
 /*
  * Global page accounting.  One instance per CPU.
  */
-extern struct page_state {
+struct page_state {
 	unsigned long nr_dirty;
 	unsigned long nr_locked;
 	unsigned long nr_pagecache;
-} ____cacheline_aligned_in_smp page_states[NR_CPUS];
+};
+
+extern struct page_state __per_cpu_data page_states;
 
 extern void get_page_state(struct page_state *ret);
 
 #define mod_page_state(member, delta)					\
 	do {								\
 		preempt_disable();					\
-		page_states[smp_processor_id()].member += (delta);	\
+		this_cpu(page_states).member += (delta);		\
 		preempt_enable();					\
 	} while (0)
 
diff -urN linux-2.5.12/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.5.12/mm/page_alloc.c	Wed May  1 08:40:14 2002
+++ linux/mm/page_alloc.c	Wed May  1 11:51:05 2002
@@ -576,7 +576,7 @@
  * The result is unavoidably approximate - it can change
  * during and after execution of this function.
  */
-struct page_state page_states[NR_CPUS] __cacheline_aligned;
+struct page_state __per_cpu_data page_states;
 EXPORT_SYMBOL(page_states);
 
 void get_page_state(struct page_state *ret)
@@ -590,7 +590,7 @@
 	for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
 		struct page_state *ps;
 
-		ps = &page_states[cpu_logical_map(pcpu)];
+		ps = &per_cpu(page_states,cpu_logical_map(pcpu));
 		ret->nr_dirty += ps->nr_dirty;
 		ret->nr_locked += ps->nr_locked;
 		ret->nr_pagecache += ps->nr_pagecache;

[-- Attachment #3: percpu-ratelimits --]
[-- Type: text/plain, Size: 855 bytes --]

diff -urN linux-2.5.12/mm/page-writeback.c linux/mm/page-writeback.c
--- linux-2.5.12/mm/page-writeback.c	Wed May  1 08:40:14 2002
+++ linux/mm/page-writeback.c	Wed May  1 10:56:24 2002
@@ -20,6 +20,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/sysrq.h>
+#include <linux/percpu.h>
 
 /*
  * Memory thresholds, in percentages
@@ -102,15 +103,11 @@
  */
 void balance_dirty_pages_ratelimited(struct address_space *mapping)
 {
-	static struct rate_limit_struct {
-		int count;
-	} ____cacheline_aligned ratelimits[NR_CPUS];
-	int cpu;
+	static int __per_cpu_data ratelimits;
 
 	preempt_disable();
-	cpu = smp_processor_id();
-	if (ratelimits[cpu].count++ >= 32) {
-		ratelimits[cpu].count = 0;
+	if (this_cpu(ratelimits)++ >= 32) {
+		this_cpu(ratelimits) = 0;
 		preempt_enable();
 		balance_dirty_pages(mapping);
 		return;

[-- Attachment #4: percpu-runqueue --]
[-- Type: text/plain, Size: 964 bytes --]

diff -urN linux-2.5.12/kernel/sched.c linux/kernel/sched.c
--- linux-2.5.12/kernel/sched.c	Wed May  1 08:40:14 2002
+++ linux/kernel/sched.c	Wed May  1 11:53:07 2002
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
+#include <linux/percpu.h>
 
 /*
  * Priority of a process goes from 0 to 139. The 0-99
@@ -154,10 +155,18 @@
 	list_t migration_queue;
 } ____cacheline_aligned;
 
-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue __per_cpu_data runqueues;
+
+static inline struct runqueue *cpu_rq(int cpu)
+{
+	return &per_cpu(runqueues, cpu);
+}
+
+static inline struct runqueue *this_rq(void)
+{
+	return &this_cpu(runqueues);
+}
 
-#define cpu_rq(cpu)		(runqueues + (cpu))
-#define this_rq()		cpu_rq(smp_processor_id())
 #define task_rq(p)		cpu_rq((p)->thread_info->cpu)
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define rt_task(p)		((p)->prio < MAX_RT_PRIO)

[-- Attachment #5: percpu-sockets --]
[-- Type: text/plain, Size: 1393 bytes --]

diff -urN linux-2.5.12-percpu/net/socket.c linux/net/socket.c
--- linux-2.5.12-percpu/net/socket.c	Wed Apr 10 19:59:40 2002
+++ linux/net/socket.c	Wed May  1 11:59:25 2002
@@ -74,6 +74,7 @@
 #include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/percpu.h>
 
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 #include <linux/kmod.h>
@@ -181,10 +182,7 @@
  *	Statistics counters of the socket lists
  */
 
-static union {
-	int	counter;
-	char	__pad[SMP_CACHE_BYTES];
-} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
+static int __per_cpu_data sockets_in_use;
 
 /*
  *	Support routines. Move socket addresses back and forth across the kernel/user
@@ -498,7 +496,7 @@
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
-	sockets_in_use[smp_processor_id()].counter++;
+	this_cpu(sockets_in_use)++;
 	return sock;
 }
 
@@ -530,7 +528,7 @@
 	if (sock->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
-	sockets_in_use[smp_processor_id()].counter--;
+	this_cpu(sockets_in_use)--;
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
@@ -1774,7 +1772,7 @@
 	int counter = 0;
 
 	for (cpu=0; cpu<smp_num_cpus; cpu++)
-		counter += sockets_in_use[cpu_logical_map(cpu)].counter;
+		counter += per_cpu(sockets_in_use,cpu_logical_map(cpu));
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 22:23 [PATCH] percpu updates Brian Gerst
@ 2002-05-01 22:44 ` Andrew Morton
  2002-05-01 22:54   ` Brian Gerst
  2002-05-05  4:08 ` Andrew Morton
  1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2002-05-01 22:44 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel

Brian Gerst wrote:
> 
> These patches convert some of the existing arrays based on NR_CPUS to
> use the new per cpu code.
> 
> ...
> -extern struct page_state {
> +struct page_state {
>         unsigned long nr_dirty;
>         unsigned long nr_locked;
>         unsigned long nr_pagecache;
> -} ____cacheline_aligned_in_smp page_states[NR_CPUS];
> +};
> +
> +extern struct page_state __per_cpu_data page_states;

When I did this a couple of weeks back it failed in
mysterious ways and I ended up parking it.  Failure
symptoms included negative numbers being reported in
/proc/meminfo for "Locked" and "Dirty".

How well has this been tested?  (If the answer
is "not very" then please wait until I've tested
it out...)

-

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 22:44 ` Andrew Morton
@ 2002-05-01 22:54   ` Brian Gerst
  2002-05-01 23:05     ` Randy.Dunlap
  0 siblings, 1 reply; 11+ messages in thread
From: Brian Gerst @ 2002-05-01 22:54 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel

Andrew Morton wrote:
> Brian Gerst wrote:
> 
>>These patches convert some of the existing arrays based on NR_CPUS to
>>use the new per cpu code.
>>
>>...
>>-extern struct page_state {
>>+struct page_state {
>>        unsigned long nr_dirty;
>>        unsigned long nr_locked;
>>        unsigned long nr_pagecache;
>>-} ____cacheline_aligned_in_smp page_states[NR_CPUS];
>>+};
>>+
>>+extern struct page_state __per_cpu_data page_states;
> 
> 
> When I did this a couple of weeks back it failed in
> mysterious ways and I ended up parking it.  Failure
> symptoms included negative numbers being reported in
> /proc/meminfo for "Locked" and "Dirty".
> 
> How well has this been tested?  (If the answer
> is "not very" then please wait until I've tested
> it out...)
> 
> -
> 

Well, the answer is not very.  I don't have an SMP machine to do 
thorough testing on.  The best I can do is boot an SMP kernel on a UP 
machine.  I did check the disassembly of vmlinux, and it looked like it 
would work as advertised.

-- 

						Brian Gerst


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 22:54   ` Brian Gerst
@ 2002-05-01 23:05     ` Randy.Dunlap
  2002-05-01 23:35       ` Alan Cox
  0 siblings, 1 reply; 11+ messages in thread
From: Randy.Dunlap @ 2002-05-01 23:05 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Andrew Morton, Dave Jones, Linux-Kernel

On Wed, 1 May 2002, Brian Gerst wrote:

| Andrew Morton wrote:
| > Brian Gerst wrote:
| >
| >>These patches convert some of the existing arrays based on NR_CPUS to
| >>use the new per cpu code.
| >>
| > When I did this a couple of weeks back it failed in
| > mysterious ways and I ended up parking it.  Failure
| > symptoms included negative numbers being reported in
| > /proc/meminfo for "Locked" and "Dirty".
| >
| > How well has this been tested?  (If the answer
| > is "not very" then please wait until I've tested
| > it out...)
|
| Well, the answer is not very.  I don't have an SMP machine to do
| thorough testing on.  The best I can do is boot an SMP kernel on a UP
| machine.  I did check the disassembly of vmlinux, and it looked like it
| would work as advertised.

uh, do you know where you could find/use some SMP machines,
gratis ?  maybe OSDL ?  hint hint.

(of course, you could just let akpm do it on his smp system,
as he suggested)

-- 
~Randy


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 23:05     ` Randy.Dunlap
@ 2002-05-01 23:35       ` Alan Cox
  2002-05-03 14:59         ` Timothy D. Witham
  0 siblings, 1 reply; 11+ messages in thread
From: Alan Cox @ 2002-05-01 23:35 UTC (permalink / raw)
  To: Randy.Dunlap; +Cc: Brian Gerst, Andrew Morton, Dave Jones, Linux-Kernel

> | machine.  I did check the disassembly of vmlinux, and it looked like it
> | would work as advertised.
> 
> uh, do you know where you could find/use some SMP machines,
> gratis ?  maybe OSDL ?  hint hint.

Dual pentium boxes are < $100 on ebay 8)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 23:35       ` Alan Cox
@ 2002-05-03 14:59         ` Timothy D. Witham
  0 siblings, 0 replies; 11+ messages in thread
From: Timothy D. Witham @ 2002-05-03 14:59 UTC (permalink / raw)
  To: Alan Cox; +Cc: Randy Dunlap, Brian Gerst, Andrew Morton, Dave Jones,
	Linux-Kernel

  But the machines at the OSDL cost him $0 and he can go up to 16 way
for his testing. 

Tim

On Wed, 2002-05-01 at 16:35, Alan Cox wrote:
> > | machine.  I did check the disassembly of vmlinux, and it looked like it
> > | would work as advertised.
> > 
> > uh, do you know where you could find/use some SMP machines,
> > gratis ?  maybe OSDL ?  hint hint.
> 
> Dual pentium boxes are < $100 on ebay 8)
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-- 
Timothy D. Witham - Lab Director - wookie@osdlab.org
Open Source Development Lab Inc - A non-profit corporation
15275 SW Koll Parkway - Suite H - Beaverton OR, 97006
(503)-626-2455 x11 (office)    (503)-702-2871     (cell)
(503)-626-2436     (fax)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-01 22:23 [PATCH] percpu updates Brian Gerst
  2002-05-01 22:44 ` Andrew Morton
@ 2002-05-05  4:08 ` Andrew Morton
  2002-05-05 16:38   ` Brian Gerst
  2002-05-06  7:27   ` Rusty Russell
  1 sibling, 2 replies; 11+ messages in thread
From: Andrew Morton @ 2002-05-05  4:08 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel, Rusty Russell

Brian Gerst wrote:
> 
> These patches convert some of the existing arrays based on NR_CPUS to
> use the new per cpu code.
> 

Brian, I tested this patch (rediffed against 2.5.13, below)
on the quad Xeon and it failed.

The machine died when bringing up the secondary CPUs
("CPU#3 already started!" and "Unable to handle kernel...")

I backed out the sched.c part and the machine booted.  So
I guess the secondary CPU bringup code uses the scheduler
somehow.

And again, the numbers in /proc/meminfo are whacko:

LowFree:         94724 kB
SwapTotal:     4000040 kB
SwapFree:      3999700 kB
Dirty:            7232 kB
Writeback:    4294967264 kB

Which never happens with the open-coded per-cpu accumulators.
After a normal boot I see:

LowFree:         95804 kB
SwapTotal:     4000040 kB
SwapFree:      3999940 kB
Dirty:            1356 kB
Writeback:           0 kB


Now, it may be that some pages are being marked dirty before
the per-cpu areas are set up, but there's no way in which
any pages will have been marked for writeback by that time, so
that "-32" value is definitely wrong.

'fraid I have to do a whine-and-run on this problem, but
it does still appear that there is something fishy with
the percpu infrastructure.


--- 2.5.13/include/linux/page-flags.h~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/include/linux/page-flags.h	Thu May  2 19:23:11 2002
@@ -42,6 +42,8 @@
  * address space...
  */
 
+#include <linux/percpu.h>
+
 /*
  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
  * locked- and dirty-page accounting.  The top eight bits of page->flags are
@@ -69,18 +71,20 @@
 /*
  * Global page accounting.  One instance per CPU.
  */
-extern struct page_state {
+struct page_state {
 	unsigned long nr_dirty;
 	unsigned long nr_writeback;
 	unsigned long nr_pagecache;
-} ____cacheline_aligned_in_smp page_states[NR_CPUS];
+};
+
+extern struct page_state __per_cpu_data page_states;
 
 extern void get_page_state(struct page_state *ret);
 
 #define mod_page_state(member, delta)					\
 	do {								\
 		preempt_disable();					\
-		page_states[smp_processor_id()].member += (delta);	\
++ 		this_cpu(page_states).member += (delta);		\
 		preempt_enable();					\
 	} while (0)
 
--- 2.5.13/kernel/sched.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/kernel/sched.c	Thu May  2 19:21:12 2002
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
+#include <linux/percpu.h>
 
 /*
  * Priority of a process goes from 0 to 139. The 0-99
@@ -154,10 +155,18 @@ struct runqueue {
 	list_t migration_queue;
 } ____cacheline_aligned;
 
-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue __per_cpu_data runqueues;
+
+static inline struct runqueue *cpu_rq(int cpu)
+{
+	return &per_cpu(runqueues, cpu);
+}
+
+static inline struct runqueue *this_rq(void)
+{
+	return &this_cpu(runqueues);
+}
 
-#define cpu_rq(cpu)		(runqueues + (cpu))
-#define this_rq()		cpu_rq(smp_processor_id())
 #define task_rq(p)		cpu_rq((p)->thread_info->cpu)
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define rt_task(p)		((p)->prio < MAX_RT_PRIO)
--- 2.5.13/mm/page_alloc.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/mm/page_alloc.c	Thu May  2 19:21:12 2002
@@ -576,7 +576,7 @@ unsigned long nr_buffermem_pages(void)
  * The result is unavoidably approximate - it can change
  * during and after execution of this function.
  */
-struct page_state page_states[NR_CPUS] __cacheline_aligned;
+struct page_state __per_cpu_data page_states;
 EXPORT_SYMBOL(page_states);
 
 void get_page_state(struct page_state *ret)
@@ -590,7 +590,7 @@ void get_page_state(struct page_state *r
 	for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
 		struct page_state *ps;
 
-		ps = &page_states[cpu_logical_map(pcpu)];
+		ps = &per_cpu(page_states,cpu_logical_map(pcpu));
 		ret->nr_dirty += ps->nr_dirty;
 		ret->nr_writeback += ps->nr_writeback;
 		ret->nr_pagecache += ps->nr_pagecache;
--- 2.5.13/mm/page-writeback.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/mm/page-writeback.c	Thu May  2 19:22:25 2002
@@ -20,6 +20,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/sysrq.h>
+#include <linux/percpu.h>
 
 /*
  * Memory thresholds, in percentages
@@ -103,15 +104,12 @@ void balance_dirty_pages(struct address_
  */
 void balance_dirty_pages_ratelimited(struct address_space *mapping)
 {
-	static struct rate_limit_struct {
-		int count;
-	} ____cacheline_aligned ratelimits[NR_CPUS];
-	int cpu;
+	static int __per_cpu_data ratelimits;
 
 	preempt_disable();
 	cpu = smp_processor_id();
-	if (ratelimits[cpu].count++ >= 1000) {
-		ratelimits[cpu].count = 0;
+	if (this_cpu(ratelimits)++ >= 1000) {
+		this_cpu(ratelimits) = 0;
 		preempt_enable();
 		balance_dirty_pages(mapping);
 		return;
--- 2.5.13/net/socket.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/net/socket.c	Thu May  2 19:21:12 2002
@@ -74,6 +74,7 @@
 #include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/percpu.h>
 
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 #include <linux/kmod.h>
@@ -181,10 +182,7 @@ static __inline__ void net_family_read_u
  *	Statistics counters of the socket lists
  */
 
-static union {
-	int	counter;
-	char	__pad[SMP_CACHE_BYTES];
-} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
+static int __per_cpu_data sockets_in_use;
 
 /*
  *	Support routines. Move socket addresses back and forth across the kernel/user
@@ -498,7 +496,7 @@ struct socket *sock_alloc(void)
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
-	sockets_in_use[smp_processor_id()].counter++;
+	this_cpu(sockets_in_use)++;
 	return sock;
 }
 
@@ -530,7 +528,7 @@ void sock_release(struct socket *sock)
 	if (sock->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
-	sockets_in_use[smp_processor_id()].counter--;
+	this_cpu(sockets_in_use)--;
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
@@ -1774,7 +1772,7 @@ int socket_get_info(char *buffer, char *
 	int counter = 0;
 
 	for (cpu=0; cpu<smp_num_cpus; cpu++)
-		counter += sockets_in_use[cpu_logical_map(cpu)].counter;
+		counter += per_cpu(sockets_in_use,cpu_logical_map(cpu));
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-05  4:08 ` Andrew Morton
@ 2002-05-05 16:38   ` Brian Gerst
  2002-05-06  8:57     ` Andrew Morton
  2002-05-06  7:27   ` Rusty Russell
  1 sibling, 1 reply; 11+ messages in thread
From: Brian Gerst @ 2002-05-05 16:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel, Rusty Russell

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

Andrew Morton wrote:
> Brian Gerst wrote:
> 
>>These patches convert some of the existing arrays based on NR_CPUS to
>>use the new per cpu code.
>>
> 
> 
> Brian, I tested this patch (rediffed against 2.5.13, below)
> on the quad Xeon and it failed.
> 
> The machine died when bringing up the secondary CPUs
> ("CPU#3 already started!" and "Unable to handle kernel...")
> 
> I backed out the sched.c part and the machine booted.  So
> I guess the secondary CPU bringup code uses the scheduler
> somehow.
> 
> And again, the numbers in /proc/meminfo are whacko:
> 
> LowFree:         94724 kB
> SwapTotal:     4000040 kB
> SwapFree:      3999700 kB
> Dirty:            7232 kB
> Writeback:    4294967264 kB
> 
> Which never happens with the open-coded per-cpu accumulators.
> After a normal boot I see:
> 
> LowFree:         95804 kB
> SwapTotal:     4000040 kB
> SwapFree:      3999940 kB
> Dirty:            1356 kB
> Writeback:           0 kB
> 
> 
> Now, it may be that some pages are being marked dirty before
> the per-cpu areas are set up, but there's no way in which
> any pages will have been marked for writeback by that time, so
> that "-32" value is definitely wrong.
> 
> 'fraid I have to do a whine-and-run on this problem, but
> it does still appear that there is something fishy with
> the percpu infrastructure.
>

Andrew, could you try this patch?  I suspect something in setup_arch() 
is touching the per cpu area before it gets copied for the other cpus. 
This patch makes certain the boot cpu area is setup ASAP.
-- 

						Brian Gerst

[-- Attachment #2: percpu-boot --]
[-- Type: text/plain, Size: 2164 bytes --]

diff -urN linux-2.5.13/arch/i386/vmlinux.lds linux/arch/i386/vmlinux.lds
--- linux-2.5.13/arch/i386/vmlinux.lds	Thu Mar  7 21:18:16 2002
+++ linux/arch/i386/vmlinux.lds	Sun May  5 11:46:26 2002
@@ -57,10 +57,13 @@
 	*(.initcall7.init)
   }
   __initcall_end = .;
+
   . = ALIGN(32);
   __per_cpu_start = .;
   .data.percpu  : { *(.data.percpu) }
+  . = ALIGN(32);
   __per_cpu_end = .;
+
   . = ALIGN(4096);
   __init_end = .;
 
@@ -70,6 +73,10 @@
   . = ALIGN(32);
   .data.cacheline_aligned : { *(.data.cacheline_aligned) }
 
+  . = ALIGN(32);
+  __cpu0_data = .;
+  .data.cpu0 : { . += SIZEOF(.data.percpu); }
+
   __bss_start = .;		/* BSS */
   .bss : {
 	*(.bss)
diff -urN linux-2.5.13/init/main.c linux/init/main.c
--- linux-2.5.13/init/main.c	Wed May  1 08:40:14 2002
+++ linux/init/main.c	Sun May  5 12:27:38 2002
@@ -272,28 +272,40 @@
 #define smp_init()	do { } while (0)
 #endif
 
+static inline void setup_boot_cpu_area(void) { }
 static inline void setup_per_cpu_areas(void) { }
 
 #else
 
 #ifdef __GENERIC_PER_CPU
+/* Created by linker magic */
+extern char __per_cpu_start[], __per_cpu_end[], __cpu0_data[];
 unsigned long __per_cpu_offset[NR_CPUS];
 
+static void __init setup_boot_cpu_area(void)
+{
+	unsigned long size;
+
+	size = __per_cpu_end - __per_cpu_start;
+	if (!size)
+		return;
+	__per_cpu_offset[0] = __cpu0_data - __per_cpu_start;
+	memcpy(__cpu0_data, __per_cpu_start, size);
+}
+
 static void __init setup_per_cpu_areas(void)
 {
 	unsigned long size, i;
 	char *ptr;
-	/* Created by linker magic */
-	extern char __per_cpu_start[], __per_cpu_end[];
 
 	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+	size = __per_cpu_end - __per_cpu_start;
 	if (!size)
 		return;
 
 	ptr = alloc_bootmem(size * NR_CPUS);
 
-	for (i = 0; i < NR_CPUS; i++, ptr += size) {
+	for (i = 1; i < NR_CPUS; i++, ptr += size) {
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, size);
 	}
@@ -340,6 +352,7 @@
  * enable them
  */
 	lock_kernel();
+	setup_boot_cpu_area();
 	printk(linux_banner);
 	setup_arch(&command_line);
 	setup_per_cpu_areas();

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-05  4:08 ` Andrew Morton
  2002-05-05 16:38   ` Brian Gerst
@ 2002-05-06  7:27   ` Rusty Russell
  1 sibling, 0 replies; 11+ messages in thread
From: Rusty Russell @ 2002-05-06  7:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: bgerst, torvalds, davej, linux-kernel

On Sat, 04 May 2002 21:08:34 -0700
Andrew Morton <akpm@zip.com.au> wrote:

> And again, the numbers in /proc/meminfo are whacko:
> 
> LowFree:         94724 kB
> SwapTotal:     4000040 kB
> SwapFree:      3999700 kB
> Dirty:            7232 kB
> Writeback:    4294967264 kB

Hmmm.... I've just applied the page-flags.h and page_alloc.c changes,
and I don't get this problem at all on my 2xi386 box on 2.5.13.  I 
even changed the name of "page_states" to "xpage_states" to find any
other references, and inserted a BUG() if it was being dereferenced
before per-cpu offsets were initialized.

Here's the diff: do you see problems when booting with this?
Rusty.
-- 
   there are those who do and those who hang on and you don't see too
   many doers quoting their contemporaries.  -- Larry McVoy

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.13/include/asm-generic/percpu.h working-2.5.13-page-per-cpu/include/asm-generic/percpu.h
--- linux-2.5.13/include/asm-generic/percpu.h	Mon Apr 15 11:47:44 2002
+++ working-2.5.13-page-per-cpu/include/asm-generic/percpu.h	Mon May  6 17:00:55 2002
@@ -5,6 +5,7 @@
 #include <linux/compiler.h>
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
+extern int per_cpu_areas_done;
 
 /* var is in discarded region: offset to particular copy we want */
 #define per_cpu(var, cpu) (*RELOC_HIDE(&var, __per_cpu_offset[cpu]))
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.13/include/linux/page-flags.h working-2.5.13-page-per-cpu/include/linux/page-flags.h
--- linux-2.5.13/include/linux/page-flags.h	Mon May  6 11:12:01 2002
+++ working-2.5.13-page-per-cpu/include/linux/page-flags.h	Mon May  6 17:01:43 2002
@@ -42,6 +42,8 @@
  * address space...
  */
 
+#include <linux/percpu.h>
+
 /*
  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
  * locked- and dirty-page accounting.  The top eight bits of page->flags are
@@ -69,18 +71,21 @@
 /*
  * Global page accounting.  One instance per CPU.
  */
-extern struct page_state {
+struct page_state {
 	unsigned long nr_dirty;
 	unsigned long nr_writeback;
 	unsigned long nr_pagecache;
-} ____cacheline_aligned_in_smp page_states[NR_CPUS];
+};
+
+extern struct page_state __per_cpu_data xpage_states;
 
 extern void get_page_state(struct page_state *ret);
 
 #define mod_page_state(member, delta)					\
 	do {								\
 		preempt_disable();					\
-		page_states[smp_processor_id()].member += (delta);	\
+		if (!per_cpu_areas_done) BUG();				\
+		this_cpu(xpage_states).member += (delta);		\
 		preempt_enable();					\
 	} while (0)
 
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.13/init/main.c working-2.5.13-page-per-cpu/init/main.c
--- linux-2.5.13/init/main.c	Wed May  1 15:09:29 2002
+++ working-2.5.13-page-per-cpu/init/main.c	Mon May  6 16:55:22 2002
@@ -278,6 +278,7 @@
 
 #ifdef __GENERIC_PER_CPU
 unsigned long __per_cpu_offset[NR_CPUS];
+int per_cpu_areas_done;
 
 static void __init setup_per_cpu_areas(void)
 {
@@ -297,6 +298,7 @@
 		__per_cpu_offset[i] = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, size);
 	}
+	per_cpu_areas_done = 1;
 }
 #endif /* !__GENERIC_PER_CPU */
 
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.13/mm/page_alloc.c working-2.5.13-page-per-cpu/mm/page_alloc.c
--- linux-2.5.13/mm/page_alloc.c	Mon May  6 11:12:01 2002
+++ working-2.5.13-page-per-cpu/mm/page_alloc.c	Mon May  6 17:02:20 2002
@@ -576,8 +576,8 @@
  * The result is unavoidably approximate - it can change
  * during and after execution of this function.
  */
-struct page_state page_states[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(page_states);
+struct page_state __per_cpu_data xpage_states;
+EXPORT_SYMBOL(xpage_states);
 
 void get_page_state(struct page_state *ret)
 {
@@ -590,7 +590,7 @@
 	for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
 		struct page_state *ps;
 
-		ps = &page_states[cpu_logical_map(pcpu)];
+		ps = &per_cpu(xpage_states,cpu_logical_map(pcpu));
 		ret->nr_dirty += ps->nr_dirty;
 		ret->nr_writeback += ps->nr_writeback;
 		ret->nr_pagecache += ps->nr_pagecache;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-05 16:38   ` Brian Gerst
@ 2002-05-06  8:57     ` Andrew Morton
  2002-05-06 12:44       ` Brian Gerst
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2002-05-06  8:57 UTC (permalink / raw)
  To: Brian Gerst; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel, Rusty Russell

Brian Gerst wrote:
> 
> Andrew Morton wrote:
> > Brian Gerst wrote:
> >
> >>These patches convert some of the existing arrays based on NR_CPUS to
> >>use the new per cpu code.
> >>
> ...
> Andrew, could you try this patch?  I suspect something in setup_arch()
> is touching the per cpu area before it gets copied for the other cpus.
> This patch makes certain the boot cpu area is setup ASAP.

This little recidivist is still using gcc-2.91.66.  It is not
placing the percpu data in the correct section.  It is not 
entirely obvious why.

I downgraded to 2.95.3 (build time went from 2:45 to 3:15, giving
nothing in return) and Brian's patch worked OK.

ho hum.  So.  2.91.66, rest in peace.  I shall miss you.


--- linux-2.5.14/init/main.c	Tue Apr 30 17:56:30 2002
+++ 25/init/main.c	Mon May  6 01:55:32 2002
@@ -51,7 +51,7 @@
  * To avoid associated bogus bug reports, we flatly refuse to compile
  * with a gcc that is known to be too old from the very beginning.
  */
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
 #error Sorry, your GCC is too old. It builds incorrect kernels.
 #endif
 

-

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] percpu updates
  2002-05-06  8:57     ` Andrew Morton
@ 2002-05-06 12:44       ` Brian Gerst
  0 siblings, 0 replies; 11+ messages in thread
From: Brian Gerst @ 2002-05-06 12:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linus Torvalds, Dave Jones, Linux-Kernel, Rusty Russell

Andrew Morton wrote:
> Brian Gerst wrote:
> 
>>Andrew Morton wrote:
>>
>>>Brian Gerst wrote:
>>>
>>>
>>>>These patches convert some of the existing arrays based on NR_CPUS to
>>>>use the new per cpu code.
>>>>
>>>
>>...
>>Andrew, could you try this patch?  I suspect something in setup_arch()
>>is touching the per cpu area before it gets copied for the other cpus.
>>This patch makes certain the boot cpu area is setup ASAP.
> 
> 
> This little recidivist is still using gcc-2.91.66.  It is not
> placing the percpu data in the correct section.  It is not 
> entirely obvious why.
> 
> I downgraded to 2.95.3 (build time went from 2:45 to 3:15, giving
> nothing in return) and Brian's patch worked OK.
> 
> ho hum.  So.  2.91.66, rest in peace.  I shall miss you.

Aha.  I was starting to wonder about the compiler.

-- 

						Brian Gerst


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2002-05-06 12:47 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-05-01 22:23 [PATCH] percpu updates Brian Gerst
2002-05-01 22:44 ` Andrew Morton
2002-05-01 22:54   ` Brian Gerst
2002-05-01 23:05     ` Randy.Dunlap
2002-05-01 23:35       ` Alan Cox
2002-05-03 14:59         ` Timothy D. Witham
2002-05-05  4:08 ` Andrew Morton
2002-05-05 16:38   ` Brian Gerst
2002-05-06  8:57     ` Andrew Morton
2002-05-06 12:44       ` Brian Gerst
2002-05-06  7:27   ` Rusty Russell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox