Re: [PATCH] percpu updates

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@zip.com.au>
To: Brian Gerst <bgerst@didntduck.org>
Cc: Linus Torvalds <torvalds@transmeta.com>,
	Dave Jones <davej@suse.de>,
	Linux-Kernel <linux-kernel@vger.kernel.org>,
	Rusty Russell <rusty@rustcorp.com.au>
Subject: Re: [PATCH] percpu updates
Date: Sat, 04 May 2002 21:08:34 -0700	[thread overview]
Message-ID: <3CD4B042.A4355FD3@zip.com.au> (raw)
In-Reply-To: <3CD06ACE.1090402@didntduck.org>

Brian Gerst wrote:
> 
> These patches convert some of the existing arrays based on NR_CPUS to
> use the new per cpu code.
> 

Brian, I tested this patch (rediffed against 2.5.13, below)
on the quad Xeon and it failed.

The machine died when bringing up the secondary CPUs
("CPU#3 already started!" and "Unable to handle kernel...")

I backed out the sched.c part and the machine booted.  So
I guess the secondary CPU bringup code uses the scheduler
somehow.

And again, the numbers in /proc/meminfo are whacko:

LowFree:         94724 kB
SwapTotal:     4000040 kB
SwapFree:      3999700 kB
Dirty:            7232 kB
Writeback:    4294967264 kB

Which never happens with the open-coded per-cpu accumulators.
After a normal boot I see:

LowFree:         95804 kB
SwapTotal:     4000040 kB
SwapFree:      3999940 kB
Dirty:            1356 kB
Writeback:           0 kB


Now, it may be that some pages are being marked dirty before
the per-cpu areas are set up, but there's no way in which
any pages will have been marked for writeback by that time, so
that "-32" value is definitely wrong.

'fraid I have to do a whine-and-run on this problem, but
it does still appear that there is something fishy with
the percpu infrastructure.


--- 2.5.13/include/linux/page-flags.h~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/include/linux/page-flags.h	Thu May  2 19:23:11 2002
@@ -42,6 +42,8 @@
  * address space...
  */
 
+#include <linux/percpu.h>
+
 /*
  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
  * locked- and dirty-page accounting.  The top eight bits of page->flags are
@@ -69,18 +71,20 @@
 /*
  * Global page accounting.  One instance per CPU.
  */
-extern struct page_state {
+struct page_state {
 	unsigned long nr_dirty;
 	unsigned long nr_writeback;
 	unsigned long nr_pagecache;
-} ____cacheline_aligned_in_smp page_states[NR_CPUS];
+};
+
+extern struct page_state __per_cpu_data page_states;
 
 extern void get_page_state(struct page_state *ret);
 
 #define mod_page_state(member, delta)					\
 	do {								\
 		preempt_disable();					\
-		page_states[smp_processor_id()].member += (delta);	\
++ 		this_cpu(page_states).member += (delta);		\
 		preempt_enable();					\
 	} while (0)
 
--- 2.5.13/kernel/sched.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/kernel/sched.c	Thu May  2 19:21:12 2002
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
+#include <linux/percpu.h>
 
 /*
  * Priority of a process goes from 0 to 139. The 0-99
@@ -154,10 +155,18 @@ struct runqueue {
 	list_t migration_queue;
 } ____cacheline_aligned;
 
-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue __per_cpu_data runqueues;
+
+static inline struct runqueue *cpu_rq(int cpu)
+{
+	return &per_cpu(runqueues, cpu);
+}
+
+static inline struct runqueue *this_rq(void)
+{
+	return &this_cpu(runqueues);
+}
 
-#define cpu_rq(cpu)		(runqueues + (cpu))
-#define this_rq()		cpu_rq(smp_processor_id())
 #define task_rq(p)		cpu_rq((p)->thread_info->cpu)
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define rt_task(p)		((p)->prio < MAX_RT_PRIO)
--- 2.5.13/mm/page_alloc.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/mm/page_alloc.c	Thu May  2 19:21:12 2002
@@ -576,7 +576,7 @@ unsigned long nr_buffermem_pages(void)
  * The result is unavoidably approximate - it can change
  * during and after execution of this function.
  */
-struct page_state page_states[NR_CPUS] __cacheline_aligned;
+struct page_state __per_cpu_data page_states;
 EXPORT_SYMBOL(page_states);
 
 void get_page_state(struct page_state *ret)
@@ -590,7 +590,7 @@ void get_page_state(struct page_state *r
 	for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
 		struct page_state *ps;
 
-		ps = &page_states[cpu_logical_map(pcpu)];
+		ps = &per_cpu(page_states,cpu_logical_map(pcpu));
 		ret->nr_dirty += ps->nr_dirty;
 		ret->nr_writeback += ps->nr_writeback;
 		ret->nr_pagecache += ps->nr_pagecache;
--- 2.5.13/mm/page-writeback.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/mm/page-writeback.c	Thu May  2 19:22:25 2002
@@ -20,6 +20,7 @@
 #include <linux/writeback.h>
 #include <linux/init.h>
 #include <linux/sysrq.h>
+#include <linux/percpu.h>
 
 /*
  * Memory thresholds, in percentages
@@ -103,15 +104,12 @@ void balance_dirty_pages(struct address_
  */
 void balance_dirty_pages_ratelimited(struct address_space *mapping)
 {
-	static struct rate_limit_struct {
-		int count;
-	} ____cacheline_aligned ratelimits[NR_CPUS];
-	int cpu;
+	static int __per_cpu_data ratelimits;
 
 	preempt_disable();
 	cpu = smp_processor_id();
-	if (ratelimits[cpu].count++ >= 1000) {
-		ratelimits[cpu].count = 0;
+	if (this_cpu(ratelimits)++ >= 1000) {
+		this_cpu(ratelimits) = 0;
 		preempt_enable();
 		balance_dirty_pages(mapping);
 		return;
--- 2.5.13/net/socket.c~bgerst-pcpu	Thu May  2 19:21:12 2002
+++ 2.5.13-akpm/net/socket.c	Thu May  2 19:21:12 2002
@@ -74,6 +74,7 @@
 #include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/highmem.h>
+#include <linux/percpu.h>
 
 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 #include <linux/kmod.h>
@@ -181,10 +182,7 @@ static __inline__ void net_family_read_u
  *	Statistics counters of the socket lists
  */
 
-static union {
-	int	counter;
-	char	__pad[SMP_CACHE_BYTES];
-} sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
+static int __per_cpu_data sockets_in_use;
 
 /*
  *	Support routines. Move socket addresses back and forth across the kernel/user
@@ -498,7 +496,7 @@ struct socket *sock_alloc(void)
 	inode->i_uid = current->fsuid;
 	inode->i_gid = current->fsgid;
 
-	sockets_in_use[smp_processor_id()].counter++;
+	this_cpu(sockets_in_use)++;
 	return sock;
 }
 
@@ -530,7 +528,7 @@ void sock_release(struct socket *sock)
 	if (sock->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
-	sockets_in_use[smp_processor_id()].counter--;
+	this_cpu(sockets_in_use)--;
 	if (!sock->file) {
 		iput(SOCK_INODE(sock));
 		return;
@@ -1774,7 +1772,7 @@ int socket_get_info(char *buffer, char *
 	int counter = 0;
 
 	for (cpu=0; cpu<smp_num_cpus; cpu++)
-		counter += sockets_in_use[cpu_logical_map(cpu)].counter;
+		counter += per_cpu(sockets_in_use,cpu_logical_map(cpu));
 
 	/* It can be negative, by the way. 8) */
 	if (counter < 0)

next prev parent reply	other threads:[~2002-05-05  4:06 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-05-01 22:23 [PATCH] percpu updates Brian Gerst
2002-05-01 22:44 ` Andrew Morton
2002-05-01 22:54   ` Brian Gerst
2002-05-01 23:05     ` Randy.Dunlap
2002-05-01 23:35       ` Alan Cox
2002-05-03 14:59         ` Timothy D. Witham
2002-05-05  4:08 ` Andrew Morton [this message]
2002-05-05 16:38   ` Brian Gerst
2002-05-06  8:57     ` Andrew Morton
2002-05-06 12:44       ` Brian Gerst
2002-05-06  7:27   ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3CD4B042.A4355FD3@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=bgerst@didntduck.org \
    --cc=davej@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox