public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Manfred Spraul <manfred@colorfullife.com>
To: Jesse Barnes <jbarnes@engr.sgi.com>
Cc: paulmck@us.ibm.com, "Martin J. Bligh" <mbligh@aracnet.com>,
	hawkes@sgi.com, linux-kernel@vger.kernel.org, wli@holomorphy.com
Subject: Re: kernbench on 512p
Date: Fri, 20 Aug 2004 22:19:26 +0200	[thread overview]
Message-ID: <41265CCE.3070808@colorfullife.com> (raw)
In-Reply-To: <200408201324.32464.jbarnes@engr.sgi.com>

[-- Attachment #1: Type: text/plain, Size: 743 bytes --]

Jesse Barnes wrote:

>Looks like a bit more context has changed.  Manfred, care to respin against 
>-mm3 so I can test?
>
>  
>
The patches are attached. Just boot-tested on a single-cpu system.

Three  changes:
- I've placed the per-group structure into rcu_state. That's simpler but 
wrong: the state should be allocated from node-local memory, not a big 
global array.
- I found a bug/race in the cpu_offline path: When the last cpu of a 
group goes offline then the group must be forced into quiescent state. 
The "&& (!forced)" was missing.
- I've removed the spin_unlock_wait(). It was intended to synchronize 
cpu_online_mask changes with the calculation of ->outstanding. Paul 
convinced me that this is not necessary.

--
    Manfred

[-- Attachment #2: patch-rcu268-01-locking --]
[-- Type: text/plain, Size: 1847 bytes --]

--- 2.6/kernel/rcupdate.c	2004-08-20 19:59:22.000000000 +0200
+++ build-2.6/kernel/rcupdate.c	2004-08-20 20:46:35.952639280 +0200
@@ -237,14 +237,29 @@
  * Clear it from the cpu mask and complete the grace period if it was the last
  * cpu. Start another grace period if someone has further entries pending
  */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+			struct rcu_data *rdp, int force)
 {
+	spin_lock(&rsp->lock);
+
+	if (unlikely(rcp->completed == rcp->cur))
+		goto out_unlock;
+	/*
+	 * RCU_quiescbatch/batch.cur and the cpu bitmap can come out of sync
+	 * during cpu startup. Ignore the quiescent state if that happened.
+	 */
+	if (unlikely(rdp->quiescbatch != rcp->cur) && likely(!force))
+		goto out_unlock;
+
 	cpu_clear(cpu, rsp->cpumask);
 	if (cpus_empty(rsp->cpumask)) {
 		/* batch completed ! */
 		rcp->completed = rcp->cur;
 		rcu_start_batch(rcp, rsp, 0);
 	}
+out_unlock:
+	spin_unlock(&rsp->lock);
+
 }
 
 /*
@@ -279,15 +294,7 @@
 		return;
 	rdp->qs_pending = 0;
 
-	spin_lock(&rsp->lock);
-	/*
-	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
-	 * during cpu startup. Ignore the quiescent state.
-	 */
-	if (likely(rdp->quiescbatch == rcp->cur))
-		cpu_quiet(rdp->cpu, rcp, rsp);
-
-	spin_unlock(&rsp->lock);
+	cpu_quiet(rdp->cpu, rcp, rsp, rdp, 0);
 }
 
 
@@ -314,10 +321,10 @@
 	 * we can block indefinitely waiting for it, so flush
 	 * it here
 	 */
-	spin_lock_bh(&rsp->lock);
-	if (rcp->cur != rcp->completed)
-		cpu_quiet(rdp->cpu, rcp, rsp);
-	spin_unlock_bh(&rsp->lock);
+	local_bh_disable();
+	cpu_quiet(rdp->cpu, rcp, rsp, rdp, 1);
+	local_bh_enable();
+
 	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
 	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
 

[-- Attachment #3: patch-rcu268-02-groups --]
[-- Type: text/plain, Size: 6691 bytes --]

--- 2.6/kernel/rcupdate.c	2004-08-20 21:52:45.272210984 +0200
+++ build-2.6/kernel/rcupdate.c	2004-08-20 21:52:24.664343856 +0200
@@ -53,17 +53,59 @@
 struct rcu_ctrlblk rcu_bh_ctrlblk =
 	{ .cur = -300, .completed = -300 , .lock = SEQCNT_ZERO };
 
+/* XXX Dummy - should belong into arch XXX */
+#define RCU_HUGE
+#define RCU_GROUP_SIZE	2
+/* XXX End of dummy XXX */
+
+#ifdef RCU_HUGE
+
+#define RCU_GROUPCOUNT		((NR_CPUS+RCU_GROUP_SIZE-1)/RCU_GROUP_SIZE)
+#define RCU_GROUP_CPUMASKLEN	((RCU_GROUP_SIZE+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define RCU_GROUPMASKLEN	((NR_CPUS+RCU_GROUP_SIZE*BITS_PER_LONG-1)/(RCU_GROUP_SIZE*BITS_PER_LONG))
+
+struct rcu_group_state {
+	spinlock_t	lock; /* Guard this struct */
+	long batchnum;	/* batchnum this group is working on. Mitmatch with
+			 * ctrlblk->cur means reinitialize outstanding to
+			 * all active cpus in this group.
+			 */
+	unsigned long outstanding[RCU_GROUP_CPUMASKLEN];
+} ____cacheline_maxaligned_in_smp;
+
+#endif
+
 /* Bookkeeping of the progress of the grace period */
 struct rcu_state {
 	spinlock_t	lock; /* Guard this struct and writes to rcu_ctrlblk */
+#ifdef RCU_HUGE
+	long batchnum;         /* batchnum the system is working on. Mismatch
+				* with rcu_ctrlblk.cur means reinitialize
+				* outstanding to all groups with active cpus
+				*/
+	unsigned long outstanding[RCU_GROUPMASKLEN];
+	struct rcu_group_state groups[RCU_GROUPCOUNT];
+#else
 	cpumask_t	cpumask; /* CPUs that need to switch in order    */
 	                              /* for current batch to proceed.        */
+#endif
 };
 
-struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
+#ifdef RCU_HUGE
+#define RCU_STATE_INITIALIZER 	\
+		{ \
+			.lock = SPIN_LOCK_UNLOCKED, \
+			.batchnum = -400, \
+			.groups = { [0 ... RCU_GROUPCOUNT-1] = \
+					{ .lock = SPIN_LOCK_UNLOCKED, .batchnum = -400 } } \
+		}
+
+#else
+#define RCU_STATE_INITIALIZER 	{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }
+#endif
+
+struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = RCU_STATE_INITIALIZER;
+struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = RCU_STATE_INITIALIZER;
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -223,8 +265,15 @@
 
 	if (rcp->next_pending &&
 			rcp->completed == rcp->cur) {
+#ifdef RCU_HUGE
+		/* Nothing to do: RCU_HUGE uses lazy initialization of the
+		 * outstanding bitmap
+		 */
+#else
+		/* FIXME: what does this comment mean? */
 		/* Can't change, since spin lock held. */
 		cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
+#endif
 		write_seqcount_begin(&rcp->lock);
 		rcp->next_pending = 0;
 		rcp->cur++;
@@ -237,6 +286,76 @@
  * Clear it from the cpu mask and complete the grace period if it was the last
  * cpu. Start another grace period if someone has further entries pending
  */
+#ifdef RCU_HUGE
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+			struct rcu_data *rdp, int force)
+{
+	struct rcu_group_state *rgs;
+	long batch;
+	int i;
+
+	batch = rcp->cur;
+
+	rgs = &rsp->groups[cpu/RCU_GROUP_SIZE];
+
+	spin_lock(&rgs->lock);
+	if (rgs->batchnum != batch) {
+		int offset;
+		/* first call for this batch - initialize outstanding */
+		rgs->batchnum = batch;
+		memset(rgs->outstanding, 0, sizeof(rgs->outstanding));
+		offset = (cpu/RCU_GROUP_SIZE)*RCU_GROUP_SIZE;
+		for (i=0;i<RCU_GROUP_SIZE;i++) {
+			if (cpu_online(i+offset) && !cpu_isset(i+offset, nohz_cpu_mask))
+				__set_bit(i, rgs->outstanding);
+		}
+	}
+	if (unlikely(rdp->quiescbatch != rgs->batchnum) && likely(!force))
+       		goto out_unlock_group;
+
+	__clear_bit(cpu%RCU_GROUP_SIZE, rgs->outstanding);
+	for (i=0;i<RCU_GROUP_CPUMASKLEN;i++) {
+		if (rgs->outstanding[i])
+			break;
+	}
+	if (i==RCU_GROUP_CPUMASKLEN) {
+		/* group completed, escalate to global level */
+		spin_lock(&rsp->lock);
+
+		if (rsp->batchnum != rcp->cur) {
+			/* first call for this batch - initialize outstanding */
+			rsp->batchnum = rcp->cur;
+			memset(rsp->outstanding, 0, sizeof(rsp->outstanding));
+
+			for (i=0;i<NR_CPUS;i+=RCU_GROUP_SIZE) {
+				int j;
+				for (j=0;j<RCU_GROUP_SIZE;j++) {
+					if (cpu_online(i+j) && !cpu_isset(i+j, nohz_cpu_mask))
+						break;
+				}
+				if (j != RCU_GROUP_SIZE)
+					__set_bit(i/RCU_GROUP_SIZE, rsp->outstanding);
+			}
+		}
+		if (unlikely(rgs->batchnum != rsp->batchnum) && likely(!force))
+       			goto out_unlock_all;
+		__clear_bit(cpu/RCU_GROUP_SIZE, rsp->outstanding);
+		for (i=0;i<RCU_GROUPMASKLEN;i++) {
+			if (rsp->outstanding[i])
+				break;
+		}
+		if (i==RCU_GROUPMASKLEN) {
+			/* all groups completed, batch completed */
+			rcp->completed = rcp->cur;
+			rcu_start_batch(rcp, rsp, 0);
+		}
+out_unlock_all:
+		spin_unlock(&rcu_state.lock);
+	}
+out_unlock_group:
+	spin_unlock(&rgs->lock);
+}
+#else
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
 			struct rcu_data *rdp, int force)
 {
@@ -261,6 +380,7 @@
 	spin_unlock(&rsp->lock);
 
 }
+#endif
 
 /*
  * Check if the cpu has gone through a quiescent state (say context
@@ -418,8 +538,25 @@
 	tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
 }
 
+#ifdef RCU_HUGE
+static void rcu_update_group(int cpu, struct rcu_ctrlblk *rcp,
+						struct rcu_state *rsp)
+{
+	int i, offset;
+	offset = (cpu/RCU_GROUP_SIZE)*RCU_GROUP_SIZE;
+	for (i=0;i<RCU_GROUP_SIZE;i++) {
+		if (cpu_online(i+offset) && !cpu_isset(i, nohz_cpu_mask))
+			break;
+	}
+	if (i == RCU_GROUP_SIZE) {
+		/* No cpu online from this group. Initialize batchnum. */
+		rsp->groups[cpu/RCU_GROUP_SIZE].batchnum = rcp->completed;
+	}
+}
+#endif
+
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
-						struct rcu_data *rdp)
+		       		struct rcu_state *rsp, struct rcu_data *rdp)
 {
 	memset(rdp, 0, sizeof(*rdp));
 	rdp->curtail = &rdp->curlist;
@@ -428,6 +565,9 @@
 	rdp->quiescbatch = rcp->completed;
 	rdp->qs_pending = 0;
 	rdp->cpu = cpu;
+#ifdef RCU_HUGE
+	rcu_update_group(cpu, rcp, rsp);
+#endif
 }
 
 static void __devinit rcu_online_cpu(int cpu)
@@ -435,8 +575,8 @@
 	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
 
-	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
-	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
+	rcu_init_percpu_data(cpu, &rcu_ctrlblk, &rcu_state, rdp);
+	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, &rcu_bh_state, bh_rdp);
 	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
 }
 

  parent reply	other threads:[~2004-08-20 20:21 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-08-19 16:16 kernbench on 512p Jesse Barnes
2004-08-19 16:22 ` William Lee Irwin III
2004-08-19 16:29 ` David Mosberger
2004-08-19 16:37   ` Jesse Barnes
2004-08-19 16:40     ` David Mosberger
2004-08-19 17:55       ` Alan Cox
2004-08-20  7:53         ` David Mosberger
2004-08-19 18:25     ` Martin J. Bligh
2004-08-19 18:42       ` Jesse Barnes
2004-08-19 21:11       ` Jesse Barnes
2004-08-19 21:24         ` Jesse Barnes
2004-08-19 21:56           ` Martin J. Bligh
2004-08-20 19:36             ` Maneesh Soni
2004-08-19 23:38           ` Paul E. McKenney
2004-08-20  0:16             ` Jesse Barnes
2004-08-20 15:57               ` Paul E. McKenney
2004-08-20 17:24                 ` Jesse Barnes
2004-08-20 19:22                   ` Manfred Spraul
2004-08-20 20:19                   ` Manfred Spraul [this message]
2004-08-23 21:23                     ` Jesse Barnes
2004-08-23 23:13                       ` Jesse Barnes
2004-09-10 19:01                     ` Greg Edwards
2004-09-13 18:21                       ` Manfred Spraul
2004-09-14 17:52                         ` Greg Edwards
2004-09-14 18:16                           ` Manfred Spraul
2004-09-14 18:43                             ` Greg Edwards
2004-09-14 19:16                               ` Manfred Spraul
2004-08-19 21:50         ` Ray Bryant
2004-08-19 22:00           ` Jesse Barnes
2004-08-19 23:03 ` William Lee Irwin III
2004-08-20 17:35   ` William Lee Irwin III
2004-08-20  0:56 ` remove dentry_open::file_ra_init_state() duplicated memset was " Marcelo Tosatti
2004-08-20  6:21   ` Andrew Morton
2004-08-20  7:28     ` Marcelo Tosatti
2004-08-20  8:34       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41265CCE.3070808@colorfullife.com \
    --to=manfred@colorfullife.com \
    --cc=hawkes@sgi.com \
    --cc=jbarnes@engr.sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mbligh@aracnet.com \
    --cc=paulmck@us.ibm.com \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox