All of lore.kernel.org
 help / color / mirror / Atom feed
From: Manfred Spraul <manfred@colorfullife.com>
To: Jesse Barnes <jbarnes@engr.sgi.com>
Cc: paulmck@us.ibm.com, "Martin J. Bligh" <mbligh@aracnet.com>,
	hawkes@sgi.com, linux-kernel@vger.kernel.org, wli@holomorphy.com
Subject: Re: kernbench on 512p
Date: Fri, 20 Aug 2004 22:19:26 +0200	[thread overview]
Message-ID: <41265CCE.3070808@colorfullife.com> (raw)
In-Reply-To: <200408201324.32464.jbarnes@engr.sgi.com>

[-- Attachment #1: Type: text/plain, Size: 743 bytes --]

Jesse Barnes wrote:

>Looks like a bit more context has changed.  Manfred, care to respin against 
>-mm3 so I can test?
>
>  
>
The patches are attached. Just boot-tested on a single-cpu system.

Three  changes:
- I've placed the per-group structure into rcu_state. That's simpler but 
wrong: the state should be allocated from node-local memory, not a big 
global array.
- I found a bug/race in the cpu_offline path: When the last cpu of a 
group goes offline then the group must be forced into quiescent state. 
The "&& (!forced)" was missing.
- I've removed the spin_unlock_wait(). It was intended to synchronize 
cpu_online_mask changes with the calculation of ->outstanding. Paul 
convinced me that this is not necessary.

--
    Manfred

[-- Attachment #2: patch-rcu268-01-locking --]
[-- Type: text/plain, Size: 1847 bytes --]

--- 2.6/kernel/rcupdate.c	2004-08-20 19:59:22.000000000 +0200
+++ build-2.6/kernel/rcupdate.c	2004-08-20 20:46:35.952639280 +0200
@@ -237,14 +237,29 @@
  * Clear it from the cpu mask and complete the grace period if it was the last
  * cpu. Start another grace period if someone has further entries pending
  */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+			struct rcu_data *rdp, int force)
 {
+	spin_lock(&rsp->lock);
+
+	if (unlikely(rcp->completed == rcp->cur))
+		goto out_unlock;
+	/*
+	 * RCU_quiescbatch/batch.cur and the cpu bitmap can come out of sync
+	 * during cpu startup. Ignore the quiescent state if that happened.
+	 */
+	if (unlikely(rdp->quiescbatch != rcp->cur) && likely(!force))
+		goto out_unlock;
+
 	cpu_clear(cpu, rsp->cpumask);
 	if (cpus_empty(rsp->cpumask)) {
 		/* batch completed ! */
 		rcp->completed = rcp->cur;
 		rcu_start_batch(rcp, rsp, 0);
 	}
+out_unlock:
+	spin_unlock(&rsp->lock);
+
 }
 
 /*
@@ -279,15 +294,7 @@
 		return;
 	rdp->qs_pending = 0;
 
-	spin_lock(&rsp->lock);
-	/*
-	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
-	 * during cpu startup. Ignore the quiescent state.
-	 */
-	if (likely(rdp->quiescbatch == rcp->cur))
-		cpu_quiet(rdp->cpu, rcp, rsp);
-
-	spin_unlock(&rsp->lock);
+	cpu_quiet(rdp->cpu, rcp, rsp, rdp, 0);
 }
 
 
@@ -314,10 +321,10 @@
 	 * we can block indefinitely waiting for it, so flush
 	 * it here
 	 */
-	spin_lock_bh(&rsp->lock);
-	if (rcp->cur != rcp->completed)
-		cpu_quiet(rdp->cpu, rcp, rsp);
-	spin_unlock_bh(&rsp->lock);
+	local_bh_disable();
+	cpu_quiet(rdp->cpu, rcp, rsp, rdp, 1);
+	local_bh_enable();
+
 	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
 	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
 

[-- Attachment #3: patch-rcu268-02-groups --]
[-- Type: text/plain, Size: 6691 bytes --]

--- 2.6/kernel/rcupdate.c	2004-08-20 21:52:45.272210984 +0200
+++ build-2.6/kernel/rcupdate.c	2004-08-20 21:52:24.664343856 +0200
@@ -53,17 +53,59 @@
 struct rcu_ctrlblk rcu_bh_ctrlblk =
 	{ .cur = -300, .completed = -300 , .lock = SEQCNT_ZERO };
 
+/* XXX Dummy - should belong into arch XXX */
+#define RCU_HUGE
+#define RCU_GROUP_SIZE	2
+/* XXX End of dummy XXX */
+
+#ifdef RCU_HUGE
+
+#define RCU_GROUPCOUNT		((NR_CPUS+RCU_GROUP_SIZE-1)/RCU_GROUP_SIZE)
+#define RCU_GROUP_CPUMASKLEN	((RCU_GROUP_SIZE+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define RCU_GROUPMASKLEN	((NR_CPUS+RCU_GROUP_SIZE*BITS_PER_LONG-1)/(RCU_GROUP_SIZE*BITS_PER_LONG))
+
+struct rcu_group_state {
+	spinlock_t	lock; /* Guard this struct */
+	long batchnum;	/* batchnum this group is working on. Mitmatch with
+			 * ctrlblk->cur means reinitialize outstanding to
+			 * all active cpus in this group.
+			 */
+	unsigned long outstanding[RCU_GROUP_CPUMASKLEN];
+} ____cacheline_maxaligned_in_smp;
+
+#endif
+
 /* Bookkeeping of the progress of the grace period */
 struct rcu_state {
 	spinlock_t	lock; /* Guard this struct and writes to rcu_ctrlblk */
+#ifdef RCU_HUGE
+	long batchnum;         /* batchnum the system is working on. Mismatch
+				* with rcu_ctrlblk.cur means reinitialize
+				* outstanding to all groups with active cpus
+				*/
+	unsigned long outstanding[RCU_GROUPMASKLEN];
+	struct rcu_group_state groups[RCU_GROUPCOUNT];
+#else
 	cpumask_t	cpumask; /* CPUs that need to switch in order    */
 	                              /* for current batch to proceed.        */
+#endif
 };
 
-struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
-	  {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
+#ifdef RCU_HUGE
+#define RCU_STATE_INITIALIZER 	\
+		{ \
+			.lock = SPIN_LOCK_UNLOCKED, \
+			.batchnum = -400, \
+			.groups = { [0 ... RCU_GROUPCOUNT-1] = \
+					{ .lock = SPIN_LOCK_UNLOCKED, .batchnum = -400 } } \
+		}
+
+#else
+#define RCU_STATE_INITIALIZER 	{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }
+#endif
+
+struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = RCU_STATE_INITIALIZER;
+struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = RCU_STATE_INITIALIZER;
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -223,8 +265,15 @@
 
 	if (rcp->next_pending &&
 			rcp->completed == rcp->cur) {
+#ifdef RCU_HUGE
+		/* Nothing to do: RCU_HUGE uses lazy initialization of the
+		 * outstanding bitmap
+		 */
+#else
+		/* FIXME: what does this comment mean? */
 		/* Can't change, since spin lock held. */
 		cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
+#endif
 		write_seqcount_begin(&rcp->lock);
 		rcp->next_pending = 0;
 		rcp->cur++;
@@ -237,6 +286,76 @@
  * Clear it from the cpu mask and complete the grace period if it was the last
  * cpu. Start another grace period if someone has further entries pending
  */
+#ifdef RCU_HUGE
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+			struct rcu_data *rdp, int force)
+{
+	struct rcu_group_state *rgs;
+	long batch;
+	int i;
+
+	batch = rcp->cur;
+
+	rgs = &rsp->groups[cpu/RCU_GROUP_SIZE];
+
+	spin_lock(&rgs->lock);
+	if (rgs->batchnum != batch) {
+		int offset;
+		/* first call for this batch - initialize outstanding */
+		rgs->batchnum = batch;
+		memset(rgs->outstanding, 0, sizeof(rgs->outstanding));
+		offset = (cpu/RCU_GROUP_SIZE)*RCU_GROUP_SIZE;
+		for (i=0;i<RCU_GROUP_SIZE;i++) {
+			if (cpu_online(i+offset) && !cpu_isset(i+offset, nohz_cpu_mask))
+				__set_bit(i, rgs->outstanding);
+		}
+	}
+	if (unlikely(rdp->quiescbatch != rgs->batchnum) && likely(!force))
+       		goto out_unlock_group;
+
+	__clear_bit(cpu%RCU_GROUP_SIZE, rgs->outstanding);
+	for (i=0;i<RCU_GROUP_CPUMASKLEN;i++) {
+		if (rgs->outstanding[i])
+			break;
+	}
+	if (i==RCU_GROUP_CPUMASKLEN) {
+		/* group completed, escalate to global level */
+		spin_lock(&rsp->lock);
+
+		if (rsp->batchnum != rcp->cur) {
+			/* first call for this batch - initialize outstanding */
+			rsp->batchnum = rcp->cur;
+			memset(rsp->outstanding, 0, sizeof(rsp->outstanding));
+
+			for (i=0;i<NR_CPUS;i+=RCU_GROUP_SIZE) {
+				int j;
+				for (j=0;j<RCU_GROUP_SIZE;j++) {
+					if (cpu_online(i+j) && !cpu_isset(i+j, nohz_cpu_mask))
+						break;
+				}
+				if (j != RCU_GROUP_SIZE)
+					__set_bit(i/RCU_GROUP_SIZE, rsp->outstanding);
+			}
+		}
+		if (unlikely(rgs->batchnum != rsp->batchnum) && likely(!force))
+       			goto out_unlock_all;
+		__clear_bit(cpu/RCU_GROUP_SIZE, rsp->outstanding);
+		for (i=0;i<RCU_GROUPMASKLEN;i++) {
+			if (rsp->outstanding[i])
+				break;
+		}
+		if (i==RCU_GROUPMASKLEN) {
+			/* all groups completed, batch completed */
+			rcp->completed = rcp->cur;
+			rcu_start_batch(rcp, rsp, 0);
+		}
+out_unlock_all:
+		spin_unlock(&rcu_state.lock);
+	}
+out_unlock_group:
+	spin_unlock(&rgs->lock);
+}
+#else
 static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
 			struct rcu_data *rdp, int force)
 {
@@ -261,6 +380,7 @@
 	spin_unlock(&rsp->lock);
 
 }
+#endif
 
 /*
  * Check if the cpu has gone through a quiescent state (say context
@@ -418,8 +538,25 @@
 	tasklet_schedule(&per_cpu(rcu_tasklet, cpu));
 }
 
+#ifdef RCU_HUGE
+static void rcu_update_group(int cpu, struct rcu_ctrlblk *rcp,
+						struct rcu_state *rsp)
+{
+	int i, offset;
+	offset = (cpu/RCU_GROUP_SIZE)*RCU_GROUP_SIZE;
+	for (i=0;i<RCU_GROUP_SIZE;i++) {
+		if (cpu_online(i+offset) && !cpu_isset(i, nohz_cpu_mask))
+			break;
+	}
+	if (i == RCU_GROUP_SIZE) {
+		/* No cpu online from this group. Initialize batchnum. */
+		rsp->groups[cpu/RCU_GROUP_SIZE].batchnum = rcp->completed;
+	}
+}
+#endif
+
 static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
-						struct rcu_data *rdp)
+		       		struct rcu_state *rsp, struct rcu_data *rdp)
 {
 	memset(rdp, 0, sizeof(*rdp));
 	rdp->curtail = &rdp->curlist;
@@ -428,6 +565,9 @@
 	rdp->quiescbatch = rcp->completed;
 	rdp->qs_pending = 0;
 	rdp->cpu = cpu;
+#ifdef RCU_HUGE
+	rcu_update_group(cpu, rcp, rsp);
+#endif
 }
 
 static void __devinit rcu_online_cpu(int cpu)
@@ -435,8 +575,8 @@
 	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
 
-	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
-	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
+	rcu_init_percpu_data(cpu, &rcu_ctrlblk, &rcu_state, rdp);
+	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, &rcu_bh_state, bh_rdp);
 	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
 }
 

  parent reply	other threads:[~2004-08-20 20:21 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-08-19 16:16 kernbench on 512p Jesse Barnes
2004-08-19 16:22 ` William Lee Irwin III
2004-08-19 16:29 ` David Mosberger
2004-08-19 16:37   ` Jesse Barnes
2004-08-19 16:40     ` David Mosberger
2004-08-19 17:55       ` Alan Cox
2004-08-20  7:53         ` David Mosberger
2004-08-19 18:25     ` Martin J. Bligh
2004-08-19 18:42       ` Jesse Barnes
2004-08-19 21:11       ` Jesse Barnes
2004-08-19 21:24         ` Jesse Barnes
2004-08-19 21:56           ` Martin J. Bligh
2004-08-20 19:36             ` Maneesh Soni
2004-08-19 23:38           ` Paul E. McKenney
2004-08-20  0:16             ` Jesse Barnes
2004-08-20 15:57               ` Paul E. McKenney
2004-08-20 17:24                 ` Jesse Barnes
2004-08-20 19:22                   ` Manfred Spraul
2004-08-20 20:19                   ` Manfred Spraul [this message]
2004-08-23 21:23                     ` Jesse Barnes
2004-08-23 23:13                       ` Jesse Barnes
2004-09-10 19:01                     ` Greg Edwards
2004-09-13 18:21                       ` Manfred Spraul
2004-09-14 17:52                         ` Greg Edwards
2004-09-14 18:16                           ` Manfred Spraul
2004-09-14 18:43                             ` Greg Edwards
2004-09-14 19:16                               ` Manfred Spraul
2004-08-19 21:50         ` Ray Bryant
2004-08-19 22:00           ` Jesse Barnes
2004-08-19 23:03 ` William Lee Irwin III
2004-08-20 17:35   ` William Lee Irwin III
2004-08-20  0:56 ` remove dentry_open::file_ra_init_state() duplicated memset was " Marcelo Tosatti
2004-08-20  6:21   ` Andrew Morton
2004-08-20  7:28     ` Marcelo Tosatti
2004-08-20  8:34       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41265CCE.3070808@colorfullife.com \
    --to=manfred@colorfullife.com \
    --cc=hawkes@sgi.com \
    --cc=jbarnes@engr.sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mbligh@aracnet.com \
    --cc=paulmck@us.ibm.com \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.