public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dipankar Sarma <dipankar@in.ibm.com>
To: tiwai@suse.de
Cc: Andrea Arcangeli <andrea@suse.de>, Robert Love <rml@ximian.com>,
	Andrew Morton <akpm@osdl.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH] RCU for low latency (experimental)
Date: Tue, 23 Mar 2004 15:47:55 +0530	[thread overview]
Message-ID: <20040323101755.GC3676@in.ibm.com> (raw)

Here is the RCU patch for low scheduling latency Andrew was talking
about in the other thread. I had done some measurements with
amlat on a 2.4 GHz P4 xeon box with 256MB memory running dbench
and it reduced worst case scheduling latencies from 800 microseconds
to about 400 microseconds.

It uses per-cpu kernel threads to execute excess callbacks and
pretty much relies on preemption. I added a CONFIG_LOW_LATENCY
option to make this conditional. The amount of callbacks to
invoke in softirq before punting to krcud can be set at boot
time using rcupdate.bhlimit parameter. The whole thing is meant
for experimenting only. The negative side of doing RCU this way
is that we may further delay the grace period with the 
RCU kernel thread  and thus there can be OOM situations.

I would be interested in all issues with this patch including 
latencies and OOM situations.

Dipankar



Reduce bh processing time of rcu callbacks by using tunable per-cpu
krcud daemeons.


 include/linux/rcupdate.h |    4 ++
 include/linux/sched.h    |    1 
 init/Kconfig             |    9 ++++
 kernel/rcupdate.c        |   91 +++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched.c           |    6 +++
 5 files changed, 108 insertions(+), 3 deletions(-)

diff -puN include/linux/rcupdate.h~rcu-low-lat include/linux/rcupdate.h
--- linux-2.6.4-rcu/include/linux/rcupdate.h~rcu-low-lat	2004-03-23 15:20:11.000000000 +0530
+++ linux-2.6.4-rcu-dipankar/include/linux/rcupdate.h	2004-03-23 15:20:11.000000000 +0530
@@ -93,9 +93,11 @@ struct rcu_data {
 	long		qsctr;		 /* User-mode/idle loop etc. */
         long            last_qsctr;	 /* value of qsctr at beginning */
                                          /* of rcu grace period */
+	struct task_struct *krcud;
         long  	       	batch;           /* Batch # for current RCU batch */
         struct list_head  nxtlist;
         struct list_head  curlist;
+        struct list_head  rcudlist;
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
@@ -103,9 +105,11 @@ extern struct rcu_ctrlblk rcu_ctrlblk;
 
 #define RCU_qsctr(cpu) 		(per_cpu(rcu_data, (cpu)).qsctr)
 #define RCU_last_qsctr(cpu) 	(per_cpu(rcu_data, (cpu)).last_qsctr)
+#define RCU_krcud(cpu) 		(per_cpu(rcu_data, (cpu)).krcud)
 #define RCU_batch(cpu) 		(per_cpu(rcu_data, (cpu)).batch)
 #define RCU_nxtlist(cpu) 	(per_cpu(rcu_data, (cpu)).nxtlist)
 #define RCU_curlist(cpu) 	(per_cpu(rcu_data, (cpu)).curlist)
+#define RCU_rcudlist(cpu) 	(per_cpu(rcu_data, (cpu)).rcudlist)
 
 #define RCU_QSCTR_INVALID	0
 
diff -puN include/linux/sched.h~rcu-low-lat include/linux/sched.h
--- linux-2.6.4-rcu/include/linux/sched.h~rcu-low-lat	2004-03-23 15:20:11.000000000 +0530
+++ linux-2.6.4-rcu-dipankar/include/linux/sched.h	2004-03-23 15:20:12.000000000 +0530
@@ -552,6 +552,7 @@ extern int task_prio(task_t *p);
 extern int task_nice(task_t *p);
 extern int task_curr(task_t *p);
 extern int idle_cpu(int cpu);
+extern int rq_has_rt_task(int cpu);
 
 void yield(void);
 
diff -puN init/Kconfig~rcu-low-lat init/Kconfig
--- linux-2.6.4-rcu/init/Kconfig~rcu-low-lat	2004-03-23 15:20:11.000000000 +0530
+++ linux-2.6.4-rcu-dipankar/init/Kconfig	2004-03-23 15:20:12.000000000 +0530
@@ -156,6 +156,14 @@ config HOTPLUG
 	  agent" (/sbin/hotplug) to load modules and set up software needed
 	  to use devices as you hotplug them.
 
+config LOW_LATENCY
+	 bool "Enable kernel features for low scheduling latency" if EXPERIMENTAL
+	 default n
+	---help---
+	   This option enables various features in the kernel that
+	   help reduce scheduling latency while potentially sacrificing
+	   throughput.
+
 config IKCONFIG
 	bool "Kernel .config support"
 	---help---
@@ -181,7 +189,6 @@ config IKCONFIG_PROC
 	  This option enables access to kernel configuration file and build
 	  information through /proc/config.gz.
 
-
 menuconfig EMBEDDED
 	bool "Remove kernel features (for embedded systems)"
 	help
diff -puN kernel/rcupdate.c~rcu-low-lat kernel/rcupdate.c
--- linux-2.6.4-rcu/kernel/rcupdate.c~rcu-low-lat	2004-03-23 15:20:11.000000000 +0530
+++ linux-2.6.4-rcu-dipankar/kernel/rcupdate.c	2004-03-23 15:21:12.000000000 +0530
@@ -39,6 +39,7 @@
 #include <asm/atomic.h>
 #include <asm/bitops.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/completion.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
@@ -54,6 +55,11 @@ DEFINE_PER_CPU(struct rcu_data, rcu_data
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
 #define RCU_tasklet(cpu) (per_cpu(rcu_tasklet, cpu))
+#ifdef CONFIG_LOW_LATENCY
+static int bhlimit = 256;
+#else 
+static int bhlimit = 0;
+#endif
 
 /**
  * call_rcu - Queue an RCU update request.
@@ -79,6 +85,13 @@ void fastcall call_rcu(struct rcu_head *
 	local_irq_restore(flags);
 }
 
+static inline unsigned int rcu_bh_callback_limit(int cpu)
+{
+	if (in_softirq() && RCU_krcud(cpu))
+		return bhlimit;
+	return (unsigned int)-1;
+}
+
 /*
  * Invoke the completed RCU callbacks. They are expected to be in
  * a per-cpu list.
@@ -87,13 +100,22 @@ static void rcu_do_batch(struct list_hea
 {
 	struct list_head *entry;
 	struct rcu_head *head;
+	unsigned int count = 0;
+	int cpu = smp_processor_id();
+	unsigned int limit = rcu_bh_callback_limit(cpu);
 
 	while (!list_empty(list)) {
 		entry = list->next;
 		list_del(entry);
 		head = list_entry(entry, struct rcu_head, list);
 		head->func(head->arg);
+		if (++count > limit && rq_has_rt_task(cpu)) {
+			list_splice(list, &RCU_rcudlist(cpu));
+			wake_up_process(RCU_krcud(cpu));
+			break;
+		}
 	}
+
 }
 
 /*
@@ -198,12 +220,67 @@ void rcu_check_callbacks(int cpu, int us
 	tasklet_schedule(&RCU_tasklet(cpu));
 }
 
+static int krcud(void * __bind_cpu)
+{
+	int cpu = (int) (long) __bind_cpu;
+
+	daemonize("krcud/%d", cpu);
+	set_user_nice(current, -19);
+	current->flags |= PF_IOTHREAD;
+
+	/* Migrate to the right CPU */
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	BUG_ON(smp_processor_id() != cpu);
+
+	__set_current_state(TASK_INTERRUPTIBLE);
+	mb();
+
+	RCU_krcud(cpu) = current;
+
+	for (;;) {
+		LIST_HEAD(list);
+
+		if (list_empty(&RCU_rcudlist(cpu)))
+			schedule();
+
+		__set_current_state(TASK_RUNNING);
+
+		local_bh_disable();
+		while (!list_empty(&RCU_rcudlist(cpu))) {
+			list_splice(&RCU_rcudlist(cpu), &list);
+			INIT_LIST_HEAD(&RCU_rcudlist(cpu));
+			local_bh_enable();
+			rcu_do_batch(&list);
+			cond_resched();
+			local_bh_disable();
+		}
+		local_bh_enable();
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+	}
+}
+
+static int start_krcud(int cpu)
+{
+	if (bhlimit) {
+		if (kernel_thread(krcud, (void *)(long)cpu, CLONE_KERNEL) < 0) {
+			printk("krcud for %i failed\n", cpu);
+			return -1;
+		}
+
+		while (!RCU_krcud(cpu))
+			yield();
+	}
+	return 0;
+}
+
 static void __devinit rcu_online_cpu(int cpu)
 {
 	memset(&per_cpu(rcu_data, cpu), 0, sizeof(struct rcu_data));
 	tasklet_init(&RCU_tasklet(cpu), rcu_process_callbacks, 0UL);
 	INIT_LIST_HEAD(&RCU_nxtlist(cpu));
 	INIT_LIST_HEAD(&RCU_curlist(cpu));
+	INIT_LIST_HEAD(&RCU_rcudlist(cpu));
 }
 
 static int __devinit rcu_cpu_notify(struct notifier_block *self, 
@@ -214,6 +291,10 @@ static int __devinit rcu_cpu_notify(stru
 	case CPU_UP_PREPARE:
 		rcu_online_cpu(cpu);
 		break;
+	case CPU_ONLINE:
+		if (start_krcud(cpu) != 0)
+			return NOTIFY_BAD;
+		break;
 	/* Space reserved for CPU_OFFLINE :) */
 	default:
 		break;
@@ -233,12 +314,17 @@ static struct notifier_block __devinitda
  */
 void __init rcu_init(void)
 {
-	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
+	rcu_online_cpu(smp_processor_id());
 	/* Register notifier for non-boot CPUs */
 	register_cpu_notifier(&rcu_nb);
 }
 
+static int __init rcu_late_init(void)
+{
+	return start_krcud(smp_processor_id());
+}
+
+__initcall(rcu_late_init);
 
 /* Because of FASTCALL declaration of complete, we use this wrapper */
 static void wakeme_after_rcu(void *completion)
@@ -262,6 +348,7 @@ void synchronize_kernel(void)
 	wait_for_completion(&completion);
 }
 
+module_param(bhlimit, int, 0);
 
 EXPORT_SYMBOL(call_rcu);
 EXPORT_SYMBOL(synchronize_kernel);
diff -puN kernel/sched.c~rcu-low-lat kernel/sched.c
--- linux-2.6.4-rcu/kernel/sched.c~rcu-low-lat	2004-03-23 15:20:11.000000000 +0530
+++ linux-2.6.4-rcu-dipankar/kernel/sched.c	2004-03-23 15:20:12.000000000 +0530
@@ -341,6 +341,12 @@ static inline void enqueue_task(struct t
 	p->array = array;
 }
 
+int rq_has_rt_task(int cpu)
+{
+        runqueue_t *rq = cpu_rq(cpu);
+        return (sched_find_first_bit(rq->active->bitmap) < MAX_RT_PRIO);
+}
+
 /*
  * effective_prio - return the priority that is based on the static
  * priority but is modified by bonuses/penalties.

_

             reply	other threads:[~2004-03-23 10:18 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-03-23 10:17 Dipankar Sarma [this message]
2004-03-23 10:25 ` [PATCH] RCU for low latency (experimental) Andrew Morton
2004-03-23 10:41   ` Dipankar Sarma
2004-03-23 10:35 ` Arjan van de Ven
2004-03-23 10:45   ` Dipankar Sarma
2004-03-23 12:31   ` Andrea Arcangeli
2004-03-23 12:40     ` Dipankar Sarma
2004-03-23 12:50       ` Andrea Arcangeli
2004-03-24 17:26         ` Paul E. McKenney
2004-03-24 17:51           ` Andrea Arcangeli
2004-03-24 20:02             ` Paul E. McKenney
2004-03-24 23:36               ` Andrea Arcangeli
2004-03-25  0:43                 ` Paul E. McKenney
2004-03-24 21:39             ` Dipankar Sarma
2004-03-24 22:53               ` Andrea Arcangeli
2004-03-24 23:11                 ` Dipankar Sarma
2004-03-24 23:34                   ` Andrea Arcangeli
2004-03-24 23:46                     ` Dipankar Sarma
2004-03-24 23:51                       ` Andrea Arcangeli
2004-03-28 16:53                       ` Takashi Iwai
2004-03-28 17:20                         ` Dipankar Sarma
2004-03-28 17:28                           ` Takashi Iwai
2004-03-29 10:43                             ` Takashi Iwai
2004-03-29 12:20                               ` Dipankar Sarma
2004-03-23 12:40     ` Arjan van de Ven
2004-03-23 12:29 ` Andrea Arcangeli
2004-03-23 12:34   ` Dipankar Sarma
2004-03-23 12:46     ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040323101755.GC3676@in.ibm.com \
    --to=dipankar@in.ibm.com \
    --cc=akpm@osdl.org \
    --cc=andrea@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rml@ximian.com \
    --cc=tiwai@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox