* [PATCH RFC tip/core/rcu 0/3] v1 RCU priority boosting
@ 2010-09-29 0:33 Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 1/3] rcu: add priority-inversion testing to rcutorture Paul E. McKenney
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Paul E. McKenney @ 2010-09-29 0:33 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, Valdis.Kletnieks, dhowells, eric.dumazet
Hello!
This patch set is the first version of RCU priority boosting, consisting
of the following:
1. Add RCU priority inversion testing to rcutorture.
2. Move TINY_RCU grace-period processing from softirq to kthread
to allow boosting.
3. Implement RCU priority boosting for TINY_RCU.
This patch has the following shortcomings, and thus is not yet ready for
inclusion:
o It does not yet include some restructuring of TREE_RCU that
is required to make boosting practical.
o It does not yet include boosting for TREE_RCU.
o Documentation has not yet been updated. In the meantime, it is
important to note that the "rcu_kthread" process must have its
priority set to at least SCHED_FIFO:2, given that rcutorture
uses SCHED_FIFO:1 to do the testing. This priority can be set
using the new RCU_BOOST_PRIO kernel configuration parameter
or by using the "chrt" command, for example, "chrt -p 2 6".
o A great deal more testing is required.
o There are probably quite a few bugs remaining, even in the parts
done thus far.
Thanx, Paul
b/include/linux/init_task.h | 9 +
b/include/linux/rcupdate.h | 1
b/include/linux/rcutiny.h | 8 -
b/include/linux/rcutree.h | 1
b/include/linux/sched.h | 11 +
b/init/Kconfig | 39 ++++++
b/kernel/rcutiny.c | 71 +++++++++--
b/kernel/rcutiny_plugin.h | 15 +-
b/kernel/rcutorture.c | 270 ++++++++++++++++++++++++++++++++++++++++++--
kernel/rcutiny.c | 66 ++++------
kernel/rcutiny_plugin.h | 206 ++++++++++++++++++++++++++++++++-
11 files changed, 608 insertions(+), 89 deletions(-)
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH RFC tip/core/rcu 1/3] rcu: add priority-inversion testing to rcutorture
2010-09-29 0:33 [PATCH RFC tip/core/rcu 0/3] v1 RCU priority boosting Paul E. McKenney
@ 2010-09-29 0:33 ` Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 2/3] rcu: move TINY_RCU from softirq to kthread Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 3/3] rcu: priority boosting for TINY_PREEMPT_RCU Paul E. McKenney
2 siblings, 0 replies; 4+ messages in thread
From: Paul E. McKenney @ 2010-09-29 0:33 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, Valdis.Kletnieks, dhowells, eric.dumazet,
Paul E. McKenney
Add an optional test to force long-term preemption of RCU read-side
critical sections, controlled by new test_boost, test_boost_interval,
and test_boost_duration module parameters. This is to be used to
test RCU priority boosting.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
kernel/rcutorture.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 259 insertions(+), 11 deletions(-)
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9d8e8fb..9ea6050 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -47,6 +47,7 @@
#include <linux/srcu.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
+#include <linux/sched.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
@@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */
static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
static int fqs_holdoff = 0; /* Hold time within burst (us). */
static int fqs_stutter = 3; /* Wait time between bursts (s). */
+static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
+static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
+static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444);
MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
module_param(fqs_stutter, int, 0444);
MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
+module_param(test_boost, int, 0444);
+MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
+module_param(test_boost_interval, int, 0444);
+MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds.");
+module_param(test_boost_duration, int, 0444);
+MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds.");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -109,6 +119,7 @@ static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
static struct task_struct *fqs_task;
+static struct task_struct *boost_tasks[NR_CPUS];
#define RCU_TORTURE_PIPE_LEN 10
@@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail;
static atomic_t n_rcu_torture_free;
static atomic_t n_rcu_torture_mberror;
static atomic_t n_rcu_torture_error;
+static long n_rcu_torture_boost_ktrerror;
+static long n_rcu_torture_boost_rterror;
+static long n_rcu_torture_boost_allocerror;
+static long n_rcu_torture_boost_afferror;
+static long n_rcu_torture_boost_failure;
+static long n_rcu_torture_boosts;
static long n_rcu_torture_timers;
static struct list_head rcu_torture_removed;
static cpumask_var_t shuffle_tmp_mask;
@@ -147,6 +164,16 @@ static int stutter_pause_test;
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
+#ifdef CONFIG_BOOST_RCU
+#define rcu_can_boost() 1
+#else /* #ifdef CONFIG_BOOST_RCU */
+#define rcu_can_boost() 0
+#endif /* #else #ifdef CONFIG_BOOST_RCU */
+
+static unsigned long boost_starttime; /* jiffies of next boost test start. */
+DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
+ /* and boost task create/destroy. */
+
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
@@ -277,6 +304,7 @@ struct rcu_torture_ops {
void (*fqs)(void);
int (*stats)(char *page);
int irq_capable;
+ int can_boost;
char *name;
};
@@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
+ .can_boost = rcu_can_boost(),
.name = "rcu"
};
@@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
+ .can_boost = rcu_can_boost(),
.name = "rcu_sync"
};
@@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
+ .can_boost = rcu_can_boost(),
.name = "rcu_expedited"
};
@@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = {
};
/*
+ * RCU torture priority-boost testing. Runs one real-time thread per
+ * CPU for moderate bursts, repeatedly registering RCU callbacks and
+ * spinning waiting for them to be invoked. If a given callback takes
+ * too long to be invoked, we assume that priority inversion has occurred.
+ */
+
+struct rcu_boost_inflight {
+ struct rcu_head rcu;
+ int inflight;
+};
+
+static void rcu_torture_boost_cb(struct rcu_head *head)
+{
+ struct rcu_boost_inflight *rbip =
+ container_of(head, struct rcu_boost_inflight, rcu);
+
+ smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */
+ rbip->inflight = 0;
+}
+
+static int rcu_torture_boost(void *arg)
+{
+ unsigned long call_rcu_time;
+ unsigned long endtime;
+ unsigned long oldstarttime;
+ struct rcu_boost_inflight rbi = { .inflight = 0 };
+ struct sched_param sp;
+
+ VERBOSE_PRINTK_STRING("rcu_torture_boost started");
+
+ /* Set real-time priority. */
+ sp.sched_priority = 1;
+ if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) {
+ VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!");
+ n_rcu_torture_boost_rterror++;
+ }
+
+ /* Each pass through the following loop does one boost-test cycle. */
+ do {
+ /* Wait for the next test interval. */
+ oldstarttime = boost_starttime;
+ while (jiffies - oldstarttime > ULONG_MAX / 2) {
+ schedule_timeout_uninterruptible(1);
+ rcu_stutter_wait("rcu_torture_boost");
+ if (kthread_should_stop() ||
+ fullstop != FULLSTOP_DONTSTOP)
+ goto checkwait;
+ }
+
+ /* Do one boost-test interval. */
+ endtime = oldstarttime + test_boost_duration * HZ;
+ call_rcu_time = jiffies;
+ while (jiffies - endtime > ULONG_MAX / 2) {
+ /* If we don't have a callback in flight, post one. */
+ if (!rbi.inflight) {
+ smp_mb(); /* RCU core before ->inflight = 1. */
+ rbi.inflight = 1;
+ call_rcu(&rbi.rcu, rcu_torture_boost_cb);
+ if (jiffies - call_rcu_time >
+ test_boost_duration * HZ - HZ / 2) {
+ VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed");
+ n_rcu_torture_boost_failure++;
+ }
+ call_rcu_time = jiffies;
+ }
+ cond_resched();
+ rcu_stutter_wait("rcu_torture_boost");
+ if (kthread_should_stop() ||
+ fullstop != FULLSTOP_DONTSTOP)
+ goto checkwait;
+ }
+
+ /*
+ * Set the start time of the next test interval.
+ * Yes, this is vulnerable to long delays, but such
+ * delays simply cause a false negative for the next
+ * interval. Besides, we are running at RT priority,
+ * so delays should be relatively rare.
+ */
+ while (oldstarttime == boost_starttime) {
+ if (mutex_trylock(&boost_mutex)) {
+ boost_starttime = jiffies +
+ test_boost_interval * HZ;
+ n_rcu_torture_boosts++;
+ mutex_unlock(&boost_mutex);
+ break;
+ }
+ schedule_timeout_uninterruptible(1);
+ }
+
+ /* Go do the stutter. */
+checkwait: rcu_stutter_wait("rcu_torture_boost");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+
+ /* Clean up and exit. */
+ VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
+ rcutorture_shutdown_absorb("rcu_torture_boost");
+ while (!kthread_should_stop() || rbi.inflight)
+ schedule_timeout_uninterruptible(1);
+ smp_mb(); /* order accesses to ->inflight before stack-frame death. */
+ return 0;
+}
+
+/*
* RCU torture force-quiescent-state kthread. Repeatedly induces
* bursts of calls to force_quiescent_state(), increasing the probability
* of occurrence of some important types of race conditions.
@@ -933,7 +1068,8 @@ rcu_torture_printk(char *page)
cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
cnt += sprintf(&page[cnt],
"rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
- "rtmbe: %d nt: %ld",
+ "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld "
+ "rtbf: %ld rtb: %ld nt: %ld",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
@@ -941,8 +1077,19 @@ rcu_torture_printk(char *page)
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free),
atomic_read(&n_rcu_torture_mberror),
+ n_rcu_torture_boost_ktrerror,
+ n_rcu_torture_boost_rterror,
+ n_rcu_torture_boost_allocerror,
+ n_rcu_torture_boost_afferror,
+ n_rcu_torture_boost_failure,
+ n_rcu_torture_boosts,
n_rcu_torture_timers);
- if (atomic_read(&n_rcu_torture_mberror) != 0)
+ if (atomic_read(&n_rcu_torture_mberror) != 0 ||
+ n_rcu_torture_boost_ktrerror != 0 ||
+ n_rcu_torture_boost_rterror != 0 ||
+ n_rcu_torture_boost_allocerror != 0 ||
+ n_rcu_torture_boost_afferror != 0 ||
+ n_rcu_torture_boost_failure != 0)
cnt += sprintf(&page[cnt], " !!!");
cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
if (i > 1) {
@@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg)
}
static inline void
-rcu_torture_print_module_parms(char *tag)
+rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
{
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
"shuffle_interval=%d stutter=%d irqreader=%d "
- "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
+ "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
+ "test_boost=%d/%d test_boost_interval=%d "
+ "test_boost_duration=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
- stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
+ stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
+ test_boost, cur_ops->can_boost,
+ test_boost_interval, test_boost_duration);
}
-static struct notifier_block rcutorture_nb = {
+static struct notifier_block rcutorture_shutdown_nb = {
.notifier_call = rcutorture_shutdown_notify,
};
+static void rcutorture_booster_cleanup(int cpu)
+{
+ struct task_struct *t;
+
+ if (boost_tasks[cpu] == NULL)
+ return;
+ mutex_lock(&boost_mutex);
+ VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task");
+ t = boost_tasks[cpu];
+ boost_tasks[cpu] = NULL;
+ mutex_unlock(&boost_mutex);
+
+ /* This must be outside of the mutex, otherwise deadlock! */
+ kthread_stop(t);
+}
+
+static int rcutorture_booster_init(int cpu)
+{
+ int retval;
+
+ if (boost_tasks[cpu] != NULL)
+ return 0; /* Already created, nothing more to do. */
+
+ /* Don't allow time recalculation while creating a new task. */
+ mutex_lock(&boost_mutex);
+ VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
+ boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL,
+ "rcu_torture_boost");
+ if (IS_ERR(boost_tasks[cpu])) {
+ retval = PTR_ERR(boost_tasks[cpu]);
+ VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
+ n_rcu_torture_boost_ktrerror++;
+ boost_tasks[cpu] = NULL;
+ mutex_unlock(&boost_mutex);
+ return retval;
+ }
+ kthread_bind(boost_tasks[cpu], cpu);
+ wake_up_process(boost_tasks[cpu]);
+ mutex_unlock(&boost_mutex);
+ return 0;
+}
+
+static int rcutorture_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+ (void)rcutorture_booster_init(cpu);
+ break;
+ case CPU_DOWN_PREPARE:
+ rcutorture_booster_cleanup(cpu);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block rcutorture_cpu_nb = {
+ .notifier_call = rcutorture_cpu_notify,
+};
+
static void
rcu_torture_cleanup(void)
{
@@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void)
}
fullstop = FULLSTOP_RMMOD;
mutex_unlock(&fullstop_mutex);
- unregister_reboot_notifier(&rcutorture_nb);
+ unregister_reboot_notifier(&rcutorture_shutdown_nb);
if (stutter_task) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
kthread_stop(stutter_task);
@@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void)
kthread_stop(fqs_task);
}
fqs_task = NULL;
+ if ((test_boost == 1 && cur_ops->can_boost) ||
+ test_boost == 2) {
+ unregister_cpu_notifier(&rcutorture_cpu_nb);
+ for_each_possible_cpu(i)
+ rcutorture_booster_cleanup(i);
+ }
/* Wait for all RCU callbacks to fire. */
@@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void)
if (cur_ops->cleanup)
cur_ops->cleanup();
if (atomic_read(&n_rcu_torture_error))
- rcu_torture_print_module_parms("End of test: FAILURE");
+ rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else
- rcu_torture_print_module_parms("End of test: SUCCESS");
+ rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
}
static int __init
@@ -1242,7 +1464,7 @@ rcu_torture_init(void)
nrealreaders = nreaders;
else
nrealreaders = 2 * num_online_cpus();
- rcu_torture_print_module_parms("Start of test");
+ rcu_torture_print_module_parms(cur_ops, "Start of test");
fullstop = FULLSTOP_DONTSTOP;
/* Set up the freelist. */
@@ -1263,6 +1485,12 @@ rcu_torture_init(void)
atomic_set(&n_rcu_torture_free, 0);
atomic_set(&n_rcu_torture_mberror, 0);
atomic_set(&n_rcu_torture_error, 0);
+ n_rcu_torture_boost_ktrerror = 0;
+ n_rcu_torture_boost_rterror = 0;
+ n_rcu_torture_boost_allocerror = 0;
+ n_rcu_torture_boost_afferror = 0;
+ n_rcu_torture_boost_failure = 0;
+ n_rcu_torture_boosts = 0;
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
atomic_set(&rcu_torture_wcount[i], 0);
for_each_possible_cpu(cpu) {
@@ -1376,7 +1604,27 @@ rcu_torture_init(void)
goto unwind;
}
}
- register_reboot_notifier(&rcutorture_nb);
+ if (test_boost_interval < 1)
+ test_boost_interval = 1;
+ if (test_boost_duration < 2)
+ test_boost_duration = 2;
+ if ((test_boost == 1 && cur_ops->can_boost) ||
+ test_boost == 2) {
+ int retval;
+
+ boost_starttime = jiffies + test_boost_interval * HZ;
+ register_cpu_notifier(&rcutorture_cpu_nb);
+ for_each_possible_cpu(i) {
+ if (cpu_is_offline(i))
+ continue; /* Heuristic: CPU can go offline. */
+ retval = rcutorture_booster_init(i);
+ if (retval < 0) {
+ firsterr = retval;
+ goto unwind;
+ }
+ }
+ }
+ register_reboot_notifier(&rcutorture_shutdown_nb);
mutex_unlock(&fullstop_mutex);
return 0;
--
1.7.0.6
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH RFC tip/core/rcu 2/3] rcu: move TINY_RCU from softirq to kthread
2010-09-29 0:33 [PATCH RFC tip/core/rcu 0/3] v1 RCU priority boosting Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 1/3] rcu: add priority-inversion testing to rcutorture Paul E. McKenney
@ 2010-09-29 0:33 ` Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 3/3] rcu: priority boosting for TINY_PREEMPT_RCU Paul E. McKenney
2 siblings, 0 replies; 4+ messages in thread
From: Paul E. McKenney @ 2010-09-29 0:33 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, Valdis.Kletnieks, dhowells, eric.dumazet,
Paul E. McKenney
If RCU priority boosting is to be meaningful, callback invocation must
be boosted in addition to preempted RCU readers. Otherwise, in presence
of CPU real-time threads, the grace period ends, but the callbacks don't
get invoked. If the callbacks don't get invoked, the associated memory
doesn't get freed, so the system is still subject to OOM.
But it is not reasonable to priority-boost RCU_SOFTIRQ, so this commit
moves the callback invocations to a kthread, which can be boosted easily.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
include/linux/rcupdate.h | 1 -
include/linux/rcutiny.h | 8 ++---
include/linux/rcutree.h | 1 +
kernel/rcutiny.c | 71 +++++++++++++++++++++++++++++++++++++--------
kernel/rcutiny_plugin.h | 15 +++++----
5 files changed, 70 insertions(+), 26 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 03cda7b..7142ee3 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -118,7 +118,6 @@ static inline int rcu_preempt_depth(void)
#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
/* Internal to kernel */
-extern void rcu_init(void);
extern void rcu_sched_qs(int cpu);
extern void rcu_bh_qs(int cpu);
extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 13877cb..ea025a6 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,7 +27,9 @@
#include <linux/cache.h>
-#define rcu_init_sched() do { } while (0)
+static inline void rcu_init(void)
+{
+}
#ifdef CONFIG_TINY_RCU
@@ -125,16 +127,12 @@ static inline void rcu_cpu_stall_reset(void)
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
extern int rcu_scheduler_active __read_mostly;
extern void rcu_scheduler_starting(void);
-
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
static inline void rcu_scheduler_starting(void)
{
}
-
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 95518e6..c0e9683 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,7 @@
#ifndef __LINUX_RCUTREE_H
#define __LINUX_RCUTREE_H
+extern void rcu_init(void);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
extern void rcu_cpu_stall_reset(void);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index d806735..86eef29 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,8 +59,15 @@ int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_cbs_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq);
+static unsigned long have_rcu_cbs;
+static void invoke_rcu_cbs(void);
+
/* Forward declarations for rcutiny_plugin.h. */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static int rcu_cbs(void *arg);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
@@ -123,7 +130,7 @@ void rcu_sched_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
- raise_softirq(RCU_SOFTIRQ);
+ invoke_rcu_cbs();
}
/*
@@ -132,7 +139,7 @@ void rcu_sched_qs(int cpu)
void rcu_bh_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
- raise_softirq(RCU_SOFTIRQ);
+ invoke_rcu_cbs();
}
/*
@@ -152,10 +159,10 @@ void rcu_check_callbacks(int cpu, int user)
}
/*
- * Helper function for rcu_process_callbacks() that operates on the
- * specified rcu_ctrlkblk structure.
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
*/
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
{
struct rcu_head *next, *list;
unsigned long flags;
@@ -180,19 +187,52 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
+ local_bh_disable();
list->func(list);
+ local_bh_enable();
list = next;
}
}
/*
- * Invoke any callbacks whose grace period has completed.
+ * This kthread invokes RCU callbacks whose grace periods have
+ * elapsed. It is awakened as needed, and takes the place of the
+ * RCU_SOFTIRQ that was used previously for this purpose.
+ * This is a kthread, but it is never stopped, at least not until
+ * the system goes down.
+ */
+static int rcu_cbs(void *arg)
+{
+ unsigned long work;
+ unsigned long flags;
+
+ for (;;) {
+ wait_event(rcu_cbs_wq, have_rcu_cbs != 0);
+ local_irq_save(flags);
+ work = have_rcu_cbs;
+ have_rcu_cbs = 0;
+ local_irq_restore(flags);
+ if (work) {
+ rcu_process_callbacks(&rcu_sched_ctrlblk);
+ rcu_process_callbacks(&rcu_bh_ctrlblk);
+ rcu_preempt_process_callbacks();
+ }
+ }
+
+ return 0; /* Not reached, but needed to shut gcc up. */
+}
+
+/*
+ * Wake up rcu_cbs() to process callbacks now eligible for invocation.
*/
-static void rcu_process_callbacks(struct softirq_action *unused)
+static void invoke_rcu_cbs(void)
{
- __rcu_process_callbacks(&rcu_sched_ctrlblk);
- __rcu_process_callbacks(&rcu_bh_ctrlblk);
- rcu_preempt_process_callbacks();
+ unsigned long flags;
+
+ local_irq_save(flags);
+ have_rcu_cbs = 1;
+ wake_up(&rcu_cbs_wq);
+ local_irq_restore(flags);
}
/*
@@ -282,7 +322,12 @@ void rcu_barrier_sched(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-void __init rcu_init(void)
+/*
+ * Spawn the kthread that invokes RCU callbacks.
+ */
+static int __init rcu_spawn_kthreads(void)
{
- open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+ rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs");
+ return 0;
}
+early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 6ceca4f..95f9239 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -22,6 +22,8 @@
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
+#include <linux/kthread.h>
+
#ifdef CONFIG_TINY_PREEMPT_RCU
#include <linux/delay.h>
@@ -164,9 +166,9 @@ static void rcu_preempt_cpu_qs(void)
if (!rcu_preempt_blocked_readers_any())
rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
- /* If there are done callbacks, make RCU_SOFTIRQ process them. */
+ /* If there are done callbacks, cause them to be invoked. */
if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
- raise_softirq(RCU_SOFTIRQ);
+ invoke_rcu_cbs();
}
/*
@@ -374,7 +376,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_cpu_qs();
if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
rcu_preempt_ctrlblk.rcb.donetail)
- raise_softirq(RCU_SOFTIRQ);
+ invoke_rcu_cbs();
if (rcu_preempt_gp_in_progress() &&
rcu_cpu_blocking_cur_gp() &&
rcu_preempt_running_reader())
@@ -383,7 +385,7 @@ static void rcu_preempt_check_callbacks(void)
/*
* TINY_PREEMPT_RCU has an extra callback-list tail pointer to
- * update, so this is invoked from __rcu_process_callbacks() to
+ * update, so this is invoked from rcu_process_callbacks() to
* handle that case. Of course, it is invoked for all flavors of
* RCU, but RCU callbacks can appear only on one of the lists, and
* neither ->nexttail nor ->donetail can possibly be NULL, so there
@@ -400,7 +402,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
*/
static void rcu_preempt_process_callbacks(void)
{
- __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+ rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
}
/*
@@ -599,14 +601,13 @@ static void rcu_preempt_process_callbacks(void)
#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
#include <linux/kernel_stat.h>
/*
* During boot, we forgive RCU lockdep issues. After this function is
* invoked, we start taking RCU lockdep issues seriously.
*/
-void rcu_scheduler_starting(void)
+void __init rcu_scheduler_starting(void)
{
WARN_ON(nr_context_switches() > 0);
rcu_scheduler_active = 1;
--
1.7.0.6
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH RFC tip/core/rcu 3/3] rcu: priority boosting for TINY_PREEMPT_RCU
2010-09-29 0:33 [PATCH RFC tip/core/rcu 0/3] v1 RCU priority boosting Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 1/3] rcu: add priority-inversion testing to rcutorture Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 2/3] rcu: move TINY_RCU from softirq to kthread Paul E. McKenney
@ 2010-09-29 0:33 ` Paul E. McKenney
2 siblings, 0 replies; 4+ messages in thread
From: Paul E. McKenney @ 2010-09-29 0:33 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, laijs, dipankar, akpm, mathieu.desnoyers, josh, niv, tglx,
peterz, rostedt, Valdis.Kletnieks, dhowells, eric.dumazet,
Paul E. McKenney
clean compile. FIXME: needs a decent changelog entry.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
include/linux/init_task.h | 9 ++-
include/linux/sched.h | 11 ++-
init/Kconfig | 39 +++++++++
kernel/rcutiny.c | 66 ++++++---------
kernel/rcutiny_plugin.h | 206 +++++++++++++++++++++++++++++++++++++++++++--
5 files changed, 279 insertions(+), 52 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2fea6c8..69f91aa 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -81,6 +81,12 @@ extern struct group_info init_groups;
*/
# define CAP_INIT_BSET CAP_FULL_SET
+#ifdef CONFIG_RCU_BOOST
+#define INIT_TASK_RCU_BOOST() \
+ .rcu_boost_mutex = NULL,
+#else
+#define INIT_TASK_RCU_BOOST()
+#endif
#ifdef CONFIG_TREE_PREEMPT_RCU
#define INIT_TASK_RCU_TREE_PREEMPT() \
.rcu_blocked_node = NULL,
@@ -92,7 +98,8 @@ extern struct group_info init_groups;
.rcu_read_lock_nesting = 0, \
.rcu_read_unlock_special = 0, \
.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
- INIT_TASK_RCU_TREE_PREEMPT()
+ INIT_TASK_RCU_TREE_PREEMPT() \
+ INIT_TASK_RCU_BOOST()
#else
#define INIT_TASK_RCU_PREEMPT(tsk)
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e18473f..ed1a9bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1210,6 +1210,9 @@ struct task_struct {
#ifdef CONFIG_TREE_PREEMPT_RCU
struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+ struct rt_mutex *rcu_boost_mutex;
+#endif /* #ifdef CONFIG_RCU_BOOST */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
@@ -1745,7 +1748,8 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#ifdef CONFIG_PREEMPT_RCU
#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
static inline void rcu_copy_process(struct task_struct *p)
{
@@ -1753,7 +1757,10 @@ static inline void rcu_copy_process(struct task_struct *p)
p->rcu_read_unlock_special = 0;
#ifdef CONFIG_TREE_PREEMPT_RCU
p->rcu_blocked_node = NULL;
-#endif
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCU_BOOST
+ p->rcu_boost_mutex = NULL;
+#endif /* #ifdef CONFIG_RCU_BOOST */
INIT_LIST_HEAD(&p->rcu_node_entry);
}
diff --git a/init/Kconfig b/init/Kconfig
index a619a1a..8187783 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -450,6 +450,45 @@ config TREE_RCU_TRACE
TREE_PREEMPT_RCU implementations, permitting Makefile to
trivially select kernel/rcutree_trace.c.
+config RCU_BOOST
+ bool "Enable RCU priority boosting"
+ depends on TINY_PREEMPT_RCU
+ default n
+ help
+ This option boosts the priority of preempted RCU readers that
+ block the current preemptible RCU grace period for too long.
+ This option also prevents heavy loads from blocking RCU
+ callback invocation for all flavors of RCU.
+
+ Say Y here if you are working with real-time apps or heavy loads
+ Say N here if you are unsure.
+
+config RCU_BOOST_PRIO
+ int "Real-time priority to boost RCU readers to"
+ range 1 99
+ depends on RCU_BOOST
+ default 1
+ help
+ This option specifies the real-time priority to which preempted
+ RCU readers are to be boosted. If you are working with CPU-bound
+ real-time applications, you should specify a priority higher then
+ the highest-priority CPU-bound application.
+
+ Specify the real-time priority, or take the default if unsure.
+
+config RCU_BOOST_DELAY
+ int "Milliseconds to delay boosting after RCU grace-period start"
+ range 0 3000
+ depends on RCU_BOOST
+ default 500
+ help
+ This option specifies the time to wait after the beginning of
+ a given grace period before priority-boosting preempted RCU
+ readers blocking that grace period. Note that any RCU reader
+ blocking an expedited RCU grace period is boosted immediately.
+
+ Accept the default if unsure.
+
endmenu # "RCU Subsystem"
config IKCONFIG
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 86eef29..93d1665 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -36,38 +36,16 @@
#include <linux/time.h>
#include <linux/cpu.h>
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
- struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
- struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
- struct rcu_head **curtail; /* ->next pointer of last CB. */
-};
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
- .donetail = &rcu_sched_ctrlblk.rcucblist,
- .curtail = &rcu_sched_ctrlblk.rcucblist,
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
- .donetail = &rcu_bh_ctrlblk.rcucblist,
- .curtail = &rcu_bh_ctrlblk.rcucblist,
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-
-/* Controls for rcu_cbs() kthread, replacing RCU_SOFTIRQ used previously. */
-static struct task_struct *rcu_cbs_task;
-static DECLARE_WAIT_QUEUE_HEAD(rcu_cbs_wq);
-static unsigned long have_rcu_cbs;
-static void invoke_rcu_cbs(void);
+/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */
+static struct task_struct *rcu_kthread_task;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
+static unsigned long have_rcu_kthread_work;
+static void invoke_rcu_kthread(void);
/* Forward declarations for rcutiny_plugin.h. */
+struct rcu_ctrlblk;
static void rcu_process_callbacks(struct rcu_ctrlblk *rcp);
-static int rcu_cbs(void *arg);
+static int rcu_kthread(void *arg);
static void __call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu),
struct rcu_ctrlblk *rcp);
@@ -130,7 +108,7 @@ void rcu_sched_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
rcu_qsctr_help(&rcu_bh_ctrlblk))
- invoke_rcu_cbs();
+ invoke_rcu_kthread();
}
/*
@@ -139,7 +117,7 @@ void rcu_sched_qs(int cpu)
void rcu_bh_qs(int cpu)
{
if (rcu_qsctr_help(&rcu_bh_ctrlblk))
- invoke_rcu_cbs();
+ invoke_rcu_kthread();
}
/*
@@ -201,37 +179,41 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
* This is a kthread, but it is never stopped, at least not until
* the system goes down.
*/
-static int rcu_cbs(void *arg)
+static int rcu_kthread(void *arg)
{
unsigned long work;
+ unsigned long morework;
unsigned long flags;
for (;;) {
- wait_event(rcu_cbs_wq, have_rcu_cbs != 0);
+ wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0);
+ morework = rcu_boost();
local_irq_save(flags);
- work = have_rcu_cbs;
- have_rcu_cbs = 0;
+ work = have_rcu_kthread_work;
+ have_rcu_kthread_work = morework;
local_irq_restore(flags);
if (work) {
rcu_process_callbacks(&rcu_sched_ctrlblk);
rcu_process_callbacks(&rcu_bh_ctrlblk);
rcu_preempt_process_callbacks();
}
+ schedule_timeout_interruptible(1); /* Leave CPU for others. */
}
return 0; /* Not reached, but needed to shut gcc up. */
}
/*
- * Wake up rcu_cbs() to process callbacks now eligible for invocation.
+ * Wake up rcu_kthread() to process callbacks now eligible for invocation
+ * or to boost readers.
*/
-static void invoke_rcu_cbs(void)
+static void invoke_rcu_kthread(void)
{
unsigned long flags;
local_irq_save(flags);
- have_rcu_cbs = 1;
- wake_up(&rcu_cbs_wq);
+ have_rcu_kthread_work = 1;
+ wake_up(&rcu_kthread_wq);
local_irq_restore(flags);
}
@@ -327,7 +309,11 @@ EXPORT_SYMBOL_GPL(rcu_barrier_sched);
*/
static int __init rcu_spawn_kthreads(void)
{
- rcu_cbs_task = kthread_run(rcu_cbs, NULL, "rcu_cbs");
+ struct sched_param sp;
+
+ rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
+ sp.sched_priority = RCU_BOOST_PRIO;
+ sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
return 0;
}
early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 95f9239..36d203c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -24,6 +24,29 @@
#include <linux/kthread.h>
+/* Global control variables for rcupdate callback mechanism. */
+struct rcu_ctrlblk {
+ struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
+ struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
+ struct rcu_head **curtail; /* ->next pointer of last CB. */
+};
+
+/* Definition for rcupdate control block. */
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+ .donetail = &rcu_sched_ctrlblk.rcucblist,
+ .curtail = &rcu_sched_ctrlblk.rcucblist,
+};
+
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+ .donetail = &rcu_bh_ctrlblk.rcucblist,
+ .curtail = &rcu_bh_ctrlblk.rcucblist,
+};
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
#ifdef CONFIG_TINY_PREEMPT_RCU
#include <linux/delay.h>
@@ -55,10 +78,20 @@ struct rcu_preempt_ctrlblk {
/* if there is no such task. If there */
/* is no current expedited grace period, */
/* then there cannot be any such task. */
+#ifdef CONFIG_RCU_BOOST
+ struct list_head *boost_tasks;
+ /* Pointer to first task that needs to be */
+ /* priority-boosted, or NULL if no priority */
+ /* boosting is needed. If there is no */
+ /* current or expedited grace period, there */
+ /* can be no such task. */
+#endif /* #ifdef CONFIG_RCU_BOOST */
u8 gpnum; /* Current grace period. */
u8 gpcpu; /* Last grace period blocked by the CPU. */
u8 completed; /* Last grace period completed. */
/* If all three are equal, RCU is idle. */
+ s8 boosted_this_gp; /* Has boosting already happened? */
+ unsigned long boost_time; /* When to start boosting (jiffies) */
};
static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
@@ -124,6 +157,130 @@ static int rcu_preempt_gp_in_progress(void)
}
/*
+ * Advance a ->blkd_tasks-list pointer to the next entry, instead
+ * returning NULL if at the end of the list.
+ */
+static struct list_head *rcu_next_node_entry(struct task_struct *t)
+{
+ struct list_head *np;
+
+ np = t->rcu_node_entry.next;
+ if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+ np = NULL;
+ return np;
+}
+
+#ifdef CONFIG_RCU_BOOST
+
+#include "rtmutex_common.h"
+
+/*
+ * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
+ * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
+ */
+static int rcu_boost(void)
+{
+ unsigned long flags;
+ struct rt_mutex mtx;
+ struct list_head *np;
+ struct task_struct *t;
+
+ if (rcu_preempt_ctrlblk.boost_tasks == NULL)
+ return 0; /* Nothing to boost. */
+ raw_local_irq_save(flags);
+ rcu_preempt_ctrlblk.boosted_this_gp++;
+ t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct,
+ rcu_node_entry);
+ np = rcu_next_node_entry(t);
+ rt_mutex_init_proxy_locked(&mtx, t);
+ t->rcu_boost_mutex = &mtx;
+ t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
+ raw_local_irq_restore(flags);
+ rt_mutex_lock(&mtx);
+ rt_mutex_unlock(&mtx);
+ return rcu_preempt_ctrlblk.boost_tasks != NULL;
+}
+
+/*
+ * Check to see if it is now time to start boosting RCU readers blocking
+ * the current grace period, and, if so, tell the rcu_kthread_task to
+ * start boosting them. If there is an expedited boost in progress,
+ * we wait for it to complete.
+ */
+static void rcu_initiate_boost(void)
+{
+ if (rcu_preempt_ctrlblk.gp_tasks != NULL &&
+ rcu_preempt_ctrlblk.boost_tasks == NULL &&
+ rcu_preempt_ctrlblk.boosted_this_gp == 0 &&
+ ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) {
+ rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks;
+ invoke_rcu_kthread();
+ }
+}
+
+/*
+ * Initiate boosting for an expedited grace period.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) {
+ rcu_preempt_ctrlblk.boost_tasks =
+ rcu_preempt_ctrlblk.blkd_tasks.next;
+ rcu_preempt_ctrlblk.boosted_this_gp = -1;
+ invoke_rcu_kthread();
+ }
+ raw_local_irq_restore(flags);
+}
+
+#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000);
+
+/*
+ * Do priority-boost accounting for the start of a new grace period.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+ rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
+ if (rcu_preempt_ctrlblk.boosted_this_gp > 0)
+ rcu_preempt_ctrlblk.boosted_this_gp = 0;
+}
+
+#else /* #ifdef CONFIG_RCU_BOOST */
+
+/*
+ * If there is no RCU priority boosting, we don't boost.
+ */
+static int rcu_boost(void)
+{
+ return 0;
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate boosting.
+ */
+static void rcu_initiate_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, we don't initiate expedited boosting.
+ */
+static void rcu_initiate_expedited_boost(void)
+{
+}
+
+/*
+ * If there is no RCU priority boosting, nothing to do at grace-period start.
+ */
+static void rcu_preempt_boost_start_gp(void)
+{
+}
+
+#endif /* else #ifdef CONFIG_RCU_BOOST */
+
+/*
* Record a preemptible-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
* in a quiescent state. There might be any number of tasks blocked
@@ -150,12 +307,14 @@ static void rcu_preempt_cpu_qs(void)
rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
- /*
- * If there is no GP, or if blocked readers are still blocking GP,
- * then there is nothing more to do.
- */
+ /* If there is no GP then there is nothing more to do. */
if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
return;
+ /* If there are blocked readers, go check up on boosting. */
+ if (rcu_preempt_blocked_readers_cgp()) {
+ rcu_initiate_boost();
+ return;
+ }
/* Advance callbacks. */
rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
@@ -168,7 +327,7 @@ static void rcu_preempt_cpu_qs(void)
/* If there are done callbacks, cause them to be invoked. */
if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
- invoke_rcu_cbs();
+ invoke_rcu_kthread();
}
/*
@@ -186,6 +345,9 @@ static void rcu_preempt_start_gp(void)
rcu_preempt_ctrlblk.gp_tasks =
rcu_preempt_ctrlblk.blkd_tasks.next;
+ /* Set up for RCU priority boosting. */
+ rcu_preempt_boost_start_gp();
+
/* If there is no running reader, CPU is done with GP. */
if (!rcu_preempt_running_reader())
rcu_preempt_cpu_qs();
@@ -306,14 +468,16 @@ static void rcu_read_unlock_special(struct task_struct *t)
*/
empty = !rcu_preempt_blocked_readers_cgp();
empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
- np = t->rcu_node_entry.next;
- if (np == &rcu_preempt_ctrlblk.blkd_tasks)
- np = NULL;
+ np = rcu_next_node_entry(t);
list_del(&t->rcu_node_entry);
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
rcu_preempt_ctrlblk.gp_tasks = np;
if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
rcu_preempt_ctrlblk.exp_tasks = np;
+#ifdef CONFIG_RCU_BOOST
+ if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks)
+ rcu_preempt_ctrlblk.boost_tasks = np;
+#endif /* #ifdef CONFIG_RCU_BOOST */
INIT_LIST_HEAD(&t->rcu_node_entry);
/*
@@ -333,6 +497,14 @@ static void rcu_read_unlock_special(struct task_struct *t)
if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
rcu_report_exp_done();
}
+#ifdef CONFIG_RCU_BOOST
+ /* Unboost self if was boosted. */
+ if (special & RCU_READ_UNLOCK_BOOSTED) {
+ t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
+ rt_mutex_unlock(t->rcu_boost_mutex);
+ t->rcu_boost_mutex = NULL;
+ }
+#endif /* #ifdef CONFIG_RCU_BOOST */
local_irq_restore(flags);
}
@@ -376,7 +548,7 @@ static void rcu_preempt_check_callbacks(void)
rcu_preempt_cpu_qs();
if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
rcu_preempt_ctrlblk.rcb.donetail)
- invoke_rcu_cbs();
+ invoke_rcu_kthread();
if (rcu_preempt_gp_in_progress() &&
rcu_cpu_blocking_cur_gp() &&
rcu_preempt_running_reader())
@@ -534,6 +706,7 @@ void synchronize_rcu_expedited(void)
/* Wait for tail of ->blkd_tasks list to drain. */
if (rcu_preempted_readers_exp())
+ rcu_initiate_expedited_boost();
wait_event(sync_rcu_preempt_exp_wq,
!rcu_preempted_readers_exp());
@@ -575,6 +748,15 @@ void exit_rcu(void)
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
/*
+ * Because preemptible RCU does not exist, it is never necessary to
+ * boost preempted RCU readers.
+ */
+static int rcu_boost(void)
+{
+ return 0;
+}
+
+/*
* Because preemptible RCU does not exist, it never has any callbacks
* to check.
*/
@@ -614,3 +796,9 @@ void __init rcu_scheduler_starting(void)
}
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+#ifdef CONFIG_RCU_BOOST
+#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
+#else /* #ifdef CONFIG_RCU_BOOST */
+#define RCU_BOOST_PRIO 1
+#endif /* #else #ifdef CONFIG_RCU_BOOST */
--
1.7.0.6
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-09-29 0:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-29 0:33 [PATCH RFC tip/core/rcu 0/3] v1 RCU priority boosting Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 1/3] rcu: add priority-inversion testing to rcutorture Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 2/3] rcu: move TINY_RCU from softirq to kthread Paul E. McKenney
2010-09-29 0:33 ` [PATCH RFC tip/core/rcu 3/3] rcu: priority boosting for TINY_PREEMPT_RCU Paul E. McKenney
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox