* [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
@ 2007-11-29 18:46 K. Prasad
0 siblings, 0 replies; 20+ messages in thread
From: K. Prasad @ 2007-11-29 18:46 UTC (permalink / raw)
To: linux-kernel; +Cc: dipankar, ego, mathieu.desnoyers, mingo, paulmck
This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
include/linux/rcupreempt_trace.h | 46 ++++++++
kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
3 files changed, 251 insertions(+), 189 deletions(-)
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
@@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
extern int rcupreempt_mb_flag(int cpu);
extern char *rcupreempt_try_flip_state_name(void);
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+ unsigned long rbs_stat_task_boost_called;
+ unsigned long rbs_stat_task_boosted;
+ unsigned long rbs_stat_boost_called;
+ unsigned long rbs_stat_try_boost;
+ unsigned long rbs_stat_boosted;
+ unsigned long rbs_stat_unboost_called;
+ unsigned long rbs_stat_unboosted;
+ unsigned long rbs_stat_try_boost_readers;
+ unsigned long rbs_stat_boost_readers;
+ unsigned long rbs_stat_try_unboost_readers;
+ unsigned long rbs_stat_unboost_readers;
+ unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+ void *private_data, const char *format, ...) \
+{ \
+ va_list ap; \
+ int cpu; \
+ struct preempt_rcu_boost_trace *boost_trace; \
+ va_start(ap, format); \
+ cpu = va_arg(ap, typeof(unsigned int)); \
+ boost_trace = (&per_cpu(boost_trace_data, cpu)); \
+ boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+ va_end(ap); \
+}
+
+struct preempt_rcu_boost_probe {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+ .name = __stringify(preempt_rcu_boost_probe_worker), \
+ .format = "%u", \
+ .probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;
static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ int cnt = 0;
+ int cpu;
+ struct preempt_rcu_boost_trace *prbt;
+ ssize_t bcount;
+ unsigned long task_boost_called = 0;
+ unsigned long task_boosted = 0;
+ unsigned long boost_called = 0;
+ unsigned long try_boost = 0;
+ unsigned long boosted = 0;
+ unsigned long unboost_called = 0;
+ unsigned long unboosted = 0;
+ unsigned long try_boost_readers = 0;
+ unsigned long boost_readers = 0;
+ unsigned long try_unboost_readers = 0;
+ unsigned long unboost_readers = 0;
+ unsigned long over_taken = 0;
+
+ mutex_lock(&mutex);
+
+ for_each_online_cpu(cpu) {
+ prbt = &per_cpu(boost_trace_data, cpu);
+
+ task_boost_called += prbt->rbs_stat_task_boost_called;
+ task_boosted += prbt->rbs_stat_task_boosted;
+ boost_called += prbt->rbs_stat_boost_called;
+ try_boost += prbt->rbs_stat_try_boost;
+ boosted += prbt->rbs_stat_boosted;
+ unboost_called += prbt->rbs_stat_unboost_called;
+ unboosted += prbt->rbs_stat_unboosted;
+ try_boost_readers += prbt->rbs_stat_try_boost_readers;
+ boost_readers += prbt->rbs_stat_boost_readers;
+ try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+ unboost_readers += prbt->rbs_stat_boost_readers;
+ over_taken += prbt->rbs_stat_over_taken;
+ }
+
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boost_called = %ld\n",
+ task_boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boosted = %ld\n",
+ task_boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_called = %ld\n",
+ boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost = %ld\n",
+ try_boost);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boosted = %ld\n",
+ boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_called = %ld\n",
+ unboost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboosted = %ld\n",
+ unboosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost_readers = %ld\n",
+ try_boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_readers = %ld\n",
+ boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_unboost_readers = %ld\n",
+ try_unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_readers = %ld\n",
+ unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "over_taken = %ld\n",
+ over_taken);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "rcu_boost_prio = %d\n",
+ read_rcu_boost_prio());
+ bcount = simple_read_from_buffer(buffer, count, ppos,
+ rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+ mutex_unlock(&mutex);
+
+ return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+ .read = rcuboost_read,
+};
+
+static struct dentry *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+ rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+ NULL, &rcuboost_fops);
+ if (!rcuboostdir)
+ return 0;
+
+ return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+ if (rcuboostdir)
+ debugfs_remove(rcuboostdir);
+ rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
{
return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
if (!ctrsdir)
goto free_out;
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ if (!rcu_trace_boost_create(rcudir))
+ goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
return 0;
free_out:
if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
}
printk(KERN_INFO "RCU Preempt markers registered\n");
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+ struct preempt_rcu_boost_probe *p = \
+ &preempt_rcu_boost_probe_array[i];
+ ret = marker_probe_register(p->name, p->format,
+ p->probe_func, p);
+ if (ret)
+ printk(KERN_INFO "Unable to register Preempt RCU Boost \
+ probe %s\n", preempt_rcu_boost_probe_array[i].name);
+ ret = marker_arm(p->name);
+ if (ret)
+ printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+ markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
marker_probe_unregister(rcupreempt_probe_array[i].name);
printk(KERN_INFO "RCU Preempt markers unregistered\n");
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ rcu_trace_boost_destroy();
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+ marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+ printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
int rbs_prio; /* CPU copy of rcu_boost_prio */
struct list_head rbs_toboost; /* Preempted RCU readers */
struct list_head rbs_boosted; /* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
- /* The rest are for statistics */
- unsigned long rbs_stat_task_boost_called;
- unsigned long rbs_stat_task_boosted;
- unsigned long rbs_stat_boost_called;
- unsigned long rbs_stat_try_boost;
- unsigned long rbs_stat_boosted;
- unsigned long rbs_stat_unboost_called;
- unsigned long rbs_stat_unboosted;
- unsigned long rbs_stat_try_boost_readers;
- unsigned long rbs_stat_boost_readers;
- unsigned long rbs_stat_try_unboost_readers;
- unsigned long rbs_stat_unboost_readers;
- unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
};
static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- static DEFINE_MUTEX(mutex);
- int cnt = 0;
- int cpu;
- struct rcu_boost_dat *rbd;
- ssize_t bcount;
- unsigned long task_boost_called = 0;
- unsigned long task_boosted = 0;
- unsigned long boost_called = 0;
- unsigned long try_boost = 0;
- unsigned long boosted = 0;
- unsigned long unboost_called = 0;
- unsigned long unboosted = 0;
- unsigned long try_boost_readers = 0;
- unsigned long boost_readers = 0;
- unsigned long try_unboost_readers = 0;
- unsigned long unboost_readers = 0;
- unsigned long over_taken = 0;
-
- mutex_lock(&mutex);
-
- for_each_online_cpu(cpu) {
- rbd = &per_cpu(rcu_boost_data, cpu);
-
- task_boost_called += rbd->rbs_stat_task_boost_called;
- task_boosted += rbd->rbs_stat_task_boosted;
- boost_called += rbd->rbs_stat_boost_called;
- try_boost += rbd->rbs_stat_try_boost;
- boosted += rbd->rbs_stat_boosted;
- unboost_called += rbd->rbs_stat_unboost_called;
- unboosted += rbd->rbs_stat_unboosted;
- try_boost_readers += rbd->rbs_stat_try_boost_readers;
- boost_readers += rbd->rbs_stat_boost_readers;
- try_unboost_readers += rbd->rbs_stat_try_boost_readers;
- unboost_readers += rbd->rbs_stat_boost_readers;
- over_taken += rbd->rbs_stat_over_taken;
- }
-
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boost_called = %ld\n",
- task_boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boosted = %ld\n",
- task_boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_called = %ld\n",
- boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost = %ld\n",
- try_boost);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boosted = %ld\n",
- boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_called = %ld\n",
- unboost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboosted = %ld\n",
- unboosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost_readers = %ld\n",
- try_boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_readers = %ld\n",
- boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_unboost_readers = %ld\n",
- try_unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_readers = %ld\n",
- unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "over_taken = %ld\n",
- over_taken);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "rcu_boost_prio = %d\n",
- rcu_boost_prio);
- bcount = simple_read_from_buffer(buffer, count, ppos,
- rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
- mutex_unlock(&mutex);
-
- return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
- .read = rcuboost_read,
-};
-
-static struct dentry *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
- rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
- NULL, &rcuboost_fops);
- if (!rcuboostdir)
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
- if (rcuboostdir)
- debugfs_remove(rcuboostdir);
- rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type) \
- static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd) \
- { \
- rbd->rbs_stat_##type++; \
- }
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */
static inline int rcu_is_boosted(struct task_struct *task)
{
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
WARN_ON(!irqs_disabled());
WARN_ON_SMP(!spin_is_locked(&task->pi_lock));
- rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+ trace_mark(task_boost_called, "%u", smp_processor_id());
if (task->rcu_prio < task->prio) {
- rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+ trace_mark(task_boosted, "%u", smp_processor_id());
task_setprio(task, task->rcu_prio);
}
}
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)
WARN_ON(!current->rcu_read_lock_nesting);
- rcu_trace_boost_boost_called(RCU_BOOST_ME);
+ trace_mark(boost_called, "%u", smp_processor_id());
/* check to see if we are already boosted */
if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)
curr->rcub_rbdp = rbd;
- rcu_trace_boost_try_boost(rbd);
+ trace_mark(try_boost, "%u", smp_processor_id());
prio = rt_mutex_getprio(curr);
@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
if (prio <= rbd->rbs_prio)
goto out;
- rcu_trace_boost_boosted(curr->rcub_rbdp);
+ trace_mark(boosted, "%u", smp_processor_id());
curr->rcu_prio = rbd->rbs_prio;
rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
int prio;
unsigned long flags;
- rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+ trace_mark(unboost_called, "%u", smp_processor_id());
/* if not boosted, then ignore */
if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)
list_del_init(&curr->rcub_entry);
- rcu_trace_boost_unboosted(rbd);
+ trace_mark(unboosted, "%u", smp_processor_id());
curr->rcu_prio = MAX_PRIO;
@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
* Another task may have taken over.
*/
if (curr->rcu_preempt_counter != rcu_boost_counter) {
- rcu_trace_boost_over_taken(rbd);
+ trace_mark(over_taken, "%u", smp_processor_id());
return 1;
}
@@ -443,7 +266,7 @@ void rcu_boost_readers(void)
prio = rt_mutex_getprio(curr);
- rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+ trace_mark(try_boost_readers, "%u", smp_processor_id());
if (prio >= rcu_boost_prio) {
/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)
rcu_boost_prio = prio;
- rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+ trace_mark(boost_readers, "%u", smp_processor_id());
/* Flag that we are the one to unboost */
curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
spin_lock_irqsave(&rcu_boost_wake_lock, flags);
- rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+ trace_mark(try_unboost_readers, "%u", smp_processor_id());
if (current->rcu_preempt_counter != rcu_boost_counter)
goto out;
- rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+ trace_mark(unboost_readers, "%u", smp_processor_id());
/*
* We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
}
/*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+ return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
* The krcupreemptd wakes up every "rcu_preempt_thread_secs"
* seconds at the minimum priority of 1 to do a
* synchronize_rcu. This ensures that grace periods finish
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
@ 2007-12-31 6:09 K. Prasad
2007-12-31 10:20 ` Ingo Molnar
2008-01-03 16:30 ` Mathieu Desnoyers
0 siblings, 2 replies; 20+ messages in thread
From: K. Prasad @ 2007-12-31 6:09 UTC (permalink / raw)
To: mingo, linux-kernel, linux-rt-users
Cc: dipankar, ego, mathieu.desnoyers, paulmck
This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
include/linux/rcupreempt_trace.h | 46 ++++++++
kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
3 files changed, 251 insertions(+), 189 deletions(-)
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
@@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
extern int rcupreempt_mb_flag(int cpu);
extern char *rcupreempt_try_flip_state_name(void);
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+ unsigned long rbs_stat_task_boost_called;
+ unsigned long rbs_stat_task_boosted;
+ unsigned long rbs_stat_boost_called;
+ unsigned long rbs_stat_try_boost;
+ unsigned long rbs_stat_boosted;
+ unsigned long rbs_stat_unboost_called;
+ unsigned long rbs_stat_unboosted;
+ unsigned long rbs_stat_try_boost_readers;
+ unsigned long rbs_stat_boost_readers;
+ unsigned long rbs_stat_try_unboost_readers;
+ unsigned long rbs_stat_unboost_readers;
+ unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+ void *private_data, const char *format, ...) \
+{ \
+ va_list ap; \
+ int cpu; \
+ struct preempt_rcu_boost_trace *boost_trace; \
+ va_start(ap, format); \
+ cpu = va_arg(ap, typeof(unsigned int)); \
+ boost_trace = (&per_cpu(boost_trace_data, cpu)); \
+ boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+ va_end(ap); \
+}
+
+struct preempt_rcu_boost_probe {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+ .name = __stringify(preempt_rcu_boost_probe_worker), \
+ .format = "%u", \
+ .probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;
static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ int cnt = 0;
+ int cpu;
+ struct preempt_rcu_boost_trace *prbt;
+ ssize_t bcount;
+ unsigned long task_boost_called = 0;
+ unsigned long task_boosted = 0;
+ unsigned long boost_called = 0;
+ unsigned long try_boost = 0;
+ unsigned long boosted = 0;
+ unsigned long unboost_called = 0;
+ unsigned long unboosted = 0;
+ unsigned long try_boost_readers = 0;
+ unsigned long boost_readers = 0;
+ unsigned long try_unboost_readers = 0;
+ unsigned long unboost_readers = 0;
+ unsigned long over_taken = 0;
+
+ mutex_lock(&mutex);
+
+ for_each_online_cpu(cpu) {
+ prbt = &per_cpu(boost_trace_data, cpu);
+
+ task_boost_called += prbt->rbs_stat_task_boost_called;
+ task_boosted += prbt->rbs_stat_task_boosted;
+ boost_called += prbt->rbs_stat_boost_called;
+ try_boost += prbt->rbs_stat_try_boost;
+ boosted += prbt->rbs_stat_boosted;
+ unboost_called += prbt->rbs_stat_unboost_called;
+ unboosted += prbt->rbs_stat_unboosted;
+ try_boost_readers += prbt->rbs_stat_try_boost_readers;
+ boost_readers += prbt->rbs_stat_boost_readers;
+ try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+ unboost_readers += prbt->rbs_stat_boost_readers;
+ over_taken += prbt->rbs_stat_over_taken;
+ }
+
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boost_called = %ld\n",
+ task_boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boosted = %ld\n",
+ task_boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_called = %ld\n",
+ boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost = %ld\n",
+ try_boost);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boosted = %ld\n",
+ boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_called = %ld\n",
+ unboost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboosted = %ld\n",
+ unboosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost_readers = %ld\n",
+ try_boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_readers = %ld\n",
+ boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_unboost_readers = %ld\n",
+ try_unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_readers = %ld\n",
+ unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "over_taken = %ld\n",
+ over_taken);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "rcu_boost_prio = %d\n",
+ read_rcu_boost_prio());
+ bcount = simple_read_from_buffer(buffer, count, ppos,
+ rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+ mutex_unlock(&mutex);
+
+ return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+ .read = rcuboost_read,
+};
+
+static struct dentry *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+ rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+ NULL, &rcuboost_fops);
+ if (!rcuboostdir)
+ return 0;
+
+ return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+ if (rcuboostdir)
+ debugfs_remove(rcuboostdir);
+ rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
{
return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
if (!ctrsdir)
goto free_out;
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ if (!rcu_trace_boost_create(rcudir))
+ goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
return 0;
free_out:
if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
}
printk(KERN_INFO "RCU Preempt markers registered\n");
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+ struct preempt_rcu_boost_probe *p = \
+ &preempt_rcu_boost_probe_array[i];
+ ret = marker_probe_register(p->name, p->format,
+ p->probe_func, p);
+ if (ret)
+ printk(KERN_INFO "Unable to register Preempt RCU Boost \
+ probe %s\n", preempt_rcu_boost_probe_array[i].name);
+ ret = marker_arm(p->name);
+ if (ret)
+ printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+ markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
marker_probe_unregister(rcupreempt_probe_array[i].name);
printk(KERN_INFO "RCU Preempt markers unregistered\n");
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ rcu_trace_boost_destroy();
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+ marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+ printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
int rbs_prio; /* CPU copy of rcu_boost_prio */
struct list_head rbs_toboost; /* Preempted RCU readers */
struct list_head rbs_boosted; /* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
- /* The rest are for statistics */
- unsigned long rbs_stat_task_boost_called;
- unsigned long rbs_stat_task_boosted;
- unsigned long rbs_stat_boost_called;
- unsigned long rbs_stat_try_boost;
- unsigned long rbs_stat_boosted;
- unsigned long rbs_stat_unboost_called;
- unsigned long rbs_stat_unboosted;
- unsigned long rbs_stat_try_boost_readers;
- unsigned long rbs_stat_boost_readers;
- unsigned long rbs_stat_try_unboost_readers;
- unsigned long rbs_stat_unboost_readers;
- unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
};
static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- static DEFINE_MUTEX(mutex);
- int cnt = 0;
- int cpu;
- struct rcu_boost_dat *rbd;
- ssize_t bcount;
- unsigned long task_boost_called = 0;
- unsigned long task_boosted = 0;
- unsigned long boost_called = 0;
- unsigned long try_boost = 0;
- unsigned long boosted = 0;
- unsigned long unboost_called = 0;
- unsigned long unboosted = 0;
- unsigned long try_boost_readers = 0;
- unsigned long boost_readers = 0;
- unsigned long try_unboost_readers = 0;
- unsigned long unboost_readers = 0;
- unsigned long over_taken = 0;
-
- mutex_lock(&mutex);
-
- for_each_online_cpu(cpu) {
- rbd = &per_cpu(rcu_boost_data, cpu);
-
- task_boost_called += rbd->rbs_stat_task_boost_called;
- task_boosted += rbd->rbs_stat_task_boosted;
- boost_called += rbd->rbs_stat_boost_called;
- try_boost += rbd->rbs_stat_try_boost;
- boosted += rbd->rbs_stat_boosted;
- unboost_called += rbd->rbs_stat_unboost_called;
- unboosted += rbd->rbs_stat_unboosted;
- try_boost_readers += rbd->rbs_stat_try_boost_readers;
- boost_readers += rbd->rbs_stat_boost_readers;
- try_unboost_readers += rbd->rbs_stat_try_boost_readers;
- unboost_readers += rbd->rbs_stat_boost_readers;
- over_taken += rbd->rbs_stat_over_taken;
- }
-
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boost_called = %ld\n",
- task_boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boosted = %ld\n",
- task_boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_called = %ld\n",
- boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost = %ld\n",
- try_boost);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boosted = %ld\n",
- boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_called = %ld\n",
- unboost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboosted = %ld\n",
- unboosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost_readers = %ld\n",
- try_boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_readers = %ld\n",
- boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_unboost_readers = %ld\n",
- try_unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_readers = %ld\n",
- unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "over_taken = %ld\n",
- over_taken);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "rcu_boost_prio = %d\n",
- rcu_boost_prio);
- bcount = simple_read_from_buffer(buffer, count, ppos,
- rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
- mutex_unlock(&mutex);
-
- return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
- .read = rcuboost_read,
-};
-
-static struct dentry *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
- rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
- NULL, &rcuboost_fops);
- if (!rcuboostdir)
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
- if (rcuboostdir)
- debugfs_remove(rcuboostdir);
- rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type) \
- static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd) \
- { \
- rbd->rbs_stat_##type++; \
- }
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */
static inline int rcu_is_boosted(struct task_struct *task)
{
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
WARN_ON(!irqs_disabled());
WARN_ON_SMP(!spin_is_locked(&task->pi_lock));
- rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+ trace_mark(task_boost_called, "%u", smp_processor_id());
if (task->rcu_prio < task->prio) {
- rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+ trace_mark(task_boosted, "%u", smp_processor_id());
task_setprio(task, task->rcu_prio);
}
}
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)
WARN_ON(!current->rcu_read_lock_nesting);
- rcu_trace_boost_boost_called(RCU_BOOST_ME);
+ trace_mark(boost_called, "%u", smp_processor_id());
/* check to see if we are already boosted */
if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)
curr->rcub_rbdp = rbd;
- rcu_trace_boost_try_boost(rbd);
+ trace_mark(try_boost, "%u", smp_processor_id());
prio = rt_mutex_getprio(curr);
@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
if (prio <= rbd->rbs_prio)
goto out;
- rcu_trace_boost_boosted(curr->rcub_rbdp);
+ trace_mark(boosted, "%u", smp_processor_id());
curr->rcu_prio = rbd->rbs_prio;
rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
int prio;
unsigned long flags;
- rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+ trace_mark(unboost_called, "%u", smp_processor_id());
/* if not boosted, then ignore */
if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)
list_del_init(&curr->rcub_entry);
- rcu_trace_boost_unboosted(rbd);
+ trace_mark(unboosted, "%u", smp_processor_id());
curr->rcu_prio = MAX_PRIO;
@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
* Another task may have taken over.
*/
if (curr->rcu_preempt_counter != rcu_boost_counter) {
- rcu_trace_boost_over_taken(rbd);
+ trace_mark(over_taken, "%u", smp_processor_id());
return 1;
}
@@ -443,7 +266,7 @@ void rcu_boost_readers(void)
prio = rt_mutex_getprio(curr);
- rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+ trace_mark(try_boost_readers, "%u", smp_processor_id());
if (prio >= rcu_boost_prio) {
/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)
rcu_boost_prio = prio;
- rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+ trace_mark(boost_readers, "%u", smp_processor_id());
/* Flag that we are the one to unboost */
curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
spin_lock_irqsave(&rcu_boost_wake_lock, flags);
- rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+ trace_mark(try_unboost_readers, "%u", smp_processor_id());
if (current->rcu_preempt_counter != rcu_boost_counter)
goto out;
- rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+ trace_mark(unboost_readers, "%u", smp_processor_id());
/*
* We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
}
/*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+ return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
* The krcupreemptd wakes up every "rcu_preempt_thread_secs"
* seconds at the minimum priority of 1 to do a
* synchronize_rcu. This ensures that grace periods finish
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2007-12-31 6:09 K. Prasad
@ 2007-12-31 10:20 ` Ingo Molnar
2008-01-02 3:31 ` Frank Ch. Eigler
2008-01-03 19:24 ` Mathieu Desnoyers
2008-01-03 16:30 ` Mathieu Desnoyers
1 sibling, 2 replies; 20+ messages in thread
From: Ingo Molnar @ 2007-12-31 10:20 UTC (permalink / raw)
To: K. Prasad
Cc: linux-kernel, linux-rt-users, dipankar, ego, mathieu.desnoyers,
paulmck, Andrew Morton, Linus Torvalds
* K. Prasad <prasad@linux.vnet.ibm.com> wrote:
> @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
>
> spin_lock_irqsave(&rcu_boost_wake_lock, flags);
>
> - rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> + trace_mark(try_unboost_readers, "%u", smp_processor_id());
this isnt directed at you or your patch, it's more directed at Mathieu,
but looking at this actual markers patch submitted to me, i'm still
fundamentally worried about the whole marker approach.
Firstly, why on earth does a full format string have to be passed in for
something as simple as a CPU id? This way we basically codify it forever
that tracing _has_ to be expensive when enabled. The latency-tracer
(which i'd love to convert to markers, if only markers were capable
enough) has shown it that tracing _can_ be used to capture performance
data without disturbing the measured system, even at hundreds of
thousands context switches a second per CPU.
Secondly, the inlined overhead of trace_mark() is still WAY too large:
if (unlikely(__mark_##name.state)) { \
preempt_disable(); \
(*__mark_##name.call) \
(&__mark_##name, call_data, \
format, ## args); \
preempt_enable(); \
} \
Whatever became of the obvious suggestion that i outlined years ago, to
have a _single_ trace call instruction and to _patch out_ the damn
marker calls by default? No .state flag checking inlined. No
preempt_disable()/enable() pair. Patching static tracepoints OUT of the
kernel, only leaving a ~5-byte NOP sequence behind them (and some
minimal disturbance to the variables the tracepoint accesses). We've got
all the alternatives.h code patching infrastructure available for such
purposes. Why are we 2 years down the line and _STILL_ arguing about
this?
Thirdly, the patch selects CONFIG_MARKERS:
> config RCU_TRACE
> - bool "Enable tracing for RCU - currently stats in debugfs"
> + tristate "Enable tracing for RCU - currently stats in debugfs"
> select DEBUG_FS
> - default y
> + select MARKERS
Which adds overhead (inlined checks for markers) all around the kernel,
even if all markers are deactivated! Imagine a thousand of them and the
kernel blows up measurably.
Sadly, this whole trace_mark() API seems to have gotten much worse since
i last saw it. It's sub-par when it's turned on and it's sub-par when
it's turned off. It gets us the worst of both worlds.
If it continues like this then i'd much rather see people add printks as
tracing, because there you _know_ that it's high-overhead and people
wont start arguing about trace compatibility either, etc.
Ingo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2007-12-31 10:20 ` Ingo Molnar
@ 2008-01-02 3:31 ` Frank Ch. Eigler
2008-01-02 12:47 ` Ingo Molnar
2008-01-03 19:24 ` Mathieu Desnoyers
1 sibling, 1 reply; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02 3:31 UTC (permalink / raw)
To: Ingo Molnar
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
Ingo Molnar <mingo@elte.hu> writes:
> [...] Firstly, why on earth does a full format string have to be
> passed in for something as simple as a CPU id? This way we basically
> codify it forever that tracing _has_ to be expensive when
> enabled. [...]
FWIW, I'm not keen about the format strings either, but they don't
constitute a performance hit beyond an additional parameter. It does
not need to actually get parsed at run time.
>[...]
> Secondly, the inlined overhead of trace_mark() is still WAY too large:
>
> if (unlikely(__mark_##name.state)) { \
> [...]
> } \
Note that this is for the unoptimized case. The immediate-value code
is better. I have still yet to see some good measurements of how much
the overheads of the various variants are, however. It's only fair to
gather these numbers and continue the debate with them in hand.
> Whatever became of the obvious suggestion that i outlined years ago,
> to have a _single_ trace call instruction and to _patch out_ the
> damn marker calls by default? [...] only leaving a ~5-byte NOP
> sequence behind them (and some minimal disturbance to the variables
> the tracepoint accesses). [...]
This has been answered several times before. It's because the marker
parameters have to be (conditionally) evaluated and pushed onto a call
frame. It's not just a call that would need being nop'd, but a whole
function call setup/teardown sequence, which itself can be interleaved
with adjacent code.
- FChE
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 3:31 ` Frank Ch. Eigler
@ 2008-01-02 12:47 ` Ingo Molnar
2008-01-02 16:33 ` Frank Ch. Eigler
0 siblings, 1 reply; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 12:47 UTC (permalink / raw)
To: Frank Ch. Eigler
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
* Frank Ch. Eigler <fche@redhat.com> wrote:
> Ingo Molnar <mingo@elte.hu> writes:
>
> > [...] Firstly, why on earth does a full format string have to be
> > passed in for something as simple as a CPU id? This way we basically
> > codify it forever that tracing _has_ to be expensive when
> > enabled. [...]
>
> FWIW, I'm not keen about the format strings either, but they don't
> constitute a performance hit beyond an additional parameter. It does
> not need to actually get parsed at run time.
"only" an additional parameter. The whole _point_ behind these markers
is for them to have minimal effect!
> >[...]
> > Secondly, the inlined overhead of trace_mark() is still WAY too large:
> >
> > if (unlikely(__mark_##name.state)) { \
> > [...]
> > } \
>
> Note that this is for the unoptimized case. The immediate-value code
> is better. I have still yet to see some good measurements of how much
> the overheads of the various variants are, however. It's only fair to
> gather these numbers and continue the debate with them in hand.
this is a general policy matter. It is _so much easier_ to add markers
if they _can_ have near-zero overhead (as in 1-2 instructions).
Otherwise we'll keep arguing about it, especially if any is added to
performance-critical codepath. (where we are counting instructions)
> > Whatever became of the obvious suggestion that i outlined years ago,
> > to have a _single_ trace call instruction and to _patch out_ the
> > damn marker calls by default? [...] only leaving a ~5-byte NOP
> > sequence behind them (and some minimal disturbance to the variables
> > the tracepoint accesses). [...]
>
> This has been answered several times before. It's because the marker
> parameters have to be (conditionally) evaluated and pushed onto a call
> frame. It's not just a call that would need being nop'd, but a whole
> function call setup/teardown sequence, which itself can be interleaved
> with adjacent code.
you are still missing my point. Firstly, the kernel is regparm built so
there's no call frame to be pushed to in most cases - we pass most
parameters in registers. (hence my stressing of minimizing the number of
parameters to an absolute minimum)
Secondly, the side-effects of a function call is what i referred to via:
> [...] (and some minimal disturbance to the variables the tracepoint
> accesses). [...]
really, a tracepoint obviously accesses data that is readily accessible
in that spot. Worst-case we'll have some additional register constraints
that make the code a bit less optimal.
Yes, if a trace point references data that is _not_ readily available,
then of course the preparation for the function call might not be cheap.
But that can be optimized by placing the tracepoints intelligently.
what cannot be optimized away at all are the conditional instructions
introduced by the probe points, extra parameters and the space overhead
of the function call itself.
Ingo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 12:47 ` Ingo Molnar
@ 2008-01-02 16:33 ` Frank Ch. Eigler
2008-01-02 17:01 ` Ingo Molnar
2008-01-02 23:49 ` Nicholas Miell
0 siblings, 2 replies; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02 16:33 UTC (permalink / raw)
To: Ingo Molnar
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
Hi -
On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> [...]
> > FWIW, I'm not keen about the format strings either, but they don't
> > constitute a performance hit beyond an additional parameter. It does
> > not need to actually get parsed at run time.
>
> "only" an additional parameter. The whole _point_ behind these markers
> is for them to have minimal effect!
Agreed. The only alternative I recall seeing proposed was my own
cartesian-product macro suite that encodes parameter types into the
marker function/macro name itself. (Maybe some of that could be
hidden with gcc typeof() magic.) There appeared to be a consensus
that this was more undesirable. Do you agree?
> [...]
> this is a general policy matter. It is _so much easier_ to add markers
> if they _can_ have near-zero overhead (as in 1-2 instructions).
> Otherwise we'll keep arguing about it, especially if any is added to
> performance-critical codepath. (where we are counting instructions)
The effect of the immediate-values patch, combined with gcc
CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
dcache-impact-free instructions. The register saves, parameter
evaluation, the function call, can all be moved out of line.
> > > Whatever became of the obvious suggestion that i outlined years ago,
> > > to have a _single_ trace call instruction and to _patch out_ the
> > > damn marker calls by default? [...] only leaving a ~5-byte NOP
> > > sequence behind them (and some minimal disturbance to the variables
> > > the tracepoint accesses). [...]
> >
> > This has been answered several times before. It's because the marker
> > parameters have to be (conditionally) evaluated and pushed onto a call
> > frame. It's not just a call that would need being nop'd, but a whole
> > function call setup/teardown sequence, which itself can be interleaved
> > with adjacent code.
>
> you are still missing my point. Firstly, the kernel is regparm built so
> there's no call frame to be pushed to in most cases - we pass most
> parameters in registers. [...]
Yes, but those registers will already be used, so their former values
need to be saved or later recomputed as a part of the call sequence.
Even wrapping it all with an asm barrier may not be enough to
completely bound the code with an address range that can be naively
NOP'd out.
> Secondly, the side-effects of a function call is what i referred to via:
>
> > [...] (and some minimal disturbance to the variables the tracepoint
> > accesses). [...]
>
> really, a tracepoint obviously accesses data that is readily accessible
> in that spot. Worst-case we'll have some additional register constraints
> that make the code a bit less optimal. [...]
It's more than that - people will want to pass some dereferenced
fields from a struct*; some old function parameters that were by the
time of the marker call shoved out of registers again. Register
constraints that prohibit general expressions would impede the
utility of the tool.
> what cannot be optimized away at all are the conditional
> instructions introduced by the probe points, extra parameters and
> the space overhead of the function call itself.
Almost all of that can & should be moved out of line.
- FChE
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 16:33 ` Frank Ch. Eigler
@ 2008-01-02 17:01 ` Ingo Molnar
2008-01-02 17:56 ` Frank Ch. Eigler
2008-01-07 18:59 ` Mathieu Desnoyers
2008-01-02 23:49 ` Nicholas Miell
1 sibling, 2 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 17:01 UTC (permalink / raw)
To: Frank Ch. Eigler
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
* Frank Ch. Eigler <fche@redhat.com> wrote:
> > [...] this is a general policy matter. It is _so much easier_ to add
> > markers if they _can_ have near-zero overhead (as in 1-2
> > instructions). Otherwise we'll keep arguing about it, especially if
> > any is added to performance-critical codepath. (where we are
> > counting instructions)
>
> The effect of the immediate-values patch, combined with gcc
> CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> dcache-impact-free instructions. The register saves, parameter
> evaluation, the function call, can all be moved out of line.
well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
we should already be getting that, right?
There's one thing that would make out-of-line tracepoints have a lot
less objectionable to me: right now the 'out of line' area is put to the
end of functions. That splinters the kernel image with inactive, rarely
taken areas of code - blowing up its icache footprint considerably. For
example sched.o has ~100 functions, with the average function size being
200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
already.
It's true that keeping the off-site code within the function keeps total
codesize slightly smaller, because the offsets (and hence the
conditional jumps) are thus 8 bit - but that's below 1% and the
cache-blow-up aspect is more severe in practice at 10-20%.
So it would be nice if we could collect all this offline code and stuff
it away into another portion of the kernel image. (or, into another
portion of the object file - which would still be good enough in
practice)
Ingo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 17:01 ` Ingo Molnar
@ 2008-01-02 17:56 ` Frank Ch. Eigler
2008-01-02 20:10 ` Ingo Molnar
2008-01-07 18:59 ` Mathieu Desnoyers
1 sibling, 1 reply; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02 17:56 UTC (permalink / raw)
To: Ingo Molnar
Cc: Frank Ch. Eigler, K. Prasad, linux-kernel, linux-rt-users,
dipankar, ego, mathieu.desnoyers, paulmck, Andrew Morton,
Linus Torvalds
Hi -
On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> [...]
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> we should already be getting that, right?
Right.
> [...] So it would be nice if we could collect all this offline code
> and stuff it away into another portion of the kernel image. (or,
> into another portion of the object file - which would still be good
> enough in practice)
That would be the -freorder-blocks-and-partition flag, as proposed by
Arjan two Februarys ago. I don't see any traces of Andi's overriding
"-fno-reorder-blocks" in the current linus tree, so maybe it's time to
resurrect this one:
http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html
- FChE
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 17:56 ` Frank Ch. Eigler
@ 2008-01-02 20:10 ` Ingo Molnar
0 siblings, 0 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 20:10 UTC (permalink / raw)
To: Frank Ch. Eigler
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
* Frank Ch. Eigler <fche@redhat.com> wrote:
> Hi -
>
> On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> > [...]
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > we should already be getting that, right?
>
> Right.
>
> > [...] So it would be nice if we could collect all this offline code
> > and stuff it away into another portion of the kernel image. (or,
> > into another portion of the object file - which would still be good
> > enough in practice)
>
> That would be the -freorder-blocks-and-partition flag, as proposed by
> Arjan two Februarys ago. I don't see any traces of Andi's overriding
> "-fno-reorder-blocks" in the current linus tree, so maybe it's time to
> resurrect this one:
>
> http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html
hm, that gives:
Forbidden
You don't have permission to access
/lists/vger.kernel.org/linux-kernel/39/196123.html on this server.
but yeah, i had the impression that gcc couldnt yet do this. Not a
showstopper, but it would be nice to initiate the gcc side of things ...
Ingo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 16:33 ` Frank Ch. Eigler
2008-01-02 17:01 ` Ingo Molnar
@ 2008-01-02 23:49 ` Nicholas Miell
1 sibling, 0 replies; 20+ messages in thread
From: Nicholas Miell @ 2008-01-02 23:49 UTC (permalink / raw)
To: Frank Ch. Eigler
Cc: Ingo Molnar, K. Prasad, linux-kernel, linux-rt-users, dipankar,
ego, mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds
On Wed, 2008-01-02 at 11:33 -0500, Frank Ch. Eigler wrote:
> Hi -
>
> On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> > [...]
> > > FWIW, I'm not keen about the format strings either, but they don't
> > > constitute a performance hit beyond an additional parameter. It does
> > > not need to actually get parsed at run time.
> >
> > "only" an additional parameter. The whole _point_ behind these markers
> > is for them to have minimal effect!
>
> Agreed. The only alternative I recall seeing proposed was my own
> cartesian-product macro suite that encodes parameter types into the
> marker function/macro name itself. (Maybe some of that could be
> hidden with gcc typeof() magic.) There appeared to be a consensus
> that this was more undesirable. Do you agree?
>
>
C++ name mangling would be extremely useful here.
Actually, why isn't the DWARF information for the functions sufficient?
--
Nicholas Miell <nmiell@comcast.net>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2007-12-31 6:09 K. Prasad
2007-12-31 10:20 ` Ingo Molnar
@ 2008-01-03 16:30 ` Mathieu Desnoyers
2008-01-04 10:58 ` Gautham R Shenoy
1 sibling, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-03 16:30 UTC (permalink / raw)
To: K. Prasad; +Cc: mingo, linux-kernel, linux-rt-users, dipankar, ego, paulmck
* K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> This patch converts the tracing mechanism of Preempt RCU boosting into
> markers. The handler functions for these markers are included inside
> rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> chosen.
>
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
> include/linux/rcupreempt_trace.h | 46 ++++++++
> kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> 3 files changed, 251 insertions(+), 189 deletions(-)
>
> Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> ===================================================================
> --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> extern int rcupreempt_mb_flag(int cpu);
> extern char *rcupreempt_try_flip_state_name(void);
>
> +#ifdef CONFIG_PREEMPT_RCU_BOOST
> +struct preempt_rcu_boost_trace {
> + unsigned long rbs_stat_task_boost_called;
> + unsigned long rbs_stat_task_boosted;
> + unsigned long rbs_stat_boost_called;
> + unsigned long rbs_stat_try_boost;
> + unsigned long rbs_stat_boosted;
> + unsigned long rbs_stat_unboost_called;
> + unsigned long rbs_stat_unboosted;
> + unsigned long rbs_stat_try_boost_readers;
> + unsigned long rbs_stat_boost_readers;
> + unsigned long rbs_stat_try_unboost_readers;
> + unsigned long rbs_stat_unboost_readers;
> + unsigned long rbs_stat_over_taken;
> +};
> +
> +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> + void *private_data, const char *format, ...) \
> +{ \
> + va_list ap; \
> + int cpu; \
> + struct preempt_rcu_boost_trace *boost_trace; \
> + va_start(ap, format); \
> + cpu = va_arg(ap, typeof(unsigned int)); \
(sorry for late response, I just came back from vacation)
Instead of passing the cpu id as a marker parameter, why don't you
simply use smp_processor_id() right here ?
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2007-12-31 10:20 ` Ingo Molnar
2008-01-02 3:31 ` Frank Ch. Eigler
@ 2008-01-03 19:24 ` Mathieu Desnoyers
1 sibling, 0 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-03 19:24 UTC (permalink / raw)
To: Ingo Molnar
Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego, paulmck,
Andrew Morton, Linus Torvalds, Frank Ch. Eigler
* Ingo Molnar (mingo@elte.hu) wrote:
>
> * K. Prasad <prasad@linux.vnet.ibm.com> wrote:
>
> > @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
> >
> > spin_lock_irqsave(&rcu_boost_wake_lock, flags);
> >
> > - rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> > + trace_mark(try_unboost_readers, "%u", smp_processor_id());
>
> this isnt directed at you or your patch, it's more directed at Mathieu,
> but looking at this actual markers patch submitted to me, i'm still
> fundamentally worried about the whole marker approach.
>
Hi Ingo,
Just to note that some of your concerns have been answered by Frank and
that I fully agree with what he said. Sorry for the late reply
(vacation..)
> Firstly, why on earth does a full format string have to be passed in for
> something as simple as a CPU id? This way we basically codify it forever
> that tracing _has_ to be expensive when enabled. The latency-tracer
> (which i'd love to convert to markers, if only markers were capable
> enough) has shown it that tracing _can_ be used to capture performance
> data without disturbing the measured system, even at hundreds of
> thousands context switches a second per CPU.
>
As I replyed in my other email, the cpu id does not have to be passed as
an argument. Also, when no argument is passed, the format string does
not have to be parsed at all in the callback.
> Secondly, the inlined overhead of trace_mark() is still WAY too large:
>
> if (unlikely(__mark_##name.state)) { \
> preempt_disable(); \
> (*__mark_##name.call) \
> (&__mark_##name, call_data, \
> format, ## args); \
> preempt_enable(); \
> } \
>
To get the full version of my trace_mark, you would have to take a few
bits from the -mm tree, which includes the "multiple probes support"
(this one moves the preempt disable/enable completely out of line) and
you should also get the "markers use immediate values" patch which I
submitted a few weeks ago : it uses code patching to modify a byte in a
mov instruction that is used to jump over the entire function call
(memory references, stack setup and the call itself)
> Whatever became of the obvious suggestion that i outlined years ago, to
> have a _single_ trace call instruction and to _patch out_ the damn
> marker calls by default? No .state flag checking inlined. No
> preempt_disable()/enable() pair. Patching static tracepoints OUT of the
> kernel, only leaving a ~5-byte NOP sequence behind them (and some
> minimal disturbance to the variables the tracepoint accesses). We've got
> all the alternatives.h code patching infrastructure available for such
> purposes. Why are we 2 years down the line and _STILL_ arguing about
> this?
>
Frank replied appropriately to this. It's mostly because of stack setup
and/or move to registers to prepare the call.
> Thirdly, the patch selects CONFIG_MARKERS:
>
> > config RCU_TRACE
> > - bool "Enable tracing for RCU - currently stats in debugfs"
> > + tristate "Enable tracing for RCU - currently stats in debugfs"
> > select DEBUG_FS
> > - default y
> > + select MARKERS
>
> Which adds overhead (inlined checks for markers) all around the kernel,
> even if all markers are deactivated! Imagine a thousand of them and the
> kernel blows up measurably.
>
> Sadly, this whole trace_mark() API seems to have gotten much worse since
> i last saw it. It's sub-par when it's turned on and it's sub-par when
> it's turned off. It gets us the worst of both worlds.
>
> If it continues like this then i'd much rather see people add printks as
> tracing, because there you _know_ that it's high-overhead and people
> wont start arguing about trace compatibility either, etc.
>
I think Frank's response explains things in enough depth.
> Ingo
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-03 16:30 ` Mathieu Desnoyers
@ 2008-01-04 10:58 ` Gautham R Shenoy
2008-01-05 12:46 ` Mathieu Desnoyers
0 siblings, 1 reply; 20+ messages in thread
From: Gautham R Shenoy @ 2008-01-04 10:58 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: K. Prasad, mingo, linux-kernel, linux-rt-users, dipankar, paulmck
On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > This patch converts the tracing mechanism of Preempt RCU boosting into
> > markers. The handler functions for these markers are included inside
> > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > chosen.
> >
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> > include/linux/rcupreempt_trace.h | 46 ++++++++
> > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > 3 files changed, 251 insertions(+), 189 deletions(-)
> >
> > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > ===================================================================
> > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > extern int rcupreempt_mb_flag(int cpu);
> > extern char *rcupreempt_try_flip_state_name(void);
> >
> > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > +struct preempt_rcu_boost_trace {
> > + unsigned long rbs_stat_task_boost_called;
> > + unsigned long rbs_stat_task_boosted;
> > + unsigned long rbs_stat_boost_called;
> > + unsigned long rbs_stat_try_boost;
> > + unsigned long rbs_stat_boosted;
> > + unsigned long rbs_stat_unboost_called;
> > + unsigned long rbs_stat_unboosted;
> > + unsigned long rbs_stat_try_boost_readers;
> > + unsigned long rbs_stat_boost_readers;
> > + unsigned long rbs_stat_try_unboost_readers;
> > + unsigned long rbs_stat_unboost_readers;
> > + unsigned long rbs_stat_over_taken;
> > +};
> > +
> > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > + void *private_data, const char *format, ...) \
> > +{ \
> > + va_list ap; \
> > + int cpu; \
> > + struct preempt_rcu_boost_trace *boost_trace; \
> > + va_start(ap, format); \
> > + cpu = va_arg(ap, typeof(unsigned int)); \
>
> (sorry for late response, I just came back from vacation)
>
> Instead of passing the cpu id as a marker parameter, why don't you
> simply use smp_processor_id() right here ?
>
Agreed.
Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
we need not pass the parameter and simply use smp_processor_id().
It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
Thanks and Regards
gautham.
> --
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
--
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-04 10:58 ` Gautham R Shenoy
@ 2008-01-05 12:46 ` Mathieu Desnoyers
2008-01-07 19:43 ` K. Prasad
0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-05 12:46 UTC (permalink / raw)
To: Gautham R Shenoy
Cc: K. Prasad, mingo, linux-kernel, linux-rt-users, dipankar, paulmck
* Gautham R Shenoy (ego@in.ibm.com) wrote:
> On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > markers. The handler functions for these markers are included inside
> > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > chosen.
> > >
> > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > ---
> > > include/linux/rcupreempt_trace.h | 46 ++++++++
> > > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > > 3 files changed, 251 insertions(+), 189 deletions(-)
> > >
> > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > ===================================================================
> > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > > extern int rcupreempt_mb_flag(int cpu);
> > > extern char *rcupreempt_try_flip_state_name(void);
> > >
> > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > +struct preempt_rcu_boost_trace {
> > > + unsigned long rbs_stat_task_boost_called;
> > > + unsigned long rbs_stat_task_boosted;
> > > + unsigned long rbs_stat_boost_called;
> > > + unsigned long rbs_stat_try_boost;
> > > + unsigned long rbs_stat_boosted;
> > > + unsigned long rbs_stat_unboost_called;
> > > + unsigned long rbs_stat_unboosted;
> > > + unsigned long rbs_stat_try_boost_readers;
> > > + unsigned long rbs_stat_boost_readers;
> > > + unsigned long rbs_stat_try_unboost_readers;
> > > + unsigned long rbs_stat_unboost_readers;
> > > + unsigned long rbs_stat_over_taken;
> > > +};
> > > +
> > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > + void *private_data, const char *format, ...) \
> > > +{ \
> > > + va_list ap; \
> > > + int cpu; \
> > > + struct preempt_rcu_boost_trace *boost_trace; \
> > > + va_start(ap, format); \
> > > + cpu = va_arg(ap, typeof(unsigned int)); \
> >
> > (sorry for late response, I just came back from vacation)
> >
> > Instead of passing the cpu id as a marker parameter, why don't you
> > simply use smp_processor_id() right here ?
> >
>
> Agreed.
>
> Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
> we need not pass the parameter and simply use smp_processor_id().
>
> It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
>
And it would help if you declare the format string with something like :
"cpuid %u" instead of "%u"
This way, if a generic tracer like LTTng dumps the traces and later a
plugin in an analysis tool like LTTV have to hook on a specific field of
your event, it can access it by specifying the marker name and field
name. And even if fields are added to this event, it won't break
compatibility as long as this field is there and have the same name.
Mathieu
>
> Thanks and Regards
> gautham.
>
>
> > --
> > Mathieu Desnoyers
> > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
>
> --
> Gautham R Shenoy
> Linux Technology Center
> IBM India.
> "Freedom comes with a price tag of responsibility, which is still a bargain,
> because Freedom is priceless!"
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-02 17:01 ` Ingo Molnar
2008-01-02 17:56 ` Frank Ch. Eigler
@ 2008-01-07 18:59 ` Mathieu Desnoyers
2008-01-13 18:07 ` Pavel Machek
1 sibling, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-07 18:59 UTC (permalink / raw)
To: Ingo Molnar
Cc: Frank Ch. Eigler, K. Prasad, linux-kernel, linux-rt-users,
dipankar, ego, paulmck, Andrew Morton, Linus Torvalds
* Ingo Molnar (mingo@elte.hu) wrote:
>
> * Frank Ch. Eigler <fche@redhat.com> wrote:
>
> > > [...] this is a general policy matter. It is _so much easier_ to add
> > > markers if they _can_ have near-zero overhead (as in 1-2
> > > instructions). Otherwise we'll keep arguing about it, especially if
> > > any is added to performance-critical codepath. (where we are
> > > counting instructions)
> >
> > The effect of the immediate-values patch, combined with gcc
> > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > dcache-impact-free instructions. The register saves, parameter
> > evaluation, the function call, can all be moved out of line.
>
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> we should already be getting that, right?
>
> There's one thing that would make out-of-line tracepoints have a lot
> less objectionable to me: right now the 'out of line' area is put to the
> end of functions. That splinters the kernel image with inactive, rarely
> taken areas of code - blowing up its icache footprint considerably. For
> example sched.o has ~100 functions, with the average function size being
> 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> already.
Hrm, I agree this can be a problem on architectures with more standard
associative icaches, but aren't most x86_64 machines (and modern x86_32)
using an instruction trace cache instead ? This makes the problem
irrelevant.
But I agree that, as Frank proposed, -freorder-blocks-and-partition
could help us in that matter for the architectures using an associative
L1 icache.
Mathieu
>
> It's true that keeping the off-site code within the function keeps total
> codesize slightly smaller, because the offsets (and hence the
> conditional jumps) are thus 8 bit - but that's below 1% and the
> cache-blow-up aspect is more severe in practice at 10-20%.
>
> So it would be nice if we could collect all this offline code and stuff
> it away into another portion of the kernel image. (or, into another
> portion of the object file - which would still be good enough in
> practice)
>
> Ingo
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-05 12:46 ` Mathieu Desnoyers
@ 2008-01-07 19:43 ` K. Prasad
0 siblings, 0 replies; 20+ messages in thread
From: K. Prasad @ 2008-01-07 19:43 UTC (permalink / raw)
To: Mathieu Desnoyers, linux-kernel
Cc: K. Prasad, mingo, linux-rt-users, dipankar, paulmck, rostedt
On Sat, Jan 05, 2008 at 07:46:32AM -0500, Mathieu Desnoyers wrote:
> * Gautham R Shenoy (ego@in.ibm.com) wrote:
> > On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > > * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > > markers. The handler functions for these markers are included inside
> > > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > > chosen.
> > > >
> > > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > > ---
> > > > include/linux/rcupreempt_trace.h | 46 ++++++++
> > > > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > > > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > > > 3 files changed, 251 insertions(+), 189 deletions(-)
> > > >
> > > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > ===================================================================
> > > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > > > extern int rcupreempt_mb_flag(int cpu);
> > > > extern char *rcupreempt_try_flip_state_name(void);
> > > >
> > > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > > +struct preempt_rcu_boost_trace {
> > > > + unsigned long rbs_stat_task_boost_called;
> > > > + unsigned long rbs_stat_task_boosted;
> > > > + unsigned long rbs_stat_boost_called;
> > > > + unsigned long rbs_stat_try_boost;
> > > > + unsigned long rbs_stat_boosted;
> > > > + unsigned long rbs_stat_unboost_called;
> > > > + unsigned long rbs_stat_unboosted;
> > > > + unsigned long rbs_stat_try_boost_readers;
> > > > + unsigned long rbs_stat_boost_readers;
> > > > + unsigned long rbs_stat_try_unboost_readers;
> > > > + unsigned long rbs_stat_unboost_readers;
> > > > + unsigned long rbs_stat_over_taken;
> > > > +};
> > > > +
> > > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > > + void *private_data, const char *format, ...) \
> > > > +{ \
> > > > + va_list ap; \
> > > > + int cpu; \
> > > > + struct preempt_rcu_boost_trace *boost_trace; \
> > > > + va_start(ap, format); \
> > > > + cpu = va_arg(ap, typeof(unsigned int)); \
> > >
> > > (sorry for late response, I just came back from vacation)
> > >
> > > Instead of passing the cpu id as a marker parameter, why don't you
> > > simply use smp_processor_id() right here ?
> > >
> >
> > Agreed.
> >
> > Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
> > we need not pass the parameter and simply use smp_processor_id().
> >
> > It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
> >
>
>
> And it would help if you declare the format string with something like :
>
> "cpuid %u" instead of "%u"
>
> This way, if a generic tracer like LTTng dumps the traces and later a
> plugin in an analysis tool like LTTV have to hook on a specific field of
> your event, it can access it by specifying the marker name and field
> name. And even if fields are added to this event, it won't break
> compatibility as long as this field is there and have the same name.
>
> Mathieu
Hi Mathieu,
Thanks for your suggestions.
Even in functions such as rcu_check_callbacks_rt, rcu_advance_callbacks_rt and
__rcu_advance_callbacks, where a call to RCU_TRACE_RDP() is made, the
'cpu' variable is populated at an early stage with the
return value of smp_processor_id().
__rcu_advance_callbacks() <-- rcu_check_callbacks_rt() <--
rcu_check_callbacks() <-- update_process_times().
update_process_times() [if understood correctly, is called at timer
interrupt level] populates the 'cpu' with smp_processor_id().
There doesn't seem to be a need to pass down the 'cpu' parameter for
the above RCU functions including respective trace_mark() calls.
I am re-sending the patches with your suggested changes in subsequent
mails.
Thanks,
K.Prasad
>
> >
> > Thanks and Regards
> > gautham.
> >
> >
> > > --
> > > Mathieu Desnoyers
> > > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
> >
> > --
> > Gautham R Shenoy
> > Linux Technology Center
> > IBM India.
> > "Freedom comes with a price tag of responsibility, which is still a bargain,
> > because Freedom is priceless!"
>
> --
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-07 18:59 ` Mathieu Desnoyers
@ 2008-01-13 18:07 ` Pavel Machek
2008-01-14 15:35 ` Mathieu Desnoyers
0 siblings, 1 reply; 20+ messages in thread
From: Pavel Machek @ 2008-01-13 18:07 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Ingo Molnar, Frank Ch. Eigler, K. Prasad, linux-kernel,
linux-rt-users, dipankar, ego, paulmck, Andrew Morton,
Linus Torvalds
On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> * Ingo Molnar (mingo@elte.hu) wrote:
> >
> > * Frank Ch. Eigler <fche@redhat.com> wrote:
> >
> > > > [...] this is a general policy matter. It is _so much easier_ to add
> > > > markers if they _can_ have near-zero overhead (as in 1-2
> > > > instructions). Otherwise we'll keep arguing about it, especially if
> > > > any is added to performance-critical codepath. (where we are
> > > > counting instructions)
> > >
> > > The effect of the immediate-values patch, combined with gcc
> > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > > dcache-impact-free instructions. The register saves, parameter
> > > evaluation, the function call, can all be moved out of line.
> >
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > we should already be getting that, right?
> >
> > There's one thing that would make out-of-line tracepoints have a lot
> > less objectionable to me: right now the 'out of line' area is put to the
> > end of functions. That splinters the kernel image with inactive, rarely
> > taken areas of code - blowing up its icache footprint considerably. For
> > example sched.o has ~100 functions, with the average function size being
> > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> > already.
>
> Hrm, I agree this can be a problem on architectures with more standard
> associative icaches, but aren't most x86_64 machines (and modern x86_32)
> using an instruction trace cache instead ? This makes the problem
> irrelevant.
>
> But I agree that, as Frank proposed, -freorder-blocks-and-partition
> could help us in that matter for the architectures using an associative
> L1 icache.
I thought trace cache died with P4?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-13 18:07 ` Pavel Machek
@ 2008-01-14 15:35 ` Mathieu Desnoyers
2008-01-14 16:30 ` Linus Torvalds
0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-14 15:35 UTC (permalink / raw)
To: Pavel Machek
Cc: Ingo Molnar, Frank Ch. Eigler, K. Prasad, linux-kernel,
linux-rt-users, dipankar, ego, paulmck, Andrew Morton,
Linus Torvalds
* Pavel Machek (pavel@ucw.cz) wrote:
> On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> > * Ingo Molnar (mingo@elte.hu) wrote:
> > >
> > > * Frank Ch. Eigler <fche@redhat.com> wrote:
> > >
> > > > > [...] this is a general policy matter. It is _so much easier_ to add
> > > > > markers if they _can_ have near-zero overhead (as in 1-2
> > > > > instructions). Otherwise we'll keep arguing about it, especially if
> > > > > any is added to performance-critical codepath. (where we are
> > > > > counting instructions)
> > > >
> > > > The effect of the immediate-values patch, combined with gcc
> > > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > > > dcache-impact-free instructions. The register saves, parameter
> > > > evaluation, the function call, can all be moved out of line.
> > >
> > > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > > we should already be getting that, right?
> > >
> > > There's one thing that would make out-of-line tracepoints have a lot
> > > less objectionable to me: right now the 'out of line' area is put to the
> > > end of functions. That splinters the kernel image with inactive, rarely
> > > taken areas of code - blowing up its icache footprint considerably. For
> > > example sched.o has ~100 functions, with the average function size being
> > > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> > > already.
> >
> > Hrm, I agree this can be a problem on architectures with more standard
> > associative icaches, but aren't most x86_64 machines (and modern x86_32)
> > using an instruction trace cache instead ? This makes the problem
> > irrelevant.
> >
> > But I agree that, as Frank proposed, -freorder-blocks-and-partition
> > could help us in that matter for the architectures using an associative
> > L1 icache.
>
> I thought trace cache died with P4?
> --
And you are absolutely right.
We would have to figure out if enabling -freorder-blocks-and-partition
makes sense kernel-wide.
Mathieu
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-14 15:35 ` Mathieu Desnoyers
@ 2008-01-14 16:30 ` Linus Torvalds
2008-01-14 19:36 ` Mathieu Desnoyers
0 siblings, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2008-01-14 16:30 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Pavel Machek, Ingo Molnar, Frank Ch. Eigler, K. Prasad,
linux-kernel, linux-rt-users, dipankar, ego, paulmck,
Andrew Morton
On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
>
> We would have to figure out if enabling -freorder-blocks-and-partition
> makes sense kernel-wide.
Last I saw, it generates crappy code, with lots more jumps back and forth,
and the image just blows up.
There's a reason we use -Os, and that's that small footprint I$ is
generally more important than fake compiler optimizations that don't
actually help except on microbenchmarks where everything fits in the
cache.
Taking a branch instruction from two bytes to five is almost always a
mistake, unless you *know* that the code it jumps to will effectively
never be done at all (which is not necessarily the case at all). It also
makes debugging much nastier, because if now things like backtraces
probably look like crap too!
Don't go there. The *best* we can do is to just use the optimizations that
generate good-looking code that humans can read. The rest is just compiler
masturbation.
Linus
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
2008-01-14 16:30 ` Linus Torvalds
@ 2008-01-14 19:36 ` Mathieu Desnoyers
0 siblings, 0 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-14 19:36 UTC (permalink / raw)
To: Linus Torvalds
Cc: Pavel Machek, Ingo Molnar, Frank Ch. Eigler, K. Prasad,
linux-kernel, linux-rt-users, dipankar, ego, paulmck,
Andrew Morton
* Linus Torvalds (torvalds@linux-foundation.org) wrote:
>
>
> On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
> >
> > We would have to figure out if enabling -freorder-blocks-and-partition
> > makes sense kernel-wide.
>
> Last I saw, it generates crappy code, with lots more jumps back and forth,
> and the image just blows up.
>
> There's a reason we use -Os, and that's that small footprint I$ is
> generally more important than fake compiler optimizations that don't
> actually help except on microbenchmarks where everything fits in the
> cache.
>
> Taking a branch instruction from two bytes to five is almost always a
> mistake, unless you *know* that the code it jumps to will effectively
> never be done at all (which is not necessarily the case at all). It also
> makes debugging much nastier, because if now things like backtraces
> probably look like crap too!
>
> Don't go there. The *best* we can do is to just use the optimizations that
> generate good-looking code that humans can read. The rest is just compiler
> masturbation.
>
I agree that turning this flag on does not seem like an interesting
solution.
Well, I wonder how important this issue of not sharing L1 instruction
cachelines with scheduler code is. If we care as much about it as Ingo
states, I wonder why we leave about 22 BUG() macros in sched.c
(calculated from the number of ud2 instructions generated on x86), which
adds up to 7 bytes at the end of many scheduler functions (7 bytes
coming from ud2, jmp and .p2align on x86).
And 22 markers in sched.c is already much more than needed. I actually
propose only 5 in my patchset.
Mathieu
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 20+ messages in thread
end of thread, other threads:[~2008-01-14 19:42 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-29 18:46 [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing K. Prasad
-- strict thread matches above, loose matches on Subject: below --
2007-12-31 6:09 K. Prasad
2007-12-31 10:20 ` Ingo Molnar
2008-01-02 3:31 ` Frank Ch. Eigler
2008-01-02 12:47 ` Ingo Molnar
2008-01-02 16:33 ` Frank Ch. Eigler
2008-01-02 17:01 ` Ingo Molnar
2008-01-02 17:56 ` Frank Ch. Eigler
2008-01-02 20:10 ` Ingo Molnar
2008-01-07 18:59 ` Mathieu Desnoyers
2008-01-13 18:07 ` Pavel Machek
2008-01-14 15:35 ` Mathieu Desnoyers
2008-01-14 16:30 ` Linus Torvalds
2008-01-14 19:36 ` Mathieu Desnoyers
2008-01-02 23:49 ` Nicholas Miell
2008-01-03 19:24 ` Mathieu Desnoyers
2008-01-03 16:30 ` Mathieu Desnoyers
2008-01-04 10:58 ` Gautham R Shenoy
2008-01-05 12:46 ` Mathieu Desnoyers
2008-01-07 19:43 ` K. Prasad
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox