public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
@ 2007-11-29 18:46 K. Prasad
  0 siblings, 0 replies; 20+ messages in thread
From: K. Prasad @ 2007-11-29 18:46 UTC (permalink / raw)
  To: linux-kernel; +Cc: dipankar, ego, mathieu.desnoyers, mingo, paulmck

This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.

Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/rcupreempt_trace.h |   46 ++++++++
 kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
 kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
 3 files changed, 251 insertions(+), 189 deletions(-)

Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
@@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
 extern int rcupreempt_mb_flag(int cpu);
 extern char *rcupreempt_try_flip_state_name(void);
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+	unsigned long rbs_stat_task_boost_called;
+	unsigned long rbs_stat_task_boosted;
+	unsigned long rbs_stat_boost_called;
+	unsigned long rbs_stat_try_boost;
+	unsigned long rbs_stat_boosted;
+	unsigned long rbs_stat_unboost_called;
+	unsigned long rbs_stat_unboosted;
+	unsigned long rbs_stat_try_boost_readers;
+	unsigned long rbs_stat_boost_readers;
+	unsigned long rbs_stat_try_unboost_readers;
+	unsigned long rbs_stat_unboost_readers;
+	unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+				void *private_data, const char *format, ...) \
+{ \
+	va_list ap; \
+	int cpu; \
+	struct preempt_rcu_boost_trace *boost_trace; \
+	va_start(ap, format); \
+	cpu = va_arg(ap, typeof(unsigned int)); \
+	boost_trace = (&per_cpu(boost_trace_data, cpu)); \
+	boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+	va_end(ap); \
+}
+
+struct preempt_rcu_boost_probe {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+	.name = __stringify(preempt_rcu_boost_probe_worker), \
+	.format = "%u", \
+	.probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;
 
 static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+	INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	static DEFINE_MUTEX(mutex);
+	int cnt = 0;
+	int cpu;
+	struct preempt_rcu_boost_trace *prbt;
+	ssize_t bcount;
+	unsigned long task_boost_called = 0;
+	unsigned long task_boosted = 0;
+	unsigned long boost_called = 0;
+	unsigned long try_boost = 0;
+	unsigned long boosted = 0;
+	unsigned long unboost_called = 0;
+	unsigned long unboosted = 0;
+	unsigned long try_boost_readers = 0;
+	unsigned long boost_readers = 0;
+	unsigned long try_unboost_readers = 0;
+	unsigned long unboost_readers = 0;
+	unsigned long over_taken = 0;
+
+	mutex_lock(&mutex);
+
+	for_each_online_cpu(cpu) {
+		prbt = &per_cpu(boost_trace_data, cpu);
+
+		task_boost_called += prbt->rbs_stat_task_boost_called;
+		task_boosted += prbt->rbs_stat_task_boosted;
+		boost_called += prbt->rbs_stat_boost_called;
+		try_boost += prbt->rbs_stat_try_boost;
+		boosted += prbt->rbs_stat_boosted;
+		unboost_called += prbt->rbs_stat_unboost_called;
+		unboosted += prbt->rbs_stat_unboosted;
+		try_boost_readers += prbt->rbs_stat_try_boost_readers;
+		boost_readers += prbt->rbs_stat_boost_readers;
+		try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+		unboost_readers += prbt->rbs_stat_boost_readers;
+		over_taken += prbt->rbs_stat_over_taken;
+	}
+
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"task_boost_called = %ld\n",
+			task_boost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"task_boosted = %ld\n",
+			task_boosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boost_called = %ld\n",
+			boost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_boost = %ld\n",
+			try_boost);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boosted = %ld\n",
+			boosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboost_called = %ld\n",
+			unboost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboosted = %ld\n",
+			unboosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_boost_readers = %ld\n",
+			try_boost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boost_readers = %ld\n",
+			boost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_unboost_readers = %ld\n",
+			try_unboost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboost_readers = %ld\n",
+			unboost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"over_taken = %ld\n",
+			over_taken);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"rcu_boost_prio = %d\n",
+			read_rcu_boost_prio());
+	bcount = simple_read_from_buffer(buffer, count, ppos,
+		rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+	mutex_unlock(&mutex);
+
+	return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+	.read = rcuboost_read,
+};
+
+static struct dentry  *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+	rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+					  NULL, &rcuboost_fops);
+	if (!rcuboostdir)
+		return 0;
+
+	return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+	if (rcuboostdir)
+		debugfs_remove(rcuboostdir);
+	rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
 {
 	return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
 	if (!ctrsdir)
 		goto free_out;
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	if (!rcu_trace_boost_create(rcudir))
+		goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
 	return 0;
 free_out:
 	if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
 	}
 	printk(KERN_INFO "RCU Preempt markers registered\n");
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+		struct preempt_rcu_boost_probe *p = \
+					&preempt_rcu_boost_probe_array[i];
+		ret = marker_probe_register(p->name, p->format,
+					    p->probe_func, p);
+		if (ret)
+			printk(KERN_INFO "Unable to register Preempt RCU Boost \
+			probe %s\n", preempt_rcu_boost_probe_array[i].name);
+		ret = marker_arm(p->name);
+		if (ret)
+			printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+				markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 	mutex_init(&rcupreempt_trace_mutex);
 	rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
 	if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
 		marker_probe_unregister(rcupreempt_probe_array[i].name);
 	printk(KERN_INFO "RCU Preempt markers unregistered\n");
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	rcu_trace_boost_destroy();
+	for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+		marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+	printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
 	debugfs_remove(statdir);
 	debugfs_remove(gpdir);
 	debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
 	int rbs_prio;			/* CPU copy of rcu_boost_prio  */
 	struct list_head rbs_toboost;	/* Preempted RCU readers       */
 	struct list_head rbs_boosted;	/* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
-	/* The rest are for statistics */
-	unsigned long rbs_stat_task_boost_called;
-	unsigned long rbs_stat_task_boosted;
-	unsigned long rbs_stat_boost_called;
-	unsigned long rbs_stat_try_boost;
-	unsigned long rbs_stat_boosted;
-	unsigned long rbs_stat_unboost_called;
-	unsigned long rbs_stat_unboosted;
-	unsigned long rbs_stat_try_boost_readers;
-	unsigned long rbs_stat_boost_readers;
-	unsigned long rbs_stat_try_unboost_readers;
-	unsigned long rbs_stat_unboost_readers;
-	unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
 };
 
 static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	static DEFINE_MUTEX(mutex);
-	int cnt = 0;
-	int cpu;
-	struct rcu_boost_dat *rbd;
-	ssize_t bcount;
-	unsigned long task_boost_called = 0;
-	unsigned long task_boosted = 0;
-	unsigned long boost_called = 0;
-	unsigned long try_boost = 0;
-	unsigned long boosted = 0;
-	unsigned long unboost_called = 0;
-	unsigned long unboosted = 0;
-	unsigned long try_boost_readers = 0;
-	unsigned long boost_readers = 0;
-	unsigned long try_unboost_readers = 0;
-	unsigned long unboost_readers = 0;
-	unsigned long over_taken = 0;
-
-	mutex_lock(&mutex);
-
-	for_each_online_cpu(cpu) {
-		rbd = &per_cpu(rcu_boost_data, cpu);
-
-		task_boost_called += rbd->rbs_stat_task_boost_called;
-		task_boosted += rbd->rbs_stat_task_boosted;
-		boost_called += rbd->rbs_stat_boost_called;
-		try_boost += rbd->rbs_stat_try_boost;
-		boosted += rbd->rbs_stat_boosted;
-		unboost_called += rbd->rbs_stat_unboost_called;
-		unboosted += rbd->rbs_stat_unboosted;
-		try_boost_readers += rbd->rbs_stat_try_boost_readers;
-		boost_readers += rbd->rbs_stat_boost_readers;
-		try_unboost_readers += rbd->rbs_stat_try_boost_readers;
-		unboost_readers += rbd->rbs_stat_boost_readers;
-		over_taken += rbd->rbs_stat_over_taken;
-	}
-
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"task_boost_called = %ld\n",
-			task_boost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"task_boosted = %ld\n",
-			task_boosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boost_called = %ld\n",
-			boost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_boost = %ld\n",
-			try_boost);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boosted = %ld\n",
-			boosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboost_called = %ld\n",
-			unboost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboosted = %ld\n",
-			unboosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_boost_readers = %ld\n",
-			try_boost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boost_readers = %ld\n",
-			boost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_unboost_readers = %ld\n",
-			try_unboost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboost_readers = %ld\n",
-			unboost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"over_taken = %ld\n",
-			over_taken);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"rcu_boost_prio = %d\n",
-			rcu_boost_prio);
-	bcount = simple_read_from_buffer(buffer, count, ppos,
-			rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
-	mutex_unlock(&mutex);
-
-	return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
-	.read = rcuboost_read,
-};
-
-static struct dentry  *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
-	rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
-					  NULL, &rcuboost_fops);
-	if (!rcuboostdir)
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
-	if (rcuboostdir)
-		debugfs_remove(rcuboostdir);
-	rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type)			      \
-	static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd)	\
-	{								\
-		rbd->rbs_stat_##type++;					\
-	}
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */
 
 static inline int rcu_is_boosted(struct task_struct *task)
 {
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
 	WARN_ON(!irqs_disabled());
 	WARN_ON_SMP(!spin_is_locked(&task->pi_lock));
 
-	rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+	trace_mark(task_boost_called, "%u", smp_processor_id());
 
 	if (task->rcu_prio < task->prio) {
-		rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+		trace_mark(task_boosted, "%u", smp_processor_id());
 		task_setprio(task, task->rcu_prio);
 	}
 }
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)
 
 	WARN_ON(!current->rcu_read_lock_nesting);
 
-	rcu_trace_boost_boost_called(RCU_BOOST_ME);
+	trace_mark(boost_called, "%u", smp_processor_id());
 
 	/* check to see if we are already boosted */
 	if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)
 
 	curr->rcub_rbdp = rbd;
 
-	rcu_trace_boost_try_boost(rbd);
+	trace_mark(try_boost, "%u", smp_processor_id());
 
 	prio = rt_mutex_getprio(curr);
 
@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
 	if (prio <= rbd->rbs_prio)
 		goto out;
 
-	rcu_trace_boost_boosted(curr->rcub_rbdp);
+	trace_mark(boosted, "%u", smp_processor_id());
 
 	curr->rcu_prio = rbd->rbs_prio;
 	rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
 	int prio;
 	unsigned long flags;
 
-	rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+	trace_mark(unboost_called, "%u", smp_processor_id());
 
 	/* if not boosted, then ignore */
 	if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)
 
 	list_del_init(&curr->rcub_entry);
 
-	rcu_trace_boost_unboosted(rbd);
+	trace_mark(unboosted, "%u", smp_processor_id());
 
 	curr->rcu_prio = MAX_PRIO;
 
@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
 		 * Another task may have taken over.
 		 */
 		if (curr->rcu_preempt_counter != rcu_boost_counter) {
-			rcu_trace_boost_over_taken(rbd);
+			trace_mark(over_taken, "%u", smp_processor_id());
 			return 1;
 		}
 
@@ -443,7 +266,7 @@ void rcu_boost_readers(void)
 
 	prio = rt_mutex_getprio(curr);
 
-	rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+	trace_mark(try_boost_readers, "%u", smp_processor_id());
 
 	if (prio >= rcu_boost_prio) {
 		/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)
 
 	rcu_boost_prio = prio;
 
-	rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+	trace_mark(boost_readers, "%u", smp_processor_id());
 
 	/* Flag that we are the one to unboost */
 	curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
 
 	spin_lock_irqsave(&rcu_boost_wake_lock, flags);
 
-	rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+	trace_mark(try_unboost_readers, "%u", smp_processor_id());
 
 	if (current->rcu_preempt_counter != rcu_boost_counter)
 		goto out;
 
-	rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+	trace_mark(unboost_readers, "%u", smp_processor_id());
 
 	/*
 	 * We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
 }
 
 /*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+	return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
  * The krcupreemptd wakes up every "rcu_preempt_thread_secs"
  * seconds at the minimum priority of 1 to do a
  * synchronize_rcu. This ensures that grace periods finish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
@ 2007-12-31  6:09 K. Prasad
  2007-12-31 10:20 ` Ingo Molnar
  2008-01-03 16:30 ` Mathieu Desnoyers
  0 siblings, 2 replies; 20+ messages in thread
From: K. Prasad @ 2007-12-31  6:09 UTC (permalink / raw)
  To: mingo, linux-kernel, linux-rt-users
  Cc: dipankar, ego, mathieu.desnoyers, paulmck

This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.
 
Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
---
 include/linux/rcupreempt_trace.h |   46 ++++++++
 kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
 kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
 3 files changed, 251 insertions(+), 189 deletions(-)
 
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
@@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
 extern int rcupreempt_mb_flag(int cpu);
 extern char *rcupreempt_try_flip_state_name(void);
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+	unsigned long rbs_stat_task_boost_called;
+	unsigned long rbs_stat_task_boosted;
+	unsigned long rbs_stat_boost_called;
+	unsigned long rbs_stat_try_boost;
+	unsigned long rbs_stat_boosted;
+	unsigned long rbs_stat_unboost_called;
+	unsigned long rbs_stat_unboosted;
+	unsigned long rbs_stat_try_boost_readers;
+	unsigned long rbs_stat_boost_readers;
+	unsigned long rbs_stat_try_unboost_readers;
+	unsigned long rbs_stat_unboost_readers;
+	unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+				void *private_data, const char *format, ...) \
+{ \
+	va_list ap; \
+	int cpu; \
+	struct preempt_rcu_boost_trace *boost_trace; \
+	va_start(ap, format); \
+	cpu = va_arg(ap, typeof(unsigned int)); \
+	boost_trace = (&per_cpu(boost_trace_data, cpu)); \
+	boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+	va_end(ap); \
+}
+
+struct preempt_rcu_boost_probe {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+	.name = __stringify(preempt_rcu_boost_probe_worker), \
+	.format = "%u", \
+	.probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;
 
 static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+	INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+	INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+				size_t count, loff_t *ppos)
+{
+	static DEFINE_MUTEX(mutex);
+	int cnt = 0;
+	int cpu;
+	struct preempt_rcu_boost_trace *prbt;
+	ssize_t bcount;
+	unsigned long task_boost_called = 0;
+	unsigned long task_boosted = 0;
+	unsigned long boost_called = 0;
+	unsigned long try_boost = 0;
+	unsigned long boosted = 0;
+	unsigned long unboost_called = 0;
+	unsigned long unboosted = 0;
+	unsigned long try_boost_readers = 0;
+	unsigned long boost_readers = 0;
+	unsigned long try_unboost_readers = 0;
+	unsigned long unboost_readers = 0;
+	unsigned long over_taken = 0;
+
+	mutex_lock(&mutex);
+
+	for_each_online_cpu(cpu) {
+		prbt = &per_cpu(boost_trace_data, cpu);
+
+		task_boost_called += prbt->rbs_stat_task_boost_called;
+		task_boosted += prbt->rbs_stat_task_boosted;
+		boost_called += prbt->rbs_stat_boost_called;
+		try_boost += prbt->rbs_stat_try_boost;
+		boosted += prbt->rbs_stat_boosted;
+		unboost_called += prbt->rbs_stat_unboost_called;
+		unboosted += prbt->rbs_stat_unboosted;
+		try_boost_readers += prbt->rbs_stat_try_boost_readers;
+		boost_readers += prbt->rbs_stat_boost_readers;
+		try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+		unboost_readers += prbt->rbs_stat_boost_readers;
+		over_taken += prbt->rbs_stat_over_taken;
+	}
+
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"task_boost_called = %ld\n",
+			task_boost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"task_boosted = %ld\n",
+			task_boosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boost_called = %ld\n",
+			boost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_boost = %ld\n",
+			try_boost);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boosted = %ld\n",
+			boosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboost_called = %ld\n",
+			unboost_called);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboosted = %ld\n",
+			unboosted);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_boost_readers = %ld\n",
+			try_boost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"boost_readers = %ld\n",
+			boost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"try_unboost_readers = %ld\n",
+			try_unboost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"unboost_readers = %ld\n",
+			unboost_readers);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"over_taken = %ld\n",
+			over_taken);
+	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+			"rcu_boost_prio = %d\n",
+			read_rcu_boost_prio());
+	bcount = simple_read_from_buffer(buffer, count, ppos,
+		rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+	mutex_unlock(&mutex);
+
+	return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+	.read = rcuboost_read,
+};
+
+static struct dentry  *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+	rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+					  NULL, &rcuboost_fops);
+	if (!rcuboostdir)
+		return 0;
+
+	return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+	if (rcuboostdir)
+		debugfs_remove(rcuboostdir);
+	rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
 {
 	return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
 	if (!ctrsdir)
 		goto free_out;
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	if (!rcu_trace_boost_create(rcudir))
+		goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
 	return 0;
 free_out:
 	if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
 	}
 	printk(KERN_INFO "RCU Preempt markers registered\n");
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+		struct preempt_rcu_boost_probe *p = \
+					&preempt_rcu_boost_probe_array[i];
+		ret = marker_probe_register(p->name, p->format,
+					    p->probe_func, p);
+		if (ret)
+			printk(KERN_INFO "Unable to register Preempt RCU Boost \
+			probe %s\n", preempt_rcu_boost_probe_array[i].name);
+		ret = marker_arm(p->name);
+		if (ret)
+			printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+				markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
 	mutex_init(&rcupreempt_trace_mutex);
 	rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
 	if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
 		marker_probe_unregister(rcupreempt_probe_array[i].name);
 	printk(KERN_INFO "RCU Preempt markers unregistered\n");
 
+#ifdef CONFIG_PREEMPT_RCU_BOOST
+	rcu_trace_boost_destroy();
+	for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+		marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+	printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
 	debugfs_remove(statdir);
 	debugfs_remove(gpdir);
 	debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
 	int rbs_prio;			/* CPU copy of rcu_boost_prio  */
 	struct list_head rbs_toboost;	/* Preempted RCU readers       */
 	struct list_head rbs_boosted;	/* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
-	/* The rest are for statistics */
-	unsigned long rbs_stat_task_boost_called;
-	unsigned long rbs_stat_task_boosted;
-	unsigned long rbs_stat_boost_called;
-	unsigned long rbs_stat_try_boost;
-	unsigned long rbs_stat_boosted;
-	unsigned long rbs_stat_unboost_called;
-	unsigned long rbs_stat_unboosted;
-	unsigned long rbs_stat_try_boost_readers;
-	unsigned long rbs_stat_boost_readers;
-	unsigned long rbs_stat_try_unboost_readers;
-	unsigned long rbs_stat_unboost_readers;
-	unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
 };
 
 static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	static DEFINE_MUTEX(mutex);
-	int cnt = 0;
-	int cpu;
-	struct rcu_boost_dat *rbd;
-	ssize_t bcount;
-	unsigned long task_boost_called = 0;
-	unsigned long task_boosted = 0;
-	unsigned long boost_called = 0;
-	unsigned long try_boost = 0;
-	unsigned long boosted = 0;
-	unsigned long unboost_called = 0;
-	unsigned long unboosted = 0;
-	unsigned long try_boost_readers = 0;
-	unsigned long boost_readers = 0;
-	unsigned long try_unboost_readers = 0;
-	unsigned long unboost_readers = 0;
-	unsigned long over_taken = 0;
-
-	mutex_lock(&mutex);
-
-	for_each_online_cpu(cpu) {
-		rbd = &per_cpu(rcu_boost_data, cpu);
-
-		task_boost_called += rbd->rbs_stat_task_boost_called;
-		task_boosted += rbd->rbs_stat_task_boosted;
-		boost_called += rbd->rbs_stat_boost_called;
-		try_boost += rbd->rbs_stat_try_boost;
-		boosted += rbd->rbs_stat_boosted;
-		unboost_called += rbd->rbs_stat_unboost_called;
-		unboosted += rbd->rbs_stat_unboosted;
-		try_boost_readers += rbd->rbs_stat_try_boost_readers;
-		boost_readers += rbd->rbs_stat_boost_readers;
-		try_unboost_readers += rbd->rbs_stat_try_boost_readers;
-		unboost_readers += rbd->rbs_stat_boost_readers;
-		over_taken += rbd->rbs_stat_over_taken;
-	}
-
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"task_boost_called = %ld\n",
-			task_boost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"task_boosted = %ld\n",
-			task_boosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boost_called = %ld\n",
-			boost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_boost = %ld\n",
-			try_boost);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boosted = %ld\n",
-			boosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboost_called = %ld\n",
-			unboost_called);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboosted = %ld\n",
-			unboosted);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_boost_readers = %ld\n",
-			try_boost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"boost_readers = %ld\n",
-			boost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"try_unboost_readers = %ld\n",
-			try_unboost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"unboost_readers = %ld\n",
-			unboost_readers);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"over_taken = %ld\n",
-			over_taken);
-	cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
-			RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
-			"rcu_boost_prio = %d\n",
-			rcu_boost_prio);
-	bcount = simple_read_from_buffer(buffer, count, ppos,
-			rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
-	mutex_unlock(&mutex);
-
-	return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
-	.read = rcuboost_read,
-};
-
-static struct dentry  *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
-	rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
-					  NULL, &rcuboost_fops);
-	if (!rcuboostdir)
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
-	if (rcuboostdir)
-		debugfs_remove(rcuboostdir);
-	rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type)			      \
-	static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd)	\
-	{								\
-		rbd->rbs_stat_##type++;					\
-	}
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */
 
 static inline int rcu_is_boosted(struct task_struct *task)
 {
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
 	WARN_ON(!irqs_disabled());
 	WARN_ON_SMP(!spin_is_locked(&task->pi_lock));
 
-	rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+	trace_mark(task_boost_called, "%u", smp_processor_id());
 
 	if (task->rcu_prio < task->prio) {
-		rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+		trace_mark(task_boosted, "%u", smp_processor_id());
 		task_setprio(task, task->rcu_prio);
 	}
 }
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)
 
 	WARN_ON(!current->rcu_read_lock_nesting);
 
-	rcu_trace_boost_boost_called(RCU_BOOST_ME);
+	trace_mark(boost_called, "%u", smp_processor_id());
 
 	/* check to see if we are already boosted */
 	if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)
 
 	curr->rcub_rbdp = rbd;
 
-	rcu_trace_boost_try_boost(rbd);
+	trace_mark(try_boost, "%u", smp_processor_id());
 
 	prio = rt_mutex_getprio(curr);
 
@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
 	if (prio <= rbd->rbs_prio)
 		goto out;
 
-	rcu_trace_boost_boosted(curr->rcub_rbdp);
+	trace_mark(boosted, "%u", smp_processor_id());
 
 	curr->rcu_prio = rbd->rbs_prio;
 	rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
 	int prio;
 	unsigned long flags;
 
-	rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+	trace_mark(unboost_called, "%u", smp_processor_id());
 
 	/* if not boosted, then ignore */
 	if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)
 
 	list_del_init(&curr->rcub_entry);
 
-	rcu_trace_boost_unboosted(rbd);
+	trace_mark(unboosted, "%u", smp_processor_id());
 
 	curr->rcu_prio = MAX_PRIO;
 
@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
 		 * Another task may have taken over.
 		 */
 		if (curr->rcu_preempt_counter != rcu_boost_counter) {
-			rcu_trace_boost_over_taken(rbd);
+			trace_mark(over_taken, "%u", smp_processor_id());
 			return 1;
 		}
 
@@ -443,7 +266,7 @@ void rcu_boost_readers(void)
 
 	prio = rt_mutex_getprio(curr);
 
-	rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+	trace_mark(try_boost_readers, "%u", smp_processor_id());
 
 	if (prio >= rcu_boost_prio) {
 		/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)
 
 	rcu_boost_prio = prio;
 
-	rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+	trace_mark(boost_readers, "%u", smp_processor_id());
 
 	/* Flag that we are the one to unboost */
 	curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
 
 	spin_lock_irqsave(&rcu_boost_wake_lock, flags);
 
-	rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+	trace_mark(try_unboost_readers, "%u", smp_processor_id());
 
 	if (current->rcu_preempt_counter != rcu_boost_counter)
 		goto out;
 
-	rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+	trace_mark(unboost_readers, "%u", smp_processor_id());
 
 	/*
 	 * We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
 }
 
 /*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+	return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
  * The krcupreemptd wakes up every "rcu_preempt_thread_secs"
  * seconds at the minimum priority of 1 to do a
  * synchronize_rcu. This ensures that grace periods finish

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2007-12-31  6:09 K. Prasad
@ 2007-12-31 10:20 ` Ingo Molnar
  2008-01-02  3:31   ` Frank Ch. Eigler
  2008-01-03 19:24   ` Mathieu Desnoyers
  2008-01-03 16:30 ` Mathieu Desnoyers
  1 sibling, 2 replies; 20+ messages in thread
From: Ingo Molnar @ 2007-12-31 10:20 UTC (permalink / raw)
  To: K. Prasad
  Cc: linux-kernel, linux-rt-users, dipankar, ego, mathieu.desnoyers,
	paulmck, Andrew Morton, Linus Torvalds


* K. Prasad <prasad@linux.vnet.ibm.com> wrote:

> @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
>  
>  	spin_lock_irqsave(&rcu_boost_wake_lock, flags);
>  
> -	rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> +	trace_mark(try_unboost_readers, "%u", smp_processor_id());

this isnt directed at you or your patch, it's more directed at Mathieu, 
but looking at this actual markers patch submitted to me, i'm still 
fundamentally worried about the whole marker approach.

Firstly, why on earth does a full format string have to be passed in for 
something as simple as a CPU id? This way we basically codify it forever 
that tracing _has_ to be expensive when enabled. The latency-tracer 
(which i'd love to convert to markers, if only markers were capable 
enough) has shown it that tracing _can_ be used to capture performance 
data without disturbing the measured system, even at hundreds of 
thousands context switches a second per CPU.

Secondly, the inlined overhead of trace_mark() is still WAY too large:

                if (unlikely(__mark_##name.state)) {                    \
                        preempt_disable();                              \
                        (*__mark_##name.call)                           \
                                (&__mark_##name, call_data,             \
                                format, ## args);                       \
                        preempt_enable();                               \
                }                                                       \

Whatever became of the obvious suggestion that i outlined years ago, to 
have a _single_ trace call instruction and to _patch out_ the damn 
marker calls by default? No .state flag checking inlined. No 
preempt_disable()/enable() pair. Patching static tracepoints OUT of the 
kernel, only leaving a ~5-byte NOP sequence behind them (and some 
minimal disturbance to the variables the tracepoint accesses). We've got 
all the alternatives.h code patching infrastructure available for such 
purposes. Why are we 2 years down the line and _STILL_ arguing about 
this?

Thirdly, the patch selects CONFIG_MARKERS:

>  config RCU_TRACE
> -       bool "Enable tracing for RCU - currently stats in debugfs"
> +       tristate "Enable tracing for RCU - currently stats in debugfs"
>         select DEBUG_FS
> -       default y
> +       select MARKERS

Which adds overhead (inlined checks for markers) all around the kernel, 
even if all markers are deactivated! Imagine a thousand of them and the 
kernel blows up measurably.

Sadly, this whole trace_mark() API seems to have gotten much worse since 
i last saw it. It's sub-par when it's turned on and it's sub-par when 
it's turned off. It gets us the worst of both worlds.

If it continues like this then i'd much rather see people add printks as 
tracing, because there you _know_ that it's high-overhead and people 
wont start arguing about trace compatibility either, etc.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2007-12-31 10:20 ` Ingo Molnar
@ 2008-01-02  3:31   ` Frank Ch. Eigler
  2008-01-02 12:47     ` Ingo Molnar
  2008-01-03 19:24   ` Mathieu Desnoyers
  1 sibling, 1 reply; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02  3:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
	mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds

Ingo Molnar <mingo@elte.hu> writes:

> [...]  Firstly, why on earth does a full format string have to be
> passed in for something as simple as a CPU id? This way we basically
> codify it forever that tracing _has_ to be expensive when
> enabled. [...]

FWIW, I'm not keen about the format strings either, but they don't
constitute a performance hit beyond an additional parameter.  It does
not need to actually get parsed at run time.


>[...]
> Secondly, the inlined overhead of trace_mark() is still WAY too large:
>
>                 if (unlikely(__mark_##name.state)) {                    \
> [...]
>                 }                                                       \

Note that this is for the unoptimized case.  The immediate-value code
is better.  I have still yet to see some good measurements of how much
the overheads of the various variants are, however.  It's only fair to
gather these numbers and continue the debate with them in hand.


> Whatever became of the obvious suggestion that i outlined years ago,
> to have a _single_ trace call instruction and to _patch out_ the
> damn marker calls by default?  [...]  only leaving a ~5-byte NOP
> sequence behind them (and some minimal disturbance to the variables
> the tracepoint accesses). [...]

This has been answered several times before.  It's because the marker
parameters have to be (conditionally) evaluated and pushed onto a call
frame.  It's not just a call that would need being nop'd, but a whole
function call setup/teardown sequence, which itself can be interleaved
with adjacent code.


- FChE

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02  3:31   ` Frank Ch. Eigler
@ 2008-01-02 12:47     ` Ingo Molnar
  2008-01-02 16:33       ` Frank Ch. Eigler
  0 siblings, 1 reply; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 12:47 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
	mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds


* Frank Ch. Eigler <fche@redhat.com> wrote:

> Ingo Molnar <mingo@elte.hu> writes:
> 
> > [...]  Firstly, why on earth does a full format string have to be
> > passed in for something as simple as a CPU id? This way we basically
> > codify it forever that tracing _has_ to be expensive when
> > enabled. [...]
> 
> FWIW, I'm not keen about the format strings either, but they don't 
> constitute a performance hit beyond an additional parameter.  It does 
> not need to actually get parsed at run time.

"only" an additional parameter. The whole _point_ behind these markers 
is for them to have minimal effect!

> >[...]
> > Secondly, the inlined overhead of trace_mark() is still WAY too large:
> >
> >                 if (unlikely(__mark_##name.state)) {                    \
> > [...]
> >                 }                                                       \
> 
> Note that this is for the unoptimized case.  The immediate-value code 
> is better.  I have still yet to see some good measurements of how much 
> the overheads of the various variants are, however.  It's only fair to 
> gather these numbers and continue the debate with them in hand.

this is a general policy matter. It is _so much easier_ to add markers 
if they _can_ have near-zero overhead (as in 1-2 instructions). 
Otherwise we'll keep arguing about it, especially if any is added to 
performance-critical codepath. (where we are counting instructions)

> > Whatever became of the obvious suggestion that i outlined years ago, 
> > to have a _single_ trace call instruction and to _patch out_ the 
> > damn marker calls by default?  [...]  only leaving a ~5-byte NOP 
> > sequence behind them (and some minimal disturbance to the variables 
> > the tracepoint accesses). [...]
> 
> This has been answered several times before.  It's because the marker 
> parameters have to be (conditionally) evaluated and pushed onto a call 
> frame.  It's not just a call that would need being nop'd, but a whole 
> function call setup/teardown sequence, which itself can be interleaved 
> with adjacent code.

you are still missing my point. Firstly, the kernel is regparm built so 
there's no call frame to be pushed to in most cases - we pass most 
parameters in registers. (hence my stressing of minimizing the number of 
parameters to an absolute minimum)

Secondly, the side-effects of a function call is what i referred to via:

> [...]  (and some minimal disturbance to the variables the tracepoint 
> accesses). [...]

really, a tracepoint obviously accesses data that is readily accessible 
in that spot. Worst-case we'll have some additional register constraints 
that make the code a bit less optimal.

Yes, if a trace point references data that is _not_ readily available, 
then of course the preparation for the function call might not be cheap. 
But that can be optimized by placing the tracepoints intelligently.

what cannot be optimized away at all are the conditional instructions 
introduced by the probe points, extra parameters and the space overhead 
of the function call itself.

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 12:47     ` Ingo Molnar
@ 2008-01-02 16:33       ` Frank Ch. Eigler
  2008-01-02 17:01         ` Ingo Molnar
  2008-01-02 23:49         ` Nicholas Miell
  0 siblings, 2 replies; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02 16:33 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
	mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds

Hi -

On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> [...]
> > FWIW, I'm not keen about the format strings either, but they don't 
> > constitute a performance hit beyond an additional parameter.  It does 
> > not need to actually get parsed at run time.
> 
> "only" an additional parameter. The whole _point_ behind these markers 
> is for them to have minimal effect!

Agreed.  The only alternative I recall seeing proposed was my own
cartesian-product macro suite that encodes parameter types into the
marker function/macro name itself.  (Maybe some of that could be
hidden with gcc typeof() magic.)  There appeared to be a consensus
that this was more undesirable.  Do you agree?


> [...]
> this is a general policy matter. It is _so much easier_ to add markers 
> if they _can_ have near-zero overhead (as in 1-2 instructions). 
> Otherwise we'll keep arguing about it, especially if any is added to 
> performance-critical codepath. (where we are counting instructions)

The effect of the immediate-values patch, combined with gcc
CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
dcache-impact-free instructions.  The register saves, parameter
evaluation, the function call, can all be moved out of line.


> > > Whatever became of the obvious suggestion that i outlined years ago, 
> > > to have a _single_ trace call instruction and to _patch out_ the 
> > > damn marker calls by default?  [...]  only leaving a ~5-byte NOP 
> > > sequence behind them (and some minimal disturbance to the variables 
> > > the tracepoint accesses). [...]
> > 
> > This has been answered several times before.  It's because the marker 
> > parameters have to be (conditionally) evaluated and pushed onto a call 
> > frame.  It's not just a call that would need being nop'd, but a whole 
> > function call setup/teardown sequence, which itself can be interleaved 
> > with adjacent code.
> 
> you are still missing my point. Firstly, the kernel is regparm built so 
> there's no call frame to be pushed to in most cases - we pass most 
> parameters in registers. [...]

Yes, but those registers will already be used, so their former values
need to be saved or later recomputed as a part of the call sequence.
Even wrapping it all with an asm barrier may not be enough to
completely bound the code with an address range that can be naively
NOP'd out.


> Secondly, the side-effects of a function call is what i referred to via:
> 
> > [...]  (and some minimal disturbance to the variables the tracepoint 
> > accesses). [...]
> 
> really, a tracepoint obviously accesses data that is readily accessible 
> in that spot. Worst-case we'll have some additional register constraints 
> that make the code a bit less optimal. [...]

It's more than that - people will want to pass some dereferenced
fields from a struct*; some old function parameters that were by the
time of the marker call shoved out of registers again.  Register
constraints that prohibit general expressions would impede the
utility of the tool.


> what cannot be optimized away at all are the conditional
> instructions introduced by the probe points, extra parameters and
> the space overhead of the function call itself.

Almost all of that can & should be moved out of line.


- FChE

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 16:33       ` Frank Ch. Eigler
@ 2008-01-02 17:01         ` Ingo Molnar
  2008-01-02 17:56           ` Frank Ch. Eigler
  2008-01-07 18:59           ` Mathieu Desnoyers
  2008-01-02 23:49         ` Nicholas Miell
  1 sibling, 2 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 17:01 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
	mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds


* Frank Ch. Eigler <fche@redhat.com> wrote:

> > [...] this is a general policy matter. It is _so much easier_ to add 
> > markers if they _can_ have near-zero overhead (as in 1-2 
> > instructions). Otherwise we'll keep arguing about it, especially if 
> > any is added to performance-critical codepath. (where we are 
> > counting instructions)
> 
> The effect of the immediate-values patch, combined with gcc 
> CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2 
> dcache-impact-free instructions.  The register saves, parameter 
> evaluation, the function call, can all be moved out of line.

well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
we should already be getting that, right?

There's one thing that would make out-of-line tracepoints have a lot 
less objectionable to me: right now the 'out of line' area is put to the 
end of functions. That splinters the kernel image with inactive, rarely 
taken areas of code - blowing up its icache footprint considerably. For 
example sched.o has ~100 functions, with the average function size being 
200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste 
already.

It's true that keeping the off-site code within the function keeps total 
codesize slightly smaller, because the offsets (and hence the 
conditional jumps) are thus 8 bit - but that's below 1% and the 
cache-blow-up aspect is more severe in practice at 10-20%.

So it would be nice if we could collect all this offline code and stuff 
it away into another portion of the kernel image. (or, into another 
portion of the object file - which would still be good enough in 
practice)

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 17:01         ` Ingo Molnar
@ 2008-01-02 17:56           ` Frank Ch. Eigler
  2008-01-02 20:10             ` Ingo Molnar
  2008-01-07 18:59           ` Mathieu Desnoyers
  1 sibling, 1 reply; 20+ messages in thread
From: Frank Ch. Eigler @ 2008-01-02 17:56 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frank Ch. Eigler, K. Prasad, linux-kernel, linux-rt-users,
	dipankar, ego, mathieu.desnoyers, paulmck, Andrew Morton,
	Linus Torvalds

Hi -

On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> [...]
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
> we should already be getting that, right?

Right.

> [...]  So it would be nice if we could collect all this offline code
> and stuff it away into another portion of the kernel image. (or,
> into another portion of the object file - which would still be good
> enough in practice)

That would be the -freorder-blocks-and-partition flag, as proposed by
Arjan two Februarys ago.  I don't see any traces of Andi's overriding
"-fno-reorder-blocks" in the current linus tree, so maybe it's time to
resurrect this one:

http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html

- FChE

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 17:56           ` Frank Ch. Eigler
@ 2008-01-02 20:10             ` Ingo Molnar
  0 siblings, 0 replies; 20+ messages in thread
From: Ingo Molnar @ 2008-01-02 20:10 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego,
	mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds


* Frank Ch. Eigler <fche@redhat.com> wrote:

> Hi -
> 
> On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> > [...]
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
> > we should already be getting that, right?
> 
> Right.
> 
> > [...]  So it would be nice if we could collect all this offline code
> > and stuff it away into another portion of the kernel image. (or,
> > into another portion of the object file - which would still be good
> > enough in practice)
> 
> That would be the -freorder-blocks-and-partition flag, as proposed by 
> Arjan two Februarys ago.  I don't see any traces of Andi's overriding 
> "-fno-reorder-blocks" in the current linus tree, so maybe it's time to 
> resurrect this one:
> 
> http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html

hm, that gives:

                                   Forbidden

   You don't have permission to access
   /lists/vger.kernel.org/linux-kernel/39/196123.html on this server.

but yeah, i had the impression that gcc couldnt yet do this. Not a 
showstopper, but it would be nice to initiate the gcc side of things ...

	Ingo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 16:33       ` Frank Ch. Eigler
  2008-01-02 17:01         ` Ingo Molnar
@ 2008-01-02 23:49         ` Nicholas Miell
  1 sibling, 0 replies; 20+ messages in thread
From: Nicholas Miell @ 2008-01-02 23:49 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Ingo Molnar, K. Prasad, linux-kernel, linux-rt-users, dipankar,
	ego, mathieu.desnoyers, paulmck, Andrew Morton, Linus Torvalds


On Wed, 2008-01-02 at 11:33 -0500, Frank Ch. Eigler wrote:
> Hi -
> 
> On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> > [...]
> > > FWIW, I'm not keen about the format strings either, but they don't 
> > > constitute a performance hit beyond an additional parameter.  It does 
> > > not need to actually get parsed at run time.
> > 
> > "only" an additional parameter. The whole _point_ behind these markers 
> > is for them to have minimal effect!
> 
> Agreed.  The only alternative I recall seeing proposed was my own
> cartesian-product macro suite that encodes parameter types into the
> marker function/macro name itself.  (Maybe some of that could be
> hidden with gcc typeof() magic.)  There appeared to be a consensus
> that this was more undesirable.  Do you agree?
> 
> 

C++ name mangling would be extremely useful here.


Actually, why isn't the DWARF information for the functions sufficient?

-- 
Nicholas Miell <nmiell@comcast.net>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2007-12-31  6:09 K. Prasad
  2007-12-31 10:20 ` Ingo Molnar
@ 2008-01-03 16:30 ` Mathieu Desnoyers
  2008-01-04 10:58   ` Gautham R Shenoy
  1 sibling, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-03 16:30 UTC (permalink / raw)
  To: K. Prasad; +Cc: mingo, linux-kernel, linux-rt-users, dipankar, ego, paulmck

* K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> This patch converts the tracing mechanism of Preempt RCU boosting into
> markers. The handler functions for these markers are included inside
> rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> chosen.
>  
> Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> ---
>  include/linux/rcupreempt_trace.h |   46 ++++++++
>  kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
>  kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
>  3 files changed, 251 insertions(+), 189 deletions(-)
>  
> Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> ===================================================================
> --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
>  extern int rcupreempt_mb_flag(int cpu);
>  extern char *rcupreempt_try_flip_state_name(void);
>  
> +#ifdef CONFIG_PREEMPT_RCU_BOOST
> +struct preempt_rcu_boost_trace {
> +	unsigned long rbs_stat_task_boost_called;
> +	unsigned long rbs_stat_task_boosted;
> +	unsigned long rbs_stat_boost_called;
> +	unsigned long rbs_stat_try_boost;
> +	unsigned long rbs_stat_boosted;
> +	unsigned long rbs_stat_unboost_called;
> +	unsigned long rbs_stat_unboosted;
> +	unsigned long rbs_stat_try_boost_readers;
> +	unsigned long rbs_stat_boost_readers;
> +	unsigned long rbs_stat_try_unboost_readers;
> +	unsigned long rbs_stat_unboost_readers;
> +	unsigned long rbs_stat_over_taken;
> +};
> +
> +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> +				void *private_data, const char *format, ...) \
> +{ \
> +	va_list ap; \
> +	int cpu; \
> +	struct preempt_rcu_boost_trace *boost_trace; \
> +	va_start(ap, format); \
> +	cpu = va_arg(ap, typeof(unsigned int)); \

(sorry for late response, I just came back from vacation)

Instead of passing the cpu id as a marker parameter, why don't you
simply use smp_processor_id() right here ?

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2007-12-31 10:20 ` Ingo Molnar
  2008-01-02  3:31   ` Frank Ch. Eigler
@ 2008-01-03 19:24   ` Mathieu Desnoyers
  1 sibling, 0 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-03 19:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K. Prasad, linux-kernel, linux-rt-users, dipankar, ego, paulmck,
	Andrew Morton, Linus Torvalds, Frank Ch. Eigler

* Ingo Molnar (mingo@elte.hu) wrote:
> 
> * K. Prasad <prasad@linux.vnet.ibm.com> wrote:
> 
> > @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
> >  
> >  	spin_lock_irqsave(&rcu_boost_wake_lock, flags);
> >  
> > -	rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> > +	trace_mark(try_unboost_readers, "%u", smp_processor_id());
> 
> this isnt directed at you or your patch, it's more directed at Mathieu, 
> but looking at this actual markers patch submitted to me, i'm still 
> fundamentally worried about the whole marker approach.
> 

Hi Ingo,

Just to note that some of your concerns have been answered by Frank and
that I fully agree with what he said. Sorry for the late reply
(vacation..)

> Firstly, why on earth does a full format string have to be passed in for 
> something as simple as a CPU id? This way we basically codify it forever 
> that tracing _has_ to be expensive when enabled. The latency-tracer 
> (which i'd love to convert to markers, if only markers were capable 
> enough) has shown it that tracing _can_ be used to capture performance 
> data without disturbing the measured system, even at hundreds of 
> thousands context switches a second per CPU.
> 

As I replyed in my other email, the cpu id does not have to be passed as
an argument. Also, when no argument is passed, the format string does
not have to be parsed at all in the callback.

> Secondly, the inlined overhead of trace_mark() is still WAY too large:
> 
>                 if (unlikely(__mark_##name.state)) {                    \
>                         preempt_disable();                              \
>                         (*__mark_##name.call)                           \
>                                 (&__mark_##name, call_data,             \
>                                 format, ## args);                       \
>                         preempt_enable();                               \
>                 }                                                       \
> 

To get the full version of my trace_mark, you would have to take a few
bits from the -mm tree, which includes the "multiple probes support"
(this one moves the preempt disable/enable completely out of line) and
you should also get the "markers use immediate values" patch which I
submitted a few weeks ago : it uses code patching to modify a byte in a
mov instruction that is used to jump over the entire function call
(memory references, stack setup and the call itself)

> Whatever became of the obvious suggestion that i outlined years ago, to 
> have a _single_ trace call instruction and to _patch out_ the damn 
> marker calls by default? No .state flag checking inlined. No 
> preempt_disable()/enable() pair. Patching static tracepoints OUT of the 
> kernel, only leaving a ~5-byte NOP sequence behind them (and some 
> minimal disturbance to the variables the tracepoint accesses). We've got 
> all the alternatives.h code patching infrastructure available for such 
> purposes. Why are we 2 years down the line and _STILL_ arguing about 
> this?
> 

Frank replied appropriately to this. It's mostly because of stack setup
and/or move to registers to prepare the call.

> Thirdly, the patch selects CONFIG_MARKERS:
> 
> >  config RCU_TRACE
> > -       bool "Enable tracing for RCU - currently stats in debugfs"
> > +       tristate "Enable tracing for RCU - currently stats in debugfs"
> >         select DEBUG_FS
> > -       default y
> > +       select MARKERS
> 
> Which adds overhead (inlined checks for markers) all around the kernel, 
> even if all markers are deactivated! Imagine a thousand of them and the 
> kernel blows up measurably.
> 
> Sadly, this whole trace_mark() API seems to have gotten much worse since 
> i last saw it. It's sub-par when it's turned on and it's sub-par when 
> it's turned off. It gets us the worst of both worlds.
> 
> If it continues like this then i'd much rather see people add printks as 
> tracing, because there you _know_ that it's high-overhead and people 
> wont start arguing about trace compatibility either, etc.
> 

I think Frank's response explains things in enough depth.

> 	Ingo

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-03 16:30 ` Mathieu Desnoyers
@ 2008-01-04 10:58   ` Gautham R Shenoy
  2008-01-05 12:46     ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Gautham R Shenoy @ 2008-01-04 10:58 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: K. Prasad, mingo, linux-kernel, linux-rt-users, dipankar, paulmck

On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > This patch converts the tracing mechanism of Preempt RCU boosting into
> > markers. The handler functions for these markers are included inside
> > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > chosen.
> >  
> > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > ---
> >  include/linux/rcupreempt_trace.h |   46 ++++++++
> >  kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
> >  kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
> >  3 files changed, 251 insertions(+), 189 deletions(-)
> >  
> > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > ===================================================================
> > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> >  extern int rcupreempt_mb_flag(int cpu);
> >  extern char *rcupreempt_try_flip_state_name(void);
> >  
> > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > +struct preempt_rcu_boost_trace {
> > +	unsigned long rbs_stat_task_boost_called;
> > +	unsigned long rbs_stat_task_boosted;
> > +	unsigned long rbs_stat_boost_called;
> > +	unsigned long rbs_stat_try_boost;
> > +	unsigned long rbs_stat_boosted;
> > +	unsigned long rbs_stat_unboost_called;
> > +	unsigned long rbs_stat_unboosted;
> > +	unsigned long rbs_stat_try_boost_readers;
> > +	unsigned long rbs_stat_boost_readers;
> > +	unsigned long rbs_stat_try_unboost_readers;
> > +	unsigned long rbs_stat_unboost_readers;
> > +	unsigned long rbs_stat_over_taken;
> > +};
> > +
> > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > +				void *private_data, const char *format, ...) \
> > +{ \
> > +	va_list ap; \
> > +	int cpu; \
> > +	struct preempt_rcu_boost_trace *boost_trace; \
> > +	va_start(ap, format); \
> > +	cpu = va_arg(ap, typeof(unsigned int)); \
> 
> (sorry for late response, I just came back from vacation)
> 
> Instead of passing the cpu id as a marker parameter, why don't you
> simply use smp_processor_id() right here ?
> 

Agreed. 

Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(), 
we need not pass the parameter and simply use smp_processor_id().

It's only the RCU_TRACE_RDP() that requires cpuid to be passed.


Thanks and Regards
gautham.


> -- 
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

-- 
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-04 10:58   ` Gautham R Shenoy
@ 2008-01-05 12:46     ` Mathieu Desnoyers
  2008-01-07 19:43       ` K. Prasad
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-05 12:46 UTC (permalink / raw)
  To: Gautham R Shenoy
  Cc: K. Prasad, mingo, linux-kernel, linux-rt-users, dipankar, paulmck

* Gautham R Shenoy (ego@in.ibm.com) wrote:
> On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > markers. The handler functions for these markers are included inside
> > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > chosen.
> > >  
> > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > ---
> > >  include/linux/rcupreempt_trace.h |   46 ++++++++
> > >  kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
> > >  kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
> > >  3 files changed, 251 insertions(+), 189 deletions(-)
> > >  
> > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > ===================================================================
> > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > >  extern int rcupreempt_mb_flag(int cpu);
> > >  extern char *rcupreempt_try_flip_state_name(void);
> > >  
> > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > +struct preempt_rcu_boost_trace {
> > > +	unsigned long rbs_stat_task_boost_called;
> > > +	unsigned long rbs_stat_task_boosted;
> > > +	unsigned long rbs_stat_boost_called;
> > > +	unsigned long rbs_stat_try_boost;
> > > +	unsigned long rbs_stat_boosted;
> > > +	unsigned long rbs_stat_unboost_called;
> > > +	unsigned long rbs_stat_unboosted;
> > > +	unsigned long rbs_stat_try_boost_readers;
> > > +	unsigned long rbs_stat_boost_readers;
> > > +	unsigned long rbs_stat_try_unboost_readers;
> > > +	unsigned long rbs_stat_unboost_readers;
> > > +	unsigned long rbs_stat_over_taken;
> > > +};
> > > +
> > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > +				void *private_data, const char *format, ...) \
> > > +{ \
> > > +	va_list ap; \
> > > +	int cpu; \
> > > +	struct preempt_rcu_boost_trace *boost_trace; \
> > > +	va_start(ap, format); \
> > > +	cpu = va_arg(ap, typeof(unsigned int)); \
> > 
> > (sorry for late response, I just came back from vacation)
> > 
> > Instead of passing the cpu id as a marker parameter, why don't you
> > simply use smp_processor_id() right here ?
> > 
> 
> Agreed. 
> 
> Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(), 
> we need not pass the parameter and simply use smp_processor_id().
> 
> It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
> 


And it would help if you declare the format string with something like :

"cpuid %u" instead of "%u"

This way, if a generic tracer like LTTng dumps the traces and later a
plugin in an analysis tool like LTTV have to hook on a specific field of
your event, it can access it by specifying the marker name and field
name. And even if fields are added to this event, it won't break
compatibility as long as this field is there and have the same name.

Mathieu

> 
> Thanks and Regards
> gautham.
> 
> 
> > -- 
> > Mathieu Desnoyers
> > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
> 
> -- 
> Gautham R Shenoy
> Linux Technology Center
> IBM India.
> "Freedom comes with a price tag of responsibility, which is still a bargain,
> because Freedom is priceless!"

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-02 17:01         ` Ingo Molnar
  2008-01-02 17:56           ` Frank Ch. Eigler
@ 2008-01-07 18:59           ` Mathieu Desnoyers
  2008-01-13 18:07             ` Pavel Machek
  1 sibling, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-07 18:59 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frank Ch. Eigler, K. Prasad, linux-kernel, linux-rt-users,
	dipankar, ego, paulmck, Andrew Morton, Linus Torvalds

* Ingo Molnar (mingo@elte.hu) wrote:
> 
> * Frank Ch. Eigler <fche@redhat.com> wrote:
> 
> > > [...] this is a general policy matter. It is _so much easier_ to add 
> > > markers if they _can_ have near-zero overhead (as in 1-2 
> > > instructions). Otherwise we'll keep arguing about it, especially if 
> > > any is added to performance-critical codepath. (where we are 
> > > counting instructions)
> > 
> > The effect of the immediate-values patch, combined with gcc 
> > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2 
> > dcache-impact-free instructions.  The register saves, parameter 
> > evaluation, the function call, can all be moved out of line.
> 
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
> we should already be getting that, right?
> 
> There's one thing that would make out-of-line tracepoints have a lot 
> less objectionable to me: right now the 'out of line' area is put to the 
> end of functions. That splinters the kernel image with inactive, rarely 
> taken areas of code - blowing up its icache footprint considerably. For 
> example sched.o has ~100 functions, with the average function size being 
> 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste 
> already.

Hrm, I agree this can be a problem on architectures with more standard
associative icaches, but aren't most x86_64 machines (and modern x86_32)
using an instruction trace cache instead ? This makes the problem
irrelevant.

But I agree that, as Frank proposed, -freorder-blocks-and-partition
could help us in that matter for the architectures using an associative
L1 icache.

Mathieu

> 
> It's true that keeping the off-site code within the function keeps total 
> codesize slightly smaller, because the offsets (and hence the 
> conditional jumps) are thus 8 bit - but that's below 1% and the 
> cache-blow-up aspect is more severe in practice at 10-20%.
> 
> So it would be nice if we could collect all this offline code and stuff 
> it away into another portion of the kernel image. (or, into another 
> portion of the object file - which would still be good enough in 
> practice)
> 
> 	Ingo

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-05 12:46     ` Mathieu Desnoyers
@ 2008-01-07 19:43       ` K. Prasad
  0 siblings, 0 replies; 20+ messages in thread
From: K. Prasad @ 2008-01-07 19:43 UTC (permalink / raw)
  To: Mathieu Desnoyers, linux-kernel
  Cc: K. Prasad, mingo, linux-rt-users, dipankar, paulmck, rostedt

On Sat, Jan 05, 2008 at 07:46:32AM -0500, Mathieu Desnoyers wrote:
> * Gautham R Shenoy (ego@in.ibm.com) wrote:
> > On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > > * K. Prasad (prasad@linux.vnet.ibm.com) wrote:
> > > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > > markers. The handler functions for these markers are included inside
> > > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > > chosen.
> > > >  
> > > > Signed-off-by: K.Prasad <prasad@linux.vnet.ibm.com>
> > > > ---
> > > >  include/linux/rcupreempt_trace.h |   46 ++++++++
> > > >  kernel/rcupreempt-boost.c        |  211 ++++-----------------------------------
> > > >  kernel/rcupreempt_trace.c        |  183 +++++++++++++++++++++++++++++++++
> > > >  3 files changed, 251 insertions(+), 189 deletions(-)
> > > >  
> > > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > ===================================================================
> > > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > > >  extern int rcupreempt_mb_flag(int cpu);
> > > >  extern char *rcupreempt_try_flip_state_name(void);
> > > >  
> > > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > > +struct preempt_rcu_boost_trace {
> > > > +	unsigned long rbs_stat_task_boost_called;
> > > > +	unsigned long rbs_stat_task_boosted;
> > > > +	unsigned long rbs_stat_boost_called;
> > > > +	unsigned long rbs_stat_try_boost;
> > > > +	unsigned long rbs_stat_boosted;
> > > > +	unsigned long rbs_stat_unboost_called;
> > > > +	unsigned long rbs_stat_unboosted;
> > > > +	unsigned long rbs_stat_try_boost_readers;
> > > > +	unsigned long rbs_stat_boost_readers;
> > > > +	unsigned long rbs_stat_try_unboost_readers;
> > > > +	unsigned long rbs_stat_unboost_readers;
> > > > +	unsigned long rbs_stat_over_taken;
> > > > +};
> > > > +
> > > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > > +				void *private_data, const char *format, ...) \
> > > > +{ \
> > > > +	va_list ap; \
> > > > +	int cpu; \
> > > > +	struct preempt_rcu_boost_trace *boost_trace; \
> > > > +	va_start(ap, format); \
> > > > +	cpu = va_arg(ap, typeof(unsigned int)); \
> > > 
> > > (sorry for late response, I just came back from vacation)
> > > 
> > > Instead of passing the cpu id as a marker parameter, why don't you
> > > simply use smp_processor_id() right here ?
> > > 
> > 
> > Agreed. 
> > 
> > Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(), 
> > we need not pass the parameter and simply use smp_processor_id().
> > 
> > It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
> > 
> 
> 
> And it would help if you declare the format string with something like :
> 
> "cpuid %u" instead of "%u"
> 
> This way, if a generic tracer like LTTng dumps the traces and later a
> plugin in an analysis tool like LTTV have to hook on a specific field of
> your event, it can access it by specifying the marker name and field
> name. And even if fields are added to this event, it won't break
> compatibility as long as this field is there and have the same name.
> 
> Mathieu

Hi Mathieu,
	Thanks for your suggestions.

Even in functions such as rcu_check_callbacks_rt, rcu_advance_callbacks_rt and
__rcu_advance_callbacks, where a call to RCU_TRACE_RDP() is made, the
'cpu' variable is populated at an early stage with the
return value of smp_processor_id().

__rcu_advance_callbacks() <-- rcu_check_callbacks_rt() <--
rcu_check_callbacks() <-- update_process_times().

update_process_times() [if understood correctly, is called at timer
interrupt level] populates the 'cpu' with smp_processor_id().

There doesn't seem to be a need to pass down the 'cpu' parameter for
the above RCU functions including respective trace_mark() calls.

I am re-sending the patches with your suggested changes in subsequent
mails.

Thanks,
K.Prasad

> 
> > 
> > Thanks and Regards
> > gautham.
> > 
> > 
> > > -- 
> > > Mathieu Desnoyers
> > > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
> > 
> > -- 
> > Gautham R Shenoy
> > Linux Technology Center
> > IBM India.
> > "Freedom comes with a price tag of responsibility, which is still a bargain,
> > because Freedom is priceless!"
> 
> -- 
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-07 18:59           ` Mathieu Desnoyers
@ 2008-01-13 18:07             ` Pavel Machek
  2008-01-14 15:35               ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Pavel Machek @ 2008-01-13 18:07 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Ingo Molnar, Frank Ch. Eigler, K. Prasad, linux-kernel,
	linux-rt-users, dipankar, ego, paulmck, Andrew Morton,
	Linus Torvalds

On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> * Ingo Molnar (mingo@elte.hu) wrote:
> > 
> > * Frank Ch. Eigler <fche@redhat.com> wrote:
> > 
> > > > [...] this is a general policy matter. It is _so much easier_ to add 
> > > > markers if they _can_ have near-zero overhead (as in 1-2 
> > > > instructions). Otherwise we'll keep arguing about it, especially if 
> > > > any is added to performance-critical codepath. (where we are 
> > > > counting instructions)
> > > 
> > > The effect of the immediate-values patch, combined with gcc 
> > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2 
> > > dcache-impact-free instructions.  The register saves, parameter 
> > > evaluation, the function call, can all be moved out of line.
> > 
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
> > we should already be getting that, right?
> > 
> > There's one thing that would make out-of-line tracepoints have a lot 
> > less objectionable to me: right now the 'out of line' area is put to the 
> > end of functions. That splinters the kernel image with inactive, rarely 
> > taken areas of code - blowing up its icache footprint considerably. For 
> > example sched.o has ~100 functions, with the average function size being 
> > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste 
> > already.
> 
> Hrm, I agree this can be a problem on architectures with more standard
> associative icaches, but aren't most x86_64 machines (and modern x86_32)
> using an instruction trace cache instead ? This makes the problem
> irrelevant.
> 
> But I agree that, as Frank proposed, -freorder-blocks-and-partition
> could help us in that matter for the architectures using an associative
> L1 icache.

I thought trace cache died with P4?
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-13 18:07             ` Pavel Machek
@ 2008-01-14 15:35               ` Mathieu Desnoyers
  2008-01-14 16:30                 ` Linus Torvalds
  0 siblings, 1 reply; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-14 15:35 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Ingo Molnar, Frank Ch. Eigler, K. Prasad, linux-kernel,
	linux-rt-users, dipankar, ego, paulmck, Andrew Morton,
	Linus Torvalds

* Pavel Machek (pavel@ucw.cz) wrote:
> On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> > * Ingo Molnar (mingo@elte.hu) wrote:
> > > 
> > > * Frank Ch. Eigler <fche@redhat.com> wrote:
> > > 
> > > > > [...] this is a general policy matter. It is _so much easier_ to add 
> > > > > markers if they _can_ have near-zero overhead (as in 1-2 
> > > > > instructions). Otherwise we'll keep arguing about it, especially if 
> > > > > any is added to performance-critical codepath. (where we are 
> > > > > counting instructions)
> > > > 
> > > > The effect of the immediate-values patch, combined with gcc 
> > > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2 
> > > > dcache-impact-free instructions.  The register saves, parameter 
> > > > evaluation, the function call, can all be moved out of line.
> > > 
> > > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so 
> > > we should already be getting that, right?
> > > 
> > > There's one thing that would make out-of-line tracepoints have a lot 
> > > less objectionable to me: right now the 'out of line' area is put to the 
> > > end of functions. That splinters the kernel image with inactive, rarely 
> > > taken areas of code - blowing up its icache footprint considerably. For 
> > > example sched.o has ~100 functions, with the average function size being 
> > > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste 
> > > already.
> > 
> > Hrm, I agree this can be a problem on architectures with more standard
> > associative icaches, but aren't most x86_64 machines (and modern x86_32)
> > using an instruction trace cache instead ? This makes the problem
> > irrelevant.
> > 
> > But I agree that, as Frank proposed, -freorder-blocks-and-partition
> > could help us in that matter for the architectures using an associative
> > L1 icache.
> 
> I thought trace cache died with P4?
> -- 

And you are absolutely right.

We would have to figure out if enabling -freorder-blocks-and-partition
makes sense kernel-wide.

Mathieu


-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-14 15:35               ` Mathieu Desnoyers
@ 2008-01-14 16:30                 ` Linus Torvalds
  2008-01-14 19:36                   ` Mathieu Desnoyers
  0 siblings, 1 reply; 20+ messages in thread
From: Linus Torvalds @ 2008-01-14 16:30 UTC (permalink / raw)
  To: Mathieu Desnoyers
  Cc: Pavel Machek, Ingo Molnar, Frank Ch. Eigler, K. Prasad,
	linux-kernel, linux-rt-users, dipankar, ego, paulmck,
	Andrew Morton



On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
> 
> We would have to figure out if enabling -freorder-blocks-and-partition
> makes sense kernel-wide.

Last I saw, it generates crappy code, with lots more jumps back and forth, 
and the image just blows up.

There's a reason we use -Os, and that's that small footprint I$ is 
generally more important than fake compiler optimizations that don't 
actually help except on microbenchmarks where everything fits in the 
cache.

Taking a branch instruction from two bytes to five is almost always a 
mistake, unless you *know* that the code it jumps to will effectively 
never be done at all (which is not necessarily the case at all). It also 
makes debugging much nastier, because if now things like backtraces 
probably look like crap too!

Don't go there. The *best* we can do is to just use the optimizations that 
generate good-looking code that humans can read. The rest is just compiler 
masturbation.

			Linus

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing
  2008-01-14 16:30                 ` Linus Torvalds
@ 2008-01-14 19:36                   ` Mathieu Desnoyers
  0 siblings, 0 replies; 20+ messages in thread
From: Mathieu Desnoyers @ 2008-01-14 19:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Pavel Machek, Ingo Molnar, Frank Ch. Eigler, K. Prasad,
	linux-kernel, linux-rt-users, dipankar, ego, paulmck,
	Andrew Morton

* Linus Torvalds (torvalds@linux-foundation.org) wrote:
> 
> 
> On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
> > 
> > We would have to figure out if enabling -freorder-blocks-and-partition
> > makes sense kernel-wide.
> 
> Last I saw, it generates crappy code, with lots more jumps back and forth, 
> and the image just blows up.
> 
> There's a reason we use -Os, and that's that small footprint I$ is 
> generally more important than fake compiler optimizations that don't 
> actually help except on microbenchmarks where everything fits in the 
> cache.
> 
> Taking a branch instruction from two bytes to five is almost always a 
> mistake, unless you *know* that the code it jumps to will effectively 
> never be done at all (which is not necessarily the case at all). It also 
> makes debugging much nastier, because if now things like backtraces 
> probably look like crap too!
> 
> Don't go there. The *best* we can do is to just use the optimizations that 
> generate good-looking code that humans can read. The rest is just compiler 
> masturbation.
> 

I agree that turning this flag on does not seem like an interesting
solution.

Well, I wonder how important this issue of not sharing L1 instruction
cachelines with scheduler code is. If we care as much about it as Ingo
states, I wonder why we leave about 22 BUG() macros in sched.c
(calculated from the number of ud2 instructions generated on x86), which
adds up to 7 bytes at the end of many scheduler functions (7 bytes
coming from ud2, jmp and .p2align on x86).

And 22 markers in sched.c is already much more than needed. I actually
propose only 5 in my patchset.

Mathieu

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-01-14 19:42 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-11-29 18:46 [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing K. Prasad
  -- strict thread matches above, loose matches on Subject: below --
2007-12-31  6:09 K. Prasad
2007-12-31 10:20 ` Ingo Molnar
2008-01-02  3:31   ` Frank Ch. Eigler
2008-01-02 12:47     ` Ingo Molnar
2008-01-02 16:33       ` Frank Ch. Eigler
2008-01-02 17:01         ` Ingo Molnar
2008-01-02 17:56           ` Frank Ch. Eigler
2008-01-02 20:10             ` Ingo Molnar
2008-01-07 18:59           ` Mathieu Desnoyers
2008-01-13 18:07             ` Pavel Machek
2008-01-14 15:35               ` Mathieu Desnoyers
2008-01-14 16:30                 ` Linus Torvalds
2008-01-14 19:36                   ` Mathieu Desnoyers
2008-01-02 23:49         ` Nicholas Miell
2008-01-03 19:24   ` Mathieu Desnoyers
2008-01-03 16:30 ` Mathieu Desnoyers
2008-01-04 10:58   ` Gautham R Shenoy
2008-01-05 12:46     ` Mathieu Desnoyers
2008-01-07 19:43       ` K. Prasad

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox