* [RFC 1/3] Linux Kernel Markers - Support Multiple Probes
2007-11-13 19:03 [RFC 0/3] Linux Kernel Markers new features Mathieu Desnoyers
@ 2007-11-13 19:03 ` Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 2/3] Linux Kernel Markers - Create modpost file Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 3/3] Linux Kernel Markers - Use Immediate Values Mathieu Desnoyers
2 siblings, 0 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2007-11-13 19:03 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers
[-- Attachment #1: markers-support-multiple-probes.patch --]
[-- Type: text/plain, Size: 37095 bytes --]
RCU style multiple probes support for the Linux Kernel Markers.
Common case (one probe) is still fast and does not require dynamic allocation
or a supplementary pointer dereference on the fast path.
- Move preempt disable from the marker site to the callback.
Since we now have an internal callback, move the preempt disable/enable to the
callback instead of the marker site.
Since the callback change is done asynchronously (passing from a handler that
supports arguments to a handler that does not setup the arguments is no
arguments are passed), we can safely update it even if it is outside the preempt
disable section.
- Move probe arm to probe connection. Now, a connected probe is automatically
armed.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/linux/marker.h | 51 +--
include/linux/module.h | 2
kernel/marker.c | 663 +++++++++++++++++++++++++++++-----------
kernel/module.c | 7
samples/markers/probe-example.c | 33 +
5 files changed, 546 insertions(+), 210 deletions(-)
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/marker.h 2007-11-13 09:48:35.000000000 -0500
+++ linux-2.6-lttng/include/linux/marker.h 2007-11-13 09:48:41.000000000 -0500
@@ -19,16 +19,23 @@ struct marker;
/**
* marker_probe_func - Type of a marker probe function
- * @mdata: pointer of type struct marker
- * @private_data: caller site private data
+ * @probe_data: probe private data
+ * @private_data: call site private data
* @fmt: format string
- * @...: variable argument list
+ * @args: variable argument list pointer. Use a pointer to overcome C's
+ * inability to pass this around as a pointer in a portable manner in
+ * the callee otherwise.
*
* Type of marker probe functions. They receive the mdata and need to parse the
* format string to recover the variable argument list.
*/
-typedef void marker_probe_func(const struct marker *mdata,
- void *private_data, const char *fmt, ...);
+typedef void marker_probe_func(void *probe_data, void *call_data,
+ const char *fmt, va_list *args);
+
+struct marker_probe_closure {
+ marker_probe_func *func; /* Callback */
+ void *private; /* Private probe data */
+};
struct marker {
const char *name; /* Marker name */
@@ -36,8 +43,11 @@ struct marker {
* variable argument list.
*/
char state; /* Marker state. */
- marker_probe_func *call;/* Probe handler function pointer */
- void *private; /* Private probe data */
+ char ptype; /* probe type : 0 : single, 1 : multi */
+ void (*call)(const struct marker *mdata, /* Probe wrapper */
+ void *private_data, const char *fmt, ...);
+ struct marker_probe_closure single;
+ struct marker_probe_closure *multi;
} __attribute__((aligned(8)));
#ifdef CONFIG_MARKERS
@@ -60,24 +70,23 @@ struct marker {
static struct marker __mark_##name \
__attribute__((section("__markers"), aligned(8))) = \
{ __mstrtab_name_##name, __mstrtab_format_##name, \
- 0, __mark_empty_function, NULL }; \
+ 0, 0, marker_probe_cb, \
+ { __mark_empty_function, NULL}, NULL }; \
__mark_check_format(format, ## args); \
if (unlikely(__mark_##name.state)) { \
- preempt_disable(); \
(*__mark_##name.call) \
(&__mark_##name, call_data, \
format, ## args); \
- preempt_enable(); \
} \
} while (0)
extern void marker_update_probe_range(struct marker *begin,
- struct marker *end, struct module *probe_module, int *refcount);
+ struct marker *end);
#else /* !CONFIG_MARKERS */
#define __trace_mark(name, call_data, format, args...) \
__mark_check_format(format, ## args)
static inline void marker_update_probe_range(struct marker *begin,
- struct marker *end, struct module *probe_module, int *refcount)
+ struct marker *end)
{ }
#endif /* CONFIG_MARKERS */
@@ -92,8 +101,6 @@ static inline void marker_update_probe_r
#define trace_mark(name, format, args...) \
__trace_mark(name, NULL, format, ## args)
-#define MARK_MAX_FORMAT_LEN 1024
-
/**
* MARK_NOARGS - Format string for a marker with no argument.
*/
@@ -106,6 +113,11 @@ static inline void __printf(1, 2) __mark
extern marker_probe_func __mark_empty_function;
+extern void marker_probe_cb(const struct marker *mdata, void *private,
+ const char *fmt, ...);
+extern void marker_probe_cb_noarg(const struct marker *mdata,
+ void *private, const char *fmt, ...);
+
/*
* Connect a probe to a marker.
* private data pointer must be a valid allocated memory address, or NULL.
@@ -116,14 +128,15 @@ extern int marker_probe_register(const c
/*
* Returns the private data given to marker_probe_register.
*/
-extern void *marker_probe_unregister(const char *name);
+extern int marker_probe_unregister(const char *name,
+ marker_probe_func *probe, void *private);
/*
* Unregister a marker by providing the registered private data.
*/
-extern void *marker_probe_unregister_private_data(void *private);
+extern int marker_probe_unregister_private_data(marker_probe_func *probe,
+ void *private);
-extern int marker_arm(const char *name);
-extern int marker_disarm(const char *name);
-extern void *marker_get_private_data(const char *name);
+extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
+ int num);
#endif
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- linux-2.6-lttng.orig/kernel/marker.c 2007-11-13 09:48:36.000000000 -0500
+++ linux-2.6-lttng/kernel/marker.c 2007-11-13 09:48:41.000000000 -0500
@@ -27,35 +27,41 @@
extern struct marker __start___markers[];
extern struct marker __stop___markers[];
+/* Set to 1 to enable marker debug output */
+const int marker_debug;
+
/*
* markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers, the hash table and deferred_sync.
+ * and module markers and the hash table.
*/
static DEFINE_MUTEX(markers_mutex);
/*
- * Marker deferred synchronization.
- * Upon marker probe_unregister, we delay call to synchronize_sched() to
- * accelerate mass unregistration (only when there is no more reference to a
- * given module do we call synchronize_sched()). However, we need to make sure
- * every critical region has ended before we re-arm a marker that has been
- * unregistered and then registered back with a different probe data.
- */
-static int deferred_sync;
-
-/*
* Marker hash table, containing the active markers.
* Protected by module_mutex.
*/
#define MARKER_HASH_BITS 6
#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+/*
+ * Note about RCU :
+ * It is used to make sure every handler has finished using its private data
+ * between two consecutive operation (add or remove) on a given marker. It is
+ * also used to delay the free of multiple probes array until a quiescent state
+ * is reached.
+ */
struct marker_entry {
struct hlist_node hlist;
char *format;
- marker_probe_func *probe;
- void *private;
+ void (*call)(const struct marker *mdata, /* Probe wrapper */
+ void *private_data, const char *fmt, ...);
+ struct marker_probe_closure single;
+ struct marker_probe_closure *multi;
int refcount; /* Number of times armed. 0 if disarmed. */
+ struct rcu_head rcu;
+ void *oldptr;
+ char rcu_pending:1;
+ char ptype:1;
char name[0]; /* Contains name'\0'format'\0' */
};
@@ -63,7 +69,8 @@ static struct hlist_head marker_table[MA
/**
* __mark_empty_function - Empty probe callback
- * @mdata: pointer of type const struct marker
+ * @probe_data: probe private data
+ * @call_call: call site private data
* @fmt: format string
* @...: variable argument list
*
@@ -72,13 +79,258 @@ static struct hlist_head marker_table[MA
* though the function pointer change and the marker enabling are two distinct
* operations that modifies the execution flow of preemptible code.
*/
-void __mark_empty_function(const struct marker *mdata, void *private,
- const char *fmt, ...)
+void __mark_empty_function(void *probe_data, void *call_data,
+ const char *fmt, va_list *args)
{
}
EXPORT_SYMBOL_GPL(__mark_empty_function);
/*
+ * marker_probe_cb Callback that prepares the variable argument list for probes.
+ * @mdata: pointer of type struct marker
+ * @private: caller site private data
+ * @fmt: format string
+ * @...: Variable argument list.
+ *
+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
+ * need to put a full smp_rmb() in this branch. This is why we do not use
+ * rcu_dereference() for the pointer read.
+ */
+void marker_probe_cb(const struct marker *mdata, void *private,
+ const char *fmt, ...)
+{
+ va_list args;
+ char ptype;
+
+ preempt_disable();
+ ptype = ACCESS_ONCE(mdata->ptype);
+ if (likely(!ptype)) {
+ marker_probe_func *func;
+ /* Must read the ptype before ptr. They are not data dependant,
+ * so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func = ACCESS_ONCE(mdata->single.func);
+ /* Must read the ptr before private data. They are not data
+ * dependant, so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ va_start(args, fmt);
+ func(mdata->single.private, private, fmt, &args);
+ va_end(args);
+ } else {
+ struct marker_probe_closure *multi;
+ int i;
+ /*
+ * multi points to an array, therefore accessing the array
+ * depends on reading multi. However, even in this case,
+ * we must insure that the pointer is read _before_ the array
+ * data. Same as rcu_dereference, but we need a full smp_rmb()
+ * in the fast path, so put the explicit barrier here.
+ */
+ smp_read_barrier_depends();
+ multi = ACCESS_ONCE(mdata->multi);
+ for (i = 0; multi[i].func; i++) {
+ va_start(args, fmt);
+ multi[i].func(multi[i].private, private, fmt, &args);
+ va_end(args);
+ }
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb);
+
+/*
+ * marker_probe_cb Callback that does not prepare the variable argument list.
+ * @mdata: pointer of type struct marker
+ * @private: caller site private data
+ * @fmt: format string
+ * @...: Variable argument list.
+ *
+ * Should be connected to markers "MARK_NOARGS".
+ */
+
+void marker_probe_cb_noarg(const struct marker *mdata,
+ void *private, const char *fmt, ...)
+{
+ va_list args; /* not initialized */
+ char ptype;
+
+ preempt_disable();
+ ptype = ACCESS_ONCE(mdata->ptype);
+ if (likely(!ptype)) {
+ marker_probe_func *func;
+ /* Must read the ptype before ptr. They are not data dependant,
+ * so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func = ACCESS_ONCE(mdata->single.func);
+ /* Must read the ptr before private data. They are not data
+ * dependant, so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func(mdata->single.private, private, fmt, &args);
+ } else {
+ struct marker_probe_closure *multi;
+ int i;
+ /*
+ * multi points to an array, therefore accessing the array
+ * depends on reading multi. However, even in this case,
+ * we must insure that the pointer is read _before_ the array
+ * data. Same as rcu_dereference, but we need a full smp_rmb()
+ * in the fast path, so put the explicit barrier here.
+ */
+ smp_read_barrier_depends();
+ multi = ACCESS_ONCE(mdata->multi);
+ for (i = 0; multi[i].func; i++)
+ multi[i].func(multi[i].private, private, fmt, &args);
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb_noarg);
+
+static void free_old_closure(struct rcu_head *head)
+{
+ struct marker_entry *entry = container_of(head,
+ struct marker_entry, rcu);
+ kfree(entry->oldptr);
+ /* Make sure we free the data before setting the pending flag to 0 */
+ smp_wmb();
+ entry->rcu_pending = 0;
+}
+
+static inline void debug_print_probes(struct marker_entry *entry)
+{
+ int i;
+
+ if (!marker_debug)
+ return;
+
+ if (!entry->ptype) {
+ printk(KERN_DEBUG "Single probe : %p %p\n",
+ entry->single.func,
+ entry->single.private);
+ } else {
+ for (i = 0; entry->multi[i].func; i++)
+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
+ entry->multi[i].func, entry->multi[i].private);
+ }
+}
+
+static struct marker_probe_closure *
+marker_entry_add_probe(struct marker_entry *entry,
+ marker_probe_func *probe, void *private)
+{
+ int nr_probes = 0;
+ struct marker_probe_closure *old, *new;
+
+ WARN_ON(!probe);
+
+ debug_print_probes(entry);
+ old = entry->multi;
+ if (!entry->ptype) {
+ if (entry->single.func == probe &&
+ entry->single.private == private)
+ return ERR_PTR(-EBUSY);
+ if (entry->single.func == __mark_empty_function) {
+ /* 0 -> 1 probes */
+ entry->single.func = probe;
+ entry->single.private = private;
+ entry->refcount = 1;
+ entry->ptype = 0;
+ debug_print_probes(entry);
+ return NULL;
+ } else {
+ /* 1 -> 2 probes */
+ nr_probes = 1;
+ old = NULL;
+ }
+ } else {
+ /* (N -> N+1), (N != 0, 1) probes */
+ for (nr_probes = 0; old[nr_probes].func; nr_probes++)
+ if (old[nr_probes].func == probe &&
+ old[nr_probes].private == private)
+ return ERR_PTR(-EBUSY);
+ }
+ /* + 2 : one for new probe, one for NULL func */
+ new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
+ GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (!old)
+ new[0] = entry->single;
+ else
+ memcpy(new, old,
+ nr_probes * sizeof(struct marker_probe_closure));
+ new[nr_probes].func = probe;
+ new[nr_probes].private = private;
+ entry->refcount = nr_probes + 1;
+ entry->multi = new;
+ entry->ptype = 1;
+ debug_print_probes(entry);
+ return old;
+}
+
+static struct marker_probe_closure *
+marker_entry_remove_probe(struct marker_entry *entry,
+ marker_probe_func *probe, void *private)
+{
+ int nr_probes = 0, nr_del = 0, i;
+ struct marker_probe_closure *old, *new;
+
+ old = entry->multi;
+
+ debug_print_probes(entry);
+ if (!entry->ptype) {
+ /* 0 -> N is an error */
+ WARN_ON(entry->single.func == __mark_empty_function);
+ /* 1 -> 0 probes */
+ WARN_ON(probe && entry->single.func != probe);
+ WARN_ON(entry->single.private != private);
+ entry->single.func = __mark_empty_function;
+ entry->refcount = 0;
+ entry->ptype = 0;
+ debug_print_probes(entry);
+ return NULL;
+ } else {
+ /* (N -> M), (N > 1, M >= 0) probes */
+ for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
+ if ((!probe || old[nr_probes].func == probe)
+ && old[nr_probes].private == private)
+ nr_del++;
+ }
+ }
+
+ if (nr_probes - nr_del == 0) {
+ /* N -> 0, (N > 1) */
+ entry->single.func = __mark_empty_function;
+ entry->refcount = 0;
+ entry->ptype = 0;
+ } else if (nr_probes - nr_del == 1) {
+ /* N -> 1, (N > 1) */
+ for (i = 0; old[i].func; i++)
+ if ((probe && old[i].func != probe) ||
+ old[i].private != private)
+ entry->single = old[i];
+ entry->refcount = 1;
+ entry->ptype = 0;
+ } else {
+ int j = 0;
+ /* N -> M, (N > 1, M > 1) */
+ /* + 1 for NULL */
+ new = kzalloc((nr_probes - nr_del + 1)
+ * sizeof(struct marker_probe_closure), GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; old[i].func; i++)
+ if ((probe && old[i].func != probe) ||
+ old[i].private != private)
+ new[j++] = old[i];
+ entry->refcount = nr_probes - nr_del;
+ entry->ptype = 1;
+ entry->multi = new;
+ }
+ debug_print_probes(entry);
+ return old;
+}
+
+/*
* Get marker if the marker is present in the marker hash table.
* Must be called with markers_mutex held.
* Returns NULL if not present.
@@ -102,8 +354,7 @@ static struct marker_entry *get_marker(c
* Add the marker to the marker hash table. Must be called with markers_mutex
* held.
*/
-static int add_marker(const char *name, const char *format,
- marker_probe_func *probe, void *private)
+static struct marker_entry *add_marker(const char *name, const char *format)
{
struct hlist_head *head;
struct hlist_node *node;
@@ -118,9 +369,8 @@ static int add_marker(const char *name,
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
printk(KERN_NOTICE
- "Marker %s busy, probe %p already installed\n",
- name, e->probe);
- return -EBUSY; /* Already there */
+ "Marker %s busy\n", name);
+ return ERR_PTR(-EBUSY); /* Already there */
}
}
/*
@@ -130,34 +380,42 @@ static int add_marker(const char *name,
e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
GFP_KERNEL);
if (!e)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
memcpy(&e->name[0], name, name_len);
if (format) {
e->format = &e->name[name_len];
memcpy(e->format, format, format_len);
+ if (strcmp(e->format, MARK_NOARGS) == 0)
+ e->call = marker_probe_cb_noarg;
+ else
+ e->call = marker_probe_cb;
trace_mark(core_marker_format, "name %s format %s",
e->name, e->format);
- } else
+ } else {
e->format = NULL;
- e->probe = probe;
- e->private = private;
+ e->call = marker_probe_cb;
+ }
+ e->single.func = __mark_empty_function;
+ e->single.private = NULL;
+ e->multi = NULL;
+ e->ptype = 0;
e->refcount = 0;
+ e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
- return 0;
+ return e;
}
/*
* Remove the marker from the marker hash table. Must be called with mutex_lock
* held.
*/
-static void *remove_marker(const char *name)
+static int remove_marker(const char *name)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
int found = 0;
size_t len = strlen(name) + 1;
- void *private = NULL;
u32 hash = jhash(name, len-1, 0);
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
@@ -167,12 +425,16 @@ static void *remove_marker(const char *n
break;
}
}
- if (found) {
- private = e->private;
- hlist_del(&e->hlist);
- kfree(e);
- }
- return private;
+ if (!found)
+ return -ENOENT;
+ if (e->single.func != __mark_empty_function)
+ return -EBUSY;
+ hlist_del(&e->hlist);
+ /* Make sure the call_rcu has been executed */
+ if (e->rcu_pending)
+ rcu_barrier();
+ kfree(e);
+ return 0;
}
/*
@@ -184,6 +446,7 @@ static int marker_set_format(struct mark
size_t name_len = strlen((*entry)->name) + 1;
size_t format_len = strlen(format) + 1;
+
e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
GFP_KERNEL);
if (!e)
@@ -191,11 +454,20 @@ static int marker_set_format(struct mark
memcpy(&e->name[0], (*entry)->name, name_len);
e->format = &e->name[name_len];
memcpy(e->format, format, format_len);
- e->probe = (*entry)->probe;
- e->private = (*entry)->private;
+ if (strcmp(e->format, MARK_NOARGS) == 0)
+ e->call = marker_probe_cb_noarg;
+ else
+ e->call = marker_probe_cb;
+ e->single = (*entry)->single;
+ e->multi = (*entry)->multi;
+ e->ptype = (*entry)->ptype;
e->refcount = (*entry)->refcount;
+ e->rcu_pending = 0;
hlist_add_before(&e->hlist, &(*entry)->hlist);
hlist_del(&(*entry)->hlist);
+ /* Make sure the call_rcu has been executed */
+ if ((*entry)->rcu_pending)
+ rcu_barrier();
kfree(*entry);
*entry = e;
trace_mark(core_marker_format, "name %s format %s",
@@ -206,7 +478,8 @@ static int marker_set_format(struct mark
/*
* Sets the probe callback corresponding to one marker.
*/
-static int set_marker(struct marker_entry **entry, struct marker *elem)
+static int set_marker(struct marker_entry **entry, struct marker *elem,
+ int active)
{
int ret;
WARN_ON(strcmp((*entry)->name, elem->name) != 0);
@@ -226,9 +499,43 @@ static int set_marker(struct marker_entr
if (ret)
return ret;
}
- elem->call = (*entry)->probe;
- elem->private = (*entry)->private;
- elem->state = 1;
+
+ /*
+ * probe_cb setup (statically known) is done here. It is
+ * asynchronous with the rest of execution, therefore we only
+ * pass from a "safe" callback (with argument) to an "unsafe"
+ * callback (does not set arguments).
+ */
+ elem->call = (*entry)->call;
+ /*
+ * Sanity check :
+ * We only update the single probe private data when the ptr is
+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
+ */
+ WARN_ON(elem->single.func != __mark_empty_function
+ && elem->single.private
+ != (*entry)->single.private &&
+ !elem->ptype);
+ elem->single.private = (*entry)->single.private;
+ /*
+ * Make sure the private data is valid when we update the
+ * single probe ptr.
+ */
+ smp_wmb();
+ elem->single.func = (*entry)->single.func;
+ /*
+ * We also make sure that the new probe callbacks array is consistent
+ * before setting a pointer to it.
+ */
+ rcu_assign_pointer(elem->multi, (*entry)->multi);
+ /*
+ * Update the function or multi probe array pointer before setting the
+ * ptype.
+ */
+ smp_wmb();
+ elem->ptype = (*entry)->ptype;
+ elem->state = active;
+
return 0;
}
@@ -240,8 +547,12 @@ static int set_marker(struct marker_entr
*/
static void disable_marker(struct marker *elem)
{
+ /* leave "call" as is. It is known statically. */
elem->state = 0;
- elem->call = __mark_empty_function;
+ elem->single.func = __mark_empty_function;
+ /* Update the function before setting the ptype */
+ smp_wmb();
+ elem->ptype = 0; /* single probe */
/*
* Leave the private data and id there, because removal is racy and
* should be done only after a synchronize_sched(). These are never used
@@ -253,14 +564,11 @@ static void disable_marker(struct marker
* marker_update_probe_range - Update a probe range
* @begin: beginning of the range
* @end: end of the range
- * @probe_module: module address of the probe being updated
- * @refcount: number of references left to the given probe_module (out)
*
* Updates the probe callback corresponding to a range of markers.
*/
void marker_update_probe_range(struct marker *begin,
- struct marker *end, struct module *probe_module,
- int *refcount)
+ struct marker *end)
{
struct marker *iter;
struct marker_entry *mark_entry;
@@ -268,15 +576,12 @@ void marker_update_probe_range(struct ma
mutex_lock(&markers_mutex);
for (iter = begin; iter < end; iter++) {
mark_entry = get_marker(iter->name);
- if (mark_entry && mark_entry->refcount) {
- set_marker(&mark_entry, iter);
+ if (mark_entry) {
+ set_marker(&mark_entry, iter,
+ !!mark_entry->refcount);
/*
* ignore error, continue
*/
- if (probe_module)
- if (probe_module ==
- __module_text_address((unsigned long)mark_entry->probe))
- (*refcount)++;
} else {
disable_marker(iter);
}
@@ -289,20 +594,27 @@ void marker_update_probe_range(struct ma
* Issues a synchronize_sched() when no reference to the module passed
* as parameter is found in the probes so the probe module can be
* safely unloaded from now on.
+ *
+ * Internal callback only changed before the first probe is connected to it.
+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
+ * transitions. All other transitions will leave the old private data valid.
+ * This makes the non-atomicity of the callback/private data updates valid.
+ *
+ * "special case" updates :
+ * 0 -> 1 callback
+ * 1 -> 0 callback
+ * 1 -> 2 callbacks
+ * 2 -> 1 callbacks
+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
+ * Site effect : marker_set_format may delete the marker entry (creating a
+ * replacement).
*/
-static void marker_update_probes(struct module *probe_module)
+static void marker_update_probes(void)
{
- int refcount = 0;
-
/* Core kernel markers */
- marker_update_probe_range(__start___markers,
- __stop___markers, probe_module, &refcount);
+ marker_update_probe_range(__start___markers, __stop___markers);
/* Markers in modules. */
- module_update_markers(probe_module, &refcount);
- if (probe_module && refcount == 0) {
- synchronize_sched();
- deferred_sync = 0;
- }
+ module_update_markers();
}
/**
@@ -314,29 +626,45 @@ static void marker_update_probes(struct
*
* private data must be a valid allocated memory address, or NULL.
* Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
*/
int marker_probe_register(const char *name, const char *format,
marker_probe_func *probe, void *private)
{
struct marker_entry *entry;
int ret = 0;
+ struct marker_probe_closure *old;
mutex_lock(&markers_mutex);
entry = get_marker(name);
- if (entry && entry->refcount) {
- ret = -EBUSY;
- goto end;
- }
- if (deferred_sync) {
- synchronize_sched();
- deferred_sync = 0;
+ if (!entry) {
+ entry = add_marker(name, format);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry);
+ goto end;
+ }
}
- ret = add_marker(name, format, probe, private);
- if (ret)
+ /*
+ * If we detect that a call_rcu is pending for this marker,
+ * make sure it's executed now.
+ */
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = marker_entry_add_probe(entry, probe, private);
+ if (IS_ERR(old)) {
+ ret = PTR_ERR(old);
goto end;
+ }
mutex_unlock(&markers_mutex);
- marker_update_probes(NULL);
- return ret;
+ marker_update_probes(); /* may update entry */
+ mutex_lock(&markers_mutex);
+ entry = get_marker(name);
+ WARN_ON(!entry);
+ entry->oldptr = old;
+ entry->rcu_pending = 1;
+ /* write rcu_pending before calling the RCU callback */
+ smp_wmb();
+ call_rcu(&entry->rcu, free_old_closure);
end:
mutex_unlock(&markers_mutex);
return ret;
@@ -346,171 +674,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register)
/**
* marker_probe_unregister - Disconnect a probe from a marker
* @name: marker name
+ * @probe: probe function pointer
+ * @private: probe private data
*
* Returns the private data given to marker_probe_register, or an ERR_PTR().
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
*/
-void *marker_probe_unregister(const char *name)
+int marker_probe_unregister(const char *name,
+ marker_probe_func *probe, void *private)
{
- struct module *probe_module;
struct marker_entry *entry;
- void *private;
+ struct marker_probe_closure *old;
+ int ret = 0;
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry) {
- private = ERR_PTR(-ENOENT);
+ ret = -ENOENT;
goto end;
}
- entry->refcount = 0;
- /* In what module is the probe handler ? */
- probe_module = __module_text_address((unsigned long)entry->probe);
- private = remove_marker(name);
- deferred_sync = 1;
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = marker_entry_remove_probe(entry, probe, private);
mutex_unlock(&markers_mutex);
- marker_update_probes(probe_module);
- return private;
+ marker_update_probes(); /* may update entry */
+ mutex_lock(&markers_mutex);
+ entry = get_marker(name);
+ entry->oldptr = old;
+ entry->rcu_pending = 1;
+ /* write rcu_pending before calling the RCU callback */
+ smp_wmb();
+ call_rcu(&entry->rcu, free_old_closure);
+ remove_marker(name); /* Ignore busy error message */
end:
mutex_unlock(&markers_mutex);
- return private;
+ return ret;
}
EXPORT_SYMBOL_GPL(marker_probe_unregister);
-/**
- * marker_probe_unregister_private_data - Disconnect a probe from a marker
- * @private: probe private data
- *
- * Unregister a marker by providing the registered private data.
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- */
-void *marker_probe_unregister_private_data(void *private)
+static struct marker_entry *
+get_marker_from_private_data(marker_probe_func *probe, void *private)
{
- struct module *probe_module;
- struct hlist_head *head;
- struct hlist_node *node;
struct marker_entry *entry;
- int found = 0;
unsigned int i;
+ struct hlist_head *head;
+ struct hlist_node *node;
- mutex_lock(&markers_mutex);
for (i = 0; i < MARKER_TABLE_SIZE; i++) {
head = &marker_table[i];
hlist_for_each_entry(entry, node, head, hlist) {
- if (entry->private == private) {
- found = 1;
- goto iter_end;
+ if (!entry->ptype) {
+ if (entry->single.func == probe
+ && entry->single.private
+ == private)
+ return entry;
+ } else {
+ struct marker_probe_closure *closure;
+ closure = entry->multi;
+ for (i = 0; closure[i].func; i++) {
+ if (closure[i].func == probe &&
+ closure[i].private
+ == private)
+ return entry;
+ }
}
}
}
-iter_end:
- if (!found) {
- private = ERR_PTR(-ENOENT);
- goto end;
- }
- entry->refcount = 0;
- /* In what module is the probe handler ? */
- probe_module = __module_text_address((unsigned long)entry->probe);
- private = remove_marker(entry->name);
- deferred_sync = 1;
- mutex_unlock(&markers_mutex);
- marker_update_probes(probe_module);
- return private;
-end:
- mutex_unlock(&markers_mutex);
- return private;
+ return NULL;
}
-EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
/**
- * marker_arm - Arm a marker
- * @name: marker name
+ * marker_probe_unregister_private_data - Disconnect a probe from a marker
+ * @probe: probe function
+ * @private: probe private data
*
- * Activate a marker. It keeps a reference count of the number of
- * arming/disarming done.
- * Returns 0 if ok, error value on error.
+ * Unregister a probe by providing the registered private data.
+ * Only removes the first marker found in hash table.
+ * Return 0 on success or error value.
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
*/
-int marker_arm(const char *name)
+int marker_probe_unregister_private_data(marker_probe_func *probe,
+ void *private)
{
struct marker_entry *entry;
int ret = 0;
+ struct marker_probe_closure *old;
mutex_lock(&markers_mutex);
- entry = get_marker(name);
+ entry = get_marker_from_private_data(probe, private);
if (!entry) {
ret = -ENOENT;
goto end;
}
- /*
- * Only need to update probes when refcount passes from 0 to 1.
- */
- if (entry->refcount++)
- goto end;
-end:
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = marker_entry_remove_probe(entry, NULL, private);
mutex_unlock(&markers_mutex);
- marker_update_probes(NULL);
- return ret;
-}
-EXPORT_SYMBOL_GPL(marker_arm);
-
-/**
- * marker_disarm - Disarm a marker
- * @name: marker name
- *
- * Disarm a marker. It keeps a reference count of the number of arming/disarming
- * done.
- * Returns 0 if ok, error value on error.
- */
-int marker_disarm(const char *name)
-{
- struct marker_entry *entry;
- int ret = 0;
-
+ marker_update_probes(); /* may update entry */
mutex_lock(&markers_mutex);
- entry = get_marker(name);
- if (!entry) {
- ret = -ENOENT;
- goto end;
- }
- /*
- * Only permit decrement refcount if higher than 0.
- * Do probe update only on 1 -> 0 transition.
- */
- if (entry->refcount) {
- if (--entry->refcount)
- goto end;
- } else {
- ret = -EPERM;
- goto end;
- }
+ entry = get_marker_from_private_data(probe, private);
+ WARN_ON(!entry);
+ entry->oldptr = old;
+ entry->rcu_pending = 1;
+ /* write rcu_pending before calling the RCU callback */
+ smp_wmb();
+ call_rcu(&entry->rcu, free_old_closure);
+ remove_marker(entry->name); /* Ignore busy error message */
end:
mutex_unlock(&markers_mutex);
- marker_update_probes(NULL);
return ret;
}
-EXPORT_SYMBOL_GPL(marker_disarm);
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
/**
* marker_get_private_data - Get a marker's probe private data
* @name: marker name
+ * @probe: probe to match
+ * @num: get the nth matching probe's private data
*
+ * Returns the nth private data pointer (starting from 0) matching, or an
+ * ERR_PTR.
* Returns the private data pointer, or an ERR_PTR.
* The private data pointer should _only_ be dereferenced if the caller is the
* owner of the data, or its content could vanish. This is mostly used to
* confirm that a caller is the owner of a registered probe.
*/
-void *marker_get_private_data(const char *name)
+void *marker_get_private_data(const char *name, marker_probe_func *probe,
+ int num)
{
struct hlist_head *head;
struct hlist_node *node;
struct marker_entry *e;
size_t name_len = strlen(name) + 1;
u32 hash = jhash(name, name_len-1, 0);
- int found = 0;
+ int i;
head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
hlist_for_each_entry(e, node, head, hlist) {
if (!strcmp(name, e->name)) {
- found = 1;
- return e->private;
+ if (!e->ptype) {
+ if (num == 0 && e->single.func == probe)
+ return e->single.private;
+ else
+ break;
+ } else {
+ struct marker_probe_closure *closure;
+ int match = 0;
+ closure = e->multi;
+ for (i = 0; closure[i].func; i++) {
+ if (closure[i].func != probe)
+ continue;
+ if (match++ == num)
+ return closure[i].private;
+ }
+ }
}
}
return ERR_PTR(-ENOENT);
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-11-13 09:48:36.000000000 -0500
+++ linux-2.6-lttng/kernel/module.c 2007-11-13 09:48:41.000000000 -0500
@@ -2000,7 +2000,7 @@ static struct module *load_module(void _
if (!mod->taints) {
#ifdef CONFIG_MARKERS
marker_update_probe_range(mod->markers,
- mod->markers + mod->num_markers, NULL, NULL);
+ mod->markers + mod->num_markers);
#endif
#ifdef CONFIG_IMMEDIATE
immediate_update_range(mod->immediate,
@@ -2600,7 +2600,7 @@ EXPORT_SYMBOL(struct_module);
#endif
#ifdef CONFIG_MARKERS
-void module_update_markers(struct module *probe_module, int *refcount)
+void module_update_markers(void)
{
struct module *mod;
@@ -2608,8 +2608,7 @@ void module_update_markers(struct module
list_for_each_entry(mod, &modules, list)
if (!mod->taints)
marker_update_probe_range(mod->markers,
- mod->markers + mod->num_markers,
- probe_module, refcount);
+ mod->markers + mod->num_markers);
mutex_unlock(&module_mutex);
}
#endif
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h 2007-11-13 09:48:36.000000000 -0500
+++ linux-2.6-lttng/include/linux/module.h 2007-11-13 09:48:41.000000000 -0500
@@ -467,7 +467,7 @@ int unregister_module_notifier(struct no
extern void print_modules(void);
-extern void module_update_markers(struct module *probe_module, int *refcount);
+extern void module_update_markers(void);
extern void _module_immediate_update(void);
extern void module_immediate_update(void);
Index: linux-2.6-lttng/samples/markers/probe-example.c
===================================================================
--- linux-2.6-lttng.orig/samples/markers/probe-example.c 2007-11-13 09:48:36.000000000 -0500
+++ linux-2.6-lttng/samples/markers/probe-example.c 2007-11-13 09:48:41.000000000 -0500
@@ -20,31 +20,35 @@ struct probe_data {
marker_probe_func *probe_func;
};
-void probe_subsystem_event(const struct marker *mdata, void *private,
- const char *format, ...)
+void __attribute__((__aligned__(2)))
+probe_subsystem_event(void *probe_data, void *call_data,
+ const char *format, va_list *args);
+void
+probe_subsystem_event(void *probe_data, void *call_data,
+ const char *format, va_list *args)
{
- va_list ap;
/* Declare args */
unsigned int value;
const char *mystr;
/* Assign args */
- va_start(ap, format);
- value = va_arg(ap, typeof(value));
- mystr = va_arg(ap, typeof(mystr));
+ value = va_arg(*args, typeof(value));
+ mystr = va_arg(*args, typeof(mystr));
/* Call printk */
- printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+ printk(KERN_INFO "Value %u, string %s\n", value, mystr);
/* or count, check rights, serialize data in a buffer */
-
- va_end(ap);
}
atomic_t eventb_count = ATOMIC_INIT(0);
-void probe_subsystem_eventb(const struct marker *mdata, void *private,
- const char *format, ...)
+void
+probe_subsystem_eventb(void *probe_data, void *call_data,
+ const char *format, va_list *args) __attribute__((aligned(2)));
+void
+probe_subsystem_eventb(void *probe_data, void *call_data,
+ const char *format, va_list *args)
{
/* Increment counter */
atomic_inc(&eventb_count);
@@ -72,10 +76,6 @@ static int __init probe_init(void)
if (result)
printk(KERN_INFO "Unable to register probe %s\n",
probe_array[i].name);
- result = marker_arm(probe_array[i].name);
- if (result)
- printk(KERN_INFO "Unable to arm probe %s\n",
- probe_array[i].name);
}
return 0;
}
@@ -85,7 +85,8 @@ static void __exit probe_fini(void)
int i;
for (i = 0; i < ARRAY_SIZE(probe_array); i++)
- marker_probe_unregister(probe_array[i].name);
+ marker_probe_unregister(probe_array[i].name,
+ probe_array[i].probe_func, &probe_array[i]);
printk(KERN_INFO "Number of event b : %u\n",
atomic_read(&eventb_count));
}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 4+ messages in thread* [RFC 2/3] Linux Kernel Markers - Create modpost file
2007-11-13 19:03 [RFC 0/3] Linux Kernel Markers new features Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 1/3] Linux Kernel Markers - Support Multiple Probes Mathieu Desnoyers
@ 2007-11-13 19:03 ` Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 3/3] Linux Kernel Markers - Use Immediate Values Mathieu Desnoyers
2 siblings, 0 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2007-11-13 19:03 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Roland McGrath, Mathieu Desnoyers
[-- Attachment #1: linux-kernel-markers-create-modpost-file.patch --]
[-- Type: text/plain, Size: 11179 bytes --]
This adds some new magic in the MODPOST phase for CONFIG_MARKERS.
Analogous to the Module.symvers file, the build will now write a
Module.markers file when CONFIG_MARKERS=y is set. This file lists
the name, defining module, and format string of each marker,
separated by \t characters. This simple text file can be used by
offline build procedures for instrumentation code, analogous to
how System.map and Module.symvers can be useful to have for
kernels other than the one you are running right now.
The strings are made easy to extract by having the __trace_mark macro
define the name and format together in a single array called __mstrtab_*
in the __markers_strings section. This is straightforward and reliable
as long as the marker structs are always defined by this macro. It is
an unreasonable amount of hairy work to extract the string pointers from
the __markers section structs, which entails handling a relocation type
for every machine under the sun.
Mathieu :
- Ran through checkpatch.pl
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/linux/marker.h | 9 --
scripts/Makefile.modpost | 11 +++
scripts/mod/modpost.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++-
scripts/mod/modpost.h | 3
4 files changed, 180 insertions(+), 7 deletions(-)
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/marker.h 2007-11-13 09:38:34.000000000 -0500
+++ linux-2.6-lttng/include/linux/marker.h 2007-11-13 09:38:36.000000000 -0500
@@ -61,15 +61,12 @@ struct marker {
*/
#define __trace_mark(name, call_data, format, args...) \
do { \
- static const char __mstrtab_name_##name[] \
+ static const char __mstrtab_##name[] \
__attribute__((section("__markers_strings"))) \
- = #name; \
- static const char __mstrtab_format_##name[] \
- __attribute__((section("__markers_strings"))) \
- = format; \
+ = #name "\0" format; \
static struct marker __mark_##name \
__attribute__((section("__markers"), aligned(8))) = \
- { __mstrtab_name_##name, __mstrtab_format_##name, \
+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
0, 0, marker_probe_cb, \
{ __mark_empty_function, NULL}, NULL }; \
__mark_check_format(format, ## args); \
Index: linux-2.6-lttng/scripts/Makefile.modpost
===================================================================
--- linux-2.6-lttng.orig/scripts/Makefile.modpost 2007-11-13 09:25:30.000000000 -0500
+++ linux-2.6-lttng/scripts/Makefile.modpost 2007-11-13 09:38:36.000000000 -0500
@@ -13,6 +13,7 @@
# 2) modpost is then used to
# 3) create one <module>.mod.c file pr. module
# 4) create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
# 5) compile all <module>.mod.c files
# 6) final link of the module to a <module.ko> file
@@ -45,6 +46,10 @@ include scripts/Makefile.lib
kernelsymfile := $(objtree)/Module.symvers
modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
# Step 1), find all modules listed in $(MODVERDIR)/
__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -62,6 +67,8 @@ modpost = scripts/mod/modpost
$(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \
$(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \
$(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
$(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w)
quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules
@@ -81,6 +88,10 @@ vmlinux.o: FORCE
$(symverfile): __modpost ;
$(modules:.ko=.mod.c): __modpost ;
+ifdef CONFIG_MARKERS
+$(markersfile): __modpost ;
+endif
+
# Step 5), compile all *.mod.c files
Index: linux-2.6-lttng/scripts/mod/modpost.c
===================================================================
--- linux-2.6-lttng.orig/scripts/mod/modpost.c 2007-11-13 09:25:30.000000000 -0500
+++ linux-2.6-lttng/scripts/mod/modpost.c 2007-11-13 09:41:23.000000000 -0500
@@ -11,6 +11,8 @@
* Usage: modpost vmlinux module1.o module2.o ...
*/
+#define _GNU_SOURCE
+#include <stdio.h>
#include <ctype.h>
#include "modpost.h"
#include "../../include/linux/license.h"
@@ -424,6 +426,8 @@ static int parse_elf(struct elf_info *in
info->export_unused_gpl_sec = i;
else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
info->export_gpl_future_sec = i;
+ else if (strcmp(secname, "__markers_strings") == 0)
+ info->markers_strings_sec = i;
if (sechdrs[i].sh_type != SHT_SYMTAB)
continue;
@@ -1249,6 +1253,62 @@ static int exit_section_ref_ok(const cha
return 0;
}
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+ const char *strings = (const char *) info->hdr + sh->sh_offset;
+ const Elf_Sym *sym, *first_sym, *last_sym;
+ size_t n;
+
+ if (!info->markers_strings_sec)
+ return;
+
+ /*
+ * First count the strings. We look for all the symbols defined
+ * in the __markers_strings section named __mstrtab_*. For
+ * these local names, the compiler puts a random .NNN suffix on,
+ * so the names don't correspond exactly.
+ */
+ first_sym = last_sym = NULL;
+ n = 0;
+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+ sym->st_shndx == info->markers_strings_sec &&
+ !strncmp(info->strtab + sym->st_name,
+ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+ if (first_sym == NULL)
+ first_sym = sym;
+ last_sym = sym;
+ ++n;
+ }
+
+ if (n == 0)
+ return;
+
+ /*
+ * Now collect each name and format into a line for the output.
+ * Lines look like:
+ * marker_name vmlinux marker %s format %d
+ * The format string after the second \t can use whitespace.
+ */
+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+ mod->nmarkers = n;
+
+ n = 0;
+ for (sym = first_sym; sym <= last_sym; sym++)
+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+ sym->st_shndx == info->markers_strings_sec &&
+ !strncmp(info->strtab + sym->st_name,
+ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+ const char *name = strings + sym->st_value;
+ const char *fmt = strchr(name, '\0') + 1;
+ char *line = NULL;
+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+ NOFAIL(line);
+ mod->markers[n++] = line;
+ }
+}
+
static void read_symbols(char *modname)
{
const char *symname;
@@ -1301,6 +1361,8 @@ static void read_symbols(char *modname)
get_src_version(modname, mod->srcversion,
sizeof(mod->srcversion)-1);
+ get_markers(&info, mod);
+
parse_elf_finish(&info);
/* Our trick to get versioning for struct_module - it's
@@ -1649,6 +1711,92 @@ static void write_dump(const char *fname
write_if_changed(&buf, fname);
}
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+ char *line = NULL;
+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+ NOFAIL(line);
+
+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+ sizeof mod->markers[0])));
+ mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+ unsigned long size, pos = 0;
+ void *file = grab_file(fname, &size);
+ char *line;
+
+ if (!file) /* No old markers, silently ignore */
+ return;
+
+ while ((line = get_next_line(&pos, file, size))) {
+ char *marker, *modname, *fmt;
+ struct module *mod;
+
+ marker = line;
+ modname = strchr(marker, '\t');
+ if (!modname)
+ goto fail;
+ *modname++ = '\0';
+ fmt = strchr(modname, '\t');
+ if (!fmt)
+ goto fail;
+ *fmt++ = '\0';
+ if (*marker == '\0' || *modname == '\0')
+ goto fail;
+
+ mod = find_module(modname);
+ if (!mod) {
+ if (is_vmlinux(modname))
+ have_vmlinux = 1;
+ mod = new_module(NOFAIL(strdup(modname)));
+ mod->skip = 1;
+ }
+
+ add_marker(mod, marker, fmt);
+ }
+ return;
+fail:
+ fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+ return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+ struct buffer buf = { };
+ struct module *mod;
+ size_t i;
+
+ for (mod = modules; mod; mod = mod->next)
+ if ((!external_module || !mod->skip) && mod->markers != NULL) {
+ /*
+ * Sort the strings so we can skip duplicates when
+ * we write them out.
+ */
+ qsort(mod->markers, mod->nmarkers,
+ sizeof mod->markers[0], &compare_strings);
+ for (i = 0; i < mod->nmarkers; ++i) {
+ char *line = mod->markers[i];
+ buf_write(&buf, line, strlen(line));
+ while (i + 1 < mod->nmarkers &&
+ !strcmp(mod->markers[i],
+ mod->markers[i + 1]))
+ free(mod->markers[i++]);
+ free(mod->markers[i]);
+ }
+ free(mod->markers);
+ mod->markers = NULL;
+ }
+
+ write_if_changed(&buf, fname);
+}
+
int main(int argc, char **argv)
{
struct module *mod;
@@ -1656,10 +1804,12 @@ int main(int argc, char **argv)
char fname[SZ];
char *kernel_read = NULL, *module_read = NULL;
char *dump_write = NULL;
+ char *markers_read = NULL;
+ char *markers_write = NULL;
int opt;
int err;
- while ((opt = getopt(argc, argv, "i:I:mso:aw")) != -1) {
+ while ((opt = getopt(argc, argv, "i:I:mso:awM:K:")) != -1) {
switch(opt) {
case 'i':
kernel_read = optarg;
@@ -1683,6 +1833,12 @@ int main(int argc, char **argv)
case 'w':
warn_unresolved = 1;
break;
+ case 'M':
+ markers_write = optarg;
+ break;
+ case 'K':
+ markers_read = optarg;
+ break;
default:
exit(1);
}
@@ -1724,5 +1880,11 @@ int main(int argc, char **argv)
if (dump_write)
write_dump(dump_write);
+ if (markers_read)
+ read_markers(markers_read);
+
+ if (markers_write)
+ write_markers(markers_write);
+
return err;
}
Index: linux-2.6-lttng/scripts/mod/modpost.h
===================================================================
--- linux-2.6-lttng.orig/scripts/mod/modpost.h 2007-11-13 09:25:30.000000000 -0500
+++ linux-2.6-lttng/scripts/mod/modpost.h 2007-11-13 09:38:36.000000000 -0500
@@ -110,6 +110,8 @@ struct module {
int has_init;
int has_cleanup;
struct buffer dev_table_buf;
+ char **markers;
+ size_t nmarkers;
char srcversion[25];
};
@@ -124,6 +126,7 @@ struct elf_info {
Elf_Section export_gpl_sec;
Elf_Section export_unused_gpl_sec;
Elf_Section export_gpl_future_sec;
+ Elf_Section markers_strings_sec;
const char *strtab;
char *modinfo;
unsigned int modinfo_len;
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 4+ messages in thread* [RFC 3/3] Linux Kernel Markers - Use Immediate Values
2007-11-13 19:03 [RFC 0/3] Linux Kernel Markers new features Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 1/3] Linux Kernel Markers - Support Multiple Probes Mathieu Desnoyers
2007-11-13 19:03 ` [RFC 2/3] Linux Kernel Markers - Create modpost file Mathieu Desnoyers
@ 2007-11-13 19:03 ` Mathieu Desnoyers
2 siblings, 0 replies; 4+ messages in thread
From: Mathieu Desnoyers @ 2007-11-13 19:03 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers
[-- Attachment #1: linux-kernel-markers-immediate-values.patch --]
[-- Type: text/plain, Size: 7839 bytes --]
Make markers use immediate values.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
Documentation/markers.txt | 17 +++++++++++++----
include/linux/marker.h | 42 ++++++++++++++++++++++++++++++++----------
kernel/marker.c | 8 ++++++--
kernel/module.c | 1 +
4 files changed, 52 insertions(+), 16 deletions(-)
Index: linux-2.6-lttng/include/linux/marker.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/marker.h 2007-11-13 09:49:15.000000000 -0500
+++ linux-2.6-lttng/include/linux/marker.h 2007-11-13 09:49:16.000000000 -0500
@@ -12,6 +12,7 @@
* See the file COPYING for more details.
*/
+#include <linux/immediate.h>
#include <linux/types.h>
struct module;
@@ -42,7 +43,7 @@ struct marker {
const char *format; /* Marker format string, describing the
* variable argument list.
*/
- char state; /* Marker state. */
+ DEFINE_IMMEDIATE(char, state); /* Immediate value state. */
char ptype; /* probe type : 0 : single, 1 : multi */
void (*call)(const struct marker *mdata, /* Probe wrapper */
void *private_data, const char *fmt, ...);
@@ -53,13 +54,14 @@ struct marker {
#ifdef CONFIG_MARKERS
/*
+ * Generic marker flavor always available.
* Note : the empty asm volatile with read constraint is used here instead of a
* "used" attribute to fix a gcc 4.1.x bug.
* Make sure the alignment of the structure in the __markers section will
* not add unwanted padding between the beginning of the section and the
* structure. Force alignment to the same alignment as the section start.
*/
-#define __trace_mark(name, call_data, format, args...) \
+#define __trace_mark(generic, name, call_data, format, args...) \
do { \
static const char __mstrtab_##name[] \
__attribute__((section("__markers_strings"))) \
@@ -70,17 +72,23 @@ struct marker {
0, 0, marker_probe_cb, \
{ __mark_empty_function, NULL}, NULL }; \
__mark_check_format(format, ## args); \
- if (unlikely(__mark_##name.state)) { \
- (*__mark_##name.call) \
- (&__mark_##name, call_data, \
- format, ## args); \
+ if (!generic) { \
+ if (unlikely(immediate_read(__mark_##name.state))) \
+ (*__mark_##name.call) \
+ (&__mark_##name, call_data, \
+ format, ## args); \
+ } else { \
+ if (unlikely(_immediate_read(__mark_##name.state))) \
+ (*__mark_##name.call) \
+ (&__mark_##name, call_data, \
+ format, ## args); \
} \
} while (0)
extern void marker_update_probe_range(struct marker *begin,
struct marker *end);
#else /* !CONFIG_MARKERS */
-#define __trace_mark(name, call_data, format, args...) \
+#define __trace_mark(generic, name, call_data, format, args...) \
__mark_check_format(format, ## args)
static inline void marker_update_probe_range(struct marker *begin,
struct marker *end)
@@ -88,15 +96,29 @@ static inline void marker_update_probe_r
#endif /* CONFIG_MARKERS */
/**
- * trace_mark - Marker
+ * trace_mark - Marker using code patching
* @name: marker name, not quoted.
* @format: format string
* @args...: variable argument list
*
- * Places a marker.
+ * Places a marker using optimized code patching technique (immediate_read())
+ * to be enabled.
*/
#define trace_mark(name, format, args...) \
- __trace_mark(name, NULL, format, ## args)
+ __trace_mark(0, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_immediate_read()) to be
+ * enabled. Should be used for markers in __init and __exit functions and in
+ * lockdep code.
+ */
+#define _trace_mark(name, format, args...) \
+ __trace_mark(1, name, NULL, format, ## args)
/**
* MARK_NOARGS - Format string for a marker with no argument.
Index: linux-2.6-lttng/kernel/marker.c
===================================================================
--- linux-2.6-lttng.orig/kernel/marker.c 2007-11-13 09:48:41.000000000 -0500
+++ linux-2.6-lttng/kernel/marker.c 2007-11-13 09:49:16.000000000 -0500
@@ -23,6 +23,7 @@
#include <linux/rcupdate.h>
#include <linux/marker.h>
#include <linux/err.h>
+#include <linux/immediate.h>
extern struct marker __start___markers[];
extern struct marker __stop___markers[];
@@ -534,7 +535,7 @@ static int set_marker(struct marker_entr
*/
smp_wmb();
elem->ptype = (*entry)->ptype;
- elem->state = active;
+ elem->state__immediate = active;
return 0;
}
@@ -548,7 +549,7 @@ static int set_marker(struct marker_entr
static void disable_marker(struct marker *elem)
{
/* leave "call" as is. It is known statically. */
- elem->state = 0;
+ elem->state__immediate = 0;
elem->single.func = __mark_empty_function;
/* Update the function before setting the ptype */
smp_wmb();
@@ -615,6 +616,9 @@ static void marker_update_probes(void)
marker_update_probe_range(__start___markers, __stop___markers);
/* Markers in modules. */
module_update_markers();
+ /* Update immediate values */
+ core_immediate_update();
+ module_immediate_update();
}
/**
Index: linux-2.6-lttng/Documentation/markers.txt
===================================================================
--- linux-2.6-lttng.orig/Documentation/markers.txt 2007-11-13 09:30:13.000000000 -0500
+++ linux-2.6-lttng/Documentation/markers.txt 2007-11-13 09:49:16.000000000 -0500
@@ -15,10 +15,12 @@ provide at runtime. A marker can be "on"
(no probe is attached). When a marker is "off" it has no effect, except for
adding a tiny time penalty (checking a condition for a branch) and space
penalty (adding a few bytes for the function call at the end of the
-instrumented function and adds a data structure in a separate section). When a
-marker is "on", the function you provide is called each time the marker is
-executed, in the execution context of the caller. When the function provided
-ends its execution, it returns to the caller (continuing from the marker site).
+instrumented function and adds a data structure in a separate section). The
+immediate values are used to minimize the impact on data cache, encoding the
+condition in the instruction stream. When a marker is "on", the function you
+provide is called each time the marker is executed, in the execution context of
+the caller. When the function provided ends its execution, it returns to the
+caller (continuing from the marker site).
You can put markers at important locations in the code. Markers are
lightweight hooks that can pass an arbitrary number of parameters,
@@ -69,6 +71,13 @@ a printk warning which identifies the in
"Format mismatch for probe probe_name (format), marker (format)"
+* Optimization for a given architecture
+
+To force use of a non-optimized version of the markers, _trace_mark() should be
+used. It takes the same parameters as the normal markers, but it does not use
+the immediate values based on code patching.
+
+
* Probe / marker example
See the example provided in samples/markers/src
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-11-13 09:48:41.000000000 -0500
+++ linux-2.6-lttng/kernel/module.c 2007-11-13 09:49:16.000000000 -0500
@@ -2003,6 +2003,7 @@ static struct module *load_module(void _
mod->markers + mod->num_markers);
#endif
#ifdef CONFIG_IMMEDIATE
+ /* Immediate values must be updated after markers */
immediate_update_range(mod->immediate,
mod->immediate + mod->num_immediate);
#endif
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 4+ messages in thread