* [PATCH] Relay CPU Hotplug support
@ 2006-12-21 0:31 Mathieu Desnoyers
2006-12-21 1:13 ` Tom Zanussi
2006-12-21 7:23 ` Andrew Morton
0 siblings, 2 replies; 10+ messages in thread
From: Mathieu Desnoyers @ 2006-12-21 0:31 UTC (permalink / raw)
To: linux-kernel, David Wilder, Tom Zanussi, Andrew Morton,
Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig
Cc: ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh,
Thomas Gleixner
Hi,
Here is a patch, result of the combined work of Tom Zanussi and myself, to add
CPU hotplug support to Relay.
This patch applies on 2.6.20-rc1-git7.
Signed-off-by : Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -24,7 +24,7 @@ #define FIX_SIZE(x) ((((x) - 1) & PAGE_M
/*
* Tracks changes to rchan/rchan_buf structs
*/
-#define RELAYFS_CHANNEL_VERSION 6
+#define RELAYFS_CHANNEL_VERSION 7
/*
* Per-cpu relay channel buffer
@@ -64,6 +64,10 @@ struct rchan
void *private_data; /* for user-defined data */
size_t last_toobig; /* tried to log event > subbuf size */
struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+ int is_global; /* One global buffer ? */
+ struct list_head list; /* for channel list */
+ struct dentry *parent; /* parent dentry passed to open */
+ char base_filename[NAME_MAX]; /* saved base filename */
};
/*
@@ -162,7 +166,8 @@ struct rchan *relay_open(const char *bas
struct dentry *parent,
size_t subbuf_size,
size_t n_subbufs,
- struct rchan_callbacks *cb);
+ struct rchan_callbacks *cb,
+ void *private_data);
extern void relay_close(struct rchan *chan);
extern void relay_flush(struct rchan *chan);
extern void relay_subbufs_consumed(struct rchan *chan,
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -7,6 +7,8 @@
* Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com)
*
* Moved to kernel/relay.c by Paul Mundt, 2006.
+ * November 2006 - CPU hotplug support by Mathieu Desnoyers
+ * (mathieu.desnoyers@polymtl.ca)
*
* This file is released under the GPL.
*/
@@ -18,6 +20,11 @@ #include <linux/string.h>
#include <linux/relay.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <linux/cpu.h>
+
+/* list of open channels, for cpu hotplug */
+static DEFINE_MUTEX(relay_channels_mutex);
+static LIST_HEAD(relay_channels);
/*
* close() vm_op implementation for relay file mapping.
@@ -187,6 +194,7 @@ void relay_destroy_buf(struct rchan_buf
__free_page(buf->page_array[i]);
kfree(buf->page_array);
}
+ chan->buf[buf->cpu] = NULL;
kfree(buf->padding);
kfree(buf);
kref_put(&chan->kref, relay_destroy_channel);
@@ -362,58 +370,76 @@ static inline void __relay_reset(struct
void relay_reset(struct rchan *chan)
{
unsigned int i;
- struct rchan_buf *prev = NULL;
if (!chan)
return;
- for (i = 0; i < NR_CPUS; i++) {
- if (!chan->buf[i] || chan->buf[i] == prev)
- break;
- __relay_reset(chan->buf[i], 0);
- prev = chan->buf[i];
+ if (chan->is_global && chan->buf[0]) {
+ __relay_reset(chan->buf[0], 0);
+ return;
}
+
+ lock_cpu_hotplug();
+ for_each_online_cpu(i)
+ if (chan->buf[i])
+ __relay_reset(chan->buf[i], 0);
+ unlock_cpu_hotplug();
}
EXPORT_SYMBOL_GPL(relay_reset);
/*
* relay_open_buf - create a new relay channel buffer
*
- * Internal - used by relay_open().
+ * used by relay_open() and CPU hotplug.
*/
-static struct rchan_buf *relay_open_buf(struct rchan *chan,
- const char *filename,
- struct dentry *parent,
- int *is_global)
+static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu)
{
- struct rchan_buf *buf;
+ struct rchan_buf *buf = NULL;
struct dentry *dentry;
+ char *tmpname;
- if (*is_global)
+ if (chan->is_global)
return chan->buf[0];
+ tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL);
+ if (!tmpname)
+ goto end;
+ snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu);
+
buf = relay_create_buf(chan);
if (!buf)
- return NULL;
+ goto free_name;
- /* Create file in fs */
- dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
- buf, is_global);
- if (!dentry) {
- relay_destroy_buf(buf);
- return NULL;
- }
+ buf->cpu = cpu;
+ __relay_reset(buf, 1);
+ /* Create file in fs */
+ dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR,
+ buf, &chan->is_global);
+ if (!dentry)
+ goto free_buf;
+
buf->dentry = dentry;
- __relay_reset(buf, 1);
+ if(chan->is_global) {
+ chan->buf[0] = buf;
+ buf->cpu = 0;
+ }
+
+ goto free_name;
+
+free_buf:
+ relay_destroy_buf(buf);
+free_name:
+ kfree(tmpname);
+end:
return buf;
}
/**
* relay_close_buf - close a channel buffer
* @buf: channel buffer
- *
+ *
* Marks the buffer finalized and restores the default callbacks.
* The channel buffer and channel buffer data structure are then freed
* automatically when the last reference is given up.
@@ -448,12 +474,54 @@ static inline void setup_callbacks(struc
}
/**
+ *
+ * relay_hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+ struct rchan *chan;
+
+ switch(action) {
+ case CPU_UP_PREPARE:
+ mutex_lock(&relay_channels_mutex);
+ list_for_each_entry(chan, &relay_channels, list) {
+ if (chan->buf[hotcpu])
+ continue;
+ chan->buf[hotcpu] = relay_open_buf(chan, hotcpu);
+ if(!chan->buf[hotcpu]) {
+ printk(KERN_ERR
+ "relay_hotcpu_callback: cpu %d buffer "
+ "creation failed\n", hotcpu);
+ mutex_unlock(&relay_channels_mutex);
+ return NOTIFY_BAD;
+ }
+ }
+ mutex_unlock(&relay_channels_mutex);
+ break;
+ case CPU_DEAD:
+ /* No need to flush the cpu : will be flushed upon
+ * final relay_flush() call. */
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+/**
* relay_open - create a new relay channel
* @base_filename: base name of files to create
* @parent: dentry of parent directory, %NULL for root directory
* @subbuf_size: size of sub-buffers
* @n_subbufs: number of sub-buffers
* @cb: client callback functions
+ * @private_data: user-defined data
*
* Returns channel pointer if successful, %NULL otherwise.
*
@@ -466,13 +534,11 @@ struct rchan *relay_open(const char *bas
struct dentry *parent,
size_t subbuf_size,
size_t n_subbufs,
- struct rchan_callbacks *cb)
+ struct rchan_callbacks *cb,
+ void *private_data)
{
unsigned int i;
struct rchan *chan;
- char *tmpname;
- int is_global = 0;
-
if (!base_filename)
return NULL;
@@ -487,38 +553,34 @@ struct rchan *relay_open(const char *bas
chan->n_subbufs = n_subbufs;
chan->subbuf_size = subbuf_size;
chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs);
+ chan->parent = parent;
+ chan->private_data = private_data;
+ strlcpy(chan->base_filename, base_filename, NAME_MAX);
setup_callbacks(chan, cb);
kref_init(&chan->kref);
- tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL);
- if (!tmpname)
- goto free_chan;
-
+ lock_cpu_hotplug();
for_each_online_cpu(i) {
- sprintf(tmpname, "%s%d", base_filename, i);
- chan->buf[i] = relay_open_buf(chan, tmpname, parent,
- &is_global);
+ chan->buf[i] = relay_open_buf(chan, i);
if (!chan->buf[i])
goto free_bufs;
-
- chan->buf[i]->cpu = i;
}
+ mutex_lock(&relay_channels_mutex);
+ list_add(&chan->list, &relay_channels);
+ mutex_unlock(&relay_channels_mutex);
+ unlock_cpu_hotplug();
- kfree(tmpname);
return chan;
free_bufs:
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_online_cpu(i) {
if (!chan->buf[i])
break;
relay_close_buf(chan->buf[i]);
- if (is_global)
- break;
}
- kfree(tmpname);
-free_chan:
kref_put(&chan->kref, relay_destroy_channel);
+ unlock_cpu_hotplug();
return NULL;
}
EXPORT_SYMBOL_GPL(relay_open);
@@ -619,24 +681,28 @@ EXPORT_SYMBOL_GPL(relay_subbufs_consumed
void relay_close(struct rchan *chan)
{
unsigned int i;
- struct rchan_buf *prev = NULL;
if (!chan)
return;
- for (i = 0; i < NR_CPUS; i++) {
- if (!chan->buf[i] || chan->buf[i] == prev)
- break;
- relay_close_buf(chan->buf[i]);
- prev = chan->buf[i];
- }
+ lock_cpu_hotplug();
+ if (chan->is_global && chan->buf[0])
+ relay_close_buf(chan->buf[0]);
+ else
+ for_each_possible_cpu(i)
+ if (chan->buf[i])
+ relay_close_buf(chan->buf[i]);
if (chan->last_toobig)
printk(KERN_WARNING "relay: one or more items not logged "
"[item size (%Zd) > sub-buffer size (%Zd)]\n",
chan->last_toobig, chan->subbuf_size);
+ mutex_lock(&relay_channels_mutex);
+ list_del(&chan->list);
+ mutex_unlock(&relay_channels_mutex);
kref_put(&chan->kref, relay_destroy_channel);
+ unlock_cpu_hotplug();
}
EXPORT_SYMBOL_GPL(relay_close);
@@ -649,17 +715,20 @@ EXPORT_SYMBOL_GPL(relay_close);
void relay_flush(struct rchan *chan)
{
unsigned int i;
- struct rchan_buf *prev = NULL;
if (!chan)
return;
- for (i = 0; i < NR_CPUS; i++) {
- if (!chan->buf[i] || chan->buf[i] == prev)
- break;
- relay_switch_subbuf(chan->buf[i], 0);
- prev = chan->buf[i];
+ if (chan->is_global && chan->buf[0]) {
+ relay_switch_subbuf(chan->buf[0], 0);
+ return;
}
+
+ lock_cpu_hotplug();
+ for_each_possible_cpu(i)
+ if (chan->buf[i])
+ relay_switch_subbuf(chan->buf[i], 0);
+ unlock_cpu_hotplug();
}
EXPORT_SYMBOL_GPL(relay_flush);
@@ -1023,3 +1092,12 @@ const struct file_operations relay_file_
.sendfile = relay_file_sendfile,
};
EXPORT_SYMBOL_GPL(relay_file_operations);
+
+static __init int relay_init(void)
+{
+
+ hotcpu_notifier(relay_hotcpu_callback, 0);
+ return 0;
+}
+
+module_init(relay_init);
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -363,10 +363,9 @@ static int blk_trace_setup(request_queue
if (!bt->dropped_file)
goto err;
- bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
+ bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks, bt);
if (!bt->rchan)
goto err;
- bt->rchan->private_data = bt;
bt->act_mask = buts.act_mask;
if (!bt->act_mask)
OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] Relay CPU Hotplug support 2006-12-21 0:31 [PATCH] Relay CPU Hotplug support Mathieu Desnoyers @ 2006-12-21 1:13 ` Tom Zanussi 2006-12-21 3:22 ` Mathieu Desnoyers 2006-12-21 7:23 ` Andrew Morton 1 sibling, 1 reply; 10+ messages in thread From: Tom Zanussi @ 2006-12-21 1:13 UTC (permalink / raw) To: Mathieu Desnoyers Cc: linux-kernel, David Wilder, Tom Zanussi, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner Mathieu Desnoyers writes: > Hi, > > Here is a patch, result of the combined work of Tom Zanussi and myself, to add > CPU hotplug support to Relay. > > This patch applies on 2.6.20-rc1-git7. > > Signed-off-by : Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> > Hi Mathieu, It looks like you forgot to include the Documentation update with this. Other than that, it looks fine to me. Acked-by: Tom Zanussi <zanussi@us.ibm.com> ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-21 1:13 ` Tom Zanussi @ 2006-12-21 3:22 ` Mathieu Desnoyers 0 siblings, 0 replies; 10+ messages in thread From: Mathieu Desnoyers @ 2006-12-21 3:22 UTC (permalink / raw) To: Tom Zanussi Cc: linux-kernel, David Wilder, Andrew Morton, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner * Tom Zanussi (zanussi@us.ibm.com) wrote: > Mathieu Desnoyers writes: > > Hi, > > > > Here is a patch, result of the combined work of Tom Zanussi and myself, to add > > CPU hotplug support to Relay. > > > > This patch applies on 2.6.20-rc1-git7. > > > > Signed-off-by : Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> > > > > Hi Mathieu, > > It looks like you forgot to include the Documentation update with > this. Other than that, it looks fine to me. > > Acked-by: Tom Zanussi <zanussi@us.ibm.com> > > Here is the missing part of the patch for documentation. Thanks for pointing it out. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt @@ -157,7 +157,7 @@ TBD(curr. line MT:/API/) channel management functions: relay_open(base_filename, parent, subbuf_size, n_subbufs, - callbacks) + callbacks, private_data) relay_close(chan) relay_flush(chan) relay_reset(chan) @@ -251,7 +251,7 @@ static struct rchan_callbacks relay_call And an example relay_open() invocation using them: - chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks); + chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL); If the create_buf_file() callback fails, or isn't defined, channel creation and thus relay_open() will fail. @@ -289,6 +289,11 @@ they use the proper locking for such a b writes in a spinlock, or by copying a write function from relay.h and creating a local version that internally does the proper locking. +The private_data passed into relay_open() allows clients to associate +user-defined data with a channel, and is immediately available +(including in create_buf_file()) via chan->private_data or +buf->chan->private_data. + Channel 'modes' --------------- -- OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-21 0:31 [PATCH] Relay CPU Hotplug support Mathieu Desnoyers 2006-12-21 1:13 ` Tom Zanussi @ 2006-12-21 7:23 ` Andrew Morton 2006-12-21 7:04 ` Tom Zanussi 2006-12-22 10:37 ` Gautham R Shenoy 1 sibling, 2 replies; 10+ messages in thread From: Andrew Morton @ 2006-12-21 7:23 UTC (permalink / raw) To: Mathieu Desnoyers Cc: linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner On Wed, 20 Dec 2006 19:31:01 -0500 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote: > Hi, > > Here is a patch, result of the combined work of Tom Zanussi and myself, to add > CPU hotplug support to Relay. > > ... > > + > + lock_cpu_hotplug(); > + for_each_online_cpu(i) > + if (chan->buf[i]) > + __relay_reset(chan->buf[i], 0); > + unlock_cpu_hotplug(); __relay_reset() runs flush_scheduled_work(). If one of the queued works takes lock_cpu_hoplug() (and some do), thou art most deadest meat. I think the core design problem you have here is that you are using cpu_online_map to record the presence or absence of resources which belong to the relay driver. Why do that - you don't own cpu_online_map (but you do get some notifications when it wants to change, that's all). Perhaps a better approach would be to teach the relay driver to maintain its own resources (already there, in chan->buf[]). So relay.c has a record of which per-cpu buffers are present and which are not. That information gets changed under a lock which the relay driver owns and controls. You already have such a lock: relay_channels_mutex. So some code in here is using lock_cpu_hotplug() to protect relay's resources while other code is using relay_channels_mutex. Which is it? Your proposed change apparently chooses to not release per-cpu resources on cpu-hotunplug. I think. That's the sort of thing which should be communicated in the (presently non-existent) patch changelog. The changelog should also tell us *why* this patch was written. Right now it's in "why on earth should we merge this" territory. Meanwhile, let's shrink 10% off of relay.o's .text, shall we? --- a/kernel/relay.c~relay-remove-inlining +++ a/kernel/relay.c @@ -322,7 +322,7 @@ static void wakeup_readers(struct work_s * * See relay_reset for description of effect. */ -static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) +static void __relay_reset(struct rchan_buf *buf, unsigned int init) { size_t i; @@ -418,7 +418,7 @@ static struct rchan_buf *relay_open_buf( * The channel buffer and channel buffer data structure are then freed * automatically when the last reference is given up. */ -static inline void relay_close_buf(struct rchan_buf *buf) +static void relay_close_buf(struct rchan_buf *buf) { buf->finalized = 1; cancel_delayed_work(&buf->wake_readers); @@ -426,7 +426,7 @@ static inline void relay_close_buf(struc kref_put(&buf->kref, relay_remove_buf); } -static inline void setup_callbacks(struct rchan *chan, +static void setup_callbacks(struct rchan *chan, struct rchan_callbacks *cb) { if (!cb) { @@ -946,11 +946,10 @@ typedef int (*subbuf_actor_t) (size_t re /* * relay_file_read_subbufs - read count bytes, bridging subbuf boundaries */ -static inline ssize_t relay_file_read_subbufs(struct file *filp, - loff_t *ppos, - subbuf_actor_t subbuf_actor, - read_actor_t actor, - read_descriptor_t *desc) +static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos, + subbuf_actor_t subbuf_actor, + read_actor_t actor, + read_descriptor_t *desc) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; _ ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-21 7:23 ` Andrew Morton @ 2006-12-21 7:04 ` Tom Zanussi 2006-12-22 10:37 ` Gautham R Shenoy 1 sibling, 0 replies; 10+ messages in thread From: Tom Zanussi @ 2006-12-21 7:04 UTC (permalink / raw) To: Andrew Morton Cc: Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner Andrew Morton writes: > On Wed, 20 Dec 2006 19:31:01 -0500 > Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> wrote: > > > Hi, > > > > Here is a patch, result of the combined work of Tom Zanussi and myself, to add > > CPU hotplug support to Relay. > > > > ... > > > > + > > + lock_cpu_hotplug(); > > + for_each_online_cpu(i) > > + if (chan->buf[i]) > > + __relay_reset(chan->buf[i], 0); > > + unlock_cpu_hotplug(); > > __relay_reset() runs flush_scheduled_work(). If one of the queued works > takes lock_cpu_hoplug() (and some do), thou art most deadest meat. > > I think the core design problem you have here is that you are using > cpu_online_map to record the presence or absence of resources which belong > to the relay driver. Why do that - you don't own cpu_online_map (but you > do get some notifications when it wants to change, that's all). > > Perhaps a better approach would be to teach the relay driver to maintain > its own resources (already there, in chan->buf[]). So relay.c has a record > of which per-cpu buffers are present and which are not. That information > gets changed under a lock which the relay driver owns and controls. > > You already have such a lock: relay_channels_mutex. So some code in here > is using lock_cpu_hotplug() to protect relay's resources while other code > is using relay_channels_mutex. Which is it? > Yes, I think we should be able to get rid of lock_cpu_hotplug() and just use relay_channels_mutex instead i.e., we can't be adding a buffer for a new cpu if we're iterating the cpu_online_map and vice versa; the below patch makes that change. > > Your proposed change apparently chooses to not release per-cpu resources on > cpu-hotunplug. I think. That's the sort of thing which should be > communicated in the (presently non-existent) patch changelog. > Yes, you're right - unplug isn't supported by this patch. The thought was that at minumum a new buffer needs to be added when a cpu comes up, but it wasn't worth the effort to remove buffers on cpu down since they'd be freed soon anyway when the channel was closed. > The changelog should also tell us *why* this patch was written. Right now > it's in "why on earth should we merge this" territory. > Mathieu originally needed to add this for tracing Xen, but it's something that's needed for any application that can be tracing while cpus are added. Tom diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt index d6788da..7fbb6ff 100644 --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt @@ -157,7 +157,7 @@ TBD(curr. line MT:/API/) channel management functions: relay_open(base_filename, parent, subbuf_size, n_subbufs, - callbacks) + callbacks, private_data) relay_close(chan) relay_flush(chan) relay_reset(chan) @@ -251,7 +251,7 @@ static struct rchan_callbacks relay_call And an example relay_open() invocation using them: - chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks); + chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL); If the create_buf_file() callback fails, or isn't defined, channel creation and thus relay_open() will fail. @@ -289,6 +289,11 @@ they use the proper locking for such a b writes in a spinlock, or by copying a write function from relay.h and creating a local version that internally does the proper locking. +The private_data passed into relay_open() allows clients to associate +user-defined data with a channel, and is immediately available +(including in create_buf_file()) via chan->private_data or +buf->chan->private_data. + Channel 'modes' --------------- diff --git a/block/blktrace.c b/block/blktrace.c index 2a248ec..1230354 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -335,10 +335,9 @@ static int blk_trace_setup(request_queue if (!bt->dropped_file) goto err; - bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks); + bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks, bt); if (!bt->rchan) goto err; - bt->rchan->private_data = bt; bt->act_mask = buts.act_mask; if (!bt->act_mask) diff --git a/include/linux/relay.h b/include/linux/relay.h index 24accb4..c275e89 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -24,7 +24,7 @@ /* * Tracks changes to rchan/rchan_buf structs */ -#define RELAYFS_CHANNEL_VERSION 6 +#define RELAYFS_CHANNEL_VERSION 7 /* * Per-cpu relay channel buffer @@ -64,6 +64,10 @@ struct rchan void *private_data; /* for user-defined data */ size_t last_toobig; /* tried to log event > subbuf size */ struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ + int is_global; /* One global buffer ? */ + struct list_head list; /* for channel list */ + struct dentry *parent; /* parent dentry passed to open */ + char base_filename[NAME_MAX]; /* saved base filename */ }; /* @@ -162,7 +166,8 @@ struct rchan *relay_open(const char *bas struct dentry *parent, size_t subbuf_size, size_t n_subbufs, - struct rchan_callbacks *cb); + struct rchan_callbacks *cb, + void *private_data); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, diff --git a/kernel/relay.c b/kernel/relay.c index f04bbdb..df4de52 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -7,6 +7,8 @@ * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) * * Moved to kernel/relay.c by Paul Mundt, 2006. + * November 2006 - CPU hotplug support by Mathieu Desnoyers + * (mathieu.desnoyers@polymtl.ca) * * This file is released under the GPL. */ @@ -18,6 +20,11 @@ #include <linux/relay.h> #include <linux/vmalloc.h> #include <linux/mm.h> +#include <linux/cpu.h> + +/* list of open channels, for cpu hotplug */ +static DEFINE_MUTEX(relay_channels_mutex); +static LIST_HEAD(relay_channels); /* * close() vm_op implementation for relay file mapping. @@ -187,6 +194,7 @@ void relay_destroy_buf(struct rchan_buf __free_page(buf->page_array[i]); kfree(buf->page_array); } + chan->buf[buf->cpu] = NULL; kfree(buf->padding); kfree(buf); kref_put(&chan->kref, relay_destroy_channel); @@ -361,58 +369,76 @@ static inline void __relay_reset(struct void relay_reset(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - __relay_reset(chan->buf[i], 0); - prev = chan->buf[i]; + if (chan->is_global && chan->buf[0]) { + __relay_reset(chan->buf[0], 0); + return; } + + mutex_lock(&relay_channels_mutex); + for_each_online_cpu(i) + if (chan->buf[i]) + __relay_reset(chan->buf[i], 0); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_reset); /* * relay_open_buf - create a new relay channel buffer * - * Internal - used by relay_open(). + * used by relay_open() and CPU hotplug. */ -static struct rchan_buf *relay_open_buf(struct rchan *chan, - const char *filename, - struct dentry *parent, - int *is_global) +static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) { - struct rchan_buf *buf; + struct rchan_buf *buf = NULL; struct dentry *dentry; + char *tmpname; - if (*is_global) + if (chan->is_global) return chan->buf[0]; + tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) + goto end; + snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); + buf = relay_create_buf(chan); if (!buf) - return NULL; + goto free_name; + + buf->cpu = cpu; + __relay_reset(buf, 1); /* Create file in fs */ - dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR, - buf, is_global); - if (!dentry) { - relay_destroy_buf(buf); - return NULL; - } + dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, + buf, &chan->is_global); + if (!dentry) + goto free_buf; buf->dentry = dentry; - __relay_reset(buf, 1); + if(chan->is_global) { + chan->buf[0] = buf; + buf->cpu = 0; + } + + goto free_name; + +free_buf: + relay_destroy_buf(buf); +free_name: + kfree(tmpname); +end: return buf; } /** * relay_close_buf - close a channel buffer * @buf: channel buffer - * + * * Marks the buffer finalized and restores the default callbacks. * The channel buffer and channel buffer data structure are then freed * automatically when the last reference is given up. @@ -447,12 +473,54 @@ static inline void setup_callbacks(struc } /** + * + * relay_hotcpu_callback - CPU hotplug callback + * @nb: notifier block + * @action: hotplug action to take + * @hcpu: CPU number + * + * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD) + */ +static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, + unsigned long action, + void *hcpu) +{ + unsigned int hotcpu = (unsigned long)hcpu; + struct rchan *chan; + + switch(action) { + case CPU_UP_PREPARE: + mutex_lock(&relay_channels_mutex); + list_for_each_entry(chan, &relay_channels, list) { + if (chan->buf[hotcpu]) + continue; + chan->buf[hotcpu] = relay_open_buf(chan, hotcpu); + if(!chan->buf[hotcpu]) { + printk(KERN_ERR + "relay_hotcpu_callback: cpu %d buffer " + "creation failed\n", hotcpu); + mutex_unlock(&relay_channels_mutex); + return NOTIFY_BAD; + } + } + mutex_unlock(&relay_channels_mutex); + break; + case CPU_DEAD: + /* No need to flush the cpu : will be flushed upon + * final relay_flush() call. */ + break; + } + return NOTIFY_OK; +} + +/** * relay_open - create a new relay channel * @base_filename: base name of files to create * @parent: dentry of parent directory, %NULL for root directory * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions + * @private_data: user-defined data * * Returns channel pointer if successful, %NULL otherwise. * @@ -465,13 +533,11 @@ struct rchan *relay_open(const char *bas struct dentry *parent, size_t subbuf_size, size_t n_subbufs, - struct rchan_callbacks *cb) + struct rchan_callbacks *cb, + void *private_data) { unsigned int i; struct rchan *chan; - char *tmpname; - int is_global = 0; - if (!base_filename) return NULL; @@ -486,38 +552,32 @@ struct rchan *relay_open(const char *bas chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + chan->parent = parent; + chan->private_data = private_data; + strlcpy(chan->base_filename, base_filename, NAME_MAX); setup_callbacks(chan, cb); kref_init(&chan->kref); - tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL); - if (!tmpname) - goto free_chan; - + mutex_lock(&relay_channels_mutex); for_each_online_cpu(i) { - sprintf(tmpname, "%s%d", base_filename, i); - chan->buf[i] = relay_open_buf(chan, tmpname, parent, - &is_global); + chan->buf[i] = relay_open_buf(chan, i); if (!chan->buf[i]) goto free_bufs; - - chan->buf[i]->cpu = i; } + list_add(&chan->list, &relay_channels); + mutex_unlock(&relay_channels_mutex); - kfree(tmpname); return chan; free_bufs: - for (i = 0; i < NR_CPUS; i++) { + for_each_online_cpu(i) { if (!chan->buf[i]) break; relay_close_buf(chan->buf[i]); - if (is_global) - break; } - kfree(tmpname); -free_chan: kref_put(&chan->kref, relay_destroy_channel); + mutex_unlock(&relay_channels_mutex); return NULL; } EXPORT_SYMBOL_GPL(relay_open); @@ -617,24 +677,26 @@ EXPORT_SYMBOL_GPL(relay_subbufs_consumed void relay_close(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - relay_close_buf(chan->buf[i]); - prev = chan->buf[i]; - } + mutex_lock(&relay_channels_mutex); + if (chan->is_global && chan->buf[0]) + relay_close_buf(chan->buf[0]); + else + for_each_possible_cpu(i) + if (chan->buf[i]) + relay_close_buf(chan->buf[i]); if (chan->last_toobig) printk(KERN_WARNING "relay: one or more items not logged " "[item size (%Zd) > sub-buffer size (%Zd)]\n", chan->last_toobig, chan->subbuf_size); + list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_close); @@ -647,17 +709,20 @@ EXPORT_SYMBOL_GPL(relay_close); void relay_flush(struct rchan *chan) { unsigned int i; - struct rchan_buf *prev = NULL; if (!chan) return; - for (i = 0; i < NR_CPUS; i++) { - if (!chan->buf[i] || chan->buf[i] == prev) - break; - relay_switch_subbuf(chan->buf[i], 0); - prev = chan->buf[i]; + if (chan->is_global && chan->buf[0]) { + relay_switch_subbuf(chan->buf[0], 0); + return; } + + mutex_lock(&relay_channels_mutex); + for_each_possible_cpu(i) + if (chan->buf[i]) + relay_switch_subbuf(chan->buf[i], 0); + mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_flush); @@ -1021,3 +1086,12 @@ struct file_operations relay_file_operat .sendfile = relay_file_sendfile, }; EXPORT_SYMBOL_GPL(relay_file_operations); + +static __init int relay_init(void) +{ + + hotcpu_notifier(relay_hotcpu_callback, 0); + return 0; +} + +module_init(relay_init); ^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-21 7:23 ` Andrew Morton 2006-12-21 7:04 ` Tom Zanussi @ 2006-12-22 10:37 ` Gautham R Shenoy 2006-12-22 10:44 ` Andrew Morton 1 sibling, 1 reply; 10+ messages in thread From: Gautham R Shenoy @ 2006-12-22 10:37 UTC (permalink / raw) To: Andrew Morton Cc: Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner, kiran, venkatesh.pallipadi, dipankar, vatsa, torvalds, davej Hi Andrew, While we are at this per-subsystem cpuhotplug "locking", here's a proposal that might put an end to the workqueue deadlock woes. I'm yet to cook up a patch for this, but here's the idea in brief. On Wed, Dec 20, 2006 at 11:23:50PM -0800, Andrew Morton wrote: > to the relay driver. Why do that - you don't own cpu_online_map (but you > do get some notifications when it wants to change, that's all). How about: Let each hot-cpu-aware subsystem maintain it's own online_cpus mask. Thus we can eliminate the global online_cpus mask and also have a clear picture of what data the per-subsystem mutexes are protecting :) In kenel/cpu.c _cpu_down() { send_all_pre_cpu_down_notifications(); . . . send_notifications_to_lock_per_subsystem_mutexes(); __stop_machine_run(); send_notifications_to_update_per_subsystem_online_cpus_mask(); send_notifications_to_release_per_subsystem_mutexes(); . . . send_all_post_cpu_down_notifications(); } Ditto for _cpu_up(). This will not only reduce the lock-contention , but will also allow the pre/post hotplug notifications handlers to make calls to function which are cpu-hotplug-aware (like create_workqueue, destroy_workqueues etc) without ending up in a recursive deadlock as the persubsystem mutexes would have been released by then. And since we are sending notifications to update per_subsystem_cpus_mask before sending the post_cpu_hotplug_notifications, the post_notification handlers will be executing with the consistent value of the online_cpus mask. Does anybody see a problem with this "update_now-cleanup_later" approach ? Thanks and Regards gautham. -- Gautham R Shenoy Linux Technology Center IBM India. "Freedom comes with a price tag of responsibility, which is still a bargain, because Freedom is priceless!" ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-22 10:37 ` Gautham R Shenoy @ 2006-12-22 10:44 ` Andrew Morton 2006-12-22 16:20 ` Oleg Nesterov 2006-12-22 20:01 ` Gautham R Shenoy 0 siblings, 2 replies; 10+ messages in thread From: Andrew Morton @ 2006-12-22 10:44 UTC (permalink / raw) To: ego, Oleg Nesterov Cc: Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner, kiran, venkatesh.pallipadi, dipankar, vatsa, torvalds, davej On Fri, 22 Dec 2006 16:07:24 +0530 Gautham R Shenoy <ego@in.ibm.com> wrote: > While we are at this per-subsystem cpuhotplug "locking", here's a > proposal that might put an end to the workqueue deadlock woes. Oleg is working on some patches which will permit us to cancel or wait upon a particular work_struct, rather than upon all pending work_structs. This will fix the problem where we accidentlly wait upon some unrelated work_struct which takes a lock which is related to one which we already hold. I hope. It'll be a bit tricky to implement: if some foreign work_struct is running right now, we cannot wait upon it - we must non-blockingly dequeue the work_struct which we want to kill before it gets to run. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-22 10:44 ` Andrew Morton @ 2006-12-22 16:20 ` Oleg Nesterov 2006-12-22 16:42 ` Oleg Nesterov 2006-12-22 20:01 ` Gautham R Shenoy 1 sibling, 1 reply; 10+ messages in thread From: Oleg Nesterov @ 2006-12-22 16:20 UTC (permalink / raw) To: Andrew Morton Cc: ego, Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner, kiran, venkatesh.pallipadi, dipankar, vatsa, torvalds, davej On 12/22, Andrew Morton wrote: > > On Fri, 22 Dec 2006 16:07:24 +0530 > Gautham R Shenoy <ego@in.ibm.com> wrote: > > > While we are at this per-subsystem cpuhotplug "locking", here's a > > proposal that might put an end to the workqueue deadlock woes. > > Oleg is working on some patches which will permit us to cancel or wait upon > a particular work_struct, rather than upon all pending work_structs. I hope there are completed. I am waiting for the next -mm release to send a "final" patch, I need too look at set_wq_data/set_wq_data when workqueue.c will be in sync with Linus's changes. > This will fix the problem where we accidentlly wait upon some unrelated > work_struct which takes a lock which is related to one which we already > hold. > > I hope. It'll be a bit tricky to implement: if some foreign work_struct is > running right now, we cannot wait upon it - we must non-blockingly dequeue > the work_struct which we want to kill before it gets to run. The previous patch I sent [PATCH, RFC rc1-mm1] implement flush_work() http://marc.theaimsgroup.com/?l=linux-kernel&m=116647310413104 has a race. +static void wait_on_work(struct cpu_workqueue_struct *cwq, + struct work_struct *work) +{ + struct wq_barrier barr; + int running = 0; + + spin_lock_irq(&cwq->lock); + if (get_wq_data(work) == cwq) { + list_del_init(&work->entry); + work_release(work); + } If that work is pending on CPU 1 it, and this CPU goes down, it may be moved to CPU 0 after flush_work() already checked CPU 0. I think we can do this: static void wait_on_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) { struct wq_barrier barr; int running = 0; spin_lock_irq(&cwq->lock); if (unlikely(cwq->current_work == work)) { init_wq_barrier(&barr); insert_work(cwq, &barr.work, 0); running = 1; } spin_unlock_irq(&cwq->lock); if (unlikely(running)) { mutex_unlock(&workqueue_mutex); wait_for_completion(&barr.done); mutex_lock(&workqueue_mutex); } } void flush_work(struct workqueue_struct *wq, struct work_struct *work) { struct cpu_workqueue_struct *cwq; cwq = get_wq_data(work); if (!cwq) return; spin_lock_irq(&cwq->lock); list_del_init(&work->entry); work_release(work); spin_unlock_irq(&cwq->lock); mutex_lock(&workqueue_mutex); if (is_single_threaded(wq)) { /* Always use first cpu's area. */ wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work); } else { int cpu; for_each_online_cpu(cpu) wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work); } mutex_unlock(&workqueue_mutex); } Do you see any problems? When wait_on_work() unlocks workqueue_mutex (or whatever we choose to protect against CPU hotplug), CPU may go away. But in that case take_over_work() will move a barrier we queued to another CPU, it will be fired sometime, and wait_on_work() will be woken. Actually, we are doing cleanup_workqueue_thread()->kthread_stop() before take_over_work(), so cwq->thread should complete its ->worklist (and thus the barrier), because currently we don't check kthread_should_stop() in run_workqueue(). But even if we did, everything looks safe to me. Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-22 16:20 ` Oleg Nesterov @ 2006-12-22 16:42 ` Oleg Nesterov 0 siblings, 0 replies; 10+ messages in thread From: Oleg Nesterov @ 2006-12-22 16:42 UTC (permalink / raw) To: Andrew Morton Cc: ego, Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner, kiran, venkatesh.pallipadi, dipankar, vatsa, torvalds, davej On 12/22, Oleg Nesterov wrote: > > void flush_work(struct workqueue_struct *wq, struct work_struct *work) > { > struct cpu_workqueue_struct *cwq; > > cwq = get_wq_data(work); > if (!cwq) > return; > > spin_lock_irq(&cwq->lock); > list_del_init(&work->entry); > work_release(work); > spin_unlock_irq(&cwq->lock); Err, forgot to mention, this should be done under workqueue_mutex or we should re-check cwq == get_wq_data(), I didn't decide yet. Sorry for extra noise. Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Relay CPU Hotplug support 2006-12-22 10:44 ` Andrew Morton 2006-12-22 16:20 ` Oleg Nesterov @ 2006-12-22 20:01 ` Gautham R Shenoy 1 sibling, 0 replies; 10+ messages in thread From: Gautham R Shenoy @ 2006-12-22 20:01 UTC (permalink / raw) To: Andrew Morton Cc: ego, Oleg Nesterov, Mathieu Desnoyers, linux-kernel, David Wilder, Tom Zanussi, Ingo Molnar, Greg Kroah-Hartman, Christoph Hellwig, ltt-dev, systemtap, Douglas Niehaus, Martin J. Bligh, Thomas Gleixner, kiran, venkatesh.pallipadi, dipankar, vatsa, torvalds, davej On Fri, Dec 22, 2006 at 02:44:58AM -0800, Andrew Morton wrote: > On Fri, 22 Dec 2006 16:07:24 +0530 > Gautham R Shenoy <ego@in.ibm.com> wrote: > > > While we are at this per-subsystem cpuhotplug "locking", here's a > > proposal that might put an end to the workqueue deadlock woes. > > Oleg is working on some patches which will permit us to cancel or wait upon > a particular work_struct, rather than upon all pending work_structs. > Oh! I was refering to the other set of workqueue deadlock woes. The ones caused when subsystems (like cpufreq) try to create/destroy workqueue from the cpuhotplug callback path. Creation/destruction of workqueue requires us to take workqueue_mutex, which would have already been taken during CPU_LOCK_ACQUIRE. More often than not, the cpu hotplug protection that we need is while accessing either cpu_online_map OR one of it's persubsystem mirrors like policy->cpus. So it makes more sense to have all the persubsystem mutexes held only during the cpu-hotplug operation (i.e stop_machine_run and __cpu_up) and release them immediately after sending notifications to update the persubsystem online_cpu map. Thanks and Regards gautham. -- Gautham R Shenoy Linux Technology Center IBM India. "Freedom comes with a price tag of responsibility, which is still a bargain, because Freedom is priceless!" ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2006-12-22 19:59 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-12-21 0:31 [PATCH] Relay CPU Hotplug support Mathieu Desnoyers 2006-12-21 1:13 ` Tom Zanussi 2006-12-21 3:22 ` Mathieu Desnoyers 2006-12-21 7:23 ` Andrew Morton 2006-12-21 7:04 ` Tom Zanussi 2006-12-22 10:37 ` Gautham R Shenoy 2006-12-22 10:44 ` Andrew Morton 2006-12-22 16:20 ` Oleg Nesterov 2006-12-22 16:42 ` Oleg Nesterov 2006-12-22 20:01 ` Gautham R Shenoy
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.