* [PATCH 1/3] jbd2,rcu: correctly use RCU
@ 2011-06-16 1:47 Lai Jiangshan
2011-06-16 1:47 ` [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU Lai Jiangshan
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Lai Jiangshan @ 2011-06-16 1:47 UTC (permalink / raw)
To: Paul E. McKenney, Steven Rostedt, Ingo Molnar, Theodore Tso,
linux-ext4, linux-k
Cc: Lai Jiangshan
In read site, we need to use local_ptr = rcu_dereference(),
and then use this local_ptr to read the content.
In update site, we should assign/publish the new object/pointer after
the content of the new object/pointer is fully initialized,
and we can't not touch the object after the pointer assignment.
rcu_assign_pointer() is need for the assignement.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
fs/jbd2/journal.c | 25 +++++++++++++------------
1 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9267291..1c45b6a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2439,7 +2439,7 @@ struct devname_cache {
char devname[BDEVNAME_SIZE];
};
#define CACHE_SIZE_BITS 6
-static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
+static struct devname_cache __rcu *devcache[1 << CACHE_SIZE_BITS];
static DEFINE_SPINLOCK(devname_cache_lock);
const char *jbd2_dev_to_name(dev_t device)
@@ -2447,24 +2447,25 @@ const char *jbd2_dev_to_name(dev_t device)
int i = hash_32(device, CACHE_SIZE_BITS);
char *ret;
struct block_device *bd;
- static struct devname_cache *new_dev;
+ static struct devname_cache *cache;
rcu_read_lock();
- if (devcache[i] && devcache[i]->device == device) {
- ret = devcache[i]->devname;
+ cache = rcu_dereference(devcache[i]);
+ if (cache && cache->device == device) {
+ ret = cache->devname;
rcu_read_unlock();
return ret;
}
rcu_read_unlock();
- new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
- if (!new_dev)
+ cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+ if (!cache)
return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
bd = bdget(device);
spin_lock(&devname_cache_lock);
if (devcache[i]) {
if (devcache[i]->device == device) {
- kfree(new_dev);
+ kfree(cache);
bdput(bd);
ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock);
@@ -2472,14 +2473,14 @@ const char *jbd2_dev_to_name(dev_t device)
}
kfree_rcu(devcache[i], rcu);
}
- devcache[i] = new_dev;
- devcache[i]->device = device;
+ cache->device = device;
if (bd) {
- bdevname(bd, devcache[i]->devname);
+ bdevname(bd, cache->devname);
bdput(bd);
} else
- __bdevname(device, devcache[i]->devname);
- ret = devcache[i]->devname;
+ __bdevname(device, cache->devname);
+ rcu_assign_pointer(devcache[i], cache);
+ ret = cache->devname;
spin_unlock(&devname_cache_lock);
return ret;
}
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU
2011-06-16 1:47 [PATCH 1/3] jbd2,rcu: correctly use RCU Lai Jiangshan
@ 2011-06-16 1:47 ` Lai Jiangshan
2011-07-08 2:06 ` Steven Rostedt
2011-06-16 1:47 ` [PATCH 3/3] jbd2,rcu: simpify jbd2_dev_to_name() Lai Jiangshan
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2011-06-16 1:47 UTC (permalink / raw)
To: Paul E. McKenney, Steven Rostedt, Ingo Molnar, Theodore Tso,
linux-ext4, linux-k
Cc: Lai Jiangshan
TP_printk() is not inside the RCU critical section, so the return value
of jbd2_dev_to_name() may be invalid. This fix pass a devname argument to
store the result name to void this problem.
And ftrace_print_bdevname() is introduced to wrap
trace_seq_reserve() and jbd2_dev_to_name().
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
fs/jbd2/journal.c | 27 ++++++++++++---------------
include/linux/ftrace_event.h | 10 ++++++++++
include/linux/jbd2.h | 6 +++---
include/trace/events/jbd2.h | 15 ++++++++-------
4 files changed, 33 insertions(+), 25 deletions(-)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1c45b6a..c37597a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2427,11 +2427,8 @@ static void __exit journal_exit(void)
* jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*
- * The caller should use rcu_read_lock() in order to make sure the
- * device name stays valid until its done with it. We use
- * rcu_read_lock() as well to make sure we're safe in case the caller
- * gets sloppy, and because rcu_read_lock() is cheap and can be safely
- * nested.
+ * We use caches to cache the results and use rcu_read_lock()
+ * to protect the caches.
*/
struct devname_cache {
struct rcu_head rcu;
@@ -2442,34 +2439,35 @@ struct devname_cache {
static struct devname_cache __rcu *devcache[1 << CACHE_SIZE_BITS];
static DEFINE_SPINLOCK(devname_cache_lock);
-const char *jbd2_dev_to_name(dev_t device)
+void jbd2_dev_to_name(dev_t device, char *devname)
{
int i = hash_32(device, CACHE_SIZE_BITS);
- char *ret;
struct block_device *bd;
static struct devname_cache *cache;
rcu_read_lock();
cache = rcu_dereference(devcache[i]);
if (cache && cache->device == device) {
- ret = cache->devname;
+ memcpy(devname, cache->devname, BDEVNAME_SIZE);
rcu_read_unlock();
- return ret;
+ return;
}
rcu_read_unlock();
cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
- if (!cache)
- return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
+ if (!cache) {
+ __bdevname(device, devname);
+ return;
+ }
bd = bdget(device);
spin_lock(&devname_cache_lock);
if (devcache[i]) {
if (devcache[i]->device == device) {
kfree(cache);
bdput(bd);
- ret = devcache[i]->devname;
+ memcpy(devname, devcache[i]->devname, BDEVNAME_SIZE);
spin_unlock(&devname_cache_lock);
- return ret;
+ return;
}
kfree_rcu(devcache[i], rcu);
}
@@ -2480,9 +2478,8 @@ const char *jbd2_dev_to_name(dev_t device)
} else
__bdevname(device, cache->devname);
rcu_assign_pointer(devcache[i], cache);
- ret = cache->devname;
+ memcpy(devname, cache->devname, BDEVNAME_SIZE);
spin_unlock(&devname_cache_lock);
- return ret;
}
EXPORT_SYMBOL(jbd2_dev_to_name);
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 59d3ef1..2959ead 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -6,6 +6,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/perf_event.h>
+#include <linux/jbd2.h>
struct trace_array;
struct tracer;
@@ -28,6 +29,15 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
const struct trace_print_flags *symbol_array);
+static inline const char *ftrace_print_bdevname(struct trace_seq *p, dev_t device)
+{
+ char *bdevname = trace_seq_reserve(p, BDEVNAME_SIZE);
+
+ jbd2_dev_to_name(device, bdevname);
+
+ return bdevname;
+}
+
#if BITS_PER_LONG == 32
const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
unsigned long long val,
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4ecb7b1..964f375 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1331,11 +1331,11 @@ extern int jbd_blocks_per_page(struct inode *inode);
#define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
#define JBUFFER_TRACE(jh, info) do {} while (0)
-/*
- * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
+/*
+ * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
* tracing infrastructure to map a dev_t to a device name.
*/
-extern const char *jbd2_dev_to_name(dev_t device);
+extern void jbd2_dev_to_name(dev_t device, char *bdevname);
#endif /* __KERNEL__ */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index bf16545..a9c8b44 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -27,7 +27,7 @@ TRACE_EVENT(jbd2_checkpoint,
),
TP_printk("dev %s result %d",
- jbd2_dev_to_name(__entry->dev), __entry->result)
+ ftrace_print_bdevname(p, __entry->dev), __entry->result)
);
DECLARE_EVENT_CLASS(jbd2_commit,
@@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
),
TP_printk("dev %s transaction %d sync %d",
- jbd2_dev_to_name(__entry->dev), __entry->transaction,
+ ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
__entry->sync_commit)
);
@@ -101,7 +101,7 @@ TRACE_EVENT(jbd2_end_commit,
),
TP_printk("dev %s transaction %d sync %d head %d",
- jbd2_dev_to_name(__entry->dev), __entry->transaction,
+ ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
__entry->sync_commit, __entry->head)
);
@@ -121,7 +121,8 @@ TRACE_EVENT(jbd2_submit_inode_data,
),
TP_printk("dev %s ino %lu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
+ ftrace_print_bdevname(p, __entry->dev),
+ (unsigned long) __entry->ino)
);
TRACE_EVENT(jbd2_run_stats,
@@ -158,7 +159,7 @@ TRACE_EVENT(jbd2_run_stats,
TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
"logging %u handle_count %u blocks %u blocks_logged %u",
- jbd2_dev_to_name(__entry->dev), __entry->tid,
+ ftrace_print_bdevname(p, __entry->dev), __entry->tid,
jiffies_to_msecs(__entry->wait),
jiffies_to_msecs(__entry->running),
jiffies_to_msecs(__entry->locked),
@@ -194,7 +195,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
"written %u dropped %u",
- jbd2_dev_to_name(__entry->dev), __entry->tid,
+ ftrace_print_bdevname(p, __entry->dev), __entry->tid,
jiffies_to_msecs(__entry->chp_time),
__entry->forced_to_close, __entry->written, __entry->dropped)
);
@@ -223,7 +224,7 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
),
TP_printk("dev %s from %u to %u offset %lu freed %lu",
- jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+ ftrace_print_bdevname(p, __entry->dev), __entry->tail_sequence,
__entry->first_tid, __entry->block_nr, __entry->freed)
);
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/3] jbd2,rcu: simpify jbd2_dev_to_name()
2011-06-16 1:47 [PATCH 1/3] jbd2,rcu: correctly use RCU Lai Jiangshan
2011-06-16 1:47 ` [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU Lai Jiangshan
@ 2011-06-16 1:47 ` Lai Jiangshan
2011-07-11 0:25 ` [PATCH 1/3] jbd2,rcu: correctly use RCU Ted Ts'o
2011-07-11 1:24 ` Ted Ts'o
3 siblings, 0 replies; 7+ messages in thread
From: Lai Jiangshan @ 2011-06-16 1:47 UTC (permalink / raw)
To: Paul E. McKenney, Steven Rostedt, Ingo Molnar, Theodore Tso,
linux-ext4, linux-k
Cc: Lai Jiangshan
Simpify jbd2_dev_to_name() and use xchg() to make the update side
wait-free and remove the spinlock.
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
fs/jbd2/journal.c | 42 +++++++++++++++++++-----------------------
1 files changed, 19 insertions(+), 23 deletions(-)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c37597a..7d5b6ac 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2437,7 +2437,6 @@ struct devname_cache {
};
#define CACHE_SIZE_BITS 6
static struct devname_cache __rcu *devcache[1 << CACHE_SIZE_BITS];
-static DEFINE_SPINLOCK(devname_cache_lock);
void jbd2_dev_to_name(dev_t device, char *devname)
{
@@ -2445,6 +2444,7 @@ void jbd2_dev_to_name(dev_t device, char *devname)
struct block_device *bd;
static struct devname_cache *cache;
+ /* get the name from the cache */
rcu_read_lock();
cache = rcu_dereference(devcache[i]);
if (cache && cache->device == device) {
@@ -2454,32 +2454,28 @@ void jbd2_dev_to_name(dev_t device, char *devname)
}
rcu_read_unlock();
- cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
- if (!cache) {
+ /* slowly get the name */
+ bd = bdget(device);
+ if (bd) {
+ bdevname(bd, devname);
+ bdput(bd);
+ } else {
__bdevname(device, devname);
return;
}
- bd = bdget(device);
- spin_lock(&devname_cache_lock);
- if (devcache[i]) {
- if (devcache[i]->device == device) {
- kfree(cache);
- bdput(bd);
- memcpy(devname, devcache[i]->devname, BDEVNAME_SIZE);
- spin_unlock(&devname_cache_lock);
- return;
- }
- kfree_rcu(devcache[i], rcu);
- }
+
+ /* put the name to the cache */
+ cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+ if (!cache)
+ return;
+
+ memcpy(cache->devname, devname, BDEVNAME_SIZE);
cache->device = device;
- if (bd) {
- bdevname(bd, cache->devname);
- bdput(bd);
- } else
- __bdevname(device, cache->devname);
- rcu_assign_pointer(devcache[i], cache);
- memcpy(devname, cache->devname, BDEVNAME_SIZE);
- spin_unlock(&devname_cache_lock);
+ cache = xchg(&devcache[i], cache);
+
+ /* free old cache by RCU */
+ if (cache)
+ kfree_rcu(cache, rcu);
}
EXPORT_SYMBOL(jbd2_dev_to_name);
--
1.7.4.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU
2011-06-16 1:47 ` [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU Lai Jiangshan
@ 2011-07-08 2:06 ` Steven Rostedt
2011-07-08 16:02 ` Paul E. McKenney
0 siblings, 1 reply; 7+ messages in thread
From: Steven Rostedt @ 2011-07-08 2:06 UTC (permalink / raw)
To: Lai Jiangshan
Cc: Paul E. McKenney, Ingo Molnar, Theodore Tso, linux-ext4,
linux-kernel
On Thu, 2011-06-16 at 09:47 +0800, Lai Jiangshan wrote:
> TP_printk() is not inside the RCU critical section, so the return value
> of jbd2_dev_to_name() may be invalid. This fix pass a devname argument to
> store the result name to void this problem.
>
> And ftrace_print_bdevname() is introduced to wrap
> trace_seq_reserve() and jbd2_dev_to_name().
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Ted,
Are you going to take these patches? Probably too late for 3.0, but
maybe can get them into 3.1?
-- Steve
> ---
> fs/jbd2/journal.c | 27 ++++++++++++---------------
> include/linux/ftrace_event.h | 10 ++++++++++
> include/linux/jbd2.h | 6 +++---
> include/trace/events/jbd2.h | 15 ++++++++-------
> 4 files changed, 33 insertions(+), 25 deletions(-)
>
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 1c45b6a..c37597a 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -2427,11 +2427,8 @@ static void __exit journal_exit(void)
> * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> * tracing infrastructure to map a dev_t to a device name.
> *
> - * The caller should use rcu_read_lock() in order to make sure the
> - * device name stays valid until its done with it. We use
> - * rcu_read_lock() as well to make sure we're safe in case the caller
> - * gets sloppy, and because rcu_read_lock() is cheap and can be safely
> - * nested.
> + * We use caches to cache the results and use rcu_read_lock()
> + * to protect the caches.
> */
> struct devname_cache {
> struct rcu_head rcu;
> @@ -2442,34 +2439,35 @@ struct devname_cache {
> static struct devname_cache __rcu *devcache[1 << CACHE_SIZE_BITS];
> static DEFINE_SPINLOCK(devname_cache_lock);
>
> -const char *jbd2_dev_to_name(dev_t device)
> +void jbd2_dev_to_name(dev_t device, char *devname)
> {
> int i = hash_32(device, CACHE_SIZE_BITS);
> - char *ret;
> struct block_device *bd;
> static struct devname_cache *cache;
>
> rcu_read_lock();
> cache = rcu_dereference(devcache[i]);
> if (cache && cache->device == device) {
> - ret = cache->devname;
> + memcpy(devname, cache->devname, BDEVNAME_SIZE);
> rcu_read_unlock();
> - return ret;
> + return;
> }
> rcu_read_unlock();
>
> cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
> - if (!cache)
> - return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
> + if (!cache) {
> + __bdevname(device, devname);
> + return;
> + }
> bd = bdget(device);
> spin_lock(&devname_cache_lock);
> if (devcache[i]) {
> if (devcache[i]->device == device) {
> kfree(cache);
> bdput(bd);
> - ret = devcache[i]->devname;
> + memcpy(devname, devcache[i]->devname, BDEVNAME_SIZE);
> spin_unlock(&devname_cache_lock);
> - return ret;
> + return;
> }
> kfree_rcu(devcache[i], rcu);
> }
> @@ -2480,9 +2478,8 @@ const char *jbd2_dev_to_name(dev_t device)
> } else
> __bdevname(device, cache->devname);
> rcu_assign_pointer(devcache[i], cache);
> - ret = cache->devname;
> + memcpy(devname, cache->devname, BDEVNAME_SIZE);
> spin_unlock(&devname_cache_lock);
> - return ret;
> }
> EXPORT_SYMBOL(jbd2_dev_to_name);
>
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 59d3ef1..2959ead 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -6,6 +6,7 @@
> #include <linux/percpu.h>
> #include <linux/hardirq.h>
> #include <linux/perf_event.h>
> +#include <linux/jbd2.h>
>
> struct trace_array;
> struct tracer;
> @@ -28,6 +29,15 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
> const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
> const struct trace_print_flags *symbol_array);
>
> +static inline const char *ftrace_print_bdevname(struct trace_seq *p, dev_t device)
> +{
> + char *bdevname = trace_seq_reserve(p, BDEVNAME_SIZE);
> +
> + jbd2_dev_to_name(device, bdevname);
> +
> + return bdevname;
> +}
> +
> #if BITS_PER_LONG == 32
> const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
> unsigned long long val,
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 4ecb7b1..964f375 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1331,11 +1331,11 @@ extern int jbd_blocks_per_page(struct inode *inode);
> #define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
> #define JBUFFER_TRACE(jh, info) do {} while (0)
>
> -/*
> - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> +/*
> + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> * tracing infrastructure to map a dev_t to a device name.
> */
> -extern const char *jbd2_dev_to_name(dev_t device);
> +extern void jbd2_dev_to_name(dev_t device, char *bdevname);
>
> #endif /* __KERNEL__ */
>
> diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
> index bf16545..a9c8b44 100644
> --- a/include/trace/events/jbd2.h
> +++ b/include/trace/events/jbd2.h
> @@ -27,7 +27,7 @@ TRACE_EVENT(jbd2_checkpoint,
> ),
>
> TP_printk("dev %s result %d",
> - jbd2_dev_to_name(__entry->dev), __entry->result)
> + ftrace_print_bdevname(p, __entry->dev), __entry->result)
> );
>
> DECLARE_EVENT_CLASS(jbd2_commit,
> @@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
> ),
>
> TP_printk("dev %s transaction %d sync %d",
> - jbd2_dev_to_name(__entry->dev), __entry->transaction,
> + ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
> __entry->sync_commit)
> );
>
> @@ -101,7 +101,7 @@ TRACE_EVENT(jbd2_end_commit,
> ),
>
> TP_printk("dev %s transaction %d sync %d head %d",
> - jbd2_dev_to_name(__entry->dev), __entry->transaction,
> + ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
> __entry->sync_commit, __entry->head)
> );
>
> @@ -121,7 +121,8 @@ TRACE_EVENT(jbd2_submit_inode_data,
> ),
>
> TP_printk("dev %s ino %lu",
> - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
> + ftrace_print_bdevname(p, __entry->dev),
> + (unsigned long) __entry->ino)
> );
>
> TRACE_EVENT(jbd2_run_stats,
> @@ -158,7 +159,7 @@ TRACE_EVENT(jbd2_run_stats,
>
> TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
> "logging %u handle_count %u blocks %u blocks_logged %u",
> - jbd2_dev_to_name(__entry->dev), __entry->tid,
> + ftrace_print_bdevname(p, __entry->dev), __entry->tid,
> jiffies_to_msecs(__entry->wait),
> jiffies_to_msecs(__entry->running),
> jiffies_to_msecs(__entry->locked),
> @@ -194,7 +195,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
>
> TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
> "written %u dropped %u",
> - jbd2_dev_to_name(__entry->dev), __entry->tid,
> + ftrace_print_bdevname(p, __entry->dev), __entry->tid,
> jiffies_to_msecs(__entry->chp_time),
> __entry->forced_to_close, __entry->written, __entry->dropped)
> );
> @@ -223,7 +224,7 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
> ),
>
> TP_printk("dev %s from %u to %u offset %lu freed %lu",
> - jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
> + ftrace_print_bdevname(p, __entry->dev), __entry->tail_sequence,
> __entry->first_tid, __entry->block_nr, __entry->freed)
> );
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU
2011-07-08 2:06 ` Steven Rostedt
@ 2011-07-08 16:02 ` Paul E. McKenney
0 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2011-07-08 16:02 UTC (permalink / raw)
To: Steven Rostedt
Cc: Lai Jiangshan, Ingo Molnar, Theodore Tso, linux-ext4,
linux-kernel
On Thu, Jul 07, 2011 at 10:06:24PM -0400, Steven Rostedt wrote:
> On Thu, 2011-06-16 at 09:47 +0800, Lai Jiangshan wrote:
> > TP_printk() is not inside the RCU critical section, so the return value
> > of jbd2_dev_to_name() may be invalid. This fix pass a devname argument to
> > store the result name to void this problem.
> >
> > And ftrace_print_bdevname() is introduced to wrap
> > trace_seq_reserve() and jbd2_dev_to_name().
> >
> > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>
> Acked-by: Steven Rostedt <rostedt@goodmis.org>
>
> Ted,
>
> Are you going to take these patches? Probably too late for 3.0, but
> maybe can get them into 3.1?
Me too.
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
> -- Steve
>
> > ---
> > fs/jbd2/journal.c | 27 ++++++++++++---------------
> > include/linux/ftrace_event.h | 10 ++++++++++
> > include/linux/jbd2.h | 6 +++---
> > include/trace/events/jbd2.h | 15 ++++++++-------
> > 4 files changed, 33 insertions(+), 25 deletions(-)
> >
> > diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> > index 1c45b6a..c37597a 100644
> > --- a/fs/jbd2/journal.c
> > +++ b/fs/jbd2/journal.c
> > @@ -2427,11 +2427,8 @@ static void __exit journal_exit(void)
> > * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> > * tracing infrastructure to map a dev_t to a device name.
> > *
> > - * The caller should use rcu_read_lock() in order to make sure the
> > - * device name stays valid until its done with it. We use
> > - * rcu_read_lock() as well to make sure we're safe in case the caller
> > - * gets sloppy, and because rcu_read_lock() is cheap and can be safely
> > - * nested.
> > + * We use caches to cache the results and use rcu_read_lock()
> > + * to protect the caches.
> > */
> > struct devname_cache {
> > struct rcu_head rcu;
> > @@ -2442,34 +2439,35 @@ struct devname_cache {
> > static struct devname_cache __rcu *devcache[1 << CACHE_SIZE_BITS];
> > static DEFINE_SPINLOCK(devname_cache_lock);
> >
> > -const char *jbd2_dev_to_name(dev_t device)
> > +void jbd2_dev_to_name(dev_t device, char *devname)
> > {
> > int i = hash_32(device, CACHE_SIZE_BITS);
> > - char *ret;
> > struct block_device *bd;
> > static struct devname_cache *cache;
> >
> > rcu_read_lock();
> > cache = rcu_dereference(devcache[i]);
> > if (cache && cache->device == device) {
> > - ret = cache->devname;
> > + memcpy(devname, cache->devname, BDEVNAME_SIZE);
> > rcu_read_unlock();
> > - return ret;
> > + return;
> > }
> > rcu_read_unlock();
> >
> > cache = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
> > - if (!cache)
> > - return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
> > + if (!cache) {
> > + __bdevname(device, devname);
> > + return;
> > + }
> > bd = bdget(device);
> > spin_lock(&devname_cache_lock);
> > if (devcache[i]) {
> > if (devcache[i]->device == device) {
> > kfree(cache);
> > bdput(bd);
> > - ret = devcache[i]->devname;
> > + memcpy(devname, devcache[i]->devname, BDEVNAME_SIZE);
> > spin_unlock(&devname_cache_lock);
> > - return ret;
> > + return;
> > }
> > kfree_rcu(devcache[i], rcu);
> > }
> > @@ -2480,9 +2478,8 @@ const char *jbd2_dev_to_name(dev_t device)
> > } else
> > __bdevname(device, cache->devname);
> > rcu_assign_pointer(devcache[i], cache);
> > - ret = cache->devname;
> > + memcpy(devname, cache->devname, BDEVNAME_SIZE);
> > spin_unlock(&devname_cache_lock);
> > - return ret;
> > }
> > EXPORT_SYMBOL(jbd2_dev_to_name);
> >
> > diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> > index 59d3ef1..2959ead 100644
> > --- a/include/linux/ftrace_event.h
> > +++ b/include/linux/ftrace_event.h
> > @@ -6,6 +6,7 @@
> > #include <linux/percpu.h>
> > #include <linux/hardirq.h>
> > #include <linux/perf_event.h>
> > +#include <linux/jbd2.h>
> >
> > struct trace_array;
> > struct tracer;
> > @@ -28,6 +29,15 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
> > const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
> > const struct trace_print_flags *symbol_array);
> >
> > +static inline const char *ftrace_print_bdevname(struct trace_seq *p, dev_t device)
> > +{
> > + char *bdevname = trace_seq_reserve(p, BDEVNAME_SIZE);
> > +
> > + jbd2_dev_to_name(device, bdevname);
> > +
> > + return bdevname;
> > +}
> > +
> > #if BITS_PER_LONG == 32
> > const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
> > unsigned long long val,
> > diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> > index 4ecb7b1..964f375 100644
> > --- a/include/linux/jbd2.h
> > +++ b/include/linux/jbd2.h
> > @@ -1331,11 +1331,11 @@ extern int jbd_blocks_per_page(struct inode *inode);
> > #define BUFFER_TRACE2(bh, bh2, info) do {} while (0)
> > #define JBUFFER_TRACE(jh, info) do {} while (0)
> >
> > -/*
> > - * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> > +/*
> > + * jbd2_dev_to_name is a utility function used by the jbd2 and ext4
> > * tracing infrastructure to map a dev_t to a device name.
> > */
> > -extern const char *jbd2_dev_to_name(dev_t device);
> > +extern void jbd2_dev_to_name(dev_t device, char *bdevname);
> >
> > #endif /* __KERNEL__ */
> >
> > diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
> > index bf16545..a9c8b44 100644
> > --- a/include/trace/events/jbd2.h
> > +++ b/include/trace/events/jbd2.h
> > @@ -27,7 +27,7 @@ TRACE_EVENT(jbd2_checkpoint,
> > ),
> >
> > TP_printk("dev %s result %d",
> > - jbd2_dev_to_name(__entry->dev), __entry->result)
> > + ftrace_print_bdevname(p, __entry->dev), __entry->result)
> > );
> >
> > DECLARE_EVENT_CLASS(jbd2_commit,
> > @@ -49,7 +49,7 @@ DECLARE_EVENT_CLASS(jbd2_commit,
> > ),
> >
> > TP_printk("dev %s transaction %d sync %d",
> > - jbd2_dev_to_name(__entry->dev), __entry->transaction,
> > + ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
> > __entry->sync_commit)
> > );
> >
> > @@ -101,7 +101,7 @@ TRACE_EVENT(jbd2_end_commit,
> > ),
> >
> > TP_printk("dev %s transaction %d sync %d head %d",
> > - jbd2_dev_to_name(__entry->dev), __entry->transaction,
> > + ftrace_print_bdevname(p, __entry->dev), __entry->transaction,
> > __entry->sync_commit, __entry->head)
> > );
> >
> > @@ -121,7 +121,8 @@ TRACE_EVENT(jbd2_submit_inode_data,
> > ),
> >
> > TP_printk("dev %s ino %lu",
> > - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
> > + ftrace_print_bdevname(p, __entry->dev),
> > + (unsigned long) __entry->ino)
> > );
> >
> > TRACE_EVENT(jbd2_run_stats,
> > @@ -158,7 +159,7 @@ TRACE_EVENT(jbd2_run_stats,
> >
> > TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
> > "logging %u handle_count %u blocks %u blocks_logged %u",
> > - jbd2_dev_to_name(__entry->dev), __entry->tid,
> > + ftrace_print_bdevname(p, __entry->dev), __entry->tid,
> > jiffies_to_msecs(__entry->wait),
> > jiffies_to_msecs(__entry->running),
> > jiffies_to_msecs(__entry->locked),
> > @@ -194,7 +195,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
> >
> > TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
> > "written %u dropped %u",
> > - jbd2_dev_to_name(__entry->dev), __entry->tid,
> > + ftrace_print_bdevname(p, __entry->dev), __entry->tid,
> > jiffies_to_msecs(__entry->chp_time),
> > __entry->forced_to_close, __entry->written, __entry->dropped)
> > );
> > @@ -223,7 +224,7 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
> > ),
> >
> > TP_printk("dev %s from %u to %u offset %lu freed %lu",
> > - jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
> > + ftrace_print_bdevname(p, __entry->dev), __entry->tail_sequence,
> > __entry->first_tid, __entry->block_nr, __entry->freed)
> > );
> >
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] jbd2,rcu: correctly use RCU
2011-06-16 1:47 [PATCH 1/3] jbd2,rcu: correctly use RCU Lai Jiangshan
2011-06-16 1:47 ` [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU Lai Jiangshan
2011-06-16 1:47 ` [PATCH 3/3] jbd2,rcu: simpify jbd2_dev_to_name() Lai Jiangshan
@ 2011-07-11 0:25 ` Ted Ts'o
2011-07-11 1:24 ` Ted Ts'o
3 siblings, 0 replies; 7+ messages in thread
From: Ted Ts'o @ 2011-07-11 0:25 UTC (permalink / raw)
To: Lai Jiangshan
Cc: Paul E. McKenney, Steven Rostedt, Ingo Molnar, linux-ext4,
linux-kernel
On Thu, Jun 16, 2011 at 09:47:04AM +0800, Lai Jiangshan wrote:
> In read site, we need to use local_ptr = rcu_dereference(),
> and then use this local_ptr to read the content.
>
> In update site, we should assign/publish the new object/pointer after
> the content of the new object/pointer is fully initialized,
> and we can't not touch the object after the pointer assignment.
> rcu_assign_pointer() is need for the assignement.
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Thanks, applied.
> @@ -2447,24 +2447,25 @@ const char *jbd2_dev_to_name(dev_t device)
> int i = hash_32(device, CACHE_SIZE_BITS);
> char *ret;
> struct block_device *bd;
> - static struct devname_cache *new_dev;
> + static struct devname_cache *cache;
I also removed the static modifier to the struct devname_cache
pointer. It's pointless, and in fact introduces a bug if two CPU's
try to run jbd2_dev_to_name() at the same time.
- Ted
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] jbd2,rcu: correctly use RCU
2011-06-16 1:47 [PATCH 1/3] jbd2,rcu: correctly use RCU Lai Jiangshan
` (2 preceding siblings ...)
2011-07-11 0:25 ` [PATCH 1/3] jbd2,rcu: correctly use RCU Ted Ts'o
@ 2011-07-11 1:24 ` Ted Ts'o
3 siblings, 0 replies; 7+ messages in thread
From: Ted Ts'o @ 2011-07-11 1:24 UTC (permalink / raw)
To: Lai Jiangshan
Cc: Paul E. McKenney, Steven Rostedt, Ingo Molnar, linux-ext4,
linux-kernel
On Thu, Jun 16, 2011 at 09:47:04AM +0800, Lai Jiangshan wrote:
> In read site, we need to use local_ptr = rcu_dereference(),
> and then use this local_ptr to read the content.
>
> In update site, we should assign/publish the new object/pointer after
> the content of the new object/pointer is fully initialized,
> and we can't not touch the object after the pointer assignment.
> rcu_assign_pointer() is need for the assignement.
>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Actually, after thinking about this some more, I think I'm going to
just change the jbd2 events to simply print MAJOR(__entry->dev) and
MINOR(__entry->dev). Otherwise, the perf tool stops dies and stops
interpreting the trace points when it tries to interpret
"jbd2_dev_to_name(REC->dev)" in the print format.
So I'll just drop jbd2_dev_to_name() completely for the jbd2
tracepoints. It's not worth it.
- Ted
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2011-07-11 1:24 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-16 1:47 [PATCH 1/3] jbd2,rcu: correctly use RCU Lai Jiangshan
2011-06-16 1:47 ` [PATCH 2/3] jbd2,tracing,rcu: jbd2_dev_to_name() is not protect by RCU Lai Jiangshan
2011-07-08 2:06 ` Steven Rostedt
2011-07-08 16:02 ` Paul E. McKenney
2011-06-16 1:47 ` [PATCH 3/3] jbd2,rcu: simpify jbd2_dev_to_name() Lai Jiangshan
2011-07-11 0:25 ` [PATCH 1/3] jbd2,rcu: correctly use RCU Ted Ts'o
2011-07-11 1:24 ` Ted Ts'o
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).