* [PATCH 01/16] blktrace: split do_blk_trace_setup into two functions
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
@ 2025-09-09 11:05 ` Johannes Thumshirn
2025-09-19 15:06 ` Christoph Hellwig
2025-09-09 11:05 ` [PATCH 02/16] blktrace: add definitions for blk_user_trace_setup2 Johannes Thumshirn
` (14 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:05 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Split do_blk_trace_setup into two functions, this is done to prepare for
an incoming new BLKTRACESETUP2 ioctl(2) which can receive extended
parameters form user-space.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 95 ++++++++++++++++++++++++-----------------
1 file changed, 57 insertions(+), 38 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6941145b5058..487eabfaf70e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -494,9 +494,10 @@ static void blk_trace_setup_lba(struct blk_trace *bt,
/*
* Setup everything required to start tracing
*/
-static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev,
- struct blk_user_trace_setup *buts)
+static struct blk_trace *blk_trace_setup_prepare(struct request_queue *q,
+ char *name, dev_t dev,
+ u32 buf_size, u32 buf_nr,
+ struct block_device *bdev)
{
struct blk_trace *bt = NULL;
struct dentry *dir = NULL;
@@ -504,31 +505,19 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
lockdep_assert_held(&q->debugfs_mutex);
- if (!buts->buf_size || !buts->buf_nr)
- return -EINVAL;
-
- strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE);
-
- /*
- * some device names have larger paths - convert the slashes
- * to underscores for this to work as expected
- */
- strreplace(buts->name, '/', '_');
-
/*
* bdev can be NULL, as with scsi-generic, this is a helpful as
* we can be.
*/
if (rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex))) {
- pr_warn("Concurrent blktraces are not allowed on %s\n",
- buts->name);
- return -EBUSY;
+ pr_warn("Concurrent blktraces are not allowed on %s\n", name);
+ return ERR_PTR(-EBUSY);
}
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
bt->sequence = alloc_percpu(unsigned long);
@@ -548,7 +537,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (bdev && !bdev_is_partition(bdev))
dir = q->debugfs_dir;
else
- bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
+ bt->dir = dir = debugfs_create_dir(name, blk_debugfs_root);
/*
* As blktrace relies on debugfs for its interface the debugfs directory
@@ -556,8 +545,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
* files or directories.
*/
if (IS_ERR_OR_NULL(dir)) {
- pr_warn("debugfs_dir not present for %s so skipping\n",
- buts->name);
+ pr_warn("debugfs_dir not present for %s so skipping\n", name);
ret = -ENOENT;
goto err;
}
@@ -569,17 +557,39 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- bt->rchan = relay_open("trace", dir, buts->buf_size,
- buts->buf_nr, &blk_relay_callbacks, bt);
+ bt->rchan = relay_open("trace", dir, buf_size, buf_nr,
+ &blk_relay_callbacks, bt);
if (!bt->rchan)
goto err;
+ blk_trace_setup_lba(bt, bdev);
+
+ return bt;
+
+err:
+ if (ret)
+ blk_trace_free(q, bt);
+
+ return ERR_PTR(ret);
+}
+
+static void blk_trace_setup_finalize(struct request_queue *q,
+ char *name, struct blk_trace *bt,
+ struct blk_user_trace_setup *buts)
+
+{
+ strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE);
+
+ /*
+ * some device names have larger paths - convert the slashes
+ * to underscores for this to work as expected
+ */
+ strreplace(buts->name, '/', '_');
+
bt->act_mask = buts->act_mask;
if (!bt->act_mask)
bt->act_mask = (u16) -1;
- blk_trace_setup_lba(bt, bdev);
-
/* overwrite with user settings */
if (buts->start_lba)
bt->start_lba = buts->start_lba;
@@ -591,12 +601,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
-
- ret = 0;
-err:
- if (ret)
- blk_trace_free(q, bt);
- return ret;
}
int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
@@ -604,17 +608,25 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
char __user *arg)
{
struct blk_user_trace_setup buts;
+ struct blk_trace *bt;
int ret;
ret = copy_from_user(&buts, arg, sizeof(buts));
if (ret)
return -EFAULT;
+ if (!buts.buf_size || !buts.buf_nr)
+ return -EINVAL;
+
mutex_lock(&q->debugfs_mutex);
- ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ blk_trace_setup_finalize(q, name, bt, &buts);
mutex_unlock(&q->debugfs_mutex);
- if (ret)
- return ret;
if (copy_to_user(arg, &buts, sizeof(buts))) {
blk_trace_remove(q);
@@ -631,11 +643,14 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
{
struct blk_user_trace_setup buts;
struct compat_blk_user_trace_setup cbuts;
- int ret;
+ struct blk_trace *bt;
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
return -EFAULT;
+ if (!cbuts.buf_size || !cbuts.buf_nr)
+ return -EINVAL;
+
buts = (struct blk_user_trace_setup) {
.act_mask = cbuts.act_mask,
.buf_size = cbuts.buf_size,
@@ -646,10 +661,14 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
};
mutex_lock(&q->debugfs_mutex);
- ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ blk_trace_setup_finalize(q, name, bt, &buts);
mutex_unlock(&q->debugfs_mutex);
- if (ret)
- return ret;
if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
blk_trace_remove(q);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 01/16] blktrace: split do_blk_trace_setup into two functions
2025-09-09 11:05 ` [PATCH 01/16] blktrace: split do_blk_trace_setup into two functions Johannes Thumshirn
@ 2025-09-19 15:06 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:06 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:05:56PM +0200, Johannes Thumshirn wrote:
> Split do_blk_trace_setup into two functions, this is done to prepare for
> an incoming new BLKTRACESETUP2 ioctl(2) which can receive extended
> parameters form user-space.
This not just splits the function, but also moves some of the logic
to the caller. Which looks fine, but should be documented here or
even better split out into a separate patch.
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 02/16] blktrace: add definitions for blk_user_trace_setup2
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
2025-09-09 11:05 ` [PATCH 01/16] blktrace: split do_blk_trace_setup into two functions Johannes Thumshirn
@ 2025-09-09 11:05 ` Johannes Thumshirn
2025-09-19 15:07 ` Christoph Hellwig
2025-09-09 11:05 ` [PATCH 03/16] blktrace: pass blk_user_trace2 to setup functions Johannes Thumshirn
` (13 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:05 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Add definitions for a version 2 of the blk_user_trace_setup ioctl. This
new will enable a different struct layout of the binary data passed to
user-space when using a new version of the blktrace utility requesting the
new struct layout.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 14 ++++++++++++++
include/uapi/linux/fs.h | 1 +
2 files changed, 15 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 1bfb635e309b..ba61374f90d8 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -143,4 +143,18 @@ struct blk_user_trace_setup {
__u32 pid;
};
+/*
+ * User setup structure passed with BLKTRACESETUP2
+ */
+struct blk_user_trace_setup2 {
+ char name[32]; /* output */
+ __u64 act_mask; /* input */
+ __u32 buf_size; /* input */
+ __u32 buf_nr; /* input */
+ __u64 start_lba;
+ __u64 end_lba;
+ __u32 pid;
+ __u32 reserved; /* for futute use */
+};
+
#endif /* _UAPIBLKTRACE_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 0bd678a4a10e..a85d0b52a3f6 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -300,6 +300,7 @@ struct file_attr {
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
+#define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2)
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 02/16] blktrace: add definitions for blk_user_trace_setup2
2025-09-09 11:05 ` [PATCH 02/16] blktrace: add definitions for blk_user_trace_setup2 Johannes Thumshirn
@ 2025-09-19 15:07 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:07 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
>
> +/*
> + * User setup structure passed with BLKTRACESETUP2
> + */
> +struct blk_user_trace_setup2 {
> + char name[32]; /* output */
> + __u64 act_mask; /* input */
> + __u32 buf_size; /* input */
> + __u32 buf_nr; /* input */
> + __u64 start_lba;
> + __u64 end_lba;
> + __u32 pid;
> + __u32 reserved; /* for futute use */
I'd rename __reserved to flags, and check that it is zero, and then
add a few more __u64 for extensibility.
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 03/16] blktrace: pass blk_user_trace2 to setup functions
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
2025-09-09 11:05 ` [PATCH 01/16] blktrace: split do_blk_trace_setup into two functions Johannes Thumshirn
2025-09-09 11:05 ` [PATCH 02/16] blktrace: add definitions for blk_user_trace_setup2 Johannes Thumshirn
@ 2025-09-09 11:05 ` Johannes Thumshirn
2025-09-19 15:07 ` Christoph Hellwig
2025-09-09 11:05 ` [PATCH 04/16] blktrace: add definitions for struct blk_io_trace2 Johannes Thumshirn
` (12 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:05 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Pass struct blk_user_trace_setup2 to blktrace_setup_finalize(). This
prepares for the incoming extension of the blktrace protocol with a 64bit
act_mask.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/linux/blktrace_api.h | 3 ++-
kernel/trace/blktrace.c | 27 ++++++++++++++++++++-------
2 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 122c62e561fc..05c8754456aa 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,11 +14,12 @@
#include <linux/sysfs.h>
struct blk_trace {
+ int version;
int trace_state;
struct rchan *rchan;
unsigned long __percpu *sequence;
unsigned char __percpu *msg_data;
- u16 act_mask;
+ u64 act_mask;
u64 start_lba;
u64 end_lba;
u32 pid;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 487eabfaf70e..4a642a812854 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -575,7 +575,7 @@ static struct blk_trace *blk_trace_setup_prepare(struct request_queue *q,
static void blk_trace_setup_finalize(struct request_queue *q,
char *name, struct blk_trace *bt,
- struct blk_user_trace_setup *buts)
+ struct blk_user_trace_setup2 *buts)
{
strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE);
@@ -607,6 +607,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
struct block_device *bdev,
char __user *arg)
{
+ struct blk_user_trace_setup2 buts2;
struct blk_user_trace_setup buts;
struct blk_trace *bt;
int ret;
@@ -618,6 +619,15 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!buts.buf_size || !buts.buf_nr)
return -EINVAL;
+ buts2 = (struct blk_user_trace_setup2) {
+ .act_mask = buts.act_mask,
+ .buf_size = buts.buf_size,
+ .buf_nr = buts.buf_nr,
+ .start_lba = buts.start_lba,
+ .end_lba = buts.end_lba,
+ .pid = buts.pid,
+ };
+
mutex_lock(&q->debugfs_mutex);
bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
bdev);
@@ -625,7 +635,9 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
mutex_unlock(&q->debugfs_mutex);
return PTR_ERR(bt);
}
- blk_trace_setup_finalize(q, name, bt, &buts);
+ bt->version = 1;
+ blk_trace_setup_finalize(q, name, bt, &buts2);
+ strcpy(buts.name, buts2.name);
mutex_unlock(&q->debugfs_mutex);
if (copy_to_user(arg, &buts, sizeof(buts))) {
@@ -641,7 +653,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
dev_t dev, struct block_device *bdev,
char __user *arg)
{
- struct blk_user_trace_setup buts;
+ struct blk_user_trace_setup2 buts2;
struct compat_blk_user_trace_setup cbuts;
struct blk_trace *bt;
@@ -651,7 +663,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
if (!cbuts.buf_size || !cbuts.buf_nr)
return -EINVAL;
- buts = (struct blk_user_trace_setup) {
+ buts2 = (struct blk_user_trace_setup2) {
.act_mask = cbuts.act_mask,
.buf_size = cbuts.buf_size,
.buf_nr = cbuts.buf_nr,
@@ -661,16 +673,17 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
};
mutex_lock(&q->debugfs_mutex);
- bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
+ bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
bdev);
if (IS_ERR(bt)) {
mutex_unlock(&q->debugfs_mutex);
return PTR_ERR(bt);
}
- blk_trace_setup_finalize(q, name, bt, &buts);
+ bt->version = 1;
+ blk_trace_setup_finalize(q, name, bt, &buts2);
mutex_unlock(&q->debugfs_mutex);
- if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
+ if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) {
blk_trace_remove(q);
return -EFAULT;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 03/16] blktrace: pass blk_user_trace2 to setup functions
2025-09-09 11:05 ` [PATCH 03/16] blktrace: pass blk_user_trace2 to setup functions Johannes Thumshirn
@ 2025-09-19 15:07 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:07 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
Looks fine:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 04/16] blktrace: add definitions for struct blk_io_trace2
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (2 preceding siblings ...)
2025-09-09 11:05 ` [PATCH 03/16] blktrace: pass blk_user_trace2 to setup functions Johannes Thumshirn
@ 2025-09-09 11:05 ` Johannes Thumshirn
2025-09-19 15:08 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 05/16] blktrace: factor out recording a blktrace event Johannes Thumshirn
` (11 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:05 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Add definitions for the extended version of the blktrace protocol using a
wider action type to be able to record new actions in the kernel.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index ba61374f90d8..01779f84d09f 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -94,6 +94,7 @@ enum blktrace_notify {
#define BLK_IO_TRACE_MAGIC 0x65617400
#define BLK_IO_TRACE_VERSION 0x07
+#define BLK_IO_TRACE2_VERSION 0x08
/*
* The trace itself
@@ -113,6 +114,20 @@ struct blk_io_trace {
/* cgroup id will be stored here if exists */
};
+struct blk_io_trace2 {
+ __u32 magic; /* MAGIC << 8 | BLK_IO_TRACE2_VERSION */
+ __u32 sequence; /* event number */
+ __u64 time; /* in nanoseconds */
+ __u64 sector; /* disk offset */
+ __u32 bytes; /* transfer length */
+ __u32 pid; /* who did it */
+ __u64 action; /* what happened */
+ __u32 device; /* device number */
+ __u32 cpu; /* on what cpu did it happen */
+ __u16 error; /* completion error */
+ __u16 pdu_len; /* length of data after this trace */
+ /* cgroup id will be stored here if exists */
+};
/*
* The remap event
*/
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 04/16] blktrace: add definitions for struct blk_io_trace2
2025-09-09 11:05 ` [PATCH 04/16] blktrace: add definitions for struct blk_io_trace2 Johannes Thumshirn
@ 2025-09-19 15:08 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:08 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:05:59PM +0200, Johannes Thumshirn wrote:
> +struct blk_io_trace2 {
> + __u32 magic; /* MAGIC << 8 | BLK_IO_TRACE2_VERSION */
> + __u32 sequence; /* event number */
> + __u64 time; /* in nanoseconds */
> + __u64 sector; /* disk offset */
> + __u32 bytes; /* transfer length */
> + __u32 pid; /* who did it */
> + __u64 action; /* what happened */
> + __u32 device; /* device number */
> + __u32 cpu; /* on what cpu did it happen */
> + __u16 error; /* completion error */
> + __u16 pdu_len; /* length of data after this trace */
> + /* cgroup id will be stored here if exists */
> +};
This structure is not u64-aligned, which means it will have different
sizes for x86-32 vs all other architectures, making it a pain to handle.
Also maybe add some extra padding so that we can extend this?
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 05/16] blktrace: factor out recording a blktrace event
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (3 preceding siblings ...)
2025-09-09 11:05 ` [PATCH 04/16] blktrace: add definitions for struct blk_io_trace2 Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:09 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 06/16] blktrace: only calculate trace length once Johannes Thumshirn
` (10 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Factor out the recording of a blktrace event into its own function,
deduplicating the code.
This also enables recording different versions of the blktrace protocol
later on.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 89 +++++++++++++++++++++++------------------
1 file changed, 49 insertions(+), 40 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 4a642a812854..5db0c1a4ef5e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -63,6 +63,34 @@ static int blk_probes_ref;
static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
+static void record_blktrace_event(struct blk_io_trace *t, pid_t pid, int cpu,
+ sector_t sector, int bytes, u32 what,
+ dev_t dev, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+
+{
+ /*
+ * These two are not needed in ftrace as they are in the
+ * generic trace_entry, filled by tracing_generic_entry_update,
+ * but for the trace_event->bin() synthesizer benefit we do it
+ * here too.
+ */
+ t->cpu = cpu;
+ t->pid = pid;
+
+ t->sector = sector;
+ t->bytes = bytes;
+ t->action = what;
+ t->device = dev;
+ t->error = error;
+ t->pdu_len = pdu_len + cgid_len;
+
+ if (cgid_len)
+ memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
+ if (pdu_len)
+ memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
+}
+
/*
* Send out a notify message.
*/
@@ -86,7 +114,12 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!event)
return;
t = ring_buffer_event_data(event);
- goto record_it;
+ record_blktrace_event(t, pid, cpu, 0, 0,
+ action | (cgid ? __BLK_TN_CGROUP : 0),
+ bt->dev, 0, cgid, cgid_len, (void *)data,
+ len);
+ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ return;
}
if (!bt->rchan)
@@ -96,18 +129,11 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (t) {
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->time = ktime_to_ns(ktime_get());
-record_it:
- t->device = bt->dev;
- t->action = action | (cgid ? __BLK_TN_CGROUP : 0);
- t->pid = pid;
- t->cpu = cpu;
- t->pdu_len = len + cgid_len;
- if (cgid_len)
- memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
- memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
-
- if (blk_tracer)
- trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+
+ record_blktrace_event(t, pid, cpu, 0, 0,
+ action | (cgid ? __BLK_TN_CGROUP : 0),
+ bt->dev, 0, cgid, cgid_len, (void *)data,
+ len);
}
}
@@ -261,7 +287,12 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
if (!event)
return;
t = ring_buffer_event_data(event);
- goto record_it;
+
+ record_blktrace_event(t, pid, cpu, sector, bytes, what, bt->dev,
+ error, cgid, cgid_len, pdu_data, pdu_len);
+
+ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
+ return;
}
if (unlikely(tsk->btrace_seq != blktrace_seq))
@@ -280,32 +311,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->sequence = ++(*sequence);
t->time = ktime_to_ns(ktime_get());
-record_it:
- /*
- * These two are not needed in ftrace as they are in the
- * generic trace_entry, filled by tracing_generic_entry_update,
- * but for the trace_event->bin() synthesizer benefit we do it
- * here too.
- */
- t->cpu = cpu;
- t->pid = pid;
-
- t->sector = sector;
- t->bytes = bytes;
- t->action = what;
- t->device = bt->dev;
- t->error = error;
- t->pdu_len = pdu_len + cgid_len;
-
- if (cgid_len)
- memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
- if (pdu_len)
- memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
-
- if (blk_tracer) {
- trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
- return;
- }
+
+ record_blktrace_event(t, pid, cpu, sector, bytes, what,
+ bt->dev, error, cgid, cgid_len,
+ pdu_data, pdu_len);
}
local_irq_restore(flags);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 05/16] blktrace: factor out recording a blktrace event
2025-09-09 11:06 ` [PATCH 05/16] blktrace: factor out recording a blktrace event Johannes Thumshirn
@ 2025-09-19 15:09 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:09 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 06/16] blktrace: only calculate trace length once
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (4 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 05/16] blktrace: factor out recording a blktrace event Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:09 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 07/16] blktrace: split out relaying a blktrace event Johannes Thumshirn
` (9 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
De-duplicate the calculation of the trace length instead of doing the
calculation twice, once for calling trace_buffer_lock_reserve() and once
for calling relay_reserve().
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 5db0c1a4ef5e..d06519957c25 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -104,13 +104,14 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
int cpu = smp_processor_id();
bool blk_tracer = blk_tracer_enabled;
ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
+ size_t trace_len;
+ trace_len = sizeof(*t) + cgid_len + len;
if (blk_tracer) {
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + len + cgid_len,
- trace_ctx);
+ trace_len, trace_ctx);
if (!event)
return;
t = ring_buffer_event_data(event);
@@ -125,7 +126,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!bt->rchan)
return;
- t = relay_reserve(bt->rchan, sizeof(*t) + len + cgid_len);
+ t = relay_reserve(bt->rchan, trace_len);
if (t) {
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->time = ktime_to_ns(ktime_get());
@@ -254,6 +255,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
bool blk_tracer = blk_tracer_enabled;
ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
const enum req_op op = opf & REQ_OP_MASK;
+ size_t trace_len;
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
@@ -276,14 +278,14 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
cpu = raw_smp_processor_id();
+ trace_len = sizeof(*t) + pdu_len + cgid_len;
if (blk_tracer) {
tracing_record_cmdline(current);
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
- sizeof(*t) + pdu_len + cgid_len,
- trace_ctx);
+ trace_len, trace_ctx);
if (!event)
return;
t = ring_buffer_event_data(event);
@@ -304,7 +306,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
* from coming in and stepping on our toes.
*/
local_irq_save(flags);
- t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len + cgid_len);
+ t = relay_reserve(bt->rchan, trace_len);
if (t) {
sequence = per_cpu_ptr(bt->sequence, cpu);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 06/16] blktrace: only calculate trace length once
2025-09-09 11:06 ` [PATCH 06/16] blktrace: only calculate trace length once Johannes Thumshirn
@ 2025-09-19 15:09 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:09 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:06:01PM +0200, Johannes Thumshirn wrote:
> De-duplicate the calculation of the trace length instead of doing the
> calculation twice, once for calling trace_buffer_lock_reserve() and once
> for calling relay_reserve().
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
would be nice if these cleanups were at the beginning of the series
before adding new data structures, though.
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 07/16] blktrace: split out relaying a blktrace event
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (5 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 06/16] blktrace: only calculate trace length once Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:10 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 08/16] blktrace: change the internal action to 64bit Johannes Thumshirn
` (8 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Split out the code relaying a blktrace event to user-space using relayfs.
This enables adding a second version supporting a new version of the
protocol.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 50 ++++++++++++++++++++++-------------------
1 file changed, 27 insertions(+), 23 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d06519957c25..24eef7b116b5 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -91,6 +91,26 @@ static void record_blktrace_event(struct blk_io_trace *t, pid_t pid, int cpu,
memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
}
+static void relay_blktrace_event(struct blk_trace *bt, unsigned long sequence,
+ pid_t pid, int cpu, sector_t sector, int bytes,
+ u32 what, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+{
+ struct blk_io_trace *t;
+ size_t trace_len = sizeof(*t) + pdu_len + cgid_len;
+
+ t = relay_reserve(bt->rchan, trace_len);
+ if (!t)
+ return;
+
+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+ t->sequence = sequence;
+ t->time = ktime_to_ns(ktime_get());
+
+ record_blktrace_event(t, pid, cpu, sector, bytes, what, bt->dev, error,
+ cgid, cgid_len, pdu_data, pdu_len);
+}
+
/*
* Send out a notify message.
*/
@@ -126,16 +146,9 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!bt->rchan)
return;
- t = relay_reserve(bt->rchan, trace_len);
- if (t) {
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->time = ktime_to_ns(ktime_get());
-
- record_blktrace_event(t, pid, cpu, 0, 0,
- action | (cgid ? __BLK_TN_CGROUP : 0),
- bt->dev, 0, cgid, cgid_len, (void *)data,
- len);
- }
+ relay_blktrace_event(bt, 0, pid, cpu, 0, 0,
+ action | (cgid ? __BLK_TN_CGROUP : 0), 0, cgid,
+ cgid_len, (void *)data, len);
}
/*
@@ -306,19 +319,10 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
* from coming in and stepping on our toes.
*/
local_irq_save(flags);
- t = relay_reserve(bt->rchan, trace_len);
- if (t) {
- sequence = per_cpu_ptr(bt->sequence, cpu);
-
- t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
- t->sequence = ++(*sequence);
- t->time = ktime_to_ns(ktime_get());
-
- record_blktrace_event(t, pid, cpu, sector, bytes, what,
- bt->dev, error, cgid, cgid_len,
- pdu_data, pdu_len);
- }
-
+ sequence = per_cpu_ptr(bt->sequence, cpu);
+ (*sequence)++;
+ relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes, what,
+ error, cgid, cgid_len, pdu_data, pdu_len);
local_irq_restore(flags);
}
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 07/16] blktrace: split out relaying a blktrace event
2025-09-09 11:06 ` [PATCH 07/16] blktrace: split out relaying a blktrace event Johannes Thumshirn
@ 2025-09-19 15:10 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:10 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:06:02PM +0200, Johannes Thumshirn wrote:
> Split out the code relaying a blktrace event to user-space using relayfs.
>
> This enables adding a second version supporting a new version of the
> protocol.
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 08/16] blktrace: change the internal action to 64bit
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (6 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 07/16] blktrace: split out relaying a blktrace event Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:10 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 09/16] blktrace: remove struct blk_io_trace from __blk_add_trace Johannes Thumshirn
` (7 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Change the internal use of the action in blktrace to 64bit. Although for
now only the lower 32bits will be used.
With the upcoming version 2 of the blktrace user-space protocol the upper
32bit will also be utilized.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 24eef7b116b5..5b97dc5e2cfd 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -127,6 +127,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
size_t trace_len;
trace_len = sizeof(*t) + cgid_len + len;
+ action = lower_32_bits(action | (cgid ? __BLK_TN_CGROUP : 0));
if (blk_tracer) {
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
@@ -136,9 +137,8 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
return;
t = ring_buffer_event_data(event);
record_blktrace_event(t, pid, cpu, 0, 0,
- action | (cgid ? __BLK_TN_CGROUP : 0),
- bt->dev, 0, cgid, cgid_len, (void *)data,
- len);
+ action, bt->dev, 0, cgid, cgid_len,
+ (void *)data, len);
trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
return;
}
@@ -146,8 +146,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!bt->rchan)
return;
- relay_blktrace_event(bt, 0, pid, cpu, 0, 0,
- action | (cgid ? __BLK_TN_CGROUP : 0), 0, cgid,
+ relay_blktrace_event(bt, 0, pid, cpu, 0, 0, action, 0, cgid,
cgid_len, (void *)data, len);
}
@@ -222,7 +221,7 @@ void __blk_trace_note_message(struct blk_trace *bt,
}
EXPORT_SYMBOL_GPL(__blk_trace_note_message);
-static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
+static int act_log_check(struct blk_trace *bt, u64 what, sector_t sector,
pid_t pid)
{
if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
@@ -253,7 +252,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
* blk_io_trace structure and places it in a per-cpu subbuffer.
*/
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
- const blk_opf_t opf, u32 what, int error,
+ const blk_opf_t opf, u64 what, int error,
int pdu_len, void *pdu_data, u64 cgid)
{
struct task_struct *tsk = current;
@@ -303,8 +302,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
t = ring_buffer_event_data(event);
- record_blktrace_event(t, pid, cpu, sector, bytes, what, bt->dev,
- error, cgid, cgid_len, pdu_data, pdu_len);
+ record_blktrace_event(t, pid, cpu, sector, bytes,
+ lower_32_bits(what), bt->dev, error,
+ cgid, cgid_len, pdu_data, pdu_len);
trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx);
return;
@@ -321,8 +321,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
local_irq_save(flags);
sequence = per_cpu_ptr(bt->sequence, cpu);
(*sequence)++;
- relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes, what,
- error, cgid, cgid_len, pdu_data, pdu_len);
+ relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes,
+ lower_32_bits(what), error, cgid, cgid_len,
+ pdu_data, pdu_len);
local_irq_restore(flags);
}
@@ -841,7 +842,7 @@ blk_trace_request_get_cgid(struct request *rq)
*
**/
static void blk_add_trace_rq(struct request *rq, blk_status_t error,
- unsigned int nr_bytes, u32 what, u64 cgid)
+ unsigned int nr_bytes, u64 what, u64 cgid)
{
struct blk_trace *bt;
@@ -905,7 +906,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
*
**/
static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
- u32 what, int error)
+ u64 what, int error)
{
struct blk_trace *bt;
@@ -971,7 +972,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
bt = rcu_dereference(q->blk_trace);
if (bt) {
__be64 rpdu = cpu_to_be64(depth);
- u32 what;
+ u64 what;
if (explicit)
what = BLK_TA_UNPLUG_IO;
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 08/16] blktrace: change the internal action to 64bit
2025-09-09 11:06 ` [PATCH 08/16] blktrace: change the internal action to 64bit Johannes Thumshirn
@ 2025-09-19 15:10 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:10 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 09/16] blktrace: remove struct blk_io_trace from __blk_add_trace
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (7 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 08/16] blktrace: change the internal action to 64bit Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:11 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 10/16] blktrace: differentiate between blk_io_trace versions Johannes Thumshirn
` (6 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Now that relaying the blktrace protocol information via relayfs has been
removed from __blk_add_trace(), it only uses 'struct blk_io_trace' for the
ftrace portion of the function.
Directly pass in the reserved area of the ftrace ring buffer to
record_blktrace_event().
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 5b97dc5e2cfd..14fb4e7296cf 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -258,7 +258,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
struct trace_buffer *buffer = NULL;
- struct blk_io_trace *t;
unsigned long flags = 0;
unsigned long *sequence;
unsigned int trace_ctx = 0;
@@ -290,19 +289,19 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
cpu = raw_smp_processor_id();
- trace_len = sizeof(*t) + pdu_len + cgid_len;
if (blk_tracer) {
tracing_record_cmdline(current);
buffer = blk_tr->array_buffer.buffer;
trace_ctx = tracing_gen_ctx_flags(0);
+ trace_len = sizeof(struct blk_io_trace) + pdu_len + cgid_len;
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
trace_len, trace_ctx);
if (!event)
return;
- t = ring_buffer_event_data(event);
- record_blktrace_event(t, pid, cpu, sector, bytes,
+ record_blktrace_event(ring_buffer_event_data(event),
+ pid, cpu, sector, bytes,
lower_32_bits(what), bt->dev, error,
cgid, cgid_len, pdu_data, pdu_len);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 09/16] blktrace: remove struct blk_io_trace from __blk_add_trace
2025-09-09 11:06 ` [PATCH 09/16] blktrace: remove struct blk_io_trace from __blk_add_trace Johannes Thumshirn
@ 2025-09-19 15:11 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:11 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:06:04PM +0200, Johannes Thumshirn wrote:
> Now that relaying the blktrace protocol information via relayfs has been
> removed from __blk_add_trace(), it only uses 'struct blk_io_trace' for the
> ftrace portion of the function.
>
> Directly pass in the reserved area of the ftrace ring buffer to
> record_blktrace_event().
Shouldn't this be part of the patch splitting out record_blktrace_event?
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 10/16] blktrace: differentiate between blk_io_trace versions
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (8 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 09/16] blktrace: remove struct blk_io_trace from __blk_add_trace Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:12 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 11/16] blktrace: untangle if/else sequence in __blk_add_trace Johannes Thumshirn
` (5 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Differentiate between blk_io_trace and blk_io_trace2 when relaying to
user-space depending on which version has been requested by the blktrace
utility.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 62 +++++++++++++++++++++++++++++++++++++----
1 file changed, 57 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 14fb4e7296cf..6dc7396c26c2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -91,6 +91,29 @@ static void record_blktrace_event(struct blk_io_trace *t, pid_t pid, int cpu,
memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
}
+static void record_blktrace_event2(struct blk_io_trace2 *t2, pid_t pid, int cpu,
+ sector_t sector, int bytes, u64 what,
+ dev_t dev, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data,
+ int pdu_len)
+
+{
+ t2->pid = pid;
+ t2->cpu = cpu;
+
+ t2->sector = sector;
+ t2->bytes = bytes;
+ t2->action = what;
+ t2->device = dev;
+ t2->error = error;
+ t2->pdu_len = pdu_len + cgid_len;
+
+ if (cgid_len)
+ memcpy((void *)t2 + sizeof(*t2), &cgid, cgid_len);
+ if (pdu_len)
+ memcpy((void *)t2 + sizeof(*t2) + cgid_len, pdu_data, pdu_len);
+}
+
static void relay_blktrace_event(struct blk_trace *bt, unsigned long sequence,
pid_t pid, int cpu, sector_t sector, int bytes,
u32 what, int error, u64 cgid,
@@ -111,6 +134,26 @@ static void relay_blktrace_event(struct blk_trace *bt, unsigned long sequence,
cgid, cgid_len, pdu_data, pdu_len);
}
+static void relay_blktrace_event2(struct blk_trace *bt, unsigned long sequence,
+ pid_t pid, int cpu, sector_t sector,
+ int bytes, u64 what, int error, u64 cgid,
+ ssize_t cgid_len, void *pdu_data, int pdu_len)
+{
+ struct blk_io_trace2 *t;
+ size_t trace_len = sizeof(struct blk_io_trace2) + pdu_len + cgid_len;
+
+ t = relay_reserve(bt->rchan, trace_len);
+ if (!t)
+ return;
+
+ t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE2_VERSION;
+ t->sequence = sequence;
+ t->time = ktime_to_ns(ktime_get());
+
+ record_blktrace_event2(t, pid, cpu, sector, bytes, what, bt->dev, error,
+ cgid, cgid_len, pdu_data, pdu_len);
+}
+
/*
* Send out a notify message.
*/
@@ -146,8 +189,12 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
if (!bt->rchan)
return;
- relay_blktrace_event(bt, 0, pid, cpu, 0, 0, action, 0, cgid,
- cgid_len, (void *)data, len);
+ if (bt->version == 1)
+ relay_blktrace_event(bt, 0, pid, cpu, 0, 0, action, 0, cgid,
+ cgid_len, (void *)data, len);
+ else
+ relay_blktrace_event2(bt, 0, pid, cpu, 0, 0, action, 0, cgid,
+ cgid_len, (void *)data, len);
}
/*
@@ -320,9 +367,14 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
local_irq_save(flags);
sequence = per_cpu_ptr(bt->sequence, cpu);
(*sequence)++;
- relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes,
- lower_32_bits(what), error, cgid, cgid_len,
- pdu_data, pdu_len);
+ if (bt->version == 1)
+ relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes,
+ lower_32_bits(what), error, cgid,
+ cgid_len, pdu_data, pdu_len);
+ else
+ relay_blktrace_event2(bt, *sequence, pid, cpu, sector, bytes,
+ what, error, cgid, cgid_len, pdu_data,
+ pdu_len);
local_irq_restore(flags);
}
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 10/16] blktrace: differentiate between blk_io_trace versions
2025-09-09 11:06 ` [PATCH 10/16] blktrace: differentiate between blk_io_trace versions Johannes Thumshirn
@ 2025-09-19 15:12 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:12 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
Looks fine:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 11/16] blktrace: untangle if/else sequence in __blk_add_trace
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (9 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 10/16] blktrace: differentiate between blk_io_trace versions Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:12 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 12/16] blktrace: add block trace commands for zone operations Johannes Thumshirn
` (4 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Untangle the if/else sequence setting the trace action in
__blk_add_trace() and turn it into a switch statement for better
extensibility.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
kernel/trace/blktrace.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6dc7396c26c2..82ad626d6202 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -324,10 +324,19 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(opf, META);
what |= MASK_TC_BIT(opf, PREFLUSH);
what |= MASK_TC_BIT(opf, FUA);
- if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
+
+ switch (op) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
what |= BLK_TC_ACT(BLK_TC_DISCARD);
- if (op == REQ_OP_FLUSH)
+ break;
+ case REQ_OP_FLUSH:
what |= BLK_TC_ACT(BLK_TC_FLUSH);
+ break;
+ default:
+ break;
+ }
+
if (cgid)
what |= __BLK_TA_CGROUP;
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 11/16] blktrace: untangle if/else sequence in __blk_add_trace
2025-09-09 11:06 ` [PATCH 11/16] blktrace: untangle if/else sequence in __blk_add_trace Johannes Thumshirn
@ 2025-09-19 15:12 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:12 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
Looks good:
Reviewed-by: Christoph Hellwig <hch@lst.de>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 12/16] blktrace: add block trace commands for zone operations
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (10 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 11/16] blktrace: untangle if/else sequence in __blk_add_trace Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-19 15:13 ` Christoph Hellwig
2025-09-09 11:06 ` [PATCH 13/16] blktrace: expose ZONE APPEND completions to blktrace Johannes Thumshirn
` (3 subsequent siblings)
15 siblings, 1 reply; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Add block trace commands for zone operations. These are added as a
separate set of 'block trace commands' shifted by 32bit so that they do
not interfere with the old 16bit wide trace command field in 'struct
blk_io_trace' action.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 11 +++++++++++
kernel/trace/blktrace.c | 18 ++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 01779f84d09f..d5047467c8ee 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -29,8 +29,19 @@ enum blktrace_cat {
BLK_TC_END = 1 << 15, /* we've run out of bits! */
};
+enum blktrace_cat2 {
+ BLK_TC_ZONE_APPEND = 1 << 1ull, /* zone append */
+ BLK_TC_ZONE_RESET = 1 << 2ull, /* zone reset */
+ BLK_TC_ZONE_RESET_ALL = 1 << 3ull, /* zone reset all */
+ BLK_TC_ZONE_FINISH = 1 << 4ull, /* zone finish */
+ BLK_TC_ZONE_OPEN = 1 << 5ull, /* zone open */
+ BLK_TC_ZONE_CLOSE = 1 << 6ull, /* zone close */
+};
+
#define BLK_TC_SHIFT (16)
#define BLK_TC_ACT(act) ((act) << BLK_TC_SHIFT)
+#define BLK_TC_SHIFT2 (32)
+#define BLK_TC_ACT2(act) ((u64)(act) << BLK_TC_SHIFT2)
/*
* Basic trace actions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 82ad626d6202..62f6cfcee4f6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -333,6 +333,24 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
case REQ_OP_FLUSH:
what |= BLK_TC_ACT(BLK_TC_FLUSH);
break;
+ case REQ_OP_ZONE_APPEND:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_APPEND);
+ break;
+ case REQ_OP_ZONE_RESET:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_RESET);
+ break;
+ case REQ_OP_ZONE_RESET_ALL:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_RESET_ALL);
+ break;
+ case REQ_OP_ZONE_FINISH:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_FINISH);
+ break;
+ case REQ_OP_ZONE_OPEN:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_OPEN);
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ what |= BLK_TC_ACT2(BLK_TC_ZONE_CLOSE);
+ break;
default:
break;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [PATCH 12/16] blktrace: add block trace commands for zone operations
2025-09-09 11:06 ` [PATCH 12/16] blktrace: add block trace commands for zone operations Johannes Thumshirn
@ 2025-09-19 15:13 ` Christoph Hellwig
0 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2025-09-19 15:13 UTC (permalink / raw)
To: Johannes Thumshirn
Cc: Jens Axboe, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
linux-block, linux-kernel, linux-trace-kernel, linux-btrace,
John Garry, Hannes Reinecke, Damien Le Moal, Christoph Hellwig,
Naohiro Aota, Shinichiro Kawasaki, Chaitanya Kulkarni,
Martin K . Petersen
On Tue, Sep 09, 2025 at 01:06:07PM +0200, Johannes Thumshirn wrote:
> Add block trace commands for zone operations. These are added as a
> separate set of 'block trace commands' shifted by 32bit so that they do
> not interfere with the old 16bit wide trace command field in 'struct
> blk_io_trace' action.
This is very confusing. Why not havve a single enum with the actual
values with a clearly marked cutoff for v1?
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 13/16] blktrace: expose ZONE APPEND completions to blktrace
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (11 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 12/16] blktrace: add block trace commands for zone operations Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-09 11:06 ` [PATCH 14/16] blktrace: trace zone management operations Johannes Thumshirn
` (2 subsequent siblings)
15 siblings, 0 replies; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Expose ZONE APPEND completions as a block trace completion action to
blktrace.
As tracing of zoned block commands needs the upper 32bit of the widened
64bit action, only add traces to blktrace if user-space has requested
version 2 of the blktrace protocol.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 3 +++
kernel/trace/blktrace.c | 21 +++++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index d5047467c8ee..c75ae82b2dbc 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -99,6 +99,9 @@ enum blktrace_notify {
#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
+#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\
+ BLK_TC_ACT2(BLK_TC_ZONE_APPEND))
+
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_MESSAGE (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 62f6cfcee4f6..fea6e63ee27c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -972,6 +972,22 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
blk_trace_request_get_cgid(rq));
}
+static void blk_add_trace_zone_update_request(void *ignore, struct request *rq)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(rq->q->blk_trace);
+ if (likely(!bt) || bt->version < 2) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ZONE_APPEND,
+ blk_trace_request_get_cgid(rq));
+}
+
/**
* blk_add_trace_bio - Add a trace for a bio oriented action
* @q: queue the io is for
@@ -1202,6 +1218,9 @@ static void blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_getrq(blk_add_trace_getrq, NULL);
WARN_ON(ret);
+ ret = register_trace_blk_zone_append_update_request_bio(
+ blk_add_trace_zone_update_request, NULL);
+ WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
@@ -1221,6 +1240,8 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
+ unregister_trace_blk_zone_append_update_request_bio(
+ blk_add_trace_zone_update_request, NULL);
unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL);
unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH 14/16] blktrace: trace zone management operations
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (12 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 13/16] blktrace: expose ZONE APPEND completions to blktrace Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-09 11:06 ` [PATCH 15/16] blktrace: trace zone write plugging operations Johannes Thumshirn
2025-09-09 11:06 ` [PATCH 16/16] blktrace: handle BLKTRACESETUP2 ioctl Johannes Thumshirn
15 siblings, 0 replies; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Trace zone management operations on block devices.
As tracing of zoned block commands needs the upper 32bit of the widened
64bit action, only add traces to blktrace if user-space has requested
version 2 of the blktrace protocol.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 2 ++
kernel/trace/blktrace.c | 20 ++++++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index c75ae82b2dbc..074c4de62c3e 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -64,6 +64,7 @@ enum blktrace_act {
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
+ __BLK_TA_ZONE_MGMT, /* zone management command was issued */
__BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/
};
@@ -101,6 +102,7 @@ enum blktrace_notify {
#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\
BLK_TC_ACT2(BLK_TC_ZONE_APPEND))
+#define BLK_TA_ZONE_MGMT __BLK_TA_ZONE_MGMT
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fea6e63ee27c..13424efbb2f6 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1046,6 +1046,22 @@ static void blk_add_trace_getrq(void *ignore, struct bio *bio)
blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_GETRQ, 0);
}
+static void blk_add_trace_blkdev_zone_mgmt(void *ignore, struct bio *bio,
+ sector_t nr_sectors)
+{
+ struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (unlikely(!bt) || bt->version < 2) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ blk_add_trace_bio(q, bio, BLK_TA_ZONE_MGMT, 0);
+}
+
static void blk_add_trace_plug(void *ignore, struct request_queue *q)
{
struct blk_trace *bt;
@@ -1221,6 +1237,9 @@ static void blk_register_tracepoints(void)
ret = register_trace_blk_zone_append_update_request_bio(
blk_add_trace_zone_update_request, NULL);
WARN_ON(ret);
+ ret = register_trace_blkdev_zone_mgmt(blk_add_trace_blkdev_zone_mgmt,
+ NULL);
+ WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
@@ -1240,6 +1259,7 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
+ unregister_trace_blkdev_zone_mgmt(blk_add_trace_blkdev_zone_mgmt, NULL);
unregister_trace_blk_zone_append_update_request_bio(
blk_add_trace_zone_update_request, NULL);
unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH 15/16] blktrace: trace zone write plugging operations
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (13 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 14/16] blktrace: trace zone management operations Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
2025-09-09 11:06 ` [PATCH 16/16] blktrace: handle BLKTRACESETUP2 ioctl Johannes Thumshirn
15 siblings, 0 replies; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Trace zone write plugging operations on block devices.
As tracing of zoned block commands needs the upper 32bit of the widened
64bit action, only add traces to blktrace if user-space has requested
version 2 of the blktrace protocol.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
include/uapi/linux/blktrace_api.h | 5 ++++
kernel/trace/blktrace.c | 39 +++++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
diff --git a/include/uapi/linux/blktrace_api.h b/include/uapi/linux/blktrace_api.h
index 074c4de62c3e..dc1aa8e4d787 100644
--- a/include/uapi/linux/blktrace_api.h
+++ b/include/uapi/linux/blktrace_api.h
@@ -64,6 +64,8 @@ enum blktrace_act {
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
__BLK_TA_DRV_DATA, /* driver-specific binary data */
+ __BLK_TA_ZONE_PLUG, /* zone write plug was plugged */
+ __BLK_TA_ZONE_UNPLUG, /* zone write plug was unplugged */
__BLK_TA_ZONE_MGMT, /* zone management command was issued */
__BLK_TA_CGROUP = 1 << 8, /* from a cgroup*/
};
@@ -103,6 +105,9 @@ enum blktrace_notify {
#define BLK_TA_ZONE_APPEND (__BLK_TA_COMPLETE |\
BLK_TC_ACT2(BLK_TC_ZONE_APPEND))
#define BLK_TA_ZONE_MGMT __BLK_TA_ZONE_MGMT
+#define BLK_TA_ZONE_PLUG (__BLK_TA_ZONE_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_ZONE_UNPLUG (__BLK_TA_ZONE_UNPLUG |\
+ BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 13424efbb2f6..3e7cd8f46c0c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1094,6 +1094,37 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,
rcu_read_unlock();
}
+static void blk_add_trace_zone_plug(void *ignore, struct request_queue *q,
+ unsigned int zno, sector_t sector,
+ unsigned int sectors)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (bt && bt->version >= 2)
+ __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0,
+ BLK_TA_ZONE_PLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
+
+ return;
+}
+
+static void blk_add_trace_zone_unplug(void *ignore, struct request_queue *q,
+ unsigned int zno, sector_t sector,
+ unsigned int sectors)
+{
+ struct blk_trace *bt;
+
+ rcu_read_lock();
+ bt = rcu_dereference(q->blk_trace);
+ if (bt && bt->version >= 2)
+ __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0,
+ BLK_TA_ZONE_UNPLUG, 0, 0, NULL, 0);
+ rcu_read_unlock();
+ return;
+}
+
static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu)
{
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
@@ -1240,6 +1271,12 @@ static void blk_register_tracepoints(void)
ret = register_trace_blkdev_zone_mgmt(blk_add_trace_blkdev_zone_mgmt,
NULL);
WARN_ON(ret);
+ ret = register_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug,
+ NULL);
+ WARN_ON(ret);
+ ret = register_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug,
+ NULL);
+ WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
ret = register_trace_block_unplug(blk_add_trace_unplug, NULL);
@@ -1259,6 +1296,8 @@ static void blk_unregister_tracepoints(void)
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug(blk_add_trace_unplug, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
+ unregister_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug, NULL);
+ unregister_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug, NULL);
unregister_trace_blkdev_zone_mgmt(blk_add_trace_blkdev_zone_mgmt, NULL);
unregister_trace_blk_zone_append_update_request_bio(
blk_add_trace_zone_update_request, NULL);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [PATCH 16/16] blktrace: handle BLKTRACESETUP2 ioctl
2025-09-09 11:05 [PATCH 00/16] block: add blktrace support for zoned block device commands Johannes Thumshirn
` (14 preceding siblings ...)
2025-09-09 11:06 ` [PATCH 15/16] blktrace: trace zone write plugging operations Johannes Thumshirn
@ 2025-09-09 11:06 ` Johannes Thumshirn
15 siblings, 0 replies; 29+ messages in thread
From: Johannes Thumshirn @ 2025-09-09 11:06 UTC (permalink / raw)
To: Jens Axboe
Cc: Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, linux-block,
linux-kernel, linux-trace-kernel, linux-btrace, John Garry,
Hannes Reinecke, Damien Le Moal, Christoph Hellwig, Naohiro Aota,
Shinichiro Kawasaki, Chaitanya Kulkarni, Martin K . Petersen,
Johannes Thumshirn
Handle the BLKTRACESETUP2 ioctl, requesting an extended version of the
blktrace protocol from user-space.
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
block/ioctl.c | 1 +
kernel/trace/blktrace.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/block/ioctl.c b/block/ioctl.c
index f7b0006ca45d..e7f83a58c8ae 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -691,6 +691,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
/* Incompatible alignment on i386 */
case BLKTRACESETUP:
+ case BLKTRACESETUP2:
return blk_trace_ioctl(bdev, cmd, argp);
default:
break;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3e7cd8f46c0c..e16a3dbed527 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -742,6 +742,38 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
EXPORT_SYMBOL_GPL(blk_trace_setup);
+static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev, char __user *arg)
+{
+ struct blk_user_trace_setup2 buts2;
+ struct blk_trace *bt;
+ int ret;
+
+ ret = copy_from_user(&buts2, arg, sizeof(buts2));
+ if (ret)
+ return -EFAULT;
+
+ if (!buts2.buf_size || !buts2.buf_nr)
+ return -EINVAL;
+
+ mutex_lock(&q->debugfs_mutex);
+ bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
+ bdev);
+ if (IS_ERR(bt)) {
+ mutex_unlock(&q->debugfs_mutex);
+ return PTR_ERR(bt);
+ }
+ bt->version = 2;
+ blk_trace_setup_finalize(q, name, bt, &buts2);
+ mutex_unlock(&q->debugfs_mutex);
+
+ if (copy_to_user(arg, &buts2, sizeof(buts2))) {
+ blk_trace_remove(q);
+ return -EFAULT;
+ }
+ return 0;
+}
+
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
static int compat_blk_trace_setup(struct request_queue *q, char *name,
dev_t dev, struct block_device *bdev,
@@ -833,6 +865,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
char b[BDEVNAME_SIZE];
switch (cmd) {
+ case BLKTRACESETUP2:
+ snprintf(b, sizeof(b), "%pg", bdev);
+ ret = blk_trace_setup2(q, b, bdev->bd_dev, bdev, arg);
+ break;
case BLKTRACESETUP:
snprintf(b, sizeof(b), "%pg", bdev);
ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
--
2.51.0
^ permalink raw reply related [flat|nested] 29+ messages in thread