linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal
@ 2025-04-03 20:22 Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter Bernd Schubert
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Bernd Schubert @ 2025-04-03 20:22 UTC (permalink / raw)
  To: Miklos Szeredi, Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez
  Cc: linux-fsdevel, Joanne Koong, Josef Bacik, Bernd Schubert

This series is mainly about improved ftraces to determine
latencies between queues and also to be able to create
latency histograms - the request unique was missing in
trace_fuse_request_send so far.
Some preparation patches are added before.

Scripts to enabled tracing and to get histograms are here
https://github.com/libfuse/libfuse/pull/1186

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
Changes in v3:
- Added missing free_percpu(), and new fuse_iqueue_destroy() (Joanne)
- Removed uneeded headers (Joanne)
- Removed '+ 1' for current cpu (Joanne)
- Link to v2: https://lore.kernel.org/r/20250403-fuse-io-uring-trace-points-v2-0-bd04f2b22f91@ddn.com

Changes in v2:
- EDITME: describe what is new in this series revision.
- EDITME: use bulletpoints and terse descriptions.
- Link to v1: https://lore.kernel.org/r/20250402-fuse-io-uring-trace-points-v1-0-11b0211fa658@ddn.com

---
Bernd Schubert (4):
      fuse: Make the fuse unique value a per-cpu counter
      fuse: Set request unique on allocation
      fuse: {io-uring} Avoid _send code dup
      fuse: fine-grained request ftraces

 fs/fuse/dev.c        | 37 ++++++++++------------------------
 fs/fuse/dev_uring.c  | 44 ++++++++++++++++------------------------
 fs/fuse/fuse_dev_i.h |  4 ----
 fs/fuse/fuse_i.h     | 21 ++++++++++++++-----
 fs/fuse/fuse_trace.h | 57 +++++++++++++++++++++++++++++++++++++---------------
 fs/fuse/inode.c      |  7 +++++++
 fs/fuse/virtio_fs.c  |  3 ---
 7 files changed, 92 insertions(+), 81 deletions(-)
---
base-commit: 08733088b566b58283f0f12fb73f5db6a9a9de30
change-id: 20250402-fuse-io-uring-trace-points-690154bb72c7

Best regards,
-- 
Bernd Schubert <bschubert@ddn.com>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter
  2025-04-03 20:22 [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal Bernd Schubert
@ 2025-04-03 20:22 ` Bernd Schubert
  2025-04-04 12:43   ` Miklos Szeredi
  2025-04-03 20:22 ` [PATCH v3 2/4] fuse: Set request unique on allocation Bernd Schubert
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Bernd Schubert @ 2025-04-03 20:22 UTC (permalink / raw)
  To: Miklos Szeredi, Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez
  Cc: linux-fsdevel, Joanne Koong, Josef Bacik, Bernd Schubert

No need to take lock, we can have that per cpu and
add in the current cpu as offset.

fuse-io-uring and virtiofs especially benefit from it
as they don't need the fiq lock at all.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c        | 24 +++---------------------
 fs/fuse/fuse_dev_i.h |  4 ----
 fs/fuse/fuse_i.h     | 21 ++++++++++++++++-----
 fs/fuse/inode.c      |  7 +++++++
 4 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -204,24 +204,6 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 }
 EXPORT_SYMBOL_GPL(fuse_len_args);
 
-static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
-{
-	fiq->reqctr += FUSE_REQ_ID_STEP;
-	return fiq->reqctr;
-}
-
-u64 fuse_get_unique(struct fuse_iqueue *fiq)
-{
-	u64 ret;
-
-	spin_lock(&fiq->lock);
-	ret = fuse_get_unique_locked(fiq);
-	spin_unlock(&fiq->lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(fuse_get_unique);
-
 unsigned int fuse_req_hash(u64 unique)
 {
 	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
@@ -278,7 +260,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 	spin_lock(&fiq->lock);
 	if (fiq->connected) {
 		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
-			req->in.h.unique = fuse_get_unique_locked(fiq);
+			req->in.h.unique = fuse_get_unique(fiq);
 		list_add_tail(&req->list, &fiq->pending);
 		fuse_dev_wake_and_unlock(fiq);
 	} else {
@@ -1177,7 +1159,7 @@ __releases(fiq->lock)
 	struct fuse_in_header ih = {
 		.opcode = FUSE_FORGET,
 		.nodeid = forget->forget_one.nodeid,
-		.unique = fuse_get_unique_locked(fiq),
+		.unique = fuse_get_unique(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
@@ -1208,7 +1190,7 @@ __releases(fiq->lock)
 	struct fuse_batch_forget_in arg = { .count = 0 };
 	struct fuse_in_header ih = {
 		.opcode = FUSE_BATCH_FORGET,
-		.unique = fuse_get_unique_locked(fiq),
+		.unique = fuse_get_unique(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -8,10 +8,6 @@
 
 #include <linux/types.h>
 
-/* Ordinary requests have even IDs, while interrupts IDs are odd */
-#define FUSE_INT_REQ_BIT (1ULL << 0)
-#define FUSE_REQ_ID_STEP (1ULL << 1)
-
 struct fuse_arg;
 struct fuse_args;
 struct fuse_pqueue;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fee96fe7887b30cd57b8a6bbda11447a228cf446..80a526eaba38aa97f6a6faa60e5276fcd7f2668f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,10 @@
 /** Number of dentries for each connection in the control filesystem */
 #define FUSE_CTL_NUM_DENTRIES 5
 
+/* Ordinary requests have even IDs, while interrupts IDs are odd */
+#define FUSE_INT_REQ_BIT (1ULL << 0)
+#define FUSE_REQ_ID_STEP (1ULL << 1)
+
 /** Maximum of max_pages received in init_out */
 extern unsigned int fuse_max_pages_limit;
 
@@ -490,7 +494,7 @@ struct fuse_iqueue {
 	wait_queue_head_t waitq;
 
 	/** The next unique request id */
-	u64 reqctr;
+	u64 __percpu *reqctr;
 
 	/** The list of pending requests */
 	struct list_head pending;
@@ -1065,6 +1069,17 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
 	rcu_read_unlock();
 }
 
+/**
+ * Get the next unique ID for a request
+ */
+static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
+{
+	int step = FUSE_REQ_ID_STEP * (task_cpu(current));
+	u64 cntr = this_cpu_inc_return(*fiq->reqctr);
+
+	return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step;
+}
+
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
 
@@ -1415,10 +1430,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx);
  */
 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
 
-/**
- * Get the next unique ID for a request
- */
-u64 fuse_get_unique(struct fuse_iqueue *fiq);
 void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e9db2cb8c150878634728685af0fa15e7ade628f..d2d850cca4c7bc3cd7158e773c5e602e15afe4e3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -930,6 +930,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 	memset(fiq, 0, sizeof(struct fuse_iqueue));
 	spin_lock_init(&fiq->lock);
 	init_waitqueue_head(&fiq->waitq);
+	fiq->reqctr = alloc_percpu(u64);
 	INIT_LIST_HEAD(&fiq->pending);
 	INIT_LIST_HEAD(&fiq->interrupts);
 	fiq->forget_list_tail = &fiq->forget_list_head;
@@ -938,6 +939,11 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
 	fiq->priv = priv;
 }
 
+static void fuse_iqueue_destroy(struct fuse_iqueue *fiq)
+{
+	free_percpu(fiq->reqctr);
+}
+
 void fuse_pqueue_init(struct fuse_pqueue *fpq)
 {
 	unsigned int i;
@@ -994,6 +1000,7 @@ static void delayed_release(struct rcu_head *p)
 	struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
 
 	fuse_uring_destruct(fc);
+	fuse_iqueue_destroy(&fc->iq);
 
 	put_user_ns(fc->user_ns);
 	fc->release(fc);

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/4] fuse: Set request unique on allocation
  2025-04-03 20:22 [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter Bernd Schubert
@ 2025-04-03 20:22 ` Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 3/4] fuse: {io-uring} Avoid _send code dup Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 4/4] fuse: fine-grained request ftraces Bernd Schubert
  3 siblings, 0 replies; 7+ messages in thread
From: Bernd Schubert @ 2025-04-03 20:22 UTC (permalink / raw)
  To: Miklos Szeredi, Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez
  Cc: linux-fsdevel, Joanne Koong, Josef Bacik, Bernd Schubert

This is especially needed for better ftrace analysis,
for example to build histograms. So far the request unique
was missing, because it was added after the first trace message.

IDs/req-unique now might not come up perfectly sequentially
anymore, but especially  with cloned device or io-uring this
did not work perfectly anyway.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c       | 8 +++-----
 fs/fuse/dev_uring.c | 3 ---
 fs/fuse/virtio_fs.c | 3 ---
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e9592ab092b948bacb5034018bd1f32c917d5c9f..1ccf5a9c61ae2b11bc1d0b799c08e6da908a9782 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -259,8 +259,6 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
 	spin_lock(&fiq->lock);
 	if (fiq->connected) {
-		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
-			req->in.h.unique = fuse_get_unique(fiq);
 		list_add_tail(&req->list, &fiq->pending);
 		fuse_dev_wake_and_unlock(fiq);
 	} else {
@@ -508,6 +506,9 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
 		req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8;
 	if (args->end)
 		__set_bit(FR_ASYNC, &req->flags);
+
+	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+		req->in.h.unique = fuse_get_unique(&req->fm->fc->iq);
 }
 
 ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
@@ -555,9 +556,6 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
 static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
 					       struct fuse_req *req)
 {
-	struct fuse_iqueue *fiq = &fc->iq;
-
-	req->in.h.unique = fuse_get_unique(fiq);
 	req->in.h.len = sizeof(struct fuse_in_header) +
 		fuse_len_args(req->args->in_numargs,
 			      (struct fuse_arg *) req->args->in_args);
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 82bf458fa9db5b2357ae2d1cf5621ed4db978892..5a05b76249d6fe6214e948955f23eed1e40bb751 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -1230,9 +1230,6 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 	if (!queue)
 		goto err;
 
-	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
-		req->in.h.unique = fuse_get_unique(fiq);
-
 	spin_lock(&queue->lock);
 	err = -ENOTCONN;
 	if (unlikely(queue->stopped))
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 82afe78ec542358e2db6f4d955d521652ae363ec..ea13d57133c335554acae33b22e1604424886ac9 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1482,9 +1482,6 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 	struct virtio_fs_vq *fsvq;
 	int ret;
 
-	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
-		req->in.h.unique = fuse_get_unique(fiq);
-
 	clear_bit(FR_PENDING, &req->flags);
 
 	fs = fiq->priv;

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/4] fuse: {io-uring} Avoid _send code dup
  2025-04-03 20:22 [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 2/4] fuse: Set request unique on allocation Bernd Schubert
@ 2025-04-03 20:22 ` Bernd Schubert
  2025-04-03 20:22 ` [PATCH v3 4/4] fuse: fine-grained request ftraces Bernd Schubert
  3 siblings, 0 replies; 7+ messages in thread
From: Bernd Schubert @ 2025-04-03 20:22 UTC (permalink / raw)
  To: Miklos Szeredi, Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez
  Cc: linux-fsdevel, Joanne Koong, Josef Bacik, Bernd Schubert

fuse_uring_send_next_to_ring() can just call into fuse_uring_send
and avoid code dup.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev_uring.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 5a05b76249d6fe6214e948955f23eed1e40bb751..c5cb2aea75af523e22f539c8e18cfd0d6e771ffc 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -667,6 +667,20 @@ static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
 	return err;
 }
 
+static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
+			    ssize_t ret, unsigned int issue_flags)
+{
+	struct fuse_ring_queue *queue = ent->queue;
+
+	spin_lock(&queue->lock);
+	ent->state = FRRS_USERSPACE;
+	list_move(&ent->list, &queue->ent_in_userspace);
+	ent->cmd = NULL;
+	spin_unlock(&queue->lock);
+
+	io_uring_cmd_done(cmd, ret, 0, issue_flags);
+}
+
 /*
  * Write data to the ring buffer and send the request to userspace,
  * userspace will read it
@@ -676,22 +690,13 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
 					struct fuse_req *req,
 					unsigned int issue_flags)
 {
-	struct fuse_ring_queue *queue = ent->queue;
 	int err;
-	struct io_uring_cmd *cmd;
 
 	err = fuse_uring_prepare_send(ent, req);
 	if (err)
 		return err;
 
-	spin_lock(&queue->lock);
-	cmd = ent->cmd;
-	ent->cmd = NULL;
-	ent->state = FRRS_USERSPACE;
-	list_move(&ent->list, &queue->ent_in_userspace);
-	spin_unlock(&queue->lock);
-
-	io_uring_cmd_done(cmd, 0, 0, issue_flags);
+	fuse_uring_send(ent, ent->cmd, 0, issue_flags);
 	return 0;
 }
 
@@ -1151,20 +1156,6 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 	return -EIOCBQUEUED;
 }
 
-static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
-			    ssize_t ret, unsigned int issue_flags)
-{
-	struct fuse_ring_queue *queue = ent->queue;
-
-	spin_lock(&queue->lock);
-	ent->state = FRRS_USERSPACE;
-	list_move(&ent->list, &queue->ent_in_userspace);
-	ent->cmd = NULL;
-	spin_unlock(&queue->lock);
-
-	io_uring_cmd_done(cmd, ret, 0, issue_flags);
-}
-
 /*
  * This prepares and sends the ring request in fuse-uring task context.
  * User buffers are not mapped yet - the application does not have permission

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 4/4] fuse: fine-grained request ftraces
  2025-04-03 20:22 [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal Bernd Schubert
                   ` (2 preceding siblings ...)
  2025-04-03 20:22 ` [PATCH v3 3/4] fuse: {io-uring} Avoid _send code dup Bernd Schubert
@ 2025-04-03 20:22 ` Bernd Schubert
  3 siblings, 0 replies; 7+ messages in thread
From: Bernd Schubert @ 2025-04-03 20:22 UTC (permalink / raw)
  To: Miklos Szeredi, Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez
  Cc: linux-fsdevel, Joanne Koong, Josef Bacik, Bernd Schubert

Rename trace_fuse_request_send to trace_fuse_request_enqueue
Add trace_fuse_request_send
Add trace_fuse_request_bg_enqueue
Add trace_fuse_request_enqueue

This helps to track entire request time and time in different
queues.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c        |  7 ++++++-
 fs/fuse/dev_uring.c  |  2 ++
 fs/fuse/fuse_trace.h | 57 +++++++++++++++++++++++++++++++++++++---------------
 3 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1ccf5a9c61ae2b11bc1d0b799c08e6da908a9782..8e1a95f80e5454d1351ecb90beacbb35779731bb 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -281,7 +281,9 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
 	req->in.h.len = sizeof(struct fuse_in_header) +
 		fuse_len_args(req->args->in_numargs,
 			      (struct fuse_arg *) req->args->in_args);
-	trace_fuse_request_send(req);
+
+	/* enqueue, as it is send to "fiq->ops queue" */
+	trace_fuse_request_enqueue(req);
 	fiq->ops->send_req(fiq, req);
 }
 
@@ -580,6 +582,8 @@ static int fuse_request_queue_background(struct fuse_req *req)
 	}
 	__set_bit(FR_ISREPLY, &req->flags);
 
+	trace_fuse_request_bg_enqueue(req);
+
 #ifdef CONFIG_FUSE_IO_URING
 	if (fuse_uring_ready(fc))
 		return fuse_request_queue_background_uring(fc, req);
@@ -1314,6 +1318,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	clear_bit(FR_PENDING, &req->flags);
 	list_del_init(&req->list);
 	spin_unlock(&fiq->lock);
+	trace_fuse_request_send(req);
 
 	args = req->args;
 	reqsize = req->in.h.len;
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index c5cb2aea75af523e22f539c8e18cfd0d6e771ffc..e5ed146b990e12c6cc2a18aaa9b527c276870aba 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -7,6 +7,7 @@
 #include "fuse_i.h"
 #include "dev_uring_i.h"
 #include "fuse_dev_i.h"
+#include "fuse_trace.h"
 
 #include <linux/fs.h>
 #include <linux/io_uring/cmd.h>
@@ -678,6 +679,7 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
 	ent->cmd = NULL;
 	spin_unlock(&queue->lock);
 
+	trace_fuse_request_send(ent->fuse_req);
 	io_uring_cmd_done(cmd, ret, 0, issue_flags);
 }
 
diff --git a/fs/fuse/fuse_trace.h b/fs/fuse/fuse_trace.h
index bbe9ddd8c71696ddcbca055f6c4c451661bb4444..393c630e7726356da16add7da4b5913b9f725b25 100644
--- a/fs/fuse/fuse_trace.h
+++ b/fs/fuse/fuse_trace.h
@@ -77,30 +77,55 @@ OPCODES
 #define EM(a, b)	{a, b},
 #define EMe(a, b)	{a, b}
 
-TRACE_EVENT(fuse_request_send,
+#define FUSE_REQ_TRACE_FIELDS				\
+	__field(dev_t,			connection)	\
+	__field(uint64_t,		unique)		\
+	__field(enum fuse_opcode,	opcode)		\
+	__field(uint32_t,		len)		\
+
+#define FUSE_REQ_TRACE_ASSIGN(req)				\
+	do {							\
+		__entry->connection	= req->fm->fc->dev;	\
+		__entry->unique		= req->in.h.unique;	\
+		__entry->opcode		= req->in.h.opcode;	\
+		__entry->len		= req->in.h.len;	\
+	} while (0)
+
+
+TRACE_EVENT(fuse_request_enqueue,
 	TP_PROTO(const struct fuse_req *req),
-
 	TP_ARGS(req),
-
-	TP_STRUCT__entry(
-		__field(dev_t,			connection)
-		__field(uint64_t,		unique)
-		__field(enum fuse_opcode,	opcode)
-		__field(uint32_t,		len)
-	),
-
-	TP_fast_assign(
-		__entry->connection	=	req->fm->fc->dev;
-		__entry->unique		=	req->in.h.unique;
-		__entry->opcode		=	req->in.h.opcode;
-		__entry->len		=	req->in.h.len;
-	),
+	TP_STRUCT__entry(FUSE_REQ_TRACE_FIELDS),
+	TP_fast_assign(FUSE_REQ_TRACE_ASSIGN(req)),
 
 	TP_printk("connection %u req %llu opcode %u (%s) len %u ",
 		  __entry->connection, __entry->unique, __entry->opcode,
 		  __print_symbolic(__entry->opcode, OPCODES), __entry->len)
 );
 
+TRACE_EVENT(fuse_request_bg_enqueue,
+	TP_PROTO(const struct fuse_req *req),
+	TP_ARGS(req),
+	TP_STRUCT__entry(FUSE_REQ_TRACE_FIELDS),
+	TP_fast_assign(FUSE_REQ_TRACE_ASSIGN(req)),
+
+	TP_printk("connection %u req %llu opcode %u (%s) len %u ",
+		  __entry->connection, __entry->unique, __entry->opcode,
+		  __print_symbolic(__entry->opcode, OPCODES), __entry->len)
+);
+
+TRACE_EVENT(fuse_request_send,
+	TP_PROTO(const struct fuse_req *req),
+	TP_ARGS(req),
+	TP_STRUCT__entry(FUSE_REQ_TRACE_FIELDS),
+	TP_fast_assign(FUSE_REQ_TRACE_ASSIGN(req)),
+
+	TP_printk("connection %u req %llu opcode %u (%s) len %u ",
+		  __entry->connection, __entry->unique, __entry->opcode,
+		  __print_symbolic(__entry->opcode, OPCODES), __entry->len)
+);
+
+
 TRACE_EVENT(fuse_request_end,
 	TP_PROTO(const struct fuse_req *req),
 

-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter
  2025-04-03 20:22 ` [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter Bernd Schubert
@ 2025-04-04 12:43   ` Miklos Szeredi
  2025-04-04 13:19     ` Bernd Schubert
  0 siblings, 1 reply; 7+ messages in thread
From: Miklos Szeredi @ 2025-04-04 12:43 UTC (permalink / raw)
  To: Bernd Schubert
  Cc: Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez, linux-fsdevel,
	Joanne Koong, Josef Bacik

On Thu, 3 Apr 2025 at 22:23, Bernd Schubert <bschubert@ddn.com> wrote:

> +/**
> + * Get the next unique ID for a request
> + */
> +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
> +{
> +       int step = FUSE_REQ_ID_STEP * (task_cpu(current));
> +       u64 cntr = this_cpu_inc_return(*fiq->reqctr);
> +
> +       return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step;

Thinking a bit... this looks wrong.

The reason is that the task could be migrated to a different CPU
between the task_cpu() and the this_cpu_inc_return(), resulting in a
possibly duplicated value.

This could be fixed with a preempt_disable()/preempt_enable() pair,
but I think it would be cleaner to go with my original idea and
initialize the percpu counters to  CPUID and increment by NR_CPU *
FUSE_REQ_ID_STEP when fetching a new value.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter
  2025-04-04 12:43   ` Miklos Szeredi
@ 2025-04-04 13:19     ` Bernd Schubert
  0 siblings, 0 replies; 7+ messages in thread
From: Bernd Schubert @ 2025-04-04 13:19 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Vivek Goyal, Stefan Hajnoczi, Eugenio Pérez,
	linux-fsdevel@vger.kernel.org, Joanne Koong, Josef Bacik

On 4/4/25 14:43, Miklos Szeredi wrote:
> On Thu, 3 Apr 2025 at 22:23, Bernd Schubert <bschubert@ddn.com> wrote:
> 
>> +/**
>> + * Get the next unique ID for a request
>> + */
>> +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
>> +{
>> +       int step = FUSE_REQ_ID_STEP * (task_cpu(current));
>> +       u64 cntr = this_cpu_inc_return(*fiq->reqctr);
>> +
>> +       return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step;
> 
> Thinking a bit... this looks wrong.
> 
> The reason is that the task could be migrated to a different CPU
> between the task_cpu() and the this_cpu_inc_return(), resulting in a
> possibly duplicated value.
> 
> This could be fixed with a preempt_disable()/preempt_enable() pair,
> but I think it would be cleaner to go with my original idea and
> initialize the percpu counters to  CPUID and increment by NR_CPU *
> FUSE_REQ_ID_STEP when fetching a new value.
> 

Oh right, I guess something like this

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 80a526eaba38..eac26ee654ca 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1074,10 +1074,7 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
  */
 static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
 {
-       int step = FUSE_REQ_ID_STEP * (task_cpu(current));
-       u64 cntr = this_cpu_inc_return(*fiq->reqctr);
-
-       return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step;
+       return this_cpu_add_return(*fiq->reqctr, FUSE_REQ_ID_STEP * NR_CPUS);
 }
 
 /** Device operations */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7e1066c174d0..463cf7797e1b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -930,7 +930,13 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
        memset(fiq, 0, sizeof(struct fuse_iqueue));
        spin_lock_init(&fiq->lock);
        init_waitqueue_head(&fiq->waitq);
+       int cpu;
+
        fiq->reqctr = alloc_percpu(u64);
+       for_each_possible_cpu(cpu) {
+               *per_cpu_ptr(fiq->reqctr, cpu) = cpu * FUSE_REQ_ID_STEP;
+       }
+
        INIT_LIST_HEAD(&fiq->pending);
        INIT_LIST_HEAD(&fiq->interrupts);
        fiq->forget_list_tail = &fiq->forget_list_head;




First need to test and think about it again and currently busy
with something else - new version follows later.


Thanks,
Bernd

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-04-04 13:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-03 20:22 [PATCH v3 0/4] fuse: Improve ftraces, per-cpu req unique and code dup removal Bernd Schubert
2025-04-03 20:22 ` [PATCH v3 1/4] fuse: Make the fuse unique value a per-cpu counter Bernd Schubert
2025-04-04 12:43   ` Miklos Szeredi
2025-04-04 13:19     ` Bernd Schubert
2025-04-03 20:22 ` [PATCH v3 2/4] fuse: Set request unique on allocation Bernd Schubert
2025-04-03 20:22 ` [PATCH v3 3/4] fuse: {io-uring} Avoid _send code dup Bernd Schubert
2025-04-03 20:22 ` [PATCH v3 4/4] fuse: fine-grained request ftraces Bernd Schubert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).