linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] fuse: cleanup request queuing towards virtiofs
@ 2024-05-29 15:52 Miklos Szeredi
  2024-05-29 18:32 ` Stefan Hajnoczi
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Miklos Szeredi @ 2024-05-29 15:52 UTC (permalink / raw)
  To: linux-fsdevel
  Cc: Peter-Jan Gootzen, Jingbo Xu, Stefan Hajnoczi, Yoray Zack,
	Vivek Goyal, virtualization

Virtiofs has its own queing mechanism, but still requests are first queued
on fiq->pending to be immediately dequeued and queued onto the virtio
queue.

The queuing on fiq->pending is unnecessary and might even have some
performance impact due to being a contention point.

Forget requests are handled similarly.

Move the queuing of requests and forgets into the fiq->ops->*.
fuse_iqueue_ops are renamed to reflect the new semantics.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c       | 159 ++++++++++++++++++++++++--------------------
 fs/fuse/fuse_i.h    |  19 ++----
 fs/fuse/virtio_fs.c |  41 ++++--------
 3 files changed, 106 insertions(+), 113 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 9eb191b5c4de..a4f510f1b1a4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -192,10 +192,22 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 }
 EXPORT_SYMBOL_GPL(fuse_len_args);
 
-u64 fuse_get_unique(struct fuse_iqueue *fiq)
+static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
 {
 	fiq->reqctr += FUSE_REQ_ID_STEP;
 	return fiq->reqctr;
+
+}
+
+u64 fuse_get_unique(struct fuse_iqueue *fiq)
+{
+	u64 ret;
+
+	spin_lock(&fiq->lock);
+	ret = fuse_get_unique_locked(fiq);
+	spin_unlock(&fiq->lock);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(fuse_get_unique);
 
@@ -215,22 +227,67 @@ __releases(fiq->lock)
 	spin_unlock(&fiq->lock);
 }
 
+static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
+{
+	spin_lock(&fiq->lock);
+	if (fiq->connected) {
+		fiq->forget_list_tail->next = forget;
+		fiq->forget_list_tail = forget;
+		fuse_dev_wake_and_unlock(fiq);
+	} else {
+		kfree(forget);
+		spin_unlock(&fiq->lock);
+	}
+}
+
+static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+	spin_lock(&fiq->lock);
+	if (list_empty(&req->intr_entry)) {
+		list_add_tail(&req->intr_entry, &fiq->interrupts);
+		/*
+		 * Pairs with smp_mb() implied by test_and_set_bit()
+		 * from fuse_request_end().
+		 */
+		smp_mb();
+		if (test_bit(FR_FINISHED, &req->flags)) {
+			list_del_init(&req->intr_entry);
+			spin_unlock(&fiq->lock);
+		}
+		fuse_dev_wake_and_unlock(fiq);
+	} else {
+		spin_unlock(&fiq->lock);
+	}
+}
+
+static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
+{
+	spin_lock(&fiq->lock);
+	if (fiq->connected) {
+		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+			req->in.h.unique = fuse_get_unique_locked(fiq);
+		list_add_tail(&req->list, &fiq->pending);
+		fuse_dev_wake_and_unlock(fiq);
+	} else {
+		spin_unlock(&fiq->lock);
+		req->out.h.error = -ENOTCONN;
+		fuse_request_end(req);
+	}
+}
+
 const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
-	.wake_forget_and_unlock		= fuse_dev_wake_and_unlock,
-	.wake_interrupt_and_unlock	= fuse_dev_wake_and_unlock,
-	.wake_pending_and_unlock	= fuse_dev_wake_and_unlock,
+	.send_forget	= fuse_dev_queue_forget,
+	.send_interrupt	= fuse_dev_queue_interrupt,
+	.send_req	= fuse_dev_queue_req,
 };
 EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
 
-static void queue_request_and_unlock(struct fuse_iqueue *fiq,
-				     struct fuse_req *req)
-__releases(fiq->lock)
+static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
 	req->in.h.len = sizeof(struct fuse_in_header) +
 		fuse_len_args(req->args->in_numargs,
 			      (struct fuse_arg *) req->args->in_args);
-	list_add_tail(&req->list, &fiq->pending);
-	fiq->ops->wake_pending_and_unlock(fiq);
+	fiq->ops->send_req(fiq, req);
 }
 
 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
@@ -241,15 +298,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 	forget->forget_one.nodeid = nodeid;
 	forget->forget_one.nlookup = nlookup;
 
-	spin_lock(&fiq->lock);
-	if (fiq->connected) {
-		fiq->forget_list_tail->next = forget;
-		fiq->forget_list_tail = forget;
-		fiq->ops->wake_forget_and_unlock(fiq);
-	} else {
-		kfree(forget);
-		spin_unlock(&fiq->lock);
-	}
+	fiq->ops->send_forget(fiq, forget);
 }
 
 static void flush_bg_queue(struct fuse_conn *fc)
@@ -263,9 +312,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
 		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
 		list_del(&req->list);
 		fc->active_background++;
-		spin_lock(&fiq->lock);
-		req->in.h.unique = fuse_get_unique(fiq);
-		queue_request_and_unlock(fiq, req);
+		fuse_send_one(fiq, req);
 	}
 }
 
@@ -335,29 +382,12 @@ static int queue_interrupt(struct fuse_req *req)
 {
 	struct fuse_iqueue *fiq = &req->fm->fc->iq;
 
-	spin_lock(&fiq->lock);
 	/* Check for we've sent request to interrupt this req */
-	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
-		spin_unlock(&fiq->lock);
+	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
 		return -EINVAL;
-	}
 
-	if (list_empty(&req->intr_entry)) {
-		list_add_tail(&req->intr_entry, &fiq->interrupts);
-		/*
-		 * Pairs with smp_mb() implied by test_and_set_bit()
-		 * from fuse_request_end().
-		 */
-		smp_mb();
-		if (test_bit(FR_FINISHED, &req->flags)) {
-			list_del_init(&req->intr_entry);
-			spin_unlock(&fiq->lock);
-			return 0;
-		}
-		fiq->ops->wake_interrupt_and_unlock(fiq);
-	} else {
-		spin_unlock(&fiq->lock);
-	}
+	fiq->ops->send_interrupt(fiq, req);
+
 	return 0;
 }
 
@@ -412,21 +442,15 @@ static void __fuse_request_send(struct fuse_req *req)
 	struct fuse_iqueue *fiq = &req->fm->fc->iq;
 
 	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
-	spin_lock(&fiq->lock);
-	if (!fiq->connected) {
-		spin_unlock(&fiq->lock);
-		req->out.h.error = -ENOTCONN;
-	} else {
-		req->in.h.unique = fuse_get_unique(fiq);
-		/* acquire extra reference, since request is still needed
-		   after fuse_request_end() */
-		__fuse_get_request(req);
-		queue_request_and_unlock(fiq, req);
 
-		request_wait_answer(req);
-		/* Pairs with smp_wmb() in fuse_request_end() */
-		smp_rmb();
-	}
+	/* acquire extra reference, since request is still needed after
+	   fuse_request_end() */
+	__fuse_get_request(req);
+	fuse_send_one(fiq, req);
+
+	request_wait_answer(req);
+	/* Pairs with smp_wmb() in fuse_request_end() */
+	smp_rmb();
 }
 
 static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
@@ -581,7 +605,6 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
 {
 	struct fuse_req *req;
 	struct fuse_iqueue *fiq = &fm->fc->iq;
-	int err = 0;
 
 	req = fuse_get_req(fm, false);
 	if (IS_ERR(req))
@@ -592,16 +615,9 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
 
 	fuse_args_to_req(req, args);
 
-	spin_lock(&fiq->lock);
-	if (fiq->connected) {
-		queue_request_and_unlock(fiq, req);
-	} else {
-		err = -ENODEV;
-		spin_unlock(&fiq->lock);
-		fuse_put_request(req);
-	}
+	fuse_send_one(fiq, req);
 
-	return err;
+	return 0;
 }
 
 /*
@@ -1076,9 +1092,9 @@ __releases(fiq->lock)
 	return err ? err : reqsize;
 }
 
-struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
-					     unsigned int max,
-					     unsigned int *countp)
+static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
+						    unsigned int max,
+						    unsigned int *countp)
 {
 	struct fuse_forget_link *head = fiq->forget_list_head.next;
 	struct fuse_forget_link **newhead = &head;
@@ -1097,7 +1113,6 @@ struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
 
 	return head;
 }
-EXPORT_SYMBOL(fuse_dequeue_forget);
 
 static int fuse_read_single_forget(struct fuse_iqueue *fiq,
 				   struct fuse_copy_state *cs,
@@ -1112,7 +1127,7 @@ __releases(fiq->lock)
 	struct fuse_in_header ih = {
 		.opcode = FUSE_FORGET,
 		.nodeid = forget->forget_one.nodeid,
-		.unique = fuse_get_unique(fiq),
+		.unique = fuse_get_unique_locked(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
@@ -1143,7 +1158,7 @@ __releases(fiq->lock)
 	struct fuse_batch_forget_in arg = { .count = 0 };
 	struct fuse_in_header ih = {
 		.opcode = FUSE_BATCH_FORGET,
-		.unique = fuse_get_unique(fiq),
+		.unique = fuse_get_unique_locked(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
@@ -1822,7 +1837,7 @@ static void fuse_resend(struct fuse_conn *fc)
 	spin_lock(&fiq->lock);
 	/* iq and pq requests are both oldest to newest */
 	list_splice(&to_queue, &fiq->pending);
-	fiq->ops->wake_pending_and_unlock(fiq);
+	fuse_dev_wake_and_unlock(fiq);
 }
 
 static int fuse_notify_resend(struct fuse_conn *fc)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index f23919610313..33b21255817e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -449,22 +449,19 @@ struct fuse_iqueue;
  */
 struct fuse_iqueue_ops {
 	/**
-	 * Signal that a forget has been queued
+	 * Send one forget
 	 */
-	void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
-		__releases(fiq->lock);
+	void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link);
 
 	/**
-	 * Signal that an INTERRUPT request has been queued
+	 * Send interrupt for request
 	 */
-	void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
-		__releases(fiq->lock);
+	void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req);
 
 	/**
-	 * Signal that a request has been queued
+	 * Send one request
 	 */
-	void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
-		__releases(fiq->lock);
+	void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req);
 
 	/**
 	 * Clean up when fuse_iqueue is destroyed
@@ -1053,10 +1050,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
 
 struct fuse_forget_link *fuse_alloc_forget(void);
 
-struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
-					     unsigned int max,
-					     unsigned int *countp);
-
 /*
  * Initialize READ or READDIR request
  */
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 1a52a51b6b07..690e508dbc4d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1089,22 +1089,13 @@ static struct virtio_driver virtio_fs_driver = {
 #endif
 };
 
-static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
-__releases(fiq->lock)
+static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
 {
-	struct fuse_forget_link *link;
 	struct virtio_fs_forget *forget;
 	struct virtio_fs_forget_req *req;
-	struct virtio_fs *fs;
-	struct virtio_fs_vq *fsvq;
-	u64 unique;
-
-	link = fuse_dequeue_forget(fiq, 1, NULL);
-	unique = fuse_get_unique(fiq);
-
-	fs = fiq->priv;
-	fsvq = &fs->vqs[VQ_HIPRIO];
-	spin_unlock(&fiq->lock);
+	struct virtio_fs *fs = fiq->priv;
+	struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
+	u64 unique = fuse_get_unique(fiq);
 
 	/* Allocate a buffer for the request */
 	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
@@ -1124,8 +1115,7 @@ __releases(fiq->lock)
 	kfree(link);
 }
 
-static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
-__releases(fiq->lock)
+static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
 	/*
 	 * TODO interrupts.
@@ -1134,7 +1124,6 @@ __releases(fiq->lock)
 	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
 	 * with shared lock between host and guest.
 	 */
-	spin_unlock(&fiq->lock);
 }
 
 /* Count number of scatter-gather elements required */
@@ -1339,21 +1328,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
 	return ret;
 }
 
-static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
-__releases(fiq->lock)
+static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 {
 	unsigned int queue_id;
 	struct virtio_fs *fs;
-	struct fuse_req *req;
 	struct virtio_fs_vq *fsvq;
 	int ret;
 
-	WARN_ON(list_empty(&fiq->pending));
-	req = list_last_entry(&fiq->pending, struct fuse_req, list);
+	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+		req->in.h.unique = fuse_get_unique(fiq);
+
 	clear_bit(FR_PENDING, &req->flags);
-	list_del_init(&req->list);
-	WARN_ON(!list_empty(&fiq->pending));
-	spin_unlock(&fiq->lock);
 
 	fs = fiq->priv;
 	queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
@@ -1393,10 +1378,10 @@ __releases(fiq->lock)
 }
 
 static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
-	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
-	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
-	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
-	.release			= virtio_fs_fiq_release,
+	.send_forget	= virtio_fs_send_forget,
+	.send_interrupt	= virtio_fs_send_interrupt,
+	.send_req	= virtio_fs_send_req,
+	.release	= virtio_fs_fiq_release,
 };
 
 static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
-- 
2.45.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 15:52 [PATCH] fuse: cleanup request queuing towards virtiofs Miklos Szeredi
@ 2024-05-29 18:32 ` Stefan Hajnoczi
  2024-05-30  9:06   ` Miklos Szeredi
                     ` (2 more replies)
  2024-05-30  3:20 ` Jingbo Xu
  2024-09-23 10:33 ` Lai, Yi
  2 siblings, 3 replies; 15+ messages in thread
From: Stefan Hajnoczi @ 2024-05-29 18:32 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: linux-fsdevel, Peter-Jan Gootzen, Jingbo Xu, Yoray Zack,
	Vivek Goyal, virtualization

[-- Attachment #1: Type: text/plain, Size: 1076 bytes --]

On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> Virtiofs has its own queing mechanism, but still requests are first queued
> on fiq->pending to be immediately dequeued and queued onto the virtio
> queue.
> 
> The queuing on fiq->pending is unnecessary and might even have some
> performance impact due to being a contention point.
> 
> Forget requests are handled similarly.
> 
> Move the queuing of requests and forgets into the fiq->ops->*.
> fuse_iqueue_ops are renamed to reflect the new semantics.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++--------------------
>  fs/fuse/fuse_i.h    |  19 ++----
>  fs/fuse/virtio_fs.c |  41 ++++--------
>  3 files changed, 106 insertions(+), 113 deletions(-)

This is a little scary but I can't think of a scenario where directly
dispatching requests to virtqueues is a problem.

Is there someone who can run single and multiqueue virtiofs performance
benchmarks?

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 15:52 [PATCH] fuse: cleanup request queuing towards virtiofs Miklos Szeredi
  2024-05-29 18:32 ` Stefan Hajnoczi
@ 2024-05-30  3:20 ` Jingbo Xu
  2024-05-30  9:00   ` Miklos Szeredi
  2024-09-23 10:33 ` Lai, Yi
  2 siblings, 1 reply; 15+ messages in thread
From: Jingbo Xu @ 2024-05-30  3:20 UTC (permalink / raw)
  To: Miklos Szeredi, linux-fsdevel
  Cc: Peter-Jan Gootzen, Stefan Hajnoczi, Yoray Zack, Vivek Goyal,
	virtualization



On 5/29/24 11:52 PM, Miklos Szeredi wrote:
> Virtiofs has its own queing mechanism, but still requests are first queued
> on fiq->pending to be immediately dequeued and queued onto the virtio
> queue.
> 
> The queuing on fiq->pending is unnecessary and might even have some
> performance impact due to being a contention point.
> 
> Forget requests are handled similarly.
> 
> Move the queuing of requests and forgets into the fiq->ops->*.
> fuse_iqueue_ops are renamed to reflect the new semantics.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---

[...]

> +static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
> +{
> +	spin_lock(&fiq->lock);
> +	if (list_empty(&req->intr_entry)) {
> +		list_add_tail(&req->intr_entry, &fiq->interrupts);
> +		/*
> +		 * Pairs with smp_mb() implied by test_and_set_bit()
> +		 * from fuse_request_end().
> +		 */
> +		smp_mb();
> +		if (test_bit(FR_FINISHED, &req->flags)) {
> +			list_del_init(&req->intr_entry);
> +			spin_unlock(&fiq->lock			^
		missing "return" here?

> +		}
> +		fuse_dev_wake_and_unlock(fiq);
> +	} else {
> +		spin_unlock(&fiq->lock);
> +	}
> +}

[...]

>  static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
> @@ -581,7 +605,6 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
>  {
>  	struct fuse_req *req;
>  	struct fuse_iqueue *fiq = &fm->fc->iq;
> -	int err = 0;
>  
>  	req = fuse_get_req(fm, false);
>  	if (IS_ERR(req))> @@ -592,16 +615,9 @@ static int fuse_simple_notify_reply(struct
fuse_mount *fm,
>  
>  	fuse_args_to_req(req, args);
>  
> -	spin_lock(&fiq->lock);
> -	if (fiq->connected) {
> -		queue_request_and_unlock(fiq, req);
> -	} else {
> -		err = -ENODEV;
> -		spin_unlock(&fiq->lock);
> -		fuse_put_request(req);
> -	}
> +	fuse_send_one(fiq, req);
>  
> -	return err;
> +	return 0;
>  }

There's a minor changed behavior visible to users.  Prior to the patch,
the FUSE_NOTIFY_RETRIEVE will returns -ENODEV when the connection is
aborted, but now it returns 0.

It seems only example/notify_store_retrieve.c has used
FUSE_NOTIFY_RETRIEVE in libfuse.  I'm not sure if this change really
matters.

Maybe we could check req->out.h.error after fuse_send_one() returns?


-- 
Thanks,
Jingbo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-30  3:20 ` Jingbo Xu
@ 2024-05-30  9:00   ` Miklos Szeredi
  2024-05-30 15:36     ` Jingbo Xu
  2024-05-30 17:07     ` Bernd Schubert
  0 siblings, 2 replies; 15+ messages in thread
From: Miklos Szeredi @ 2024-05-30  9:00 UTC (permalink / raw)
  To: Jingbo Xu
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Stefan Hajnoczi,
	Yoray Zack, Vivek Goyal, virtualization

On Thu, 30 May 2024 at 05:20, Jingbo Xu <jefflexu@linux.alibaba.com> wrote:

> > +             if (test_bit(FR_FINISHED, &req->flags)) {
> > +                     list_del_init(&req->intr_entry);
> > +                     spin_unlock(&fiq->lock                  ^
>                 missing "return" here?

Well spotted.  Thanks.

> > -             err = -ENODEV;
> > -             spin_unlock(&fiq->lock);
> > -             fuse_put_request(req);
> > -     }
> > +     fuse_send_one(fiq, req);
> >
> > -     return err;
> > +     return 0;
> >  }
>
> There's a minor changed behavior visible to users.  Prior to the patch,
> the FUSE_NOTIFY_RETRIEVE will returns -ENODEV when the connection is
> aborted, but now it returns 0.
>
> It seems only example/notify_store_retrieve.c has used
> FUSE_NOTIFY_RETRIEVE in libfuse.  I'm not sure if this change really
> matters.

It will return -ENOTCONN from  fuse_simple_notify_reply() ->
fuse_get_req().  The -ENODEV would be a very short transient error
during the abort, so it doesn't matter.

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 18:32 ` Stefan Hajnoczi
@ 2024-05-30  9:06   ` Miklos Szeredi
  2024-05-30 13:38   ` Peter-Jan Gootzen
  2024-06-05 10:40   ` Peter-Jan Gootzen
  2 siblings, 0 replies; 15+ messages in thread
From: Miklos Szeredi @ 2024-05-30  9:06 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Jingbo Xu,
	Yoray Zack, Vivek Goyal, virtualization

On Wed, 29 May 2024 at 20:32, Stefan Hajnoczi <stefanha@redhat.com> wrote:

> This is a little scary but I can't think of a scenario where directly
> dispatching requests to virtqueues is a problem.

It would be scary if it was't a simple transformation:

insert element on empty list, remove only element from list -> do nothing

Thanks,
Miklos

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 18:32 ` Stefan Hajnoczi
  2024-05-30  9:06   ` Miklos Szeredi
@ 2024-05-30 13:38   ` Peter-Jan Gootzen
  2024-06-05 10:40   ` Peter-Jan Gootzen
  2 siblings, 0 replies; 15+ messages in thread
From: Peter-Jan Gootzen @ 2024-05-30 13:38 UTC (permalink / raw)
  To: stefanha@redhat.com, mszeredi@redhat.com
  Cc: vgoyal@redhat.com, jefflexu@linux.alibaba.com,
	linux-fsdevel@vger.kernel.org, Yoray Zack,
	virtualization@lists.linux.dev

On Wed, 2024-05-29 at 14:32 -0400, Stefan Hajnoczi wrote:
> On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> > Virtiofs has its own queing mechanism, but still requests are first
> > queued
> > on fiq->pending to be immediately dequeued and queued onto the
> > virtio
> > queue.
> > 
> > The queuing on fiq->pending is unnecessary and might even have some
> > performance impact due to being a contention point.
> > 
> > Forget requests are handled similarly.
> > 
> > Move the queuing of requests and forgets into the fiq->ops->*.
> > fuse_iqueue_ops are renamed to reflect the new semantics.
> > 
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++-----------------
> > ---
> >  fs/fuse/fuse_i.h    |  19 ++----
> >  fs/fuse/virtio_fs.c |  41 ++++--------
> >  3 files changed, 106 insertions(+), 113 deletions(-)
> 
> This is a little scary but I can't think of a scenario where directly
> dispatching requests to virtqueues is a problem.
> 
> Is there someone who can run single and multiqueue virtiofs
> performance
> benchmarks?

Yes we can provide that with our BlueField DPU setup. I will review,
test and perform some experiments on the patch and get back to you all
on this with some numbers.

> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

- Peter-Jan


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-30  9:00   ` Miklos Szeredi
@ 2024-05-30 15:36     ` Jingbo Xu
  2024-05-30 17:07     ` Bernd Schubert
  1 sibling, 0 replies; 15+ messages in thread
From: Jingbo Xu @ 2024-05-30 15:36 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Stefan Hajnoczi,
	Yoray Zack, Vivek Goyal, virtualization



On 5/30/24 5:00 PM, Miklos Szeredi wrote:
> On Thu, 30 May 2024 at 05:20, Jingbo Xu <jefflexu@linux.alibaba.com> wrote:
> 
>>> +             if (test_bit(FR_FINISHED, &req->flags)) {
>>> +                     list_del_init(&req->intr_entry);
>>> +                     spin_unlock(&fiq->lock                  ^
>>                 missing "return" here?
> 
> Well spotted.  Thanks.
> 
>>> -             err = -ENODEV;
>>> -             spin_unlock(&fiq->lock);
>>> -             fuse_put_request(req);
>>> -     }
>>> +     fuse_send_one(fiq, req);
>>>
>>> -     return err;
>>> +     return 0;
>>>  }
>>
>> There's a minor changed behavior visible to users.  Prior to the patch,
>> the FUSE_NOTIFY_RETRIEVE will returns -ENODEV when the connection is
>> aborted, but now it returns 0.
>>
>> It seems only example/notify_store_retrieve.c has used
>> FUSE_NOTIFY_RETRIEVE in libfuse.  I'm not sure if this change really
>> matters.
> 
> It will return -ENOTCONN from  fuse_simple_notify_reply() ->
> fuse_get_req().  The -ENODEV would be a very short transient error
> during the abort, so it doesn't matter.

Okay, fair enough.  Feel free to add:

Reviewed-by: Jingbo Xu <jefflexu@linux.alibaba.com>


-- 
Thanks,
Jingbo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-30  9:00   ` Miklos Szeredi
  2024-05-30 15:36     ` Jingbo Xu
@ 2024-05-30 17:07     ` Bernd Schubert
  1 sibling, 0 replies; 15+ messages in thread
From: Bernd Schubert @ 2024-05-30 17:07 UTC (permalink / raw)
  To: Miklos Szeredi, Jingbo Xu
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Stefan Hajnoczi,
	Yoray Zack, Vivek Goyal, virtualization



On 5/30/24 11:00, Miklos Szeredi wrote:
> On Thu, 30 May 2024 at 05:20, Jingbo Xu <jefflexu@linux.alibaba.com> wrote:
> 
>>> +             if (test_bit(FR_FINISHED, &req->flags)) {
>>> +                     list_del_init(&req->intr_entry);
>>> +                     spin_unlock(&fiq->lock                  ^
>>                 missing "return" here?
> 
> Well spotted.  Thanks.
> 
>>> -             err = -ENODEV;
>>> -             spin_unlock(&fiq->lock);
>>> -             fuse_put_request(req);
>>> -     }
>>> +     fuse_send_one(fiq, req);
>>>
>>> -     return err;
>>> +     return 0;
>>>  }
>>
>> There's a minor changed behavior visible to users.  Prior to the patch,
>> the FUSE_NOTIFY_RETRIEVE will returns -ENODEV when the connection is
>> aborted, but now it returns 0.
>>
>> It seems only example/notify_store_retrieve.c has used
>> FUSE_NOTIFY_RETRIEVE in libfuse.  I'm not sure if this change really
>> matters.
> 
> It will return -ENOTCONN from  fuse_simple_notify_reply() ->
> fuse_get_req().  The -ENODEV would be a very short transient error
> during the abort, so it doesn't matter.

example/notify_store_retrieve and example/notify_inval_inode actually
get EBADF on umount. Issue is that FUSE_DESTROY is sent very late only,
but inodes already released. If server side sends a notification in the
wrong moment, it gets EBADF. We would need something like
FUSE_PREPARE_DESTROY that is being send when umount processing starts to
stop notifications. I'm not sure if that exists in the VFS and didn't
have time yet to check.


Thanks,
Bernd

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 18:32 ` Stefan Hajnoczi
  2024-05-30  9:06   ` Miklos Szeredi
  2024-05-30 13:38   ` Peter-Jan Gootzen
@ 2024-06-05 10:40   ` Peter-Jan Gootzen
  2024-06-05 11:04     ` Stefan Hajnoczi
  2 siblings, 1 reply; 15+ messages in thread
From: Peter-Jan Gootzen @ 2024-06-05 10:40 UTC (permalink / raw)
  To: stefanha@redhat.com, mszeredi@redhat.com
  Cc: Idan Zach, virtualization@lists.linux.dev,
	jefflexu@linux.alibaba.com, linux-fsdevel@vger.kernel.org,
	Max Gurtovoy, vgoyal@redhat.com, Oren Duer, Yoray Zack

On Wed, 2024-05-29 at 14:32 -0400, Stefan Hajnoczi wrote:
> On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> > Virtiofs has its own queing mechanism, but still requests are first
> > queued
> > on fiq->pending to be immediately dequeued and queued onto the
> > virtio
> > queue.
> > 
> > The queuing on fiq->pending is unnecessary and might even have some
> > performance impact due to being a contention point.
> > 
> > Forget requests are handled similarly.
> > 
> > Move the queuing of requests and forgets into the fiq->ops->*.
> > fuse_iqueue_ops are renamed to reflect the new semantics.
> > 
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++-----------------
> > ---
> >  fs/fuse/fuse_i.h    |  19 ++----
> >  fs/fuse/virtio_fs.c |  41 ++++--------
> >  3 files changed, 106 insertions(+), 113 deletions(-)
> 
> This is a little scary but I can't think of a scenario where directly
> dispatching requests to virtqueues is a problem.
> 
> Is there someone who can run single and multiqueue virtiofs
> performance
> benchmarks?
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

I ran some tests and experiments on the patch (on top of v6.10-rc2) with
our multi-queue capable virtio-fs device. No issues were found.

Experimental system setup (which is not the fastest possible setup nor
the most optimized setup!):
# Host:
   - Dell PowerEdge R7525
   - CPU: 2x AMD EPYC 7413 24-Core
   - VM: QEMU KVM with 24 cores, vCPUs locked to the NUMA nodes on which
the DPU is attached. VFIO-pci device to passthrough the DPU.           
Running a default x86_64 ext4 buildroot with fio installed.
# Virtio-fs device:
   - BlueField-3 DPU
   - CPU: ARM Cortex-A78AE, 16 cores
   - One thread per queue, each busy polling on one request queue
   - Each queue is 1024 descriptors deep
# Workload (deviations are specified in the table):
   - fio 3.34
   - sequential read
   - ioengine=io_uring, single 4GiB file, iodepth=128, bs=256KiB,    
runtime=30s, ramp_time=10s, direct=1
   - T is the number of threads (numjobs=T with thread=1)
   - Q is the number of request queues

| Workload           | Before patch | After patch |
| ------------------ | ------------ | ----------- |
| T=1 Q=1            | 9216MiB/s    | 9201MiB/s   |
| T=2 Q=2            | 10.8GiB/s    | 10.7GiB/s   |
| T=4 Q=4            | 12.6GiB/s    | 12.2GiB/s   |
| T=8 Q=8            | 19.5GiB/s    | 19.7GiB/s   |
| T=16 Q=1           | 9451MiB/s    | 9558MiB/s   |
| T=16 Q=2           | 13.5GiB/s    | 13.4GiB/s   |
| T=16 Q=4           | 11.8GiB/s    | 11.4GiB/s   |
| T=16 Q=8           | 11.1GiB/s    | 10.8GiB/s   |
| T=24 Q=24          | 26.5GiB/s    | 26.5GiB/s   |
| T=24 Q=24 24 files | 26.5GiB/s    | 26.6GiB/s   |
| T=24 Q=24 4k       | 948MiB/s     | 955MiB/s    |

Averaging out those results, the difference is within a reasonable
margin of a error (less than 1%). So in this setup's
case we see no difference in performance.
However if the virtio-fs device was more optimized, e.g. if it didn't
copy the data to its memory, then the bottleneck could possibly be on
the driver-side and this patch could show some benefit at those higher
message rates.

So although I would have hoped for some performance increase already
with this setup, I still think this is a good patch and a logical
optimization for high performance virtio-fs devices that might show a
benefit in the future.

Tested-by: Peter-Jan Gootzen <pgootzen@nvidia.com>
Reviewed-by: Peter-Jan Gootzen <pgootzen@nvidia.com>


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-06-05 10:40   ` Peter-Jan Gootzen
@ 2024-06-05 11:04     ` Stefan Hajnoczi
  0 siblings, 0 replies; 15+ messages in thread
From: Stefan Hajnoczi @ 2024-06-05 11:04 UTC (permalink / raw)
  To: Peter-Jan Gootzen
  Cc: mszeredi@redhat.com, Idan Zach, virtualization@lists.linux.dev,
	jefflexu@linux.alibaba.com, linux-fsdevel@vger.kernel.org,
	Max Gurtovoy, vgoyal@redhat.com, Oren Duer, Yoray Zack

[-- Attachment #1: Type: text/plain, Size: 3916 bytes --]

On Wed, Jun 05, 2024 at 10:40:44AM +0000, Peter-Jan Gootzen wrote:
> On Wed, 2024-05-29 at 14:32 -0400, Stefan Hajnoczi wrote:
> > On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> > > Virtiofs has its own queing mechanism, but still requests are first
> > > queued
> > > on fiq->pending to be immediately dequeued and queued onto the
> > > virtio
> > > queue.
> > > 
> > > The queuing on fiq->pending is unnecessary and might even have some
> > > performance impact due to being a contention point.
> > > 
> > > Forget requests are handled similarly.
> > > 
> > > Move the queuing of requests and forgets into the fiq->ops->*.
> > > fuse_iqueue_ops are renamed to reflect the new semantics.
> > > 
> > > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > > ---
> > >  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++-----------------
> > > ---
> > >  fs/fuse/fuse_i.h    |  19 ++----
> > >  fs/fuse/virtio_fs.c |  41 ++++--------
> > >  3 files changed, 106 insertions(+), 113 deletions(-)
> > 
> > This is a little scary but I can't think of a scenario where directly
> > dispatching requests to virtqueues is a problem.
> > 
> > Is there someone who can run single and multiqueue virtiofs
> > performance
> > benchmarks?
> > 
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> 
> I ran some tests and experiments on the patch (on top of v6.10-rc2) with
> our multi-queue capable virtio-fs device. No issues were found.
> 
> Experimental system setup (which is not the fastest possible setup nor
> the most optimized setup!):
> # Host:
>    - Dell PowerEdge R7525
>    - CPU: 2x AMD EPYC 7413 24-Core
>    - VM: QEMU KVM with 24 cores, vCPUs locked to the NUMA nodes on which
> the DPU is attached. VFIO-pci device to passthrough the DPU.           
> Running a default x86_64 ext4 buildroot with fio installed.
> # Virtio-fs device:
>    - BlueField-3 DPU
>    - CPU: ARM Cortex-A78AE, 16 cores
>    - One thread per queue, each busy polling on one request queue
>    - Each queue is 1024 descriptors deep
> # Workload (deviations are specified in the table):
>    - fio 3.34
>    - sequential read
>    - ioengine=io_uring, single 4GiB file, iodepth=128, bs=256KiB,    
> runtime=30s, ramp_time=10s, direct=1
>    - T is the number of threads (numjobs=T with thread=1)
>    - Q is the number of request queues
> 
> | Workload           | Before patch | After patch |
> | ------------------ | ------------ | ----------- |
> | T=1 Q=1            | 9216MiB/s    | 9201MiB/s   |
> | T=2 Q=2            | 10.8GiB/s    | 10.7GiB/s   |
> | T=4 Q=4            | 12.6GiB/s    | 12.2GiB/s   |
> | T=8 Q=8            | 19.5GiB/s    | 19.7GiB/s   |
> | T=16 Q=1           | 9451MiB/s    | 9558MiB/s   |
> | T=16 Q=2           | 13.5GiB/s    | 13.4GiB/s   |
> | T=16 Q=4           | 11.8GiB/s    | 11.4GiB/s   |
> | T=16 Q=8           | 11.1GiB/s    | 10.8GiB/s   |
> | T=24 Q=24          | 26.5GiB/s    | 26.5GiB/s   |
> | T=24 Q=24 24 files | 26.5GiB/s    | 26.6GiB/s   |
> | T=24 Q=24 4k       | 948MiB/s     | 955MiB/s    |
> 
> Averaging out those results, the difference is within a reasonable
> margin of a error (less than 1%). So in this setup's
> case we see no difference in performance.
> However if the virtio-fs device was more optimized, e.g. if it didn't
> copy the data to its memory, then the bottleneck could possibly be on
> the driver-side and this patch could show some benefit at those higher
> message rates.
> 
> So although I would have hoped for some performance increase already
> with this setup, I still think this is a good patch and a logical
> optimization for high performance virtio-fs devices that might show a
> benefit in the future.
> 
> Tested-by: Peter-Jan Gootzen <pgootzen@nvidia.com>
> Reviewed-by: Peter-Jan Gootzen <pgootzen@nvidia.com>

Thank you!

Stefan


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-05-29 15:52 [PATCH] fuse: cleanup request queuing towards virtiofs Miklos Szeredi
  2024-05-29 18:32 ` Stefan Hajnoczi
  2024-05-30  3:20 ` Jingbo Xu
@ 2024-09-23 10:33 ` Lai, Yi
  2024-09-23 22:48   ` Joanne Koong
  2 siblings, 1 reply; 15+ messages in thread
From: Lai, Yi @ 2024-09-23 10:33 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: linux-fsdevel, Peter-Jan Gootzen, Jingbo Xu, Stefan Hajnoczi,
	Yoray Zack, Vivek Goyal, virtualization, yi1.lai

Hi Miklos Szeredi,

Greetings!

I used Syzkaller and found that there is WARNING in fuse_request_end in Linux-next tree - next-20240918.

After bisection and the first bad commit is:
"
5de8acb41c86 fuse: cleanup request queuing towards virtiofs
"

All detailed into can be found at:
https://github.com/laifryiee/syzkaller_logs/tree/main/240922_114402_fuse_request_end
Syzkaller repro code:
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.c
Syzkaller repro syscall steps:
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.prog
Syzkaller report:
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.report
Kconfig(make olddefconfig):
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/kconfig_origin
Bisect info:
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/bisect_info.log
bzImage:
https://github.com/laifryiee/syzkaller_logs/raw/main/240922_114402_fuse_request_end/bzImage_55bcd2e0d04c1171d382badef1def1fd04ef66c5
Issue dmesg:
https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/55bcd2e0d04c1171d382badef1def1fd04ef66c5_dmesg.log

"
[   31.577123] ------------[ cut here ]------------
[   31.578842] WARNING: CPU: 1 PID: 1186 at fs/fuse/dev.c:373 fuse_request_end+0x7d2/0x910
[   31.581269] Modules linked in:
[   31.582553] CPU: 1 UID: 0 PID: 1186 Comm: repro Not tainted 6.11.0-next-20240918-55bcd2e0d04c #1
[   31.584332] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[   31.586281] RIP: 0010:fuse_request_end+0x7d2/0x910
[   31.587001] Code: ff 48 8b 7d d0 e8 ae 0f 72 ff e9 c2 fb ff ff e8 a4 0f 72 ff e9 e7 fb ff ff e8 3a 3b 0a ff 0f 0b e9 17 fa ff ff e8 2e 3b 0a ff <0f> 0b e9 c1 f9 ff ff 4c 89 ff e8 af 0f 72 ff e9 82 f8 ff ff e8 a5
[   31.589442] RSP: 0018:ffff88802141f640 EFLAGS: 00010293
[   31.590198] RAX: 0000000000000000 RBX: 0000000000000201 RCX: ffffffff825d5bb2
[   31.591137] RDX: ffff888010b2a500 RSI: ffffffff825d61f2 RDI: 0000000000000001
[   31.592072] RBP: ffff88802141f680 R08: 0000000000000000 R09: ffffed100356f28e
[   31.593010] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88801ab79440
[   31.594062] R13: ffff88801ab79470 R14: ffff88801dcaa000 R15: ffff88800d71fa00
[   31.594820] FS:  00007f812eca2640(0000) GS:ffff88806c500000(0000) knlGS:0000000000000000
[   31.595670] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   31.596299] CR2: 000055b7baa16b20 CR3: 00000000109b0002 CR4: 0000000000770ef0
[   31.597054] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   31.597850] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400
[   31.598595] PKRU: 55555554
[   31.598902] Call Trace:
[   31.599180]  <TASK>
[   31.599439]  ? show_regs+0x6d/0x80
[   31.599805]  ? __warn+0xf3/0x380
[   31.600137]  ? report_bug+0x25e/0x4b0
[   31.600521]  ? fuse_request_end+0x7d2/0x910
[   31.600885]  ? report_bug+0x2cb/0x4b0
[   31.601204]  ? fuse_request_end+0x7d2/0x910
[   31.601564]  ? fuse_request_end+0x7d3/0x910
[   31.601956]  ? handle_bug+0xf1/0x190
[   31.602275]  ? exc_invalid_op+0x3c/0x80
[   31.602595]  ? asm_exc_invalid_op+0x1f/0x30
[   31.602959]  ? fuse_request_end+0x192/0x910
[   31.603301]  ? fuse_request_end+0x7d2/0x910
[   31.603643]  ? fuse_request_end+0x7d2/0x910
[   31.603988]  ? do_raw_spin_unlock+0x15c/0x210
[   31.604366]  fuse_dev_queue_req+0x23c/0x2b0
[   31.604714]  fuse_send_one+0x1d1/0x360
[   31.605031]  fuse_simple_request+0x348/0xd30
[   31.605385]  ? lockdep_hardirqs_on+0x89/0x110
[   31.605755]  fuse_send_open+0x234/0x2f0
[   31.606126]  ? __pfx_fuse_send_open+0x10/0x10
[   31.606487]  ? kasan_save_track+0x18/0x40
[   31.606834]  ? lockdep_init_map_type+0x2df/0x810
[   31.607227]  ? __kasan_check_write+0x18/0x20
[   31.607591]  fuse_file_open+0x2bc/0x770
[   31.607921]  fuse_do_open+0x5d/0xe0
[   31.608215]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[   31.608681]  fuse_dir_open+0x138/0x220
[   31.609005]  do_dentry_open+0x6be/0x1390
[   31.609358]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[   31.609861]  ? __pfx_fuse_dir_open+0x10/0x10
[   31.610240]  vfs_open+0x87/0x3f0
[   31.610523]  ? may_open+0x205/0x430
[   31.610834]  path_openat+0x23b7/0x32d0
[   31.611161]  ? __pfx_path_openat+0x10/0x10
[   31.611502]  ? lock_acquire.part.0+0x152/0x390
[   31.611874]  ? __this_cpu_preempt_check+0x21/0x30
[   31.612266]  ? lock_is_held_type+0xef/0x150
[   31.612611]  ? __this_cpu_preempt_check+0x21/0x30
[   31.613002]  do_filp_open+0x1cc/0x420
[   31.613316]  ? __pfx_do_filp_open+0x10/0x10
[   31.613669]  ? lock_release+0x441/0x870
[   31.614043]  ? __pfx_lock_release+0x10/0x10
[   31.614404]  ? do_raw_spin_unlock+0x15c/0x210
[   31.614784]  do_sys_openat2+0x185/0x1f0
[   31.615105]  ? __pfx_do_sys_openat2+0x10/0x10
[   31.615470]  ? __this_cpu_preempt_check+0x21/0x30
[   31.615854]  ? seqcount_lockdep_reader_access.constprop.0+0xb4/0xd0
[   31.616370]  ? lockdep_hardirqs_on+0x89/0x110
[   31.616736]  __x64_sys_openat+0x17a/0x240
[   31.617067]  ? __pfx___x64_sys_openat+0x10/0x10
[   31.617447]  ? __audit_syscall_entry+0x39c/0x500
[   31.617870]  x64_sys_call+0x1a52/0x20d0
[   31.618194]  do_syscall_64+0x6d/0x140
[   31.618504]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   31.618917] RIP: 0033:0x7f812eb3e8c4
[   31.619225] Code: 24 20 eb 8f 66 90 44 89 54 24 0c e8 76 d3 f5 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 c8 d3 f5 ff 8b 44
[   31.620656] RSP: 002b:00007f812eca1b90 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
[   31.621255] RAX: ffffffffffffffda RBX: 00007f812eca2640 RCX: 00007f812eb3e8c4
[   31.621864] RDX: 0000000000010000 RSI: 0000000020002080 RDI: 00000000ffffff9c
[   31.622428] RBP: 0000000020002080 R08: 0000000000000000 R09: 0000000000000000
[   31.622987] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000010000
[   31.623549] R13: 0000000000000006 R14: 00007f812ea9f560 R15: 0000000000000000
[   31.624123]  </TASK>
[   31.624316] irq event stamp: 1655
[   31.624595] hardirqs last  enabled at (1663): [<ffffffff8145cb85>] __up_console_sem+0x95/0xb0
[   31.625310] hardirqs last disabled at (1670): [<ffffffff8145cb6a>] __up_console_sem+0x7a/0xb0
[   31.626039] softirqs last  enabled at (1466): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
[   31.626726] softirqs last disabled at (1449): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
[   31.627405] ---[ end trace 0000000000000000 ]---
"

I hope you find it useful.

Regards,
Yi Lai

---

If you don't need the following environment to reproduce the problem or if you
already have one reproduced environment, please ignore the following information.

How to reproduce:
git clone https://gitlab.com/xupengfe/repro_vm_env.git
cd repro_vm_env
tar -xvf repro_vm_env.tar.gz
cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
  // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
  // You could change the bzImage_xxx as you want
  // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
You could use below command to log in, there is no password for root.
ssh -p 10023 root@localhost

After login vm(virtual machine) successfully, you could transfer reproduced
binary to the vm by below way, and reproduce the problem in vm:
gcc -pthread -o repro repro.c
scp -P 10023 repro root@localhost:/root/

Get the bzImage for target kernel:
Please use target kconfig and copy it to kernel_src/.config
make olddefconfig
make -jx bzImage           //x should equal or less than cpu num your pc has

Fill the bzImage file into above start3.sh to load the target kernel in vm.

Tips:
If you already have qemu-system-x86_64, please ignore below info.
If you want to install qemu v7.1.0 version:
git clone https://github.com/qemu/qemu.git
cd qemu
git checkout -f v7.1.0
mkdir build
cd build
yum install -y ninja-build.x86_64
yum -y install libslirp-devel.x86_64
../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
make
make install 

On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> Virtiofs has its own queing mechanism, but still requests are first queued
> on fiq->pending to be immediately dequeued and queued onto the virtio
> queue.
> 
> The queuing on fiq->pending is unnecessary and might even have some
> performance impact due to being a contention point.
> 
> Forget requests are handled similarly.
> 
> Move the queuing of requests and forgets into the fiq->ops->*.
> fuse_iqueue_ops are renamed to reflect the new semantics.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++--------------------
>  fs/fuse/fuse_i.h    |  19 ++----
>  fs/fuse/virtio_fs.c |  41 ++++--------
>  3 files changed, 106 insertions(+), 113 deletions(-)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 9eb191b5c4de..a4f510f1b1a4 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -192,10 +192,22 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
>  }
>  EXPORT_SYMBOL_GPL(fuse_len_args);
>  
> -u64 fuse_get_unique(struct fuse_iqueue *fiq)
> +static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
>  {
>  	fiq->reqctr += FUSE_REQ_ID_STEP;
>  	return fiq->reqctr;
> +
> +}
> +
> +u64 fuse_get_unique(struct fuse_iqueue *fiq)
> +{
> +	u64 ret;
> +
> +	spin_lock(&fiq->lock);
> +	ret = fuse_get_unique_locked(fiq);
> +	spin_unlock(&fiq->lock);
> +
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(fuse_get_unique);
>  
> @@ -215,22 +227,67 @@ __releases(fiq->lock)
>  	spin_unlock(&fiq->lock);
>  }
>  
> +static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
> +{
> +	spin_lock(&fiq->lock);
> +	if (fiq->connected) {
> +		fiq->forget_list_tail->next = forget;
> +		fiq->forget_list_tail = forget;
> +		fuse_dev_wake_and_unlock(fiq);
> +	} else {
> +		kfree(forget);
> +		spin_unlock(&fiq->lock);
> +	}
> +}
> +
> +static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
> +{
> +	spin_lock(&fiq->lock);
> +	if (list_empty(&req->intr_entry)) {
> +		list_add_tail(&req->intr_entry, &fiq->interrupts);
> +		/*
> +		 * Pairs with smp_mb() implied by test_and_set_bit()
> +		 * from fuse_request_end().
> +		 */
> +		smp_mb();
> +		if (test_bit(FR_FINISHED, &req->flags)) {
> +			list_del_init(&req->intr_entry);
> +			spin_unlock(&fiq->lock);
> +		}
> +		fuse_dev_wake_and_unlock(fiq);
> +	} else {
> +		spin_unlock(&fiq->lock);
> +	}
> +}
> +
> +static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
> +{
> +	spin_lock(&fiq->lock);
> +	if (fiq->connected) {
> +		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
> +			req->in.h.unique = fuse_get_unique_locked(fiq);
> +		list_add_tail(&req->list, &fiq->pending);
> +		fuse_dev_wake_and_unlock(fiq);
> +	} else {
> +		spin_unlock(&fiq->lock);
> +		req->out.h.error = -ENOTCONN;
> +		fuse_request_end(req);
> +	}
> +}
> +
>  const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
> -	.wake_forget_and_unlock		= fuse_dev_wake_and_unlock,
> -	.wake_interrupt_and_unlock	= fuse_dev_wake_and_unlock,
> -	.wake_pending_and_unlock	= fuse_dev_wake_and_unlock,
> +	.send_forget	= fuse_dev_queue_forget,
> +	.send_interrupt	= fuse_dev_queue_interrupt,
> +	.send_req	= fuse_dev_queue_req,
>  };
>  EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
>  
> -static void queue_request_and_unlock(struct fuse_iqueue *fiq,
> -				     struct fuse_req *req)
> -__releases(fiq->lock)
> +static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
>  {
>  	req->in.h.len = sizeof(struct fuse_in_header) +
>  		fuse_len_args(req->args->in_numargs,
>  			      (struct fuse_arg *) req->args->in_args);
> -	list_add_tail(&req->list, &fiq->pending);
> -	fiq->ops->wake_pending_and_unlock(fiq);
> +	fiq->ops->send_req(fiq, req);
>  }
>  
>  void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
> @@ -241,15 +298,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
>  	forget->forget_one.nodeid = nodeid;
>  	forget->forget_one.nlookup = nlookup;
>  
> -	spin_lock(&fiq->lock);
> -	if (fiq->connected) {
> -		fiq->forget_list_tail->next = forget;
> -		fiq->forget_list_tail = forget;
> -		fiq->ops->wake_forget_and_unlock(fiq);
> -	} else {
> -		kfree(forget);
> -		spin_unlock(&fiq->lock);
> -	}
> +	fiq->ops->send_forget(fiq, forget);
>  }
>  
>  static void flush_bg_queue(struct fuse_conn *fc)
> @@ -263,9 +312,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
>  		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
>  		list_del(&req->list);
>  		fc->active_background++;
> -		spin_lock(&fiq->lock);
> -		req->in.h.unique = fuse_get_unique(fiq);
> -		queue_request_and_unlock(fiq, req);
> +		fuse_send_one(fiq, req);
>  	}
>  }
>  
> @@ -335,29 +382,12 @@ static int queue_interrupt(struct fuse_req *req)
>  {
>  	struct fuse_iqueue *fiq = &req->fm->fc->iq;
>  
> -	spin_lock(&fiq->lock);
>  	/* Check for we've sent request to interrupt this req */
> -	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
> -		spin_unlock(&fiq->lock);
> +	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
>  		return -EINVAL;
> -	}
>  
> -	if (list_empty(&req->intr_entry)) {
> -		list_add_tail(&req->intr_entry, &fiq->interrupts);
> -		/*
> -		 * Pairs with smp_mb() implied by test_and_set_bit()
> -		 * from fuse_request_end().
> -		 */
> -		smp_mb();
> -		if (test_bit(FR_FINISHED, &req->flags)) {
> -			list_del_init(&req->intr_entry);
> -			spin_unlock(&fiq->lock);
> -			return 0;
> -		}
> -		fiq->ops->wake_interrupt_and_unlock(fiq);
> -	} else {
> -		spin_unlock(&fiq->lock);
> -	}
> +	fiq->ops->send_interrupt(fiq, req);
> +
>  	return 0;
>  }
>  
> @@ -412,21 +442,15 @@ static void __fuse_request_send(struct fuse_req *req)
>  	struct fuse_iqueue *fiq = &req->fm->fc->iq;
>  
>  	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
> -	spin_lock(&fiq->lock);
> -	if (!fiq->connected) {
> -		spin_unlock(&fiq->lock);
> -		req->out.h.error = -ENOTCONN;
> -	} else {
> -		req->in.h.unique = fuse_get_unique(fiq);
> -		/* acquire extra reference, since request is still needed
> -		   after fuse_request_end() */
> -		__fuse_get_request(req);
> -		queue_request_and_unlock(fiq, req);
>  
> -		request_wait_answer(req);
> -		/* Pairs with smp_wmb() in fuse_request_end() */
> -		smp_rmb();
> -	}
> +	/* acquire extra reference, since request is still needed after
> +	   fuse_request_end() */
> +	__fuse_get_request(req);
> +	fuse_send_one(fiq, req);
> +
> +	request_wait_answer(req);
> +	/* Pairs with smp_wmb() in fuse_request_end() */
> +	smp_rmb();
>  }
>  
>  static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
> @@ -581,7 +605,6 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
>  {
>  	struct fuse_req *req;
>  	struct fuse_iqueue *fiq = &fm->fc->iq;
> -	int err = 0;
>  
>  	req = fuse_get_req(fm, false);
>  	if (IS_ERR(req))
> @@ -592,16 +615,9 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
>  
>  	fuse_args_to_req(req, args);
>  
> -	spin_lock(&fiq->lock);
> -	if (fiq->connected) {
> -		queue_request_and_unlock(fiq, req);
> -	} else {
> -		err = -ENODEV;
> -		spin_unlock(&fiq->lock);
> -		fuse_put_request(req);
> -	}
> +	fuse_send_one(fiq, req);
>  
> -	return err;
> +	return 0;
>  }
>  
>  /*
> @@ -1076,9 +1092,9 @@ __releases(fiq->lock)
>  	return err ? err : reqsize;
>  }
>  
> -struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> -					     unsigned int max,
> -					     unsigned int *countp)
> +static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> +						    unsigned int max,
> +						    unsigned int *countp)
>  {
>  	struct fuse_forget_link *head = fiq->forget_list_head.next;
>  	struct fuse_forget_link **newhead = &head;
> @@ -1097,7 +1113,6 @@ struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
>  
>  	return head;
>  }
> -EXPORT_SYMBOL(fuse_dequeue_forget);
>  
>  static int fuse_read_single_forget(struct fuse_iqueue *fiq,
>  				   struct fuse_copy_state *cs,
> @@ -1112,7 +1127,7 @@ __releases(fiq->lock)
>  	struct fuse_in_header ih = {
>  		.opcode = FUSE_FORGET,
>  		.nodeid = forget->forget_one.nodeid,
> -		.unique = fuse_get_unique(fiq),
> +		.unique = fuse_get_unique_locked(fiq),
>  		.len = sizeof(ih) + sizeof(arg),
>  	};
>  
> @@ -1143,7 +1158,7 @@ __releases(fiq->lock)
>  	struct fuse_batch_forget_in arg = { .count = 0 };
>  	struct fuse_in_header ih = {
>  		.opcode = FUSE_BATCH_FORGET,
> -		.unique = fuse_get_unique(fiq),
> +		.unique = fuse_get_unique_locked(fiq),
>  		.len = sizeof(ih) + sizeof(arg),
>  	};
>  
> @@ -1822,7 +1837,7 @@ static void fuse_resend(struct fuse_conn *fc)
>  	spin_lock(&fiq->lock);
>  	/* iq and pq requests are both oldest to newest */
>  	list_splice(&to_queue, &fiq->pending);
> -	fiq->ops->wake_pending_and_unlock(fiq);
> +	fuse_dev_wake_and_unlock(fiq);
>  }
>  
>  static int fuse_notify_resend(struct fuse_conn *fc)
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index f23919610313..33b21255817e 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -449,22 +449,19 @@ struct fuse_iqueue;
>   */
>  struct fuse_iqueue_ops {
>  	/**
> -	 * Signal that a forget has been queued
> +	 * Send one forget
>  	 */
> -	void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
> -		__releases(fiq->lock);
> +	void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link);
>  
>  	/**
> -	 * Signal that an INTERRUPT request has been queued
> +	 * Send interrupt for request
>  	 */
> -	void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
> -		__releases(fiq->lock);
> +	void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req);
>  
>  	/**
> -	 * Signal that a request has been queued
> +	 * Send one request
>  	 */
> -	void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
> -		__releases(fiq->lock);
> +	void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req);
>  
>  	/**
>  	 * Clean up when fuse_iqueue is destroyed
> @@ -1053,10 +1050,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
>  
>  struct fuse_forget_link *fuse_alloc_forget(void);
>  
> -struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> -					     unsigned int max,
> -					     unsigned int *countp);
> -
>  /*
>   * Initialize READ or READDIR request
>   */
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 1a52a51b6b07..690e508dbc4d 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -1089,22 +1089,13 @@ static struct virtio_driver virtio_fs_driver = {
>  #endif
>  };
>  
> -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
> -__releases(fiq->lock)
> +static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
>  {
> -	struct fuse_forget_link *link;
>  	struct virtio_fs_forget *forget;
>  	struct virtio_fs_forget_req *req;
> -	struct virtio_fs *fs;
> -	struct virtio_fs_vq *fsvq;
> -	u64 unique;
> -
> -	link = fuse_dequeue_forget(fiq, 1, NULL);
> -	unique = fuse_get_unique(fiq);
> -
> -	fs = fiq->priv;
> -	fsvq = &fs->vqs[VQ_HIPRIO];
> -	spin_unlock(&fiq->lock);
> +	struct virtio_fs *fs = fiq->priv;
> +	struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
> +	u64 unique = fuse_get_unique(fiq);
>  
>  	/* Allocate a buffer for the request */
>  	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
> @@ -1124,8 +1115,7 @@ __releases(fiq->lock)
>  	kfree(link);
>  }
>  
> -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
> -__releases(fiq->lock)
> +static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
>  {
>  	/*
>  	 * TODO interrupts.
> @@ -1134,7 +1124,6 @@ __releases(fiq->lock)
>  	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
>  	 * with shared lock between host and guest.
>  	 */
> -	spin_unlock(&fiq->lock);
>  }
>  
>  /* Count number of scatter-gather elements required */
> @@ -1339,21 +1328,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
>  	return ret;
>  }
>  
> -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
> -__releases(fiq->lock)
> +static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
>  {
>  	unsigned int queue_id;
>  	struct virtio_fs *fs;
> -	struct fuse_req *req;
>  	struct virtio_fs_vq *fsvq;
>  	int ret;
>  
> -	WARN_ON(list_empty(&fiq->pending));
> -	req = list_last_entry(&fiq->pending, struct fuse_req, list);
> +	if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
> +		req->in.h.unique = fuse_get_unique(fiq);
> +
>  	clear_bit(FR_PENDING, &req->flags);
> -	list_del_init(&req->list);
> -	WARN_ON(!list_empty(&fiq->pending));
> -	spin_unlock(&fiq->lock);
>  
>  	fs = fiq->priv;
>  	queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
> @@ -1393,10 +1378,10 @@ __releases(fiq->lock)
>  }
>  
>  static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
> -	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
> -	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
> -	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
> -	.release			= virtio_fs_fiq_release,
> +	.send_forget	= virtio_fs_send_forget,
> +	.send_interrupt	= virtio_fs_send_interrupt,
> +	.send_req	= virtio_fs_send_req,
> +	.release	= virtio_fs_fiq_release,
>  };
>  
>  static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
> -- 
> 2.45.1
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-09-23 10:33 ` Lai, Yi
@ 2024-09-23 22:48   ` Joanne Koong
  2024-09-23 23:47     ` Joanne Koong
  0 siblings, 1 reply; 15+ messages in thread
From: Joanne Koong @ 2024-09-23 22:48 UTC (permalink / raw)
  To: Lai, Yi
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Jingbo Xu,
	Stefan Hajnoczi, Yoray Zack, Vivek Goyal, virtualization, yi1.lai

On Mon, Sep 23, 2024 at 3:34 AM Lai, Yi <yi1.lai@linux.intel.com> wrote:
>
> Hi Miklos Szeredi,
>
> Greetings!
>
> I used Syzkaller and found that there is WARNING in fuse_request_end in Linux-next tree - next-20240918.
>
> After bisection and the first bad commit is:
> "
> 5de8acb41c86 fuse: cleanup request queuing towards virtiofs
> "
>
> All detailed into can be found at:
> https://github.com/laifryiee/syzkaller_logs/tree/main/240922_114402_fuse_request_end
> Syzkaller repro code:
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.c
> Syzkaller repro syscall steps:
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.prog
> Syzkaller report:
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.report
> Kconfig(make olddefconfig):
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/kconfig_origin
> Bisect info:
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/bisect_info.log
> bzImage:
> https://github.com/laifryiee/syzkaller_logs/raw/main/240922_114402_fuse_request_end/bzImage_55bcd2e0d04c1171d382badef1def1fd04ef66c5
> Issue dmesg:
> https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/55bcd2e0d04c1171d382badef1def1fd04ef66c5_dmesg.log
>
> "
> [   31.577123] ------------[ cut here ]------------
> [   31.578842] WARNING: CPU: 1 PID: 1186 at fs/fuse/dev.c:373 fuse_request_end+0x7d2/0x910
> [   31.581269] Modules linked in:
> [   31.582553] CPU: 1 UID: 0 PID: 1186 Comm: repro Not tainted 6.11.0-next-20240918-55bcd2e0d04c #1
> [   31.584332] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
> [   31.586281] RIP: 0010:fuse_request_end+0x7d2/0x910
> [   31.587001] Code: ff 48 8b 7d d0 e8 ae 0f 72 ff e9 c2 fb ff ff e8 a4 0f 72 ff e9 e7 fb ff ff e8 3a 3b 0a ff 0f 0b e9 17 fa ff ff e8 2e 3b 0a ff <0f> 0b e9 c1 f9 ff ff 4c 89 ff e8 af 0f 72 ff e9 82 f8 ff ff e8 a5
> [   31.589442] RSP: 0018:ffff88802141f640 EFLAGS: 00010293
> [   31.590198] RAX: 0000000000000000 RBX: 0000000000000201 RCX: ffffffff825d5bb2
> [   31.591137] RDX: ffff888010b2a500 RSI: ffffffff825d61f2 RDI: 0000000000000001
> [   31.592072] RBP: ffff88802141f680 R08: 0000000000000000 R09: ffffed100356f28e
> [   31.593010] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88801ab79440
> [   31.594062] R13: ffff88801ab79470 R14: ffff88801dcaa000 R15: ffff88800d71fa00
> [   31.594820] FS:  00007f812eca2640(0000) GS:ffff88806c500000(0000) knlGS:0000000000000000
> [   31.595670] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   31.596299] CR2: 000055b7baa16b20 CR3: 00000000109b0002 CR4: 0000000000770ef0
> [   31.597054] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [   31.597850] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400
> [   31.598595] PKRU: 55555554
> [   31.598902] Call Trace:
> [   31.599180]  <TASK>
> [   31.599439]  ? show_regs+0x6d/0x80
> [   31.599805]  ? __warn+0xf3/0x380
> [   31.600137]  ? report_bug+0x25e/0x4b0
> [   31.600521]  ? fuse_request_end+0x7d2/0x910
> [   31.600885]  ? report_bug+0x2cb/0x4b0
> [   31.601204]  ? fuse_request_end+0x7d2/0x910
> [   31.601564]  ? fuse_request_end+0x7d3/0x910
> [   31.601956]  ? handle_bug+0xf1/0x190
> [   31.602275]  ? exc_invalid_op+0x3c/0x80
> [   31.602595]  ? asm_exc_invalid_op+0x1f/0x30
> [   31.602959]  ? fuse_request_end+0x192/0x910
> [   31.603301]  ? fuse_request_end+0x7d2/0x910
> [   31.603643]  ? fuse_request_end+0x7d2/0x910
> [   31.603988]  ? do_raw_spin_unlock+0x15c/0x210
> [   31.604366]  fuse_dev_queue_req+0x23c/0x2b0
> [   31.604714]  fuse_send_one+0x1d1/0x360
> [   31.605031]  fuse_simple_request+0x348/0xd30
> [   31.605385]  ? lockdep_hardirqs_on+0x89/0x110
> [   31.605755]  fuse_send_open+0x234/0x2f0
> [   31.606126]  ? __pfx_fuse_send_open+0x10/0x10
> [   31.606487]  ? kasan_save_track+0x18/0x40
> [   31.606834]  ? lockdep_init_map_type+0x2df/0x810
> [   31.607227]  ? __kasan_check_write+0x18/0x20
> [   31.607591]  fuse_file_open+0x2bc/0x770
> [   31.607921]  fuse_do_open+0x5d/0xe0
> [   31.608215]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
> [   31.608681]  fuse_dir_open+0x138/0x220
> [   31.609005]  do_dentry_open+0x6be/0x1390
> [   31.609358]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
> [   31.609861]  ? __pfx_fuse_dir_open+0x10/0x10
> [   31.610240]  vfs_open+0x87/0x3f0
> [   31.610523]  ? may_open+0x205/0x430
> [   31.610834]  path_openat+0x23b7/0x32d0
> [   31.611161]  ? __pfx_path_openat+0x10/0x10
> [   31.611502]  ? lock_acquire.part.0+0x152/0x390
> [   31.611874]  ? __this_cpu_preempt_check+0x21/0x30
> [   31.612266]  ? lock_is_held_type+0xef/0x150
> [   31.612611]  ? __this_cpu_preempt_check+0x21/0x30
> [   31.613002]  do_filp_open+0x1cc/0x420
> [   31.613316]  ? __pfx_do_filp_open+0x10/0x10
> [   31.613669]  ? lock_release+0x441/0x870
> [   31.614043]  ? __pfx_lock_release+0x10/0x10
> [   31.614404]  ? do_raw_spin_unlock+0x15c/0x210
> [   31.614784]  do_sys_openat2+0x185/0x1f0
> [   31.615105]  ? __pfx_do_sys_openat2+0x10/0x10
> [   31.615470]  ? __this_cpu_preempt_check+0x21/0x30
> [   31.615854]  ? seqcount_lockdep_reader_access.constprop.0+0xb4/0xd0
> [   31.616370]  ? lockdep_hardirqs_on+0x89/0x110
> [   31.616736]  __x64_sys_openat+0x17a/0x240
> [   31.617067]  ? __pfx___x64_sys_openat+0x10/0x10
> [   31.617447]  ? __audit_syscall_entry+0x39c/0x500
> [   31.617870]  x64_sys_call+0x1a52/0x20d0
> [   31.618194]  do_syscall_64+0x6d/0x140
> [   31.618504]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [   31.618917] RIP: 0033:0x7f812eb3e8c4
> [   31.619225] Code: 24 20 eb 8f 66 90 44 89 54 24 0c e8 76 d3 f5 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 c8 d3 f5 ff 8b 44
> [   31.620656] RSP: 002b:00007f812eca1b90 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
> [   31.621255] RAX: ffffffffffffffda RBX: 00007f812eca2640 RCX: 00007f812eb3e8c4
> [   31.621864] RDX: 0000000000010000 RSI: 0000000020002080 RDI: 00000000ffffff9c
> [   31.622428] RBP: 0000000020002080 R08: 0000000000000000 R09: 0000000000000000
> [   31.622987] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000010000
> [   31.623549] R13: 0000000000000006 R14: 00007f812ea9f560 R15: 0000000000000000
> [   31.624123]  </TASK>
> [   31.624316] irq event stamp: 1655
> [   31.624595] hardirqs last  enabled at (1663): [<ffffffff8145cb85>] __up_console_sem+0x95/0xb0
> [   31.625310] hardirqs last disabled at (1670): [<ffffffff8145cb6a>] __up_console_sem+0x7a/0xb0
> [   31.626039] softirqs last  enabled at (1466): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
> [   31.626726] softirqs last disabled at (1449): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
> [   31.627405] ---[ end trace 0000000000000000 ]---
> "
>
> I hope you find it useful.
>
> Regards,
> Yi Lai
>
> ---
>
> If you don't need the following environment to reproduce the problem or if you
> already have one reproduced environment, please ignore the following information.
>
> How to reproduce:
> git clone https://gitlab.com/xupengfe/repro_vm_env.git
> cd repro_vm_env
> tar -xvf repro_vm_env.tar.gz
> cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
>   // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
>   // You could change the bzImage_xxx as you want
>   // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
> You could use below command to log in, there is no password for root.
> ssh -p 10023 root@localhost
>
> After login vm(virtual machine) successfully, you could transfer reproduced
> binary to the vm by below way, and reproduce the problem in vm:
> gcc -pthread -o repro repro.c
> scp -P 10023 repro root@localhost:/root/
>
> Get the bzImage for target kernel:
> Please use target kconfig and copy it to kernel_src/.config
> make olddefconfig
> make -jx bzImage           //x should equal or less than cpu num your pc has
>
> Fill the bzImage file into above start3.sh to load the target kernel in vm.
>
> Tips:
> If you already have qemu-system-x86_64, please ignore below info.
> If you want to install qemu v7.1.0 version:
> git clone https://github.com/qemu/qemu.git
> cd qemu
> git checkout -f v7.1.0
> mkdir build
> cd build
> yum install -y ninja-build.x86_64
> yum -y install libslirp-devel.x86_64
> ../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
> make
> make install
>
> On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> > Virtiofs has its own queing mechanism, but still requests are first queued
> > on fiq->pending to be immediately dequeued and queued onto the virtio
> > queue.
> >
> > The queuing on fiq->pending is unnecessary and might even have some
> > performance impact due to being a contention point.
> >
> > Forget requests are handled similarly.
> >
> > Move the queuing of requests and forgets into the fiq->ops->*.
> > fuse_iqueue_ops are renamed to reflect the new semantics.
> >
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++--------------------
> >  fs/fuse/fuse_i.h    |  19 ++----
> >  fs/fuse/virtio_fs.c |  41 ++++--------
> >  3 files changed, 106 insertions(+), 113 deletions(-)
> >
> > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > index 9eb191b5c4de..a4f510f1b1a4 100644
> > --- a/fs/fuse/dev.c
> > +++ b/fs/fuse/dev.c
> > @@ -192,10 +192,22 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
> >  }
> >  EXPORT_SYMBOL_GPL(fuse_len_args);
> >
> > -u64 fuse_get_unique(struct fuse_iqueue *fiq)
> > +static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
> >  {
> >       fiq->reqctr += FUSE_REQ_ID_STEP;
> >       return fiq->reqctr;
> > +
> > +}
> > +
> > +u64 fuse_get_unique(struct fuse_iqueue *fiq)
> > +{
> > +     u64 ret;
> > +
> > +     spin_lock(&fiq->lock);
> > +     ret = fuse_get_unique_locked(fiq);
> > +     spin_unlock(&fiq->lock);
> > +
> > +     return ret;
> >  }
> >  EXPORT_SYMBOL_GPL(fuse_get_unique);
> >
> > @@ -215,22 +227,67 @@ __releases(fiq->lock)
> >       spin_unlock(&fiq->lock);
> >  }
> >
> > +static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
> > +{
> > +     spin_lock(&fiq->lock);
> > +     if (fiq->connected) {
> > +             fiq->forget_list_tail->next = forget;
> > +             fiq->forget_list_tail = forget;
> > +             fuse_dev_wake_and_unlock(fiq);
> > +     } else {
> > +             kfree(forget);
> > +             spin_unlock(&fiq->lock);
> > +     }
> > +}
> > +
> > +static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
> > +{
> > +     spin_lock(&fiq->lock);
> > +     if (list_empty(&req->intr_entry)) {
> > +             list_add_tail(&req->intr_entry, &fiq->interrupts);
> > +             /*
> > +              * Pairs with smp_mb() implied by test_and_set_bit()
> > +              * from fuse_request_end().
> > +              */
> > +             smp_mb();
> > +             if (test_bit(FR_FINISHED, &req->flags)) {
> > +                     list_del_init(&req->intr_entry);
> > +                     spin_unlock(&fiq->lock);
> > +             }
> > +             fuse_dev_wake_and_unlock(fiq);
> > +     } else {
> > +             spin_unlock(&fiq->lock);
> > +     }
> > +}
> > +
> > +static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
> > +{
> > +     spin_lock(&fiq->lock);
> > +     if (fiq->connected) {
> > +             if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
> > +                     req->in.h.unique = fuse_get_unique_locked(fiq);
> > +             list_add_tail(&req->list, &fiq->pending);
> > +             fuse_dev_wake_and_unlock(fiq);
> > +     } else {
> > +             spin_unlock(&fiq->lock);
> > +             req->out.h.error = -ENOTCONN;
> > +             fuse_request_end(req);

in the case where the connection has been aborted, this request will
still have the FR_PENDING flag set on it when it calls
fuse_request_end(). I think we can just call fuse_put_request() here
instead.

> > +     }
> > +}
> > +
> >  const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
> > -     .wake_forget_and_unlock         = fuse_dev_wake_and_unlock,
> > -     .wake_interrupt_and_unlock      = fuse_dev_wake_and_unlock,
> > -     .wake_pending_and_unlock        = fuse_dev_wake_and_unlock,
> > +     .send_forget    = fuse_dev_queue_forget,
> > +     .send_interrupt = fuse_dev_queue_interrupt,
> > +     .send_req       = fuse_dev_queue_req,
> >  };
> >  EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
> >
> > -static void queue_request_and_unlock(struct fuse_iqueue *fiq,
> > -                                  struct fuse_req *req)
> > -__releases(fiq->lock)
> > +static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
> >  {
> >       req->in.h.len = sizeof(struct fuse_in_header) +
> >               fuse_len_args(req->args->in_numargs,
> >                             (struct fuse_arg *) req->args->in_args);
> > -     list_add_tail(&req->list, &fiq->pending);
> > -     fiq->ops->wake_pending_and_unlock(fiq);
> > +     fiq->ops->send_req(fiq, req);
> >  }
> >
> >  void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
> > @@ -241,15 +298,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
> >       forget->forget_one.nodeid = nodeid;
> >       forget->forget_one.nlookup = nlookup;
> >
> > -     spin_lock(&fiq->lock);
> > -     if (fiq->connected) {
> > -             fiq->forget_list_tail->next = forget;
> > -             fiq->forget_list_tail = forget;
> > -             fiq->ops->wake_forget_and_unlock(fiq);
> > -     } else {
> > -             kfree(forget);
> > -             spin_unlock(&fiq->lock);
> > -     }
> > +     fiq->ops->send_forget(fiq, forget);
> >  }
> >
> >  static void flush_bg_queue(struct fuse_conn *fc)
> > @@ -263,9 +312,7 @@ static void flush_bg_queue(struct fuse_conn *fc)
> >               req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
> >               list_del(&req->list);
> >               fc->active_background++;
> > -             spin_lock(&fiq->lock);
> > -             req->in.h.unique = fuse_get_unique(fiq);
> > -             queue_request_and_unlock(fiq, req);
> > +             fuse_send_one(fiq, req);
> >       }
> >  }
> >
> > @@ -335,29 +382,12 @@ static int queue_interrupt(struct fuse_req *req)
> >  {
> >       struct fuse_iqueue *fiq = &req->fm->fc->iq;
> >
> > -     spin_lock(&fiq->lock);
> >       /* Check for we've sent request to interrupt this req */
> > -     if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
> > -             spin_unlock(&fiq->lock);
> > +     if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
> >               return -EINVAL;
> > -     }
> >
> > -     if (list_empty(&req->intr_entry)) {
> > -             list_add_tail(&req->intr_entry, &fiq->interrupts);
> > -             /*
> > -              * Pairs with smp_mb() implied by test_and_set_bit()
> > -              * from fuse_request_end().
> > -              */
> > -             smp_mb();
> > -             if (test_bit(FR_FINISHED, &req->flags)) {
> > -                     list_del_init(&req->intr_entry);
> > -                     spin_unlock(&fiq->lock);
> > -                     return 0;
> > -             }
> > -             fiq->ops->wake_interrupt_and_unlock(fiq);
> > -     } else {
> > -             spin_unlock(&fiq->lock);
> > -     }
> > +     fiq->ops->send_interrupt(fiq, req);
> > +
> >       return 0;
> >  }
> >
> > @@ -412,21 +442,15 @@ static void __fuse_request_send(struct fuse_req *req)
> >       struct fuse_iqueue *fiq = &req->fm->fc->iq;
> >
> >       BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
> > -     spin_lock(&fiq->lock);
> > -     if (!fiq->connected) {
> > -             spin_unlock(&fiq->lock);
> > -             req->out.h.error = -ENOTCONN;
> > -     } else {
> > -             req->in.h.unique = fuse_get_unique(fiq);
> > -             /* acquire extra reference, since request is still needed
> > -                after fuse_request_end() */
> > -             __fuse_get_request(req);
> > -             queue_request_and_unlock(fiq, req);
> >
> > -             request_wait_answer(req);
> > -             /* Pairs with smp_wmb() in fuse_request_end() */
> > -             smp_rmb();
> > -     }
> > +     /* acquire extra reference, since request is still needed after
> > +        fuse_request_end() */
> > +     __fuse_get_request(req);
> > +     fuse_send_one(fiq, req);
> > +
> > +     request_wait_answer(req);
> > +     /* Pairs with smp_wmb() in fuse_request_end() */
> > +     smp_rmb();
> >  }
> >
> >  static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
> > @@ -581,7 +605,6 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
> >  {
> >       struct fuse_req *req;
> >       struct fuse_iqueue *fiq = &fm->fc->iq;
> > -     int err = 0;
> >
> >       req = fuse_get_req(fm, false);
> >       if (IS_ERR(req))
> > @@ -592,16 +615,9 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm,
> >
> >       fuse_args_to_req(req, args);
> >
> > -     spin_lock(&fiq->lock);
> > -     if (fiq->connected) {
> > -             queue_request_and_unlock(fiq, req);
> > -     } else {
> > -             err = -ENODEV;
> > -             spin_unlock(&fiq->lock);
> > -             fuse_put_request(req);
> > -     }
> > +     fuse_send_one(fiq, req);
> >
> > -     return err;
> > +     return 0;
> >  }
> >
> >  /*
> > @@ -1076,9 +1092,9 @@ __releases(fiq->lock)
> >       return err ? err : reqsize;
> >  }
> >
> > -struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> > -                                          unsigned int max,
> > -                                          unsigned int *countp)
> > +static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> > +                                                 unsigned int max,
> > +                                                 unsigned int *countp)
> >  {
> >       struct fuse_forget_link *head = fiq->forget_list_head.next;
> >       struct fuse_forget_link **newhead = &head;
> > @@ -1097,7 +1113,6 @@ struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> >
> >       return head;
> >  }
> > -EXPORT_SYMBOL(fuse_dequeue_forget);
> >
> >  static int fuse_read_single_forget(struct fuse_iqueue *fiq,
> >                                  struct fuse_copy_state *cs,
> > @@ -1112,7 +1127,7 @@ __releases(fiq->lock)
> >       struct fuse_in_header ih = {
> >               .opcode = FUSE_FORGET,
> >               .nodeid = forget->forget_one.nodeid,
> > -             .unique = fuse_get_unique(fiq),
> > +             .unique = fuse_get_unique_locked(fiq),
> >               .len = sizeof(ih) + sizeof(arg),
> >       };
> >
> > @@ -1143,7 +1158,7 @@ __releases(fiq->lock)
> >       struct fuse_batch_forget_in arg = { .count = 0 };
> >       struct fuse_in_header ih = {
> >               .opcode = FUSE_BATCH_FORGET,
> > -             .unique = fuse_get_unique(fiq),
> > +             .unique = fuse_get_unique_locked(fiq),
> >               .len = sizeof(ih) + sizeof(arg),
> >       };
> >
> > @@ -1822,7 +1837,7 @@ static void fuse_resend(struct fuse_conn *fc)
> >       spin_lock(&fiq->lock);
> >       /* iq and pq requests are both oldest to newest */
> >       list_splice(&to_queue, &fiq->pending);
> > -     fiq->ops->wake_pending_and_unlock(fiq);
> > +     fuse_dev_wake_and_unlock(fiq);
> >  }
> >
> >  static int fuse_notify_resend(struct fuse_conn *fc)
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index f23919610313..33b21255817e 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -449,22 +449,19 @@ struct fuse_iqueue;
> >   */
> >  struct fuse_iqueue_ops {
> >       /**
> > -      * Signal that a forget has been queued
> > +      * Send one forget
> >        */
> > -     void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq)
> > -             __releases(fiq->lock);
> > +     void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link);
> >
> >       /**
> > -      * Signal that an INTERRUPT request has been queued
> > +      * Send interrupt for request
> >        */
> > -     void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq)
> > -             __releases(fiq->lock);
> > +     void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req);
> >
> >       /**
> > -      * Signal that a request has been queued
> > +      * Send one request
> >        */
> > -     void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
> > -             __releases(fiq->lock);
> > +     void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req);
> >
> >       /**
> >        * Clean up when fuse_iqueue is destroyed
> > @@ -1053,10 +1050,6 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
> >
> >  struct fuse_forget_link *fuse_alloc_forget(void);
> >
> > -struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
> > -                                          unsigned int max,
> > -                                          unsigned int *countp);
> > -
> >  /*
> >   * Initialize READ or READDIR request
> >   */
> > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> > index 1a52a51b6b07..690e508dbc4d 100644
> > --- a/fs/fuse/virtio_fs.c
> > +++ b/fs/fuse/virtio_fs.c
> > @@ -1089,22 +1089,13 @@ static struct virtio_driver virtio_fs_driver = {
> >  #endif
> >  };
> >
> > -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
> > -__releases(fiq->lock)
> > +static void virtio_fs_send_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *link)
> >  {
> > -     struct fuse_forget_link *link;
> >       struct virtio_fs_forget *forget;
> >       struct virtio_fs_forget_req *req;
> > -     struct virtio_fs *fs;
> > -     struct virtio_fs_vq *fsvq;
> > -     u64 unique;
> > -
> > -     link = fuse_dequeue_forget(fiq, 1, NULL);
> > -     unique = fuse_get_unique(fiq);
> > -
> > -     fs = fiq->priv;
> > -     fsvq = &fs->vqs[VQ_HIPRIO];
> > -     spin_unlock(&fiq->lock);
> > +     struct virtio_fs *fs = fiq->priv;
> > +     struct virtio_fs_vq *fsvq = &fs->vqs[VQ_HIPRIO];
> > +     u64 unique = fuse_get_unique(fiq);
> >
> >       /* Allocate a buffer for the request */
> >       forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
> > @@ -1124,8 +1115,7 @@ __releases(fiq->lock)
> >       kfree(link);
> >  }
> >
> > -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
> > -__releases(fiq->lock)
> > +static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
> >  {
> >       /*
> >        * TODO interrupts.
> > @@ -1134,7 +1124,6 @@ __releases(fiq->lock)
> >        * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
> >        * with shared lock between host and guest.
> >        */
> > -     spin_unlock(&fiq->lock);
> >  }
> >
> >  /* Count number of scatter-gather elements required */
> > @@ -1339,21 +1328,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
> >       return ret;
> >  }
> >
> > -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
> > -__releases(fiq->lock)
> > +static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
> >  {
> >       unsigned int queue_id;
> >       struct virtio_fs *fs;
> > -     struct fuse_req *req;
> >       struct virtio_fs_vq *fsvq;
> >       int ret;
> >
> > -     WARN_ON(list_empty(&fiq->pending));
> > -     req = list_last_entry(&fiq->pending, struct fuse_req, list);
> > +     if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
> > +             req->in.h.unique = fuse_get_unique(fiq);
> > +
> >       clear_bit(FR_PENDING, &req->flags);
> > -     list_del_init(&req->list);
> > -     WARN_ON(!list_empty(&fiq->pending));
> > -     spin_unlock(&fiq->lock);
> >
> >       fs = fiq->priv;
> >       queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
> > @@ -1393,10 +1378,10 @@ __releases(fiq->lock)
> >  }
> >
> >  static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
> > -     .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
> > -     .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
> > -     .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
> > -     .release                        = virtio_fs_fiq_release,
> > +     .send_forget    = virtio_fs_send_forget,
> > +     .send_interrupt = virtio_fs_send_interrupt,
> > +     .send_req       = virtio_fs_send_req,
> > +     .release        = virtio_fs_fiq_release,
> >  };
> >
> >  static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
> > --
> > 2.45.1
> >
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-09-23 22:48   ` Joanne Koong
@ 2024-09-23 23:47     ` Joanne Koong
  2024-09-24  8:58       ` Miklos Szeredi
  0 siblings, 1 reply; 15+ messages in thread
From: Joanne Koong @ 2024-09-23 23:47 UTC (permalink / raw)
  To: Lai, Yi
  Cc: Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen, Jingbo Xu,
	Stefan Hajnoczi, Yoray Zack, Vivek Goyal, virtualization, yi1.lai

On Mon, Sep 23, 2024 at 3:48 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> On Mon, Sep 23, 2024 at 3:34 AM Lai, Yi <yi1.lai@linux.intel.com> wrote:
> >
> > Hi Miklos Szeredi,
> >
> > Greetings!
> >
> > I used Syzkaller and found that there is WARNING in fuse_request_end in Linux-next tree - next-20240918.
> >
> > After bisection and the first bad commit is:
> > "
> > 5de8acb41c86 fuse: cleanup request queuing towards virtiofs
> > "
> >
> > All detailed into can be found at:
> > https://github.com/laifryiee/syzkaller_logs/tree/main/240922_114402_fuse_request_end
> > Syzkaller repro code:
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.c
> > Syzkaller repro syscall steps:
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.prog
> > Syzkaller report:
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/repro.report
> > Kconfig(make olddefconfig):
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/kconfig_origin
> > Bisect info:
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/bisect_info.log
> > bzImage:
> > https://github.com/laifryiee/syzkaller_logs/raw/main/240922_114402_fuse_request_end/bzImage_55bcd2e0d04c1171d382badef1def1fd04ef66c5
> > Issue dmesg:
> > https://github.com/laifryiee/syzkaller_logs/blob/main/240922_114402_fuse_request_end/55bcd2e0d04c1171d382badef1def1fd04ef66c5_dmesg.log
> >
> > "
> > [   31.577123] ------------[ cut here ]------------
> > [   31.578842] WARNING: CPU: 1 PID: 1186 at fs/fuse/dev.c:373 fuse_request_end+0x7d2/0x910
> > [   31.581269] Modules linked in:
> > [   31.582553] CPU: 1 UID: 0 PID: 1186 Comm: repro Not tainted 6.11.0-next-20240918-55bcd2e0d04c #1
> > [   31.584332] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
> > [   31.586281] RIP: 0010:fuse_request_end+0x7d2/0x910
> > [   31.587001] Code: ff 48 8b 7d d0 e8 ae 0f 72 ff e9 c2 fb ff ff e8 a4 0f 72 ff e9 e7 fb ff ff e8 3a 3b 0a ff 0f 0b e9 17 fa ff ff e8 2e 3b 0a ff <0f> 0b e9 c1 f9 ff ff 4c 89 ff e8 af 0f 72 ff e9 82 f8 ff ff e8 a5
> > [   31.589442] RSP: 0018:ffff88802141f640 EFLAGS: 00010293
> > [   31.590198] RAX: 0000000000000000 RBX: 0000000000000201 RCX: ffffffff825d5bb2
> > [   31.591137] RDX: ffff888010b2a500 RSI: ffffffff825d61f2 RDI: 0000000000000001
> > [   31.592072] RBP: ffff88802141f680 R08: 0000000000000000 R09: ffffed100356f28e
> > [   31.593010] R10: 0000000000000001 R11: 0000000000000001 R12: ffff88801ab79440
> > [   31.594062] R13: ffff88801ab79470 R14: ffff88801dcaa000 R15: ffff88800d71fa00
> > [   31.594820] FS:  00007f812eca2640(0000) GS:ffff88806c500000(0000) knlGS:0000000000000000
> > [   31.595670] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [   31.596299] CR2: 000055b7baa16b20 CR3: 00000000109b0002 CR4: 0000000000770ef0
> > [   31.597054] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [   31.597850] DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400
> > [   31.598595] PKRU: 55555554
> > [   31.598902] Call Trace:
> > [   31.599180]  <TASK>
> > [   31.599439]  ? show_regs+0x6d/0x80
> > [   31.599805]  ? __warn+0xf3/0x380
> > [   31.600137]  ? report_bug+0x25e/0x4b0
> > [   31.600521]  ? fuse_request_end+0x7d2/0x910
> > [   31.600885]  ? report_bug+0x2cb/0x4b0
> > [   31.601204]  ? fuse_request_end+0x7d2/0x910
> > [   31.601564]  ? fuse_request_end+0x7d3/0x910
> > [   31.601956]  ? handle_bug+0xf1/0x190
> > [   31.602275]  ? exc_invalid_op+0x3c/0x80
> > [   31.602595]  ? asm_exc_invalid_op+0x1f/0x30
> > [   31.602959]  ? fuse_request_end+0x192/0x910
> > [   31.603301]  ? fuse_request_end+0x7d2/0x910
> > [   31.603643]  ? fuse_request_end+0x7d2/0x910
> > [   31.603988]  ? do_raw_spin_unlock+0x15c/0x210
> > [   31.604366]  fuse_dev_queue_req+0x23c/0x2b0
> > [   31.604714]  fuse_send_one+0x1d1/0x360
> > [   31.605031]  fuse_simple_request+0x348/0xd30
> > [   31.605385]  ? lockdep_hardirqs_on+0x89/0x110
> > [   31.605755]  fuse_send_open+0x234/0x2f0
> > [   31.606126]  ? __pfx_fuse_send_open+0x10/0x10
> > [   31.606487]  ? kasan_save_track+0x18/0x40
> > [   31.606834]  ? lockdep_init_map_type+0x2df/0x810
> > [   31.607227]  ? __kasan_check_write+0x18/0x20
> > [   31.607591]  fuse_file_open+0x2bc/0x770
> > [   31.607921]  fuse_do_open+0x5d/0xe0
> > [   31.608215]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
> > [   31.608681]  fuse_dir_open+0x138/0x220
> > [   31.609005]  do_dentry_open+0x6be/0x1390
> > [   31.609358]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
> > [   31.609861]  ? __pfx_fuse_dir_open+0x10/0x10
> > [   31.610240]  vfs_open+0x87/0x3f0
> > [   31.610523]  ? may_open+0x205/0x430
> > [   31.610834]  path_openat+0x23b7/0x32d0
> > [   31.611161]  ? __pfx_path_openat+0x10/0x10
> > [   31.611502]  ? lock_acquire.part.0+0x152/0x390
> > [   31.611874]  ? __this_cpu_preempt_check+0x21/0x30
> > [   31.612266]  ? lock_is_held_type+0xef/0x150
> > [   31.612611]  ? __this_cpu_preempt_check+0x21/0x30
> > [   31.613002]  do_filp_open+0x1cc/0x420
> > [   31.613316]  ? __pfx_do_filp_open+0x10/0x10
> > [   31.613669]  ? lock_release+0x441/0x870
> > [   31.614043]  ? __pfx_lock_release+0x10/0x10
> > [   31.614404]  ? do_raw_spin_unlock+0x15c/0x210
> > [   31.614784]  do_sys_openat2+0x185/0x1f0
> > [   31.615105]  ? __pfx_do_sys_openat2+0x10/0x10
> > [   31.615470]  ? __this_cpu_preempt_check+0x21/0x30
> > [   31.615854]  ? seqcount_lockdep_reader_access.constprop.0+0xb4/0xd0
> > [   31.616370]  ? lockdep_hardirqs_on+0x89/0x110
> > [   31.616736]  __x64_sys_openat+0x17a/0x240
> > [   31.617067]  ? __pfx___x64_sys_openat+0x10/0x10
> > [   31.617447]  ? __audit_syscall_entry+0x39c/0x500
> > [   31.617870]  x64_sys_call+0x1a52/0x20d0
> > [   31.618194]  do_syscall_64+0x6d/0x140
> > [   31.618504]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> > [   31.618917] RIP: 0033:0x7f812eb3e8c4
> > [   31.619225] Code: 24 20 eb 8f 66 90 44 89 54 24 0c e8 76 d3 f5 ff 44 8b 54 24 0c 44 89 e2 48 89 ee 41 89 c0 bf 9c ff ff ff b8 01 01 00 00 0f 05 <48> 3d 00 f0 ff ff 77 34 44 89 c7 89 44 24 0c e8 c8 d3 f5 ff 8b 44
> > [   31.620656] RSP: 002b:00007f812eca1b90 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
> > [   31.621255] RAX: ffffffffffffffda RBX: 00007f812eca2640 RCX: 00007f812eb3e8c4
> > [   31.621864] RDX: 0000000000010000 RSI: 0000000020002080 RDI: 00000000ffffff9c
> > [   31.622428] RBP: 0000000020002080 R08: 0000000000000000 R09: 0000000000000000
> > [   31.622987] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000010000
> > [   31.623549] R13: 0000000000000006 R14: 00007f812ea9f560 R15: 0000000000000000
> > [   31.624123]  </TASK>
> > [   31.624316] irq event stamp: 1655
> > [   31.624595] hardirqs last  enabled at (1663): [<ffffffff8145cb85>] __up_console_sem+0x95/0xb0
> > [   31.625310] hardirqs last disabled at (1670): [<ffffffff8145cb6a>] __up_console_sem+0x7a/0xb0
> > [   31.626039] softirqs last  enabled at (1466): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
> > [   31.626726] softirqs last disabled at (1449): [<ffffffff8128a889>] __irq_exit_rcu+0xa9/0x120
> > [   31.627405] ---[ end trace 0000000000000000 ]---
> > "
> >
> > I hope you find it useful.
> >
> > Regards,
> > Yi Lai
> >
> > ---
> >
> > If you don't need the following environment to reproduce the problem or if you
> > already have one reproduced environment, please ignore the following information.
> >
> > How to reproduce:
> > git clone https://gitlab.com/xupengfe/repro_vm_env.git
> > cd repro_vm_env
> > tar -xvf repro_vm_env.tar.gz
> > cd repro_vm_env; ./start3.sh  // it needs qemu-system-x86_64 and I used v7.1.0
> >   // start3.sh will load bzImage_2241ab53cbb5cdb08a6b2d4688feb13971058f65 v6.2-rc5 kernel
> >   // You could change the bzImage_xxx as you want
> >   // Maybe you need to remove line "-drive if=pflash,format=raw,readonly=on,file=./OVMF_CODE.fd \" for different qemu version
> > You could use below command to log in, there is no password for root.
> > ssh -p 10023 root@localhost
> >
> > After login vm(virtual machine) successfully, you could transfer reproduced
> > binary to the vm by below way, and reproduce the problem in vm:
> > gcc -pthread -o repro repro.c
> > scp -P 10023 repro root@localhost:/root/
> >
> > Get the bzImage for target kernel:
> > Please use target kconfig and copy it to kernel_src/.config
> > make olddefconfig
> > make -jx bzImage           //x should equal or less than cpu num your pc has
> >
> > Fill the bzImage file into above start3.sh to load the target kernel in vm.
> >
> > Tips:
> > If you already have qemu-system-x86_64, please ignore below info.
> > If you want to install qemu v7.1.0 version:
> > git clone https://github.com/qemu/qemu.git
> > cd qemu
> > git checkout -f v7.1.0
> > mkdir build
> > cd build
> > yum install -y ninja-build.x86_64
> > yum -y install libslirp-devel.x86_64
> > ../configure --target-list=x86_64-softmmu --enable-kvm --enable-vnc --enable-gtk --enable-sdl --enable-usb-redir --enable-slirp
> > make
> > make install
> >
> > On Wed, May 29, 2024 at 05:52:07PM +0200, Miklos Szeredi wrote:
> > > Virtiofs has its own queing mechanism, but still requests are first queued
> > > on fiq->pending to be immediately dequeued and queued onto the virtio
> > > queue.
> > >
> > > The queuing on fiq->pending is unnecessary and might even have some
> > > performance impact due to being a contention point.
> > >
> > > Forget requests are handled similarly.
> > >
> > > Move the queuing of requests and forgets into the fiq->ops->*.
> > > fuse_iqueue_ops are renamed to reflect the new semantics.
> > >
> > > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > > ---
> > >  fs/fuse/dev.c       | 159 ++++++++++++++++++++++++--------------------
> > >  fs/fuse/fuse_i.h    |  19 ++----
> > >  fs/fuse/virtio_fs.c |  41 ++++--------
> > >  3 files changed, 106 insertions(+), 113 deletions(-)
> > >
> > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > > index 9eb191b5c4de..a4f510f1b1a4 100644
> > > --- a/fs/fuse/dev.c
> > > +++ b/fs/fuse/dev.c
> > > @@ -192,10 +192,22 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
> > >  }
> > >  EXPORT_SYMBOL_GPL(fuse_len_args);
> > >
> > > -u64 fuse_get_unique(struct fuse_iqueue *fiq)
> > > +static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
> > >  {
> > >       fiq->reqctr += FUSE_REQ_ID_STEP;
> > >       return fiq->reqctr;
> > > +
> > > +}
> > > +
> > > +u64 fuse_get_unique(struct fuse_iqueue *fiq)
> > > +{
> > > +     u64 ret;
> > > +
> > > +     spin_lock(&fiq->lock);
> > > +     ret = fuse_get_unique_locked(fiq);
> > > +     spin_unlock(&fiq->lock);
> > > +
> > > +     return ret;
> > >  }
> > >  EXPORT_SYMBOL_GPL(fuse_get_unique);
> > >
> > > @@ -215,22 +227,67 @@ __releases(fiq->lock)
> > >       spin_unlock(&fiq->lock);
> > >  }
> > >
> > > +static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
> > > +{
> > > +     spin_lock(&fiq->lock);
> > > +     if (fiq->connected) {
> > > +             fiq->forget_list_tail->next = forget;
> > > +             fiq->forget_list_tail = forget;
> > > +             fuse_dev_wake_and_unlock(fiq);
> > > +     } else {
> > > +             kfree(forget);
> > > +             spin_unlock(&fiq->lock);
> > > +     }
> > > +}
> > > +
> > > +static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
> > > +{
> > > +     spin_lock(&fiq->lock);
> > > +     if (list_empty(&req->intr_entry)) {
> > > +             list_add_tail(&req->intr_entry, &fiq->interrupts);
> > > +             /*
> > > +              * Pairs with smp_mb() implied by test_and_set_bit()
> > > +              * from fuse_request_end().
> > > +              */
> > > +             smp_mb();
> > > +             if (test_bit(FR_FINISHED, &req->flags)) {
> > > +                     list_del_init(&req->intr_entry);
> > > +                     spin_unlock(&fiq->lock);
> > > +             }
> > > +             fuse_dev_wake_and_unlock(fiq);
> > > +     } else {
> > > +             spin_unlock(&fiq->lock);
> > > +     }
> > > +}
> > > +
> > > +static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
> > > +{
> > > +     spin_lock(&fiq->lock);
> > > +     if (fiq->connected) {
> > > +             if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
> > > +                     req->in.h.unique = fuse_get_unique_locked(fiq);
> > > +             list_add_tail(&req->list, &fiq->pending);
> > > +             fuse_dev_wake_and_unlock(fiq);
> > > +     } else {
> > > +             spin_unlock(&fiq->lock);
> > > +             req->out.h.error = -ENOTCONN;
> > > +             fuse_request_end(req);
>
> in the case where the connection has been aborted, this request will
> still have the FR_PENDING flag set on it when it calls
> fuse_request_end(). I think we can just call fuse_put_request() here
> instead.

actually, after looking more at this, I missed that this patch changes
the logic to now call request_wait_answer() (for non-background
requests) even if the connection was aborted.
So maybe just clear_bit(FR_PENDING, &req->flags) before we call
fuse_request_end() is the best.

>
> > > +     }
> > > +}
> > > +

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-09-23 23:47     ` Joanne Koong
@ 2024-09-24  8:58       ` Miklos Szeredi
  2024-09-24  9:52         ` Lai, Yi
  0 siblings, 1 reply; 15+ messages in thread
From: Miklos Szeredi @ 2024-09-24  8:58 UTC (permalink / raw)
  To: Joanne Koong
  Cc: Lai, Yi, Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen,
	Jingbo Xu, Stefan Hajnoczi, Yoray Zack, Vivek Goyal,
	virtualization, yi1.lai

[-- Attachment #1: Type: text/plain, Size: 261 bytes --]

On Tue, 24 Sept 2024 at 01:48, Joanne Koong <joannelkoong@gmail.com> wrote:
> So maybe just clear_bit(FR_PENDING, &req->flags) before we call
> fuse_request_end() is the best.

Agreed.   Attached patch should fix it.

Yi, can you please verify?

Thanks,
Miklos

[-- Attachment #2: 0001-fuse-clear-FR_PENDING-if-abort-is-detected-when-send.patch --]
[-- Type: text/x-patch, Size: 992 bytes --]

From fcd2d9e1fdcd7cada612f2e8737fb13a2bce7d0e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Tue, 24 Sep 2024 10:47:23 +0200
Subject: fuse: clear FR_PENDING if abort is detected when sending request

The (!fiq->connected) check was moved into the queuing method resulting in
the following:

Fixes: 5de8acb41c86 ("fuse: cleanup request queuing towards virtiofs")
Reported-by: Lai, Yi <yi1.lai@linux.intel.com>
Closes: https://lore.kernel.org/all/ZvFEAM6JfrBKsOU0@ly-workstation/
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cdf925e0b317..53c4569d85a4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -295,6 +295,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 	} else {
 		spin_unlock(&fiq->lock);
 		req->out.h.error = -ENOTCONN;
+		clear_bit(FR_PENDING, &req->flags);
 		fuse_request_end(req);
 	}
 }
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] fuse: cleanup request queuing towards virtiofs
  2024-09-24  8:58       ` Miklos Szeredi
@ 2024-09-24  9:52         ` Lai, Yi
  0 siblings, 0 replies; 15+ messages in thread
From: Lai, Yi @ 2024-09-24  9:52 UTC (permalink / raw)
  To: Miklos Szeredi
  Cc: Joanne Koong, Miklos Szeredi, linux-fsdevel, Peter-Jan Gootzen,
	Jingbo Xu, Stefan Hajnoczi, Yoray Zack, Vivek Goyal,
	virtualization, yi1.lai

Hi Miklos,

After applying your attached patch on top of linux-next next-20240924
tag, issue cannot be reproduced running the same repro binary.

Regards,
Yi Lai

On Tue, Sep 24, 2024 at 10:58:31AM +0200, Miklos Szeredi wrote:
> On Tue, 24 Sept 2024 at 01:48, Joanne Koong <joannelkoong@gmail.com> wrote:
> > So maybe just clear_bit(FR_PENDING, &req->flags) before we call
> > fuse_request_end() is the best.
> 
> Agreed.   Attached patch should fix it.
> 
> Yi, can you please verify?
> 
> Thanks,
> Miklos

> From fcd2d9e1fdcd7cada612f2e8737fb13a2bce7d0e Mon Sep 17 00:00:00 2001
> From: Miklos Szeredi <mszeredi@redhat.com>
> Date: Tue, 24 Sep 2024 10:47:23 +0200
> Subject: fuse: clear FR_PENDING if abort is detected when sending request
> 
> The (!fiq->connected) check was moved into the queuing method resulting in
> the following:
> 
> Fixes: 5de8acb41c86 ("fuse: cleanup request queuing towards virtiofs")
> Reported-by: Lai, Yi <yi1.lai@linux.intel.com>
> Closes: https://lore.kernel.org/all/ZvFEAM6JfrBKsOU0@ly-workstation/
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  fs/fuse/dev.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index cdf925e0b317..53c4569d85a4 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -295,6 +295,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
>  	} else {
>  		spin_unlock(&fiq->lock);
>  		req->out.h.error = -ENOTCONN;
> +		clear_bit(FR_PENDING, &req->flags);
>  		fuse_request_end(req);
>  	}
>  }
> -- 
> 2.46.0
> 


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2024-09-24  9:53 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-29 15:52 [PATCH] fuse: cleanup request queuing towards virtiofs Miklos Szeredi
2024-05-29 18:32 ` Stefan Hajnoczi
2024-05-30  9:06   ` Miklos Szeredi
2024-05-30 13:38   ` Peter-Jan Gootzen
2024-06-05 10:40   ` Peter-Jan Gootzen
2024-06-05 11:04     ` Stefan Hajnoczi
2024-05-30  3:20 ` Jingbo Xu
2024-05-30  9:00   ` Miklos Szeredi
2024-05-30 15:36     ` Jingbo Xu
2024-05-30 17:07     ` Bernd Schubert
2024-09-23 10:33 ` Lai, Yi
2024-09-23 22:48   ` Joanne Koong
2024-09-23 23:47     ` Joanne Koong
2024-09-24  8:58       ` Miklos Szeredi
2024-09-24  9:52         ` Lai, Yi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).