linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V3] nbd: fix partial sending
@ 2024-10-29  1:19 Ming Lei
  2024-11-05 14:03 ` Kevin Wolf
  2025-01-13 14:46 ` Jens Axboe
  0 siblings, 2 replies; 5+ messages in thread
From: Ming Lei @ 2024-10-29  1:19 UTC (permalink / raw)
  To: Jens Axboe, linux-block
  Cc: josef, nbd, eblake, Ming Lei, vincent.chen, Leon Schuermann,
	Bart Van Assche, Kevin Wolf

nbd driver sends request header and payload with multiple call of
sock_sendmsg, and partial sending can't be avoided. However, nbd driver
returns BLK_STS_RESOURCE to block core in this situation. This way causes
one issue: request->tag may change in the next run of nbd_queue_rq(), but
the original old tag has been sent as part of header cookie, this way
confuses nbd driver reply handling, since the real request can't be
retrieved any more with the obsolete old tag.

Fix it by retrying sending directly in per-socket work function,
meantime return BLK_STS_OK to block layer core.

Cc: vincent.chen@sifive.com
Cc: Leon Schuermann <leon@is.currently.online>
Cc: Bart Van Assche <bvanassche@acm.org>
Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V3:
	- rename nbd_run_pending_work()(Kevin)
	- warning on double schedule(Kevin)
	- cover requeue in handling pending work function

V2:
	- move pending retry to socket work function and return BLK_STS_OK, so that
	userspace can get chance to handle the signal(Kevin)


 drivers/block/nbd.c | 95 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 85 insertions(+), 10 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b852050d8a96..a14a454ba0e8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -62,6 +62,7 @@ struct nbd_sock {
 	bool dead;
 	int fallback_index;
 	int cookie;
+	struct work_struct work;
 };
 
 struct recv_thread_args {
@@ -141,6 +142,9 @@ struct nbd_device {
  */
 #define NBD_CMD_INFLIGHT	2
 
+/* Just part of request header or data payload is sent successfully */
+#define NBD_CMD_PARTIAL_SEND	3
+
 struct nbd_cmd {
 	struct nbd_device *nbd;
 	struct mutex lock;
@@ -466,6 +470,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
 	if (!mutex_trylock(&cmd->lock))
 		return BLK_EH_RESET_TIMER;
 
+	/* partial send is handled in nbd_sock's work function */
+	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
+		mutex_unlock(&cmd->lock);
+		return BLK_EH_RESET_TIMER;
+	}
+
 	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		mutex_unlock(&cmd->lock);
 		return BLK_EH_DONE;
@@ -614,6 +624,30 @@ static inline int was_interrupted(int result)
 	return result == -ERESTARTSYS || result == -EINTR;
 }
 
+/*
+ * We've already sent header or part of data payload, have no choice but
+ * to set pending and schedule it in work.
+ *
+ * And we have to return BLK_STS_OK to block core, otherwise this same
+ * request may be re-dispatched with different tag, but our header has
+ * been sent out with old tag, and this way does confuse reply handling.
+ */
+static void nbd_sched_pending_work(struct nbd_device *nbd,
+				   struct nbd_sock *nsock,
+				   struct nbd_cmd *cmd, int sent)
+{
+	struct request *req = blk_mq_rq_from_pdu(cmd);
+
+	/* pending work should be scheduled only once */
+	WARN_ON_ONCE(test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags));
+
+	nsock->pending = req;
+	nsock->sent = sent;
+	set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+	refcount_inc(&nbd->config_refs);
+	schedule_work(&nsock->work);
+}
+
 /*
  * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
  * Returns BLK_STS_IOERR if sending failed.
@@ -699,8 +733,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 			 * completely done.
 			 */
 			if (sent) {
-				nsock->pending = req;
-				nsock->sent = sent;
+				nbd_sched_pending_work(nbd, nsock, cmd, sent);
+				return BLK_STS_OK;
 			}
 			set_bit(NBD_CMD_REQUEUED, &cmd->flags);
 			return BLK_STS_RESOURCE;
@@ -737,14 +771,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 			result = sock_xmit(nbd, index, 1, &from, flags, &sent);
 			if (result < 0) {
 				if (was_interrupted(result)) {
-					/* We've already sent the header, we
-					 * have no choice but to set pending and
-					 * return BUSY.
-					 */
-					nsock->pending = req;
-					nsock->sent = sent;
-					set_bit(NBD_CMD_REQUEUED, &cmd->flags);
-					return BLK_STS_RESOURCE;
+					nbd_sched_pending_work(nbd, nsock, cmd, sent);
+					return BLK_STS_OK;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -770,6 +798,14 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 	return BLK_STS_OK;
 
 requeue:
+	/*
+	 * Can't requeue in case we are dealing with partial send
+	 *
+	 * We must run from pending work function.
+	 * */
+	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
+		return BLK_STS_OK;
+
 	/* retry on a different socket */
 	dev_err_ratelimited(disk_to_dev(nbd->disk),
 			    "Request send failed, requeueing\n");
@@ -778,6 +814,44 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 	return BLK_STS_OK;
 }
 
+/* handle partial sending */
+static void nbd_pending_cmd_work(struct work_struct *work)
+{
+	struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
+	struct request *req = nsock->pending;
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+	struct nbd_device *nbd = cmd->nbd;
+	unsigned long deadline = READ_ONCE(req->deadline);
+	unsigned int wait_ms = 2;
+
+	mutex_lock(&cmd->lock);
+
+	WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
+	if (WARN_ON_ONCE(!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)))
+		goto out;
+
+	mutex_lock(&nsock->tx_lock);
+	while (true) {
+		nbd_send_cmd(nbd, cmd, cmd->index);
+		if (!nsock->pending)
+			break;
+
+		/* don't bother timeout handler for partial sending */
+		if (READ_ONCE(jiffies) + msecs_to_jiffies(wait_ms) >= deadline) {
+			cmd->status = BLK_STS_IOERR;
+			blk_mq_complete_request(req);
+			break;
+		}
+		msleep(wait_ms);
+		wait_ms *= 2;
+	}
+	mutex_unlock(&nsock->tx_lock);
+	clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+out:
+	mutex_unlock(&cmd->lock);
+	nbd_config_put(nbd);
+}
+
 static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
 			  struct nbd_reply *reply)
 {
@@ -1224,6 +1298,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	nsock->pending = NULL;
 	nsock->sent = 0;
 	nsock->cookie = 0;
+	INIT_WORK(&nsock->work, nbd_pending_cmd_work);
 	socks[config->num_connections++] = nsock;
 	atomic_inc(&config->live_connections);
 	blk_mq_unfreeze_queue(nbd->disk->queue);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH V3] nbd: fix partial sending
  2024-10-29  1:19 [PATCH V3] nbd: fix partial sending Ming Lei
@ 2024-11-05 14:03 ` Kevin Wolf
  2025-01-08 18:04   ` Kevin Wolf
  2025-01-13  1:14   ` Ming Lei
  2025-01-13 14:46 ` Jens Axboe
  1 sibling, 2 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-11-05 14:03 UTC (permalink / raw)
  To: Ming Lei
  Cc: Jens Axboe, linux-block, josef, nbd, eblake, vincent.chen,
	Leon Schuermann, Bart Van Assche

Am 29.10.2024 um 02:19 hat Ming Lei geschrieben:
> nbd driver sends request header and payload with multiple call of
> sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> returns BLK_STS_RESOURCE to block core in this situation. This way causes
> one issue: request->tag may change in the next run of nbd_queue_rq(), but
> the original old tag has been sent as part of header cookie, this way
> confuses nbd driver reply handling, since the real request can't be
> retrieved any more with the obsolete old tag.
> 
> Fix it by retrying sending directly in per-socket work function,
> meantime return BLK_STS_OK to block layer core.
> 
> Cc: vincent.chen@sifive.com
> Cc: Leon Schuermann <leon@is.currently.online>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Reported-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>

> @@ -770,6 +798,14 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
>  	return BLK_STS_OK;
>  
>  requeue:
> +	/*
> +	 * Can't requeue in case we are dealing with partial send
> +	 *
> +	 * We must run from pending work function.
> +	 * */
> +	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
> +		return BLK_STS_OK;
> +
>  	/* retry on a different socket */
>  	dev_err_ratelimited(disk_to_dev(nbd->disk),
>  			    "Request send failed, requeueing\n");

This hunk doesn't feel ideal: The assumption in the normal code path
here is that the socket is dead, i.e. the error isn't recoverable. With
this way to handle it, nbd_pending_cmd_work() will keep retrying until
the request finally times out. We could probably return an error right
away.

In fact, I think even requeuing (and ideally still completing the
request successfully in the end) would be fine in this case because
we'll shut down the socket and never send any additional data on it, so
the server will never see a complete command. We would just have to make
sure that nbd_pending_cmd_work() doesn't try to complete sending the
command any more.

But even though this error path isn't optimal, I feel it might be
acceptable. Let's see if someone else has an opinion on it.

Tested-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V3] nbd: fix partial sending
  2024-11-05 14:03 ` Kevin Wolf
@ 2025-01-08 18:04   ` Kevin Wolf
  2025-01-13  1:14   ` Ming Lei
  1 sibling, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2025-01-08 18:04 UTC (permalink / raw)
  To: Ming Lei
  Cc: Jens Axboe, linux-block, josef, nbd, eblake, vincent.chen,
	Leon Schuermann, Bart Van Assche

Am 05.11.2024 um 15:03 hat Kevin Wolf geschrieben:
> Am 29.10.2024 um 02:19 hat Ming Lei geschrieben:
> > nbd driver sends request header and payload with multiple call of
> > sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> > returns BLK_STS_RESOURCE to block core in this situation. This way causes
> > one issue: request->tag may change in the next run of nbd_queue_rq(), but
> > the original old tag has been sent as part of header cookie, this way
> > confuses nbd driver reply handling, since the real request can't be
> > retrieved any more with the obsolete old tag.
> > 
> > Fix it by retrying sending directly in per-socket work function,
> > meantime return BLK_STS_OK to block layer core.
> > 
> > Cc: vincent.chen@sifive.com
> > Cc: Leon Schuermann <leon@is.currently.online>
> > Cc: Bart Van Assche <bvanassche@acm.org>
> > Reported-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> 
> > @@ -770,6 +798,14 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
> >  	return BLK_STS_OK;
> >  
> >  requeue:
> > +	/*
> > +	 * Can't requeue in case we are dealing with partial send
> > +	 *
> > +	 * We must run from pending work function.
> > +	 * */
> > +	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
> > +		return BLK_STS_OK;
> > +
> >  	/* retry on a different socket */
> >  	dev_err_ratelimited(disk_to_dev(nbd->disk),
> >  			    "Request send failed, requeueing\n");
> 
> This hunk doesn't feel ideal: The assumption in the normal code path
> here is that the socket is dead, i.e. the error isn't recoverable. With
> this way to handle it, nbd_pending_cmd_work() will keep retrying until
> the request finally times out. We could probably return an error right
> away.
> 
> In fact, I think even requeuing (and ideally still completing the
> request successfully in the end) would be fine in this case because
> we'll shut down the socket and never send any additional data on it, so
> the server will never see a complete command. We would just have to make
> sure that nbd_pending_cmd_work() doesn't try to complete sending the
> command any more.
> 
> But even though this error path isn't optimal, I feel it might be
> acceptable. Let's see if someone else has an opinion on it.
> 
> Tested-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Kevin Wolf <kwolf@redhat.com>

What is the status with this fix? I don't see any further comments on it,
but it also doesn't seem to be merged yet. Am I missing a follow-up
thread?

Kevin


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V3] nbd: fix partial sending
  2024-11-05 14:03 ` Kevin Wolf
  2025-01-08 18:04   ` Kevin Wolf
@ 2025-01-13  1:14   ` Ming Lei
  1 sibling, 0 replies; 5+ messages in thread
From: Ming Lei @ 2025-01-13  1:14 UTC (permalink / raw)
  To: Kevin Wolf, Jens Axboe
  Cc: linux-block, josef, nbd, eblake, vincent.chen, Leon Schuermann,
	Bart Van Assche

On Tue, Nov 05, 2024 at 03:03:06PM +0100, Kevin Wolf wrote:
> Am 29.10.2024 um 02:19 hat Ming Lei geschrieben:
> > nbd driver sends request header and payload with multiple call of
> > sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> > returns BLK_STS_RESOURCE to block core in this situation. This way causes
> > one issue: request->tag may change in the next run of nbd_queue_rq(), but
> > the original old tag has been sent as part of header cookie, this way
> > confuses nbd driver reply handling, since the real request can't be
> > retrieved any more with the obsolete old tag.
> > 
> > Fix it by retrying sending directly in per-socket work function,
> > meantime return BLK_STS_OK to block layer core.
> > 
> > Cc: vincent.chen@sifive.com
> > Cc: Leon Schuermann <leon@is.currently.online>
> > Cc: Bart Van Assche <bvanassche@acm.org>
> > Reported-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> 
> > @@ -770,6 +798,14 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
> >  	return BLK_STS_OK;
> >  
> >  requeue:
> > +	/*
> > +	 * Can't requeue in case we are dealing with partial send
> > +	 *
> > +	 * We must run from pending work function.
> > +	 * */
> > +	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
> > +		return BLK_STS_OK;
> > +
> >  	/* retry on a different socket */
> >  	dev_err_ratelimited(disk_to_dev(nbd->disk),
> >  			    "Request send failed, requeueing\n");
> 
> This hunk doesn't feel ideal: The assumption in the normal code path
> here is that the socket is dead, i.e. the error isn't recoverable. With
> this way to handle it, nbd_pending_cmd_work() will keep retrying until
> the request finally times out. We could probably return an error right
> away.
> 
> In fact, I think even requeuing (and ideally still completing the
> request successfully in the end) would be fine in this case because
> we'll shut down the socket and never send any additional data on it, so
> the server will never see a complete command. We would just have to make
> sure that nbd_pending_cmd_work() doesn't try to complete sending the
> command any more.
> 
> But even though this error path isn't optimal, I feel it might be
> acceptable. Let's see if someone else has an opinion on it.
> 
> Tested-by: Kevin Wolf <kwolf@redhat.com>
> Reviewed-by: Kevin Wolf <kwolf@redhat.com>
 
Hello Jens,

Can you make this fix into v6.14 if you are fine?


Thanks,
Ming


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH V3] nbd: fix partial sending
  2024-10-29  1:19 [PATCH V3] nbd: fix partial sending Ming Lei
  2024-11-05 14:03 ` Kevin Wolf
@ 2025-01-13 14:46 ` Jens Axboe
  1 sibling, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2025-01-13 14:46 UTC (permalink / raw)
  To: linux-block, Ming Lei
  Cc: josef, nbd, eblake, vincent.chen, Leon Schuermann,
	Bart Van Assche, Kevin Wolf


On Tue, 29 Oct 2024 09:19:41 +0800, Ming Lei wrote:
> nbd driver sends request header and payload with multiple call of
> sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> returns BLK_STS_RESOURCE to block core in this situation. This way causes
> one issue: request->tag may change in the next run of nbd_queue_rq(), but
> the original old tag has been sent as part of header cookie, this way
> confuses nbd driver reply handling, since the real request can't be
> retrieved any more with the obsolete old tag.
> 
> [...]

Applied, thanks!

[1/1] nbd: fix partial sending
      commit: 8337b029f788272f5273887ccefb8226404658ce

Best regards,
-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-01-13 14:46 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-29  1:19 [PATCH V3] nbd: fix partial sending Ming Lei
2024-11-05 14:03 ` Kevin Wolf
2025-01-08 18:04   ` Kevin Wolf
2025-01-13  1:14   ` Ming Lei
2025-01-13 14:46 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).