linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2] nbd: fix partial sending
@ 2024-10-18 14:08 Ming Lei
  2024-10-22 18:46 ` Kevin Wolf
  0 siblings, 1 reply; 3+ messages in thread
From: Ming Lei @ 2024-10-18 14:08 UTC (permalink / raw)
  To: Jens Axboe, linux-block
  Cc: josef, nbd, eblake, Ming Lei, vincent.chen, Leon Schuermann,
	Bart Van Assche, Kevin Wolf

nbd driver sends request header and payload with multiple call of
sock_sendmsg, and partial sending can't be avoided. However, nbd driver
returns BLK_STS_RESOURCE to block core in this situation. This way causes
one issue: request->tag may change in the next run of nbd_queue_rq(), but
the original old tag has been sent as part of header cookie, this way
confuses nbd driver reply handling, since the real request can't be
retrieved any more with the obsolete old tag.

Fix it by retrying sending directly in per-socket work function,
meantime return BLK_STS_OK to block layer core.

Cc: vincent.chen@sifive.com
Cc: Leon Schuermann <leon@is.currently.online>
Cc: Bart Van Assche <bvanassche@acm.org>
Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V2:
	- move pending retry to socket work function and return BLK_STS_OK, so that
	userspace can get chance to handle the signal(Kevin)

 drivers/block/nbd.c | 89 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 77 insertions(+), 12 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index b852050d8a96..855f4a79e37c 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -62,6 +62,7 @@ struct nbd_sock {
 	bool dead;
 	int fallback_index;
 	int cookie;
+	struct work_struct work;
 };
 
 struct recv_thread_args {
@@ -141,6 +142,9 @@ struct nbd_device {
  */
 #define NBD_CMD_INFLIGHT	2
 
+/* Just part of request header or data payload is sent successfully */
+#define NBD_CMD_PARTIAL_SEND	3
+
 struct nbd_cmd {
 	struct nbd_device *nbd;
 	struct mutex lock;
@@ -466,6 +470,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
 	if (!mutex_trylock(&cmd->lock))
 		return BLK_EH_RESET_TIMER;
 
+	/* partial send is handled in nbd_sock's work function */
+	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
+		mutex_unlock(&cmd->lock);
+		return BLK_EH_RESET_TIMER;
+	}
+
 	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		mutex_unlock(&cmd->lock);
 		return BLK_EH_DONE;
@@ -614,6 +624,27 @@ static inline int was_interrupted(int result)
 	return result == -ERESTARTSYS || result == -EINTR;
 }
 
+/*
+ * We've already sent header or part of data payload, have no choice but
+ * to set pending and schedule it in work.
+ *
+ * And we have to return BLK_STS_OK to block core, otherwise this same
+ * request may be re-dispatched with different tag, but our header has
+ * been sent out with old tag, and this way does confuse reply handling.
+ */
+static void nbd_run_pending_work(struct nbd_device *nbd,
+				 struct nbd_sock *nsock,
+				 struct nbd_cmd *cmd, int sent)
+{
+	struct request *req = blk_mq_rq_from_pdu(cmd);
+
+	nsock->pending = req;
+	nsock->sent = sent;
+	set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+	refcount_inc(&nbd->config_refs);
+	schedule_work(&nsock->work);
+}
+
 /*
  * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
  * Returns BLK_STS_IOERR if sending failed.
@@ -699,11 +730,12 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 			 * completely done.
 			 */
 			if (sent) {
-				nsock->pending = req;
-				nsock->sent = sent;
+				nbd_run_pending_work(nbd, nsock, cmd, sent);
+				return BLK_STS_OK;
+			} else {
+				set_bit(NBD_CMD_REQUEUED, &cmd->flags);
+				return BLK_STS_RESOURCE;
 			}
-			set_bit(NBD_CMD_REQUEUED, &cmd->flags);
-			return BLK_STS_RESOURCE;
 		}
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
@@ -737,14 +769,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 			result = sock_xmit(nbd, index, 1, &from, flags, &sent);
 			if (result < 0) {
 				if (was_interrupted(result)) {
-					/* We've already sent the header, we
-					 * have no choice but to set pending and
-					 * return BUSY.
-					 */
-					nsock->pending = req;
-					nsock->sent = sent;
-					set_bit(NBD_CMD_REQUEUED, &cmd->flags);
-					return BLK_STS_RESOURCE;
+					nbd_run_pending_work(nbd, nsock, cmd, sent);
+					return BLK_STS_OK;
 				}
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -778,6 +804,44 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
 	return BLK_STS_OK;
 }
 
+/* handle partial sending */
+static void nbd_pending_cmd_work(struct work_struct *work)
+{
+	struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
+	struct request *req = nsock->pending;
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+	struct nbd_device *nbd = cmd->nbd;
+	unsigned long deadline = READ_ONCE(req->deadline);
+	unsigned int wait_ms = 2;
+
+	mutex_lock(&cmd->lock);
+
+	WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
+	if (!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
+		goto out;
+
+	mutex_lock(&nsock->tx_lock);
+	while (true) {
+		nbd_send_cmd(nbd, cmd, cmd->index);
+		if (!nsock->pending)
+			break;
+
+		/* don't bother timeout handler for partial sending */
+		if (READ_ONCE(jiffies) + msecs_to_jiffies(wait_ms) >= deadline) {
+			cmd->status = BLK_STS_IOERR;
+			blk_mq_complete_request(req);
+			break;
+		}
+		msleep(wait_ms);
+		wait_ms *= 2;
+	}
+	mutex_unlock(&nsock->tx_lock);
+	clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
+out:
+	mutex_unlock(&cmd->lock);
+	nbd_config_put(nbd);
+}
+
 static int nbd_read_reply(struct nbd_device *nbd, struct socket *sock,
 			  struct nbd_reply *reply)
 {
@@ -1224,6 +1288,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	nsock->pending = NULL;
 	nsock->sent = 0;
 	nsock->cookie = 0;
+	INIT_WORK(&nsock->work, nbd_pending_cmd_work);
 	socks[config->num_connections++] = nsock;
 	atomic_inc(&config->live_connections);
 	blk_mq_unfreeze_queue(nbd->disk->queue);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH V2] nbd: fix partial sending
  2024-10-18 14:08 [PATCH V2] nbd: fix partial sending Ming Lei
@ 2024-10-22 18:46 ` Kevin Wolf
  2024-10-23  2:03   ` Ming Lei
  0 siblings, 1 reply; 3+ messages in thread
From: Kevin Wolf @ 2024-10-22 18:46 UTC (permalink / raw)
  To: Ming Lei
  Cc: Jens Axboe, linux-block, josef, nbd, eblake, vincent.chen,
	Leon Schuermann, Bart Van Assche

Am 18.10.2024 um 16:08 hat Ming Lei geschrieben:
> nbd driver sends request header and payload with multiple call of
> sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> returns BLK_STS_RESOURCE to block core in this situation. This way causes
> one issue: request->tag may change in the next run of nbd_queue_rq(), but
> the original old tag has been sent as part of header cookie, this way
> confuses nbd driver reply handling, since the real request can't be
> retrieved any more with the obsolete old tag.
> 
> Fix it by retrying sending directly in per-socket work function,
> meantime return BLK_STS_OK to block layer core.
> 
> Cc: vincent.chen@sifive.com
> Cc: Leon Schuermann <leon@is.currently.online>
> Cc: Bart Van Assche <bvanassche@acm.org>
> Reported-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> V2:
> 	- move pending retry to socket work function and return BLK_STS_OK, so that
> 	userspace can get chance to handle the signal(Kevin)

So first of all: This seems to work.

But looking through the code I have a few comments:

>  drivers/block/nbd.c | 89 +++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 77 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index b852050d8a96..855f4a79e37c 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -62,6 +62,7 @@ struct nbd_sock {
>  	bool dead;
>  	int fallback_index;
>  	int cookie;
> +	struct work_struct work;
>  };
>  
>  struct recv_thread_args {
> @@ -141,6 +142,9 @@ struct nbd_device {
>   */
>  #define NBD_CMD_INFLIGHT	2
>  
> +/* Just part of request header or data payload is sent successfully */
> +#define NBD_CMD_PARTIAL_SEND	3
> +
>  struct nbd_cmd {
>  	struct nbd_device *nbd;
>  	struct mutex lock;
> @@ -466,6 +470,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
>  	if (!mutex_trylock(&cmd->lock))
>  		return BLK_EH_RESET_TIMER;
>  
> +	/* partial send is handled in nbd_sock's work function */
> +	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
> +		mutex_unlock(&cmd->lock);
> +		return BLK_EH_RESET_TIMER;
> +	}
> +
>  	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>  		mutex_unlock(&cmd->lock);
>  		return BLK_EH_DONE;
> @@ -614,6 +624,27 @@ static inline int was_interrupted(int result)
>  	return result == -ERESTARTSYS || result == -EINTR;
>  }
>  
> +/*
> + * We've already sent header or part of data payload, have no choice but
> + * to set pending and schedule it in work.
> + *
> + * And we have to return BLK_STS_OK to block core, otherwise this same
> + * request may be re-dispatched with different tag, but our header has
> + * been sent out with old tag, and this way does confuse reply handling.
> + */
> +static void nbd_run_pending_work(struct nbd_device *nbd,
> +				 struct nbd_sock *nsock,
> +				 struct nbd_cmd *cmd, int sent)

The name of this function is a bit confusing, we don't actually run
anything here. Maybe nbd_schedule_pending_work()? Or something else with
"schedule" and "send".

> +{
> +	struct request *req = blk_mq_rq_from_pdu(cmd);
> +
> +	nsock->pending = req;
> +	nsock->sent = sent;
> +	set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
> +	refcount_inc(&nbd->config_refs);
> +	schedule_work(&nsock->work);
> +}

Important point about this function: It doesn't check if we already have
scheduled the work. You seem to have some code in nbd_pending_cmd_work()
that can cope with being scheduled multiple times, but allowing the
situation makes the control flow hard to follow. It would probably be
better to avoid this and call refcount_inc() and schedule_work() only if
NBD_CMD_PARTIAL_SEND isn't already set.

> +
>  /*
>   * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
>   * Returns BLK_STS_IOERR if sending failed.
> @@ -699,11 +730,12 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
>  			 * completely done.
>  			 */
>  			if (sent) {
> -				nsock->pending = req;
> -				nsock->sent = sent;
> +				nbd_run_pending_work(nbd, nsock, cmd, sent);
> +				return BLK_STS_OK;

Now the question is if requeuing is already implicitly avoided, because
returning EINTR to a worker doesn't seem to make a lot of sense to me,
and so we might actually never hit this code path from a worker. But I'm
not completely sure. Maybe better to detect the situation in
nbd_run_pending_work() anyway.

> +			} else {
> +				set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> +				return BLK_STS_RESOURCE;
>  			}
> -			set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> -			return BLK_STS_RESOURCE;

This is an unrelated style change.

>  		}
>  		dev_err_ratelimited(disk_to_dev(nbd->disk),
>  			"Send control failed (result %d)\n", result);
> @@ -737,14 +769,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
>  			result = sock_xmit(nbd, index, 1, &from, flags, &sent);
>  			if (result < 0) {
>  				if (was_interrupted(result)) {
> -					/* We've already sent the header, we
> -					 * have no choice but to set pending and
> -					 * return BUSY.
> -					 */
> -					nsock->pending = req;
> -					nsock->sent = sent;
> -					set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> -					return BLK_STS_RESOURCE;
> +					nbd_run_pending_work(nbd, nsock, cmd, sent);
> +					return BLK_STS_OK;
>  				}
>  				dev_err(disk_to_dev(nbd->disk),
>  					"Send data failed (result %d)\n",
> @@ -778,6 +804,44 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
>  	return BLK_STS_OK;
>  }
>  
> +/* handle partial sending */
> +static void nbd_pending_cmd_work(struct work_struct *work)
> +{
> +	struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
> +	struct request *req = nsock->pending;
> +	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
> +	struct nbd_device *nbd = cmd->nbd;
> +	unsigned long deadline = READ_ONCE(req->deadline);
> +	unsigned int wait_ms = 2;
> +
> +	mutex_lock(&cmd->lock);
> +
> +	WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
> +	if (!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
> +		goto out;

I believe this check is only for detecting if the work was scheduled
multiple times? If we avoid that above, we probably can make this a
WARN_ON_ONCE(), too.

> +
> +	mutex_lock(&nsock->tx_lock);
> +	while (true) {
> +		nbd_send_cmd(nbd, cmd, cmd->index);
> +		if (!nsock->pending)
> +			break;

If it's true that we can never get EINTR or ERESTARTSYS in a worker,
then this condition would always be true and the loop and sleeping logic
is unnecessary.

If it actually is necessary, I wonder if it wouldn't be better to just
let nbd_send_cmd() reschedule the work without a loop and sleeping here.
I'm not entirely sure what EINTR/ERESTARTSYS should even mean in this
context, but like in the .queue_rq() path, the thing that clears these
error conditions would probably be returning, not sleeping?

> +
> +		/* don't bother timeout handler for partial sending */
> +		if (READ_ONCE(jiffies) + msecs_to_jiffies(wait_ms) >= deadline) {
> +			cmd->status = BLK_STS_IOERR;
> +			blk_mq_complete_request(req);
> +			break;
> +		}
> +		msleep(wait_ms);
> +		wait_ms *= 2;
> +	}
> +	mutex_unlock(&nsock->tx_lock);
> +	clear_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
> +out:
> +	mutex_unlock(&cmd->lock);
> +	nbd_config_put(nbd);
> +}

Kevin


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V2] nbd: fix partial sending
  2024-10-22 18:46 ` Kevin Wolf
@ 2024-10-23  2:03   ` Ming Lei
  0 siblings, 0 replies; 3+ messages in thread
From: Ming Lei @ 2024-10-23  2:03 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Jens Axboe, linux-block, josef, nbd, eblake, vincent.chen,
	Leon Schuermann, Bart Van Assche

On Tue, Oct 22, 2024 at 08:46:07PM +0200, Kevin Wolf wrote:
> Am 18.10.2024 um 16:08 hat Ming Lei geschrieben:
> > nbd driver sends request header and payload with multiple call of
> > sock_sendmsg, and partial sending can't be avoided. However, nbd driver
> > returns BLK_STS_RESOURCE to block core in this situation. This way causes
> > one issue: request->tag may change in the next run of nbd_queue_rq(), but
> > the original old tag has been sent as part of header cookie, this way
> > confuses nbd driver reply handling, since the real request can't be
> > retrieved any more with the obsolete old tag.
> > 
> > Fix it by retrying sending directly in per-socket work function,
> > meantime return BLK_STS_OK to block layer core.
> > 
> > Cc: vincent.chen@sifive.com
> > Cc: Leon Schuermann <leon@is.currently.online>
> > Cc: Bart Van Assche <bvanassche@acm.org>
> > Reported-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > V2:
> > 	- move pending retry to socket work function and return BLK_STS_OK, so that
> > 	userspace can get chance to handle the signal(Kevin)
> 
> So first of all: This seems to work.
> 
> But looking through the code I have a few comments:
> 
> >  drivers/block/nbd.c | 89 +++++++++++++++++++++++++++++++++++++++------
> >  1 file changed, 77 insertions(+), 12 deletions(-)
> > 
> > diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> > index b852050d8a96..855f4a79e37c 100644
> > --- a/drivers/block/nbd.c
> > +++ b/drivers/block/nbd.c
> > @@ -62,6 +62,7 @@ struct nbd_sock {
> >  	bool dead;
> >  	int fallback_index;
> >  	int cookie;
> > +	struct work_struct work;
> >  };
> >  
> >  struct recv_thread_args {
> > @@ -141,6 +142,9 @@ struct nbd_device {
> >   */
> >  #define NBD_CMD_INFLIGHT	2
> >  
> > +/* Just part of request header or data payload is sent successfully */
> > +#define NBD_CMD_PARTIAL_SEND	3
> > +
> >  struct nbd_cmd {
> >  	struct nbd_device *nbd;
> >  	struct mutex lock;
> > @@ -466,6 +470,12 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req)
> >  	if (!mutex_trylock(&cmd->lock))
> >  		return BLK_EH_RESET_TIMER;
> >  
> > +	/* partial send is handled in nbd_sock's work function */
> > +	if (test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags)) {
> > +		mutex_unlock(&cmd->lock);
> > +		return BLK_EH_RESET_TIMER;
> > +	}
> > +
> >  	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
> >  		mutex_unlock(&cmd->lock);
> >  		return BLK_EH_DONE;
> > @@ -614,6 +624,27 @@ static inline int was_interrupted(int result)
> >  	return result == -ERESTARTSYS || result == -EINTR;
> >  }
> >  
> > +/*
> > + * We've already sent header or part of data payload, have no choice but
> > + * to set pending and schedule it in work.
> > + *
> > + * And we have to return BLK_STS_OK to block core, otherwise this same
> > + * request may be re-dispatched with different tag, but our header has
> > + * been sent out with old tag, and this way does confuse reply handling.
> > + */
> > +static void nbd_run_pending_work(struct nbd_device *nbd,
> > +				 struct nbd_sock *nsock,
> > +				 struct nbd_cmd *cmd, int sent)
> 
> The name of this function is a bit confusing, we don't actually run
> anything here. Maybe nbd_schedule_pending_work()? Or something else with
> "schedule" and "send".

Fine, but blk-mq does has blk_mq_run_hw_queue().

> 
> > +{
> > +	struct request *req = blk_mq_rq_from_pdu(cmd);
> > +
> > +	nsock->pending = req;
> > +	nsock->sent = sent;
> > +	set_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags);
> > +	refcount_inc(&nbd->config_refs);
> > +	schedule_work(&nsock->work);
> > +}
> 
> Important point about this function: It doesn't check if we already have
> scheduled the work. You seem to have some code in nbd_pending_cmd_work()
> that can cope with being scheduled multiple times, but allowing the
> situation makes the control flow hard to follow. It would probably be
> better to avoid this and call refcount_inc() and schedule_work() only if
> NBD_CMD_PARTIAL_SEND isn't already set.

The function can be called only once because BLK_STS_OK means the request
ownership is transferred to nbd now.

But we can add

	WARN_ON_ONCE(test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags));

here.

> 
> > +
> >  /*
> >   * Returns BLK_STS_RESOURCE if the caller should retry after a delay.
> >   * Returns BLK_STS_IOERR if sending failed.
> > @@ -699,11 +730,12 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
> >  			 * completely done.
> >  			 */
> >  			if (sent) {
> > -				nsock->pending = req;
> > -				nsock->sent = sent;
> > +				nbd_run_pending_work(nbd, nsock, cmd, sent);
> > +				return BLK_STS_OK;
> 
> Now the question is if requeuing is already implicitly avoided, because

Yes, requeue is supposed to be exclusive with handling pending request,
but V2 still may allow requeue from pending work context, will fix it.

> returning EINTR to a worker doesn't seem to make a lot of sense to me,
> and so we might actually never hit this code path from a worker. But I'm
> not completely sure. Maybe better to detect the situation in
> nbd_run_pending_work() anyway.
> 
> > +			} else {
> > +				set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> > +				return BLK_STS_RESOURCE;
> >  			}
> > -			set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> > -			return BLK_STS_RESOURCE;
> 
> This is an unrelated style change.

Fine.

> 
> >  		}
> >  		dev_err_ratelimited(disk_to_dev(nbd->disk),
> >  			"Send control failed (result %d)\n", result);
> > @@ -737,14 +769,8 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
> >  			result = sock_xmit(nbd, index, 1, &from, flags, &sent);
> >  			if (result < 0) {
> >  				if (was_interrupted(result)) {
> > -					/* We've already sent the header, we
> > -					 * have no choice but to set pending and
> > -					 * return BUSY.
> > -					 */
> > -					nsock->pending = req;
> > -					nsock->sent = sent;
> > -					set_bit(NBD_CMD_REQUEUED, &cmd->flags);
> > -					return BLK_STS_RESOURCE;
> > +					nbd_run_pending_work(nbd, nsock, cmd, sent);
> > +					return BLK_STS_OK;
> >  				}
> >  				dev_err(disk_to_dev(nbd->disk),
> >  					"Send data failed (result %d)\n",
> > @@ -778,6 +804,44 @@ static blk_status_t nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd,
> >  	return BLK_STS_OK;
> >  }
> >  
> > +/* handle partial sending */
> > +static void nbd_pending_cmd_work(struct work_struct *work)
> > +{
> > +	struct nbd_sock *nsock = container_of(work, struct nbd_sock, work);
> > +	struct request *req = nsock->pending;
> > +	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
> > +	struct nbd_device *nbd = cmd->nbd;
> > +	unsigned long deadline = READ_ONCE(req->deadline);
> > +	unsigned int wait_ms = 2;
> > +
> > +	mutex_lock(&cmd->lock);
> > +
> > +	WARN_ON_ONCE(test_bit(NBD_CMD_REQUEUED, &cmd->flags));
> > +	if (!test_bit(NBD_CMD_PARTIAL_SEND, &cmd->flags))
> > +		goto out;
> 
> I believe this check is only for detecting if the work was scheduled
> multiple times? If we avoid that above, we probably can make this a
> WARN_ON_ONCE(), too.

The check can be killed or changed to WARN_ON_ONCE().

> 
> > +
> > +	mutex_lock(&nsock->tx_lock);
> > +	while (true) {
> > +		nbd_send_cmd(nbd, cmd, cmd->index);
> > +		if (!nsock->pending)
> > +			break;
> 
> If it's true that we can never get EINTR or ERESTARTSYS in a worker,
> then this condition would always be true and the loop and sleeping logic
> is unnecessary.

kernel thread is capable of handling signal, but seems doesn't make
sense to get EINTR or ERESTARTSYS from kworker context.

But it doesn't matter here.

> 
> If it actually is necessary, I wonder if it wouldn't be better to just
> let nbd_send_cmd() reschedule the work without a loop and sleeping here.
> I'm not entirely sure what EINTR/ERESTARTSYS should even mean in this
> context, but like in the .queue_rq() path, the thing that clears these
> error conditions would probably be returning, not sleeping?

Yeah, it is necessary because nbd_pending_cmd_work() may not move
on, and the pending condition is still true, such as send socket buffer
is full, then we still need to wait and retry.

V2 doesn't handle this case, so requeue may be triggered, which is
wrong.


Thanks, 
Ming


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-10-23  2:03 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-10-18 14:08 [PATCH V2] nbd: fix partial sending Ming Lei
2024-10-22 18:46 ` Kevin Wolf
2024-10-23  2:03   ` Ming Lei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).