All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
	linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling
Date: Wed, 26 Jun 2024 14:13:46 +0200	[thread overview]
Message-ID: <20240626121347.1116-7-hare@kernel.org> (raw)
In-Reply-To: <20240626121347.1116-1-hare@kernel.org>

When there is no write space on the socket we shouldn't try to
push more data onto it; it'll stall anyway and leads to higher CPU
utilisation. So check for sock_wspace() before queueing new
requests and let the sock write_space() handler restart the
submission.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
 drivers/nvme/host/tcp.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 599d4ebf888f..d78cca2f05d4 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -147,6 +147,7 @@ enum nvme_tcp_recv_state {
 struct nvme_tcp_ctrl;
 struct nvme_tcp_queue {
 	struct socket		*sock;
+	struct blk_mq_hw_ctx	*hctx;
 	struct work_struct	io_work;
 	int			io_cpu;
 
@@ -381,6 +382,15 @@ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
 		nvme_tcp_queue_has_pending(queue);
 }
 
+static inline void nvme_tcp_queue_work(struct nvme_tcp_queue *queue)
+{
+	set_bit(SOCK_NOSPACE, &queue->sock->flags);
+	if (!sock_wspace(queue->sock->sk))
+		return;
+	clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+}
+
 static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 		bool sync, bool last)
 {
@@ -402,7 +412,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
 	}
 
 	if (last && nvme_tcp_queue_has_pending(queue))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		nvme_tcp_queue_work(queue);
 }
 
 static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -550,6 +560,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
 
 	hctx->driver_data = queue;
+	queue->hctx = hctx;
 	return 0;
 }
 
@@ -1004,7 +1015,10 @@ static void nvme_tcp_write_space(struct sock *sk)
 	queue = sk->sk_user_data;
 	if (likely(queue && sk_stream_is_writeable(sk))) {
 		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		if (sock_wspace(sk))
+			queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		if (queue->hctx)
+			blk_mq_start_hw_queue(queue->hctx);
 	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1317,7 +1331,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
 
 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
 
-	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+	nvme_tcp_queue_work(queue);
 }
 
 static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -1863,6 +1877,7 @@ static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
 
 static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 {
+	queue->hctx = NULL;
 	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
 	nvme_tcp_restore_sock_ops(queue);
 	cancel_work_sync(&queue->io_work);
@@ -2614,7 +2629,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
 	struct nvme_tcp_queue *queue = hctx->driver_data;
 
 	if (!llist_empty(&queue->req_list))
-		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+		nvme_tcp_queue_work(queue);
 }
 
 static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2630,6 +2645,13 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
 		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
 
+	set_bit(SOCK_NOSPACE, &queue->sock->flags);
+	if (!sock_wspace(queue->sock->sk)) {
+		blk_mq_stop_hw_queue(hctx);
+		return BLK_STS_DEV_RESOURCE;
+	}
+	clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+
 	ret = nvme_tcp_setup_cmd_pdu(ns, rq);
 	if (unlikely(ret))
 		return ret;
-- 
2.35.3



  parent reply	other threads:[~2024-06-26 12:14 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-26 12:13 [PATCH 0/7] nvme-tcp scalability improvements Hannes Reinecke
2024-06-26 12:13 ` [PATCH 1/7] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-06-26 12:13 ` [PATCH 2/7] nvme-tcp: distribute queue affinity Hannes Reinecke
2024-06-26 13:38   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 3/7] net: micro-optimize skb_datagram_iter Hannes Reinecke
2024-06-26 13:38   ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 4/7] nvme-tcp: receive data in softirq Hannes Reinecke
2024-06-26 12:13 ` [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter Hannes Reinecke
2024-06-26 13:44   ` Sagi Grimberg
2024-06-26 12:13 ` Hannes Reinecke [this message]
2024-06-26 13:45   ` [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling Sagi Grimberg
2024-06-26 12:13 ` [PATCH 7/7] nvme-tcp: make softirq_rx the default Hannes Reinecke
2024-06-26 13:46   ` Sagi Grimberg
2024-06-26 13:37 ` [PATCH 0/7] nvme-tcp scalability improvements Sagi Grimberg
2024-06-26 14:27   ` Hannes Reinecke

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240626121347.1116-7-hare@kernel.org \
    --to=hare@kernel.org \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.