From: Hannes Reinecke <hare@kernel.org>
To: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org>,
linux-nvme@lists.infradead.org, Hannes Reinecke <hare@kernel.org>
Subject: [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling
Date: Wed, 26 Jun 2024 14:13:46 +0200 [thread overview]
Message-ID: <20240626121347.1116-7-hare@kernel.org> (raw)
In-Reply-To: <20240626121347.1116-1-hare@kernel.org>
When there is no write space on the socket we shouldn't try to
push more data onto it; it'll stall anyway and leads to higher CPU
utilisation. So check for sock_wspace() before queueing new
requests and let the sock write_space() handler restart the
submission.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
---
drivers/nvme/host/tcp.c | 30 ++++++++++++++++++++++++++----
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 599d4ebf888f..d78cca2f05d4 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -147,6 +147,7 @@ enum nvme_tcp_recv_state {
struct nvme_tcp_ctrl;
struct nvme_tcp_queue {
struct socket *sock;
+ struct blk_mq_hw_ctx *hctx;
struct work_struct io_work;
int io_cpu;
@@ -381,6 +382,15 @@ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
nvme_tcp_queue_has_pending(queue);
}
+static inline void nvme_tcp_queue_work(struct nvme_tcp_queue *queue)
+{
+ set_bit(SOCK_NOSPACE, &queue->sock->flags);
+ if (!sock_wspace(queue->sock->sk))
+ return;
+ clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+}
+
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync, bool last)
{
@@ -402,7 +412,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
}
if (last && nvme_tcp_queue_has_pending(queue))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -550,6 +560,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
hctx->driver_data = queue;
+ queue->hctx = hctx;
return 0;
}
@@ -1004,7 +1015,10 @@ static void nvme_tcp_write_space(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && sk_stream_is_writeable(sk))) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ if (sock_wspace(sk))
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ if (queue->hctx)
+ blk_mq_start_hw_queue(queue->hctx);
}
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1317,7 +1331,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
} while (!time_after(jiffies, deadline)); /* quota is exhausted */
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -1863,6 +1877,7 @@ static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
{
+ queue->hctx = NULL;
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
nvme_tcp_restore_sock_ops(queue);
cancel_work_sync(&queue->io_work);
@@ -2614,7 +2629,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2630,6 +2645,13 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ set_bit(SOCK_NOSPACE, &queue->sock->flags);
+ if (!sock_wspace(queue->sock->sk)) {
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_STS_DEV_RESOURCE;
+ }
+ clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+
ret = nvme_tcp_setup_cmd_pdu(ns, rq);
if (unlikely(ret))
return ret;
--
2.35.3
next prev parent reply other threads:[~2024-06-26 12:14 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-26 12:13 [PATCH 0/7] nvme-tcp scalability improvements Hannes Reinecke
2024-06-26 12:13 ` [PATCH 1/7] nvme-tcp: align I/O cpu with blk-mq mapping Hannes Reinecke
2024-06-26 12:13 ` [PATCH 2/7] nvme-tcp: distribute queue affinity Hannes Reinecke
2024-06-26 13:38 ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 3/7] net: micro-optimize skb_datagram_iter Hannes Reinecke
2024-06-26 13:38 ` Sagi Grimberg
2024-06-26 12:13 ` [PATCH 4/7] nvme-tcp: receive data in softirq Hannes Reinecke
2024-06-26 12:13 ` [PATCH 5/7] nvmet-tcp: add wq_unbound module parameter Hannes Reinecke
2024-06-26 13:44 ` Sagi Grimberg
2024-06-26 12:13 ` Hannes Reinecke [this message]
2024-06-26 13:45 ` [PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling Sagi Grimberg
2024-06-26 12:13 ` [PATCH 7/7] nvme-tcp: make softirq_rx the default Hannes Reinecke
2024-06-26 13:46 ` Sagi Grimberg
2024-06-26 13:37 ` [PATCH 0/7] nvme-tcp scalability improvements Sagi Grimberg
2024-06-26 14:27 ` Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240626121347.1116-7-hare@kernel.org \
--to=hare@kernel.org \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox