From: Guixin Liu <kanie@linux.alibaba.com>
To: Keith Busch <kbusch@kernel.org>, Jens Axboe <axboe@kernel.dk>,
Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Subject: [PATCH v3 1/2] nvme-multipath: introduce service-time iopolicy
Date: Fri, 8 Nov 2024 16:54:48 +0800 [thread overview]
Message-ID: <20241108085449.49972-2-kanie@linux.alibaba.com> (raw)
In-Reply-To: <20241108085449.49972-1-kanie@linux.alibaba.com>
In scenarios with varying random I/O sizes, the different I/O sizes
being processed on each path can lead to slower processing and higher
latency on paths under heavy load.
The service-time policy can dispatch I/O to the path with the lowest
total amount of currently processed I/O, ensuring that new I/O can be
sent to less-loaded paths when some paths are overloaded, thereby
achieving lower latency and higher throughput.
Introduce a atomic64_t inflight_size to record the total I/O size
that the path is processing, and choosing a path with lowest
inflight_size to send the I/O.
Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
---
drivers/nvme/host/multipath.c | 53 ++++++++++++++++++++++++++++++++++-
drivers/nvme/host/nvme.h | 3 ++
2 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 6a15873055b9..fcd3b2108152 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -18,6 +18,7 @@ static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
[NVME_IOPOLICY_QD] = "queue-depth",
+ [NVME_IOPOLICY_ST] = "service-time",
};
static int iopolicy = NVME_IOPOLICY_NUMA;
@@ -32,6 +33,8 @@ static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
iopolicy = NVME_IOPOLICY_RR;
else if (!strncmp(val, "queue-depth", 11))
iopolicy = NVME_IOPOLICY_QD;
+ else if (!strncmp(val, "service-time", 12))
+ iopolicy = NVME_IOPOLICY_ST;
else
return -EINVAL;
@@ -46,7 +49,7 @@ static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
&iopolicy, 0644);
MODULE_PARM_DESC(iopolicy,
- "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
+ "Default multipath I/O policy; 'numa' (default), 'round-robin', 'queue-depth' or 'service-time'");
void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
{
@@ -136,6 +139,11 @@ void nvme_mpath_start_request(struct request *rq)
nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
}
+ if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_ST) {
+ atomic64_add(blk_rq_bytes(rq), &ns->ctrl->inflight_size);
+ nvme_req(rq)->flags |= NVME_MPATH_CNT_IOSIZE;
+ }
+
if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
return;
@@ -152,6 +160,9 @@ void nvme_mpath_end_request(struct request *rq)
if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)
atomic_dec_if_positive(&ns->ctrl->nr_active);
+ if (nvme_req(rq)->flags & NVME_MPATH_CNT_IOSIZE)
+ atomic64_sub(blk_rq_bytes(rq), &ns->ctrl->inflight_size);
+
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
@@ -405,9 +416,48 @@ static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
return ns;
}
+static struct nvme_ns *nvme_service_time_path(struct nvme_ns_head *head)
+{
+ struct nvme_ns *opt = NULL, *nonopt = NULL, *ns;
+ unsigned int min_inflight_nonopt = UINT_MAX;
+ unsigned int min_inflight_opt = UINT_MAX;
+ unsigned int inflight;
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (nvme_path_is_disabled(ns))
+ continue;
+
+ inflight = atomic64_read(&ns->ctrl->inflight_size);
+
+ switch (ns->ana_state) {
+ case NVME_ANA_OPTIMIZED:
+ if (inflight < min_inflight_opt) {
+ min_inflight_opt = inflight;
+ opt = ns;
+ }
+ break;
+ case NVME_ANA_NONOPTIMIZED:
+ if (inflight < min_inflight_nonopt) {
+ min_inflight_nonopt = inflight;
+ nonopt = ns;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (min_inflight_opt == 0)
+ return opt;
+ }
+
+ return opt ? opt : nonopt;
+}
+
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{
switch (READ_ONCE(head->subsys->iopolicy)) {
+ case NVME_IOPOLICY_ST:
+ return nvme_service_time_path(head);
case NVME_IOPOLICY_QD:
return nvme_queue_depth_path(head);
case NVME_IOPOLICY_RR:
@@ -1040,6 +1090,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
/* initialize this in the identify path to cover controller resets */
atomic_set(&ctrl->nr_active, 0);
+ atomic64_set(&ctrl->inflight_size, 0);
if (!ctrl->max_namespaces ||
ctrl->max_namespaces > le32_to_cpu(id->nn)) {
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 093cb423f536..bf6c74fdc9ba 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -202,6 +202,7 @@ enum {
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
NVME_MPATH_CNT_ACTIVE = (1 << 3),
+ NVME_MPATH_CNT_IOSIZE = (1 << 4),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -367,6 +368,7 @@ struct nvme_ctrl {
struct timer_list anatt_timer;
struct work_struct ana_work;
atomic_t nr_active;
+ atomic64_t inflight_size;
#endif
#ifdef CONFIG_NVME_HOST_AUTH
@@ -416,6 +418,7 @@ enum nvme_iopolicy {
NVME_IOPOLICY_NUMA,
NVME_IOPOLICY_RR,
NVME_IOPOLICY_QD,
+ NVME_IOPOLICY_ST,
};
struct nvme_subsystem {
--
2.43.0
next prev parent reply other threads:[~2024-11-08 9:05 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-08 8:54 [PATCH v3 0/2] Introduce service-time multipath policy and document Guixin Liu
2024-11-08 8:54 ` Guixin Liu [this message]
2024-11-08 8:54 ` [PATCH v3 2/2] docs, nvme: add a nvme-multipath document Guixin Liu
2024-11-11 10:41 ` [PATCH v3 0/2] Introduce service-time multipath policy and document Guixin Liu
2024-11-13 10:22 ` Guixin Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241108085449.49972-2-kanie@linux.alibaba.com \
--to=kanie@linux.alibaba.com \
--cc=axboe@kernel.dk \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox