From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: hare@suse.de, kbusch@kernel.org, hch@lst.de, sagi@grimberg.me,
dwagner@suse.de, axboe@kernel.dk, gjoyce@ibm.com
Subject: [RFC PATCHv3 3/6] nvme: add sysfs attribute adp_ewma_shift
Date: Mon, 27 Oct 2025 14:59:37 +0530 [thread overview]
Message-ID: <20251027092949.961287-4-nilay@linux.ibm.com> (raw)
In-Reply-To: <20251027092949.961287-1-nilay@linux.ibm.com>
By default, the EWMA (Exponentially Weighted Moving Average) shift
value, used for storing latency samples for adaptive iopolicy, is set
to 3. The EWMA is calculated using the following formula:
ewma = (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
The default value of 3 assigns ~87.5% weight to the existing EWMA value
and ~12.5% weight to the new latency sample. This provides a stable
average that smooths out short-term variations.
However, different workloads may require faster or slower adaptation to
changing conditions. This commit introduces a new sysfs attribute,
adp_ewma_shift, allowing users to tune the weighting factor.
For example:
- adp_ewma_shift = 1 => 50% old, 50% new
- adp_ewma_shift = 0 => 0% old, 100% new
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
drivers/nvme/host/core.c | 10 ++++++++-
drivers/nvme/host/multipath.c | 38 +++++++++++++++++++++++++++++++----
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/sysfs.c | 1 +
4 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 284a7c9c5d1d..ab09b9724674 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3240,7 +3240,15 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return -EINVAL;
}
nvme_mpath_default_iopolicy(subsys);
-
+#ifdef CONFIG_NVME_MULTIPATH
+ /*
+ * Default value of emwa_shift is set to 3 so that we can assign ~87.5 %
+ * weight to the existing ewma and ~12.5% weight to the new latency
+ * sample. This default could be changed through sysfs. This value is
+ * used while adding latency sample for adaptive iopolicy.
+ */
+ subsys->adp_ewma_shift = NVME_DEFAULT_ADP_EWMA_SHIFT;
+#endif
subsys->dev.class = &nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index b438371b8494..95407c0f2f4b 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -294,10 +294,9 @@ static void nvme_mpath_weight_work(struct work_struct *weight_work)
* For instance, with EWMA_SHIFT = 3, this assigns 7/8 (~87.5 %) weight to
* the existing/old ewma and 1/8 (~12.5%) weight to the new sample.
*/
-static inline u64 ewma_update(u64 old, u64 new)
+static inline u64 ewma_update(u64 old, u64 new, int ewma_shift)
{
- return (old * ((1 << NVME_DEFAULT_ADP_EWMA_SHIFT) - 1)
- + new) >> NVME_DEFAULT_ADP_EWMA_SHIFT;
+ return (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
}
static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
@@ -389,7 +388,8 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
if (unlikely(!stat->slat_ns))
WRITE_ONCE(stat->slat_ns, avg_lat_ns);
else {
- slat_ns = ewma_update(stat->slat_ns, avg_lat_ns);
+ slat_ns = ewma_update(stat->slat_ns, avg_lat_ns,
+ READ_ONCE(head->subsys->adp_ewma_shift));
WRITE_ONCE(stat->slat_ns, slat_ns);
}
@@ -1465,6 +1465,36 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
+static ssize_t nvme_subsys_adp_ewma_shift_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ return sysfs_emit(buf, "%d\n", READ_ONCE(subsys->adp_ewma_shift));
+}
+
+static ssize_t nvme_subsys_adp_ewma_shift_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int shift, err;
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ err = kstrtoint(buf, 0, &shift);
+ if (err)
+ return -EINVAL;
+
+ if (shift < 0)
+ return -EINVAL;
+
+ WRITE_ONCE(subsys->adp_ewma_shift, shift);
+ return count;
+}
+
+SUBSYS_ATTR_RW(adp_ewma_shift, 0644, nvme_subsys_adp_ewma_shift_show,
+ nvme_subsys_adp_ewma_shift_store);
+
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5baf0232726f..9f5b233c747a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -450,6 +450,7 @@ struct nvme_subsystem {
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
+ int adp_ewma_shift; /* used for adaptive iopolicy */
#endif
};
@@ -1043,6 +1044,7 @@ extern struct device_attribute dev_attr_queue_depth;
extern struct device_attribute dev_attr_numa_nodes;
extern struct device_attribute dev_attr_delayed_removal_secs;
extern struct device_attribute subsys_attr_iopolicy;
+extern struct device_attribute subsys_attr_adp_ewma_shift;
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 1cbab90ed42e..cf9711961b00 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -917,6 +917,7 @@ static struct attribute *nvme_subsys_attrs[] = {
&subsys_attr_subsystype.attr,
#ifdef CONFIG_NVME_MULTIPATH
&subsys_attr_iopolicy.attr,
+ &subsys_attr_adp_ewma_shift.attr,
#endif
NULL,
};
--
2.51.0
next prev parent reply other threads:[~2025-10-27 9:30 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 9:29 [RFC PATCHv3 0/6] nvme-multipath: introduce adaptive I/O policy Nilay Shroff
2025-10-27 9:29 ` [RFC PATCHv3 1/6] block: expose blk_stat_{enable,disable}_accounting() to drivers Nilay Shroff
2025-10-29 9:39 ` Christoph Hellwig
2025-10-29 16:40 ` Nilay Shroff
2025-10-27 9:29 ` [RFC PATCHv3 2/6] nvme-multipath: add support for adaptive I/O policy Nilay Shroff
2025-10-27 11:34 ` Hannes Reinecke
2025-10-27 12:09 ` Nilay Shroff
2025-10-29 9:40 ` Christoph Hellwig
2025-10-29 14:21 ` Nilay Shroff
2025-10-27 9:29 ` Nilay Shroff [this message]
2025-10-27 11:54 ` [RFC PATCHv3 3/6] nvme: add sysfs attribute adp_ewma_shift Hannes Reinecke
2025-10-27 9:29 ` [RFC PATCHv3 4/6] nvme: add sysfs attribute adp_weight_timeout Nilay Shroff
2025-10-27 11:54 ` Hannes Reinecke
2025-10-27 9:29 ` [RFC PATCHv3 5/6] nvme: add generic debugfs support Nilay Shroff
2025-10-27 11:55 ` Hannes Reinecke
2025-10-27 12:02 ` Nilay Shroff
2025-10-27 9:29 ` [RFC PATCHv3 6/6] nvme-multipath: add debugfs attribute for adaptive I/O policy stat Nilay Shroff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251027092949.961287-4-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=dwagner@suse.de \
--cc=gjoyce@ibm.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.