public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: dwagner@suse.de, hare@suse.com, kbusch@kernel.org, hch@lst.de,
	gjoyce@ibm.com, wenxiong@linux.ibm.com,
	Nilay Shroff <nilay@linux.ibm.com>
Subject: [PATCH 4/9] libnvme: add support for retrieving per-path gendisk I/O statistics
Date: Sat, 21 Mar 2026 20:58:03 +0530	[thread overview]
Message-ID: <20260321152823.3197870-5-nilay@linux.ibm.com> (raw)
In-Reply-To: <20260321152823.3197870-1-nilay@linux.ibm.com>

Gendisk I/O statistics provide useful insight into disk activity,
including read/write/discard/flush operations, as well as information
about in-flight I/Os and I/O timing.

Parsing these statistics allows users to determine the number of I/Os
processed, time spent servicing I/O, number of sectors accessed, and
the count of in-flight requests.

Add support for retrieving per-path gendisk I/O statistics. Also add
support for computing deltas of these statistics between samples, such
as I/O ticks, number of sectors, and number of serviced I/Os.

These metrics can be used by tools such as nvme-top to display
real-time disk activity. Per-path metrics are particularly useful when
NVMe native multipath is enabled.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 libnvme/src/libnvme.ld     |  12 ++
 libnvme/src/nvme/private.h |  26 ++++
 libnvme/src/nvme/tree.c    | 255 +++++++++++++++++++++++++++++++++++++
 libnvme/src/nvme/tree.h    | 102 +++++++++++++++
 4 files changed, 395 insertions(+)

diff --git a/libnvme/src/libnvme.ld b/libnvme/src/libnvme.ld
index f842eb770..ceb9f9bcb 100644
--- a/libnvme/src/libnvme.ld
+++ b/libnvme/src/libnvme.ld
@@ -144,6 +144,18 @@ LIBNVME_3 {
 		nvme_path_get_queue_depth;
 		nvme_path_get_ana_state;
 		nvme_path_get_numa_nodes;
+		nvme_path_get_stat;
+		nvme_path_get_read_ios;
+		nvme_path_get_write_ios;
+		nvme_path_get_inflights;
+		nvme_path_get_stat_interval;
+		nvme_path_get_io_ticks;
+		nvme_path_get_read_ticks;
+		nvme_path_get_write_ticks;
+		nvme_path_get_read_sectors;
+		nvme_path_get_write_sectors;
+		nvme_path_reset_stat;
+		nvme_path_update_stat;
 		nvme_filter_paths;
 		nvme_read_config;
 		nvme_read_key;
diff --git a/libnvme/src/nvme/private.h b/libnvme/src/nvme/private.h
index 84852c8b0..8e327f1e2 100644
--- a/libnvme/src/nvme/private.h
+++ b/libnvme/src/nvme/private.h
@@ -20,6 +20,7 @@
 
 #include <nvme/fabrics.h>
 #include <nvme/mi.h>
+#include <nvme/tree.h>
 
 const char *nvme_subsys_sysfs_dir(void);
 const char *nvme_ctrl_sysfs_dir(void);
@@ -128,9 +129,34 @@ struct nvme_transport_handle {
 	struct nvme_log *log;
 };
 
+enum stat_group {
+	READ = 0,
+	WRITE,
+	DISCARD,
+	FLUSH,
+
+	NR_STAT_GROUPS
+};
+
+struct nvme_stat {
+	struct {
+		unsigned long ios;
+		unsigned long merges;
+		unsigned long long sectors;
+		unsigned int ticks;	/* in milliseconds */
+	} group[NR_STAT_GROUPS];
+
+	unsigned int inflights;
+	unsigned int io_ticks;		/* in milliseconds */
+	unsigned int tot_ticks;		/* in milliseconds */
+
+	double ts_ms;			/* timestamp when the stat is updated */
+};
+
 struct nvme_path { /*!generate-accessors*/
 	struct list_node entry;
 	struct list_node nentry;
+	struct nvme_stat stat[2];
 
 	struct nvme_ctrl *c;
 	struct nvme_ns *n;
diff --git a/libnvme/src/nvme/tree.c b/libnvme/src/nvme/tree.c
index 18aabe044..94f900307 100644
--- a/libnvme/src/nvme/tree.c
+++ b/libnvme/src/nvme/tree.c
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <time.h>
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -850,6 +851,260 @@ __public char *nvme_path_get_numa_nodes(nvme_path_t p)
 	return p->numa_nodes;
 }
 
+static nvme_stat_t nvme_path_get_stat(nvme_path_t p, int curr)
+{
+	if (curr < 0 || curr > 1)
+		return NULL;
+
+	return &p->stat[curr];
+}
+
+__public void nvme_path_reset_stat(nvme_path_t p)
+{
+	nvme_stat_t stat = &p->stat[0];
+
+	memset(stat, 0, 2 * sizeof(struct nvme_stat));
+}
+
+static int nvme_update_stat(const char *sysfs_stat_path, nvme_stat_t stat)
+{
+	int n;
+	struct timespec ts;
+	unsigned long rd_ios, rd_merges, wr_ios, wr_merges;
+	unsigned long dc_ios, dc_merges, fl_ios;
+	unsigned long long rd_sectors, wr_sectors, dc_sectors;
+	unsigned int rd_ticks, wr_ticks, dc_ticks, fl_ticks;
+	unsigned int io_ticks, tot_ticks, inflights;
+
+	memset(stat, 0, sizeof(struct nvme_stat));
+
+	n = sscanf(sysfs_stat_path,
+		"%lu %lu %llu %u %lu %lu %llu %u %u %u %u %lu %lu %llu %u %lu %u",
+		&rd_ios, &rd_merges, &rd_sectors, &rd_ticks,
+		&wr_ios, &wr_merges, &wr_sectors, &wr_ticks,
+		&inflights, &io_ticks, &tot_ticks,
+		&dc_ios, &dc_merges, &dc_sectors, &dc_ticks,
+		&fl_ios, &fl_ticks);
+
+	if (n < 17)
+		return -1;
+
+	/* update read stat */
+	stat->group[READ].ios = rd_ios;
+	stat->group[READ].merges = rd_merges;
+	stat->group[READ].sectors = rd_sectors;
+	stat->group[READ].ticks = rd_ticks;
+
+	/* update write stat */
+	stat->group[WRITE].ios = wr_ios;
+	stat->group[WRITE].merges = wr_merges;
+	stat->group[WRITE].sectors = wr_sectors;
+	stat->group[WRITE].ticks = wr_ticks;
+
+	/* update inflight counters and ticks */
+	stat->inflights = inflights;
+	stat->io_ticks = io_ticks;
+	stat->tot_ticks = tot_ticks;
+
+	/* update discard stat */
+	stat->group[DISCARD].ios = dc_ios;
+	stat->group[DISCARD].merges = dc_merges;
+	stat->group[DISCARD].sectors = dc_sectors;
+	stat->group[DISCARD].ticks = dc_ticks;
+
+	/* update flush stat */
+	stat->group[FLUSH].ios = fl_ios;
+	stat->group[FLUSH].ticks = fl_ticks;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	stat->ts_ms = ts.tv_sec * 1000 + (double)ts.tv_nsec / 1e6;
+
+	return 0;
+}
+
+__public int nvme_path_update_stat(nvme_path_t p, int curr)
+{
+	_cleanup_free_ char *sysfs_stat_path = NULL;
+	nvme_stat_t stat;
+
+	stat = nvme_path_get_stat(p, curr);
+	if (!stat)
+		return -1;
+
+	sysfs_stat_path = nvme_get_path_attr(p, "stat");
+	if (!sysfs_stat_path)
+		return -1;
+
+	return nvme_update_stat(sysfs_stat_path, stat);
+}
+
+static int nvme_stat_get_inflights(nvme_stat_t stat)
+{
+	return stat->inflights;
+}
+
+__public unsigned int nvme_path_get_inflights(nvme_path_t p, int this)
+{
+	nvme_stat_t curr;
+
+	curr = nvme_path_get_stat(p, this);
+	if (!curr)
+		return 0;
+
+	return nvme_stat_get_inflights(curr);
+}
+
+static int nvme_stat_get_io_ticks(nvme_stat_t curr, nvme_stat_t prev)
+{
+	unsigned int delta = 0;
+
+	if (curr->io_ticks > prev->io_ticks)
+		delta = curr->io_ticks - prev->io_ticks;
+
+	return delta;
+}
+
+__public unsigned int nvme_path_get_io_ticks(nvme_path_t p, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_io_ticks(curr, prev);
+}
+
+static unsigned int nvme_stat_get_ticks(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp)
+{
+	unsigned int delta = 0;
+
+	if (curr->group[grp].ticks > prev->group[grp].ticks)
+		delta = curr->group[grp].ticks - prev->group[grp].ticks;
+
+	return delta;
+}
+
+static unsigned int __nvme_path_get_ticks(nvme_path_t p,
+		enum stat_group grp, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ticks(curr, prev, grp);
+}
+
+__public unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ticks(p, READ, curr);
+}
+
+__public unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ticks(p, WRITE, curr);
+}
+
+static double nvme_stat_get_interval(nvme_stat_t curr, nvme_stat_t prev)
+{
+	double delta = 0.0;
+
+	if (curr->ts_ms > prev->ts_ms)
+		delta = curr->ts_ms - prev->ts_ms;
+
+	return delta;
+}
+
+__public double nvme_path_get_stat_interval(nvme_path_t p, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_interval(curr, prev);
+}
+
+static unsigned long nvme_stat_get_ios(nvme_stat_t curr, nvme_stat_t prev,
+		enum stat_group grp)
+{
+	unsigned long ios = 0;
+
+	if (curr->group[grp].ios > prev->group[grp].ios)
+		ios = curr->group[grp].ios - prev->group[grp].ios;
+
+	return ios;
+}
+
+static unsigned long __nvme_path_get_ios(nvme_path_t p, enum stat_group grp,
+		int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ios(curr, prev, grp);
+}
+
+__public unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ios(p, READ, curr);
+}
+
+__public unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ios(p, WRITE, curr);
+}
+
+static unsigned long long nvme_stat_get_sectors(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp)
+{
+	unsigned long long sec = 0;
+
+	if (curr->group[grp].sectors > prev->group[grp].sectors)
+		sec = curr->group[grp].sectors - prev->group[grp].sectors;
+
+	return sec;
+}
+
+static unsigned long long __nvme_path_get_sectors(nvme_path_t p,
+		enum stat_group grp, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_sectors(curr, prev, grp);
+}
+
+__public unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_sectors(p, READ, curr);
+}
+
+__public unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_sectors(p, WRITE, curr);
+}
+
 void nvme_free_path(struct nvme_path *p)
 {
 	list_del_init(&p->entry);
diff --git a/libnvme/src/nvme/tree.h b/libnvme/src/nvme/tree.h
index 39d715508..3924e061a 100644
--- a/libnvme/src/nvme/tree.h
+++ b/libnvme/src/nvme/tree.h
@@ -23,6 +23,7 @@
 typedef struct nvme_ns *nvme_ns_t;
 typedef struct nvme_ns_head *nvme_ns_head_t;
 typedef struct nvme_path *nvme_path_t;
+typedef struct nvme_stat *nvme_stat_t;
 typedef struct nvme_ctrl *nvme_ctrl_t;
 typedef struct nvme_subsystem *nvme_subsystem_t;
 typedef struct nvme_host *nvme_host_t;
@@ -719,6 +720,107 @@ nvme_ctrl_t nvme_path_get_ctrl(nvme_path_t p);
  */
 nvme_ns_t nvme_path_get_ns(nvme_path_t p);
 
+/**
+ * nvme_path_reset_stat() - Resets namespace path nvme stat
+ * @p:	&nvme_path_t object
+ */
+void nvme_path_reset_stat(nvme_path_t p);
+
+/**
+ * nvme_path_update_stat() - Update stat of an nvme_path_t object
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	0 on success, -1 on error
+ */
+int nvme_path_update_stat(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ios() - Calculate and return read IOs
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of read IOs processed between two stat samples
+ */
+unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ios() - Get write I/Os
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of write I/Os processed between two stat samples
+ */
+unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ticks() - Get read I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing read I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_sectors() - Get read I/O sectors
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Number of sectors read from the device between two stat samples
+ */
+unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_sectors() - Get write I/O sectors
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of sectors written to the device between two stat samples
+ */
+unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ticks() - Get write I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing write I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_stat_interval() - Get interval between two stat samples
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Interval, in milliseconds between collection of two consecutive
+ *		stat samples
+ */
+double nvme_path_get_stat_interval(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_io_ticks() - Get I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time consumed, in milliseconds, processing I/O requests between
+ *		two stat samples
+ */
+unsigned int nvme_path_get_io_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_inflights() - Inflight IOs for nvme_path_t object
+ *
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Inflight number of IOs
+ */
+unsigned int nvme_path_get_inflights(nvme_path_t p, int curr);
+
 /**
  * nvme_ctrl_get_transport_handle() - Get associated transport handle
  * @c:	Controller instance
-- 
2.53.0



  parent reply	other threads:[~2026-03-21 15:29 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-21 15:27 [PATCH 0/9] libnvme: add support for retrieving additional NVMe stat Nilay Shroff
2026-03-21 15:28 ` [PATCH 1/9] libnvme: annotate nvme_path::ana_state with !accessors:none Nilay Shroff
2026-03-24  8:55   ` Daniel Wagner
2026-03-24 13:08     ` Nilay Shroff
2026-03-21 15:28 ` [PATCH 2/9] libnvme: annotate nvme_path::numa_nodes " Nilay Shroff
2026-03-21 15:28 ` [PATCH 3/9] libnvme: annotate nvme_subsystem::iopolicy " Nilay Shroff
2026-03-21 15:28 ` Nilay Shroff [this message]
2026-03-24  9:05   ` [PATCH 4/9] libnvme: add support for retrieving per-path gendisk I/O statistics Daniel Wagner
2026-03-24 13:02     ` Nilay Shroff
2026-04-01 15:42       ` Daniel Wagner
2026-04-03 15:36         ` Nilay Shroff
2026-03-21 15:28 ` [PATCH 5/9] libnvme: add support for retrieving namespace " Nilay Shroff
2026-03-24  9:06   ` Daniel Wagner
2026-03-24 13:12     ` Nilay Shroff
2026-03-21 15:28 ` [PATCH 6/9] libnvme: add support for per-path diagnostic counters Nilay Shroff
2026-03-24  9:18   ` Daniel Wagner
2026-03-24 13:54     ` Nilay Shroff
2026-04-01 15:54       ` Daniel Wagner
2026-04-03 15:47         ` Nilay Shroff
2026-03-21 15:28 ` [PATCH 7/9] libnvme: add support for namespace " Nilay Shroff
2026-03-21 15:28 ` [PATCH 8/9] libnvme: add support for nshead " Nilay Shroff
2026-03-21 15:28 ` [PATCH 9/9] libnvme: add support for ctrl " Nilay Shroff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260321152823.3197870-5-nilay@linux.ibm.com \
    --to=nilay@linux.ibm.com \
    --cc=dwagner@suse.de \
    --cc=gjoyce@ibm.com \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=wenxiong@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox