All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-nvme@lists.infradead.org
Cc: dwagner@suse.de, hare@suse.com, kbusch@kernel.org, hch@lst.de,
	gjoyce@linux.ibm.com, wenxiong@linux.ibm.com
Subject: [PATCHv2 4/9] libnvme: add support for retrieving per-path gendisk I/O statistics
Date: Sat,  4 Apr 2026 15:44:54 +0530	[thread overview]
Message-ID: <20260404101504.44539-5-nilay@linux.ibm.com> (raw)
In-Reply-To: <20260404101504.44539-1-nilay@linux.ibm.com>

Gendisk I/O statistics provide useful insight into disk activity,
including read/write/discard/flush operations, as well as information
about in-flight I/Os and I/O timing.

Parsing these statistics allows users to determine the number of I/Os
processed, time spent servicing I/O, number of sectors accessed, and
the count of in-flight requests.

Add support for retrieving per-path gendisk I/O statistics. Also add
support for computing deltas of these statistics between samples, such
as I/O ticks, number of sectors, and number of serviced I/Os.

These metrics can be used by tools such as nvme-top to display
real-time disk activity. Per-path metrics are particularly useful when
NVMe native multipath is enabled.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 libnvme/src/libnvme.ld     |  12 ++
 libnvme/src/nvme/private.h |  29 ++++
 libnvme/src/nvme/tree.c    | 268 +++++++++++++++++++++++++++++++++++++
 libnvme/src/nvme/tree.h    |  93 +++++++++++++
 4 files changed, 402 insertions(+)

diff --git a/libnvme/src/libnvme.ld b/libnvme/src/libnvme.ld
index 1ff451d3d..53b5f90e5 100644
--- a/libnvme/src/libnvme.ld
+++ b/libnvme/src/libnvme.ld
@@ -144,6 +144,18 @@ LIBNVME_3 {
 		nvme_path_get_queue_depth;
 		nvme_path_get_ana_state;
 		nvme_path_get_numa_nodes;
+		nvme_path_get_stat;
+		nvme_path_get_read_ios;
+		nvme_path_get_write_ios;
+		nvme_path_get_inflights;
+		nvme_path_get_stat_interval;
+		nvme_path_get_io_ticks;
+		nvme_path_get_read_ticks;
+		nvme_path_get_write_ticks;
+		nvme_path_get_read_sectors;
+		nvme_path_get_write_sectors;
+		nvme_path_reset_stat;
+		nvme_path_update_stat;
 		nvme_random_uuid;
 		nvme_read_config;
 		nvme_read_hostid;
diff --git a/libnvme/src/nvme/private.h b/libnvme/src/nvme/private.h
index 0b8dec81e..2e8e792b9 100644
--- a/libnvme/src/nvme/private.h
+++ b/libnvme/src/nvme/private.h
@@ -20,6 +20,7 @@
 
 #include <nvme/fabrics.h>
 #include <nvme/mi.h>
+#include <nvme/tree.h>
 
 const char *nvme_subsys_sysfs_dir(void);
 const char *nvme_ctrl_sysfs_dir(void);
@@ -128,10 +129,38 @@ struct nvme_transport_handle {
 	struct nvme_log *log;
 };
 
+enum stat_group {
+	READ = 0,
+	WRITE,
+	DISCARD,
+	FLUSH,
+
+	NR_STAT_GROUPS
+};
+
+struct nvme_stat {
+	struct {
+		unsigned long ios;
+		unsigned long merges;
+		unsigned long long sectors;
+		unsigned int ticks;	/* in milliseconds */
+	} group[NR_STAT_GROUPS];
+
+	unsigned int inflights;
+	unsigned int io_ticks;		/* in milliseconds */
+	unsigned int tot_ticks;		/* in milliseconds */
+
+	double ts_ms;			/* timestamp when the stat is updated */
+};
+
 struct nvme_path { /*!generate-accessors*/
 	struct list_node entry;
 	struct list_node nentry;
 
+	struct nvme_stat stat[2];	/* gendisk I/O stat */
+	unsigned int curr_idx;		/* current index into the stat[] */
+	bool diffstat;
+
 	struct nvme_ctrl *c;
 	struct nvme_ns *n;
 
diff --git a/libnvme/src/nvme/tree.c b/libnvme/src/nvme/tree.c
index ac3779dca..b5f8c219d 100644
--- a/libnvme/src/nvme/tree.c
+++ b/libnvme/src/nvme/tree.c
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <time.h>
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -851,6 +852,273 @@ __public char *nvme_path_get_numa_nodes(nvme_path_t p)
 	return p->numa_nodes;
 }
 
+static nvme_stat_t nvme_path_get_stat(nvme_path_t p, unsigned int idx)
+{
+	if (idx > 1)
+		return NULL;
+
+	return &p->stat[idx];
+}
+
+__public void nvme_path_reset_stat(nvme_path_t p)
+{
+	nvme_stat_t stat = &p->stat[0];
+
+	memset(stat, 0, 2 * sizeof(struct nvme_stat));
+}
+
+static int nvme_update_stat(const char *sysfs_stat_path, nvme_stat_t stat)
+{
+	int n;
+	struct timespec ts;
+	unsigned long rd_ios, rd_merges, wr_ios, wr_merges;
+	unsigned long dc_ios, dc_merges, fl_ios;
+	unsigned long long rd_sectors, wr_sectors, dc_sectors;
+	unsigned int rd_ticks, wr_ticks, dc_ticks, fl_ticks;
+	unsigned int io_ticks, tot_ticks, inflights;
+
+	memset(stat, 0, sizeof(struct nvme_stat));
+
+	n = sscanf(sysfs_stat_path,
+		"%lu %lu %llu %u %lu %lu %llu %u %u %u %u %lu %lu %llu %u %lu %u",
+		&rd_ios, &rd_merges, &rd_sectors, &rd_ticks,
+		&wr_ios, &wr_merges, &wr_sectors, &wr_ticks,
+		&inflights, &io_ticks, &tot_ticks,
+		&dc_ios, &dc_merges, &dc_sectors, &dc_ticks,
+		&fl_ios, &fl_ticks);
+
+	if (n < 17)
+		return -EINVAL;
+
+	/* update read stat */
+	stat->group[READ].ios = rd_ios;
+	stat->group[READ].merges = rd_merges;
+	stat->group[READ].sectors = rd_sectors;
+	stat->group[READ].ticks = rd_ticks;
+
+	/* update write stat */
+	stat->group[WRITE].ios = wr_ios;
+	stat->group[WRITE].merges = wr_merges;
+	stat->group[WRITE].sectors = wr_sectors;
+	stat->group[WRITE].ticks = wr_ticks;
+
+	/* update inflight counters and ticks */
+	stat->inflights = inflights;
+	stat->io_ticks = io_ticks;
+	stat->tot_ticks = tot_ticks;
+
+	/* update discard stat */
+	stat->group[DISCARD].ios = dc_ios;
+	stat->group[DISCARD].merges = dc_merges;
+	stat->group[DISCARD].sectors = dc_sectors;
+	stat->group[DISCARD].ticks = dc_ticks;
+
+	/* update flush stat */
+	stat->group[FLUSH].ios = fl_ios;
+	stat->group[FLUSH].ticks = fl_ticks;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	stat->ts_ms = ts.tv_sec * 1000 + (double)ts.tv_nsec / 1e6;
+
+	return 0;
+}
+
+__public int nvme_path_update_stat(nvme_path_t p, bool diffstat)
+{
+	_cleanup_free_ char *sysfs_stat_path = NULL;
+	nvme_stat_t stat;
+
+	p->diffstat = diffstat;
+	p->curr_idx ^= 1;
+	stat = nvme_path_get_stat(p, p->curr_idx);
+	if (!stat)
+		return -EINVAL;
+
+	sysfs_stat_path = nvme_get_path_attr(p, "stat");
+	if (!sysfs_stat_path)
+		return -EINVAL;
+
+	return nvme_update_stat(sysfs_stat_path, stat);
+}
+
+static int nvme_stat_get_inflights(nvme_stat_t stat)
+{
+	return stat->inflights;
+}
+
+__public unsigned int nvme_path_get_inflights(nvme_path_t p)
+{
+	nvme_stat_t curr;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	if (!curr)
+		return 0;
+
+	return nvme_stat_get_inflights(curr);
+}
+
+static int nvme_stat_get_io_ticks(nvme_stat_t curr, nvme_stat_t prev,
+		bool diffstat)
+{
+	unsigned int delta = 0;
+
+	if (!diffstat)
+		return curr->io_ticks;
+
+	if (curr->io_ticks > prev->io_ticks)
+		delta = curr->io_ticks - prev->io_ticks;
+
+	return delta;
+}
+
+__public unsigned int nvme_path_get_io_ticks(nvme_path_t p)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	prev = nvme_path_get_stat(p, !p->curr_idx);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_io_ticks(curr, prev, p->diffstat);
+}
+
+static unsigned int nvme_stat_get_ticks(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp, bool diffstat)
+{
+	unsigned int delta = 0;
+
+	if (!diffstat)
+		return curr->group[grp].ticks;
+
+	if (curr->group[grp].ticks > prev->group[grp].ticks)
+		delta = curr->group[grp].ticks - prev->group[grp].ticks;
+
+	return delta;
+}
+
+static unsigned int __nvme_path_get_ticks(nvme_path_t p, enum stat_group grp)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	prev = nvme_path_get_stat(p, !p->curr_idx);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ticks(curr, prev, grp, p->diffstat);
+}
+
+__public unsigned int nvme_path_get_read_ticks(nvme_path_t p)
+{
+	return __nvme_path_get_ticks(p, READ);
+}
+
+__public unsigned int nvme_path_get_write_ticks(nvme_path_t p)
+{
+	return __nvme_path_get_ticks(p, WRITE);
+}
+
+static double nvme_stat_get_interval(nvme_stat_t curr, nvme_stat_t prev)
+{
+	double delta = 0.0;
+
+	if (prev->ts_ms && curr->ts_ms > prev->ts_ms)
+		delta = curr->ts_ms - prev->ts_ms;
+
+	return delta;
+}
+
+__public double nvme_path_get_stat_interval(nvme_path_t p)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	prev = nvme_path_get_stat(p, !p->curr_idx);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_interval(curr, prev);
+}
+
+static unsigned long nvme_stat_get_ios(nvme_stat_t curr, nvme_stat_t prev,
+		enum stat_group grp, bool diffstat)
+{
+	unsigned long ios = 0;
+
+	if (!diffstat)
+		return curr->group[grp].ios;
+
+	if (curr->group[grp].ios > prev->group[grp].ios)
+		ios = curr->group[grp].ios - prev->group[grp].ios;
+
+	return ios;
+}
+
+static unsigned long __nvme_path_get_ios(nvme_path_t p, enum stat_group grp)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	prev = nvme_path_get_stat(p, !p->curr_idx);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ios(curr, prev, grp, p->diffstat);
+}
+
+__public unsigned long nvme_path_get_read_ios(nvme_path_t p)
+{
+	return __nvme_path_get_ios(p, READ);
+}
+
+__public unsigned long nvme_path_get_write_ios(nvme_path_t p)
+{
+	return __nvme_path_get_ios(p, WRITE);
+}
+
+static unsigned long long nvme_stat_get_sectors(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp, bool diffstat)
+{
+	unsigned long long sec = 0;
+
+	if (!diffstat)
+		return curr->group[grp].sectors;
+
+	if (curr->group[grp].sectors > prev->group[grp].sectors)
+		sec = curr->group[grp].sectors - prev->group[grp].sectors;
+
+	return sec;
+}
+
+static unsigned long long __nvme_path_get_sectors(nvme_path_t p,
+		enum stat_group grp)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, p->curr_idx);
+	prev = nvme_path_get_stat(p, !p->curr_idx);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_sectors(curr, prev, grp, p->diffstat);
+}
+
+__public unsigned long long nvme_path_get_read_sectors(nvme_path_t p)
+{
+	return __nvme_path_get_sectors(p, READ);
+}
+
+__public unsigned long long nvme_path_get_write_sectors(nvme_path_t p)
+{
+	return __nvme_path_get_sectors(p, WRITE);
+}
+
 void nvme_free_path(struct nvme_path *p)
 {
 	list_del_init(&p->entry);
diff --git a/libnvme/src/nvme/tree.h b/libnvme/src/nvme/tree.h
index 39d715508..8c42edd71 100644
--- a/libnvme/src/nvme/tree.h
+++ b/libnvme/src/nvme/tree.h
@@ -23,6 +23,7 @@
 typedef struct nvme_ns *nvme_ns_t;
 typedef struct nvme_ns_head *nvme_ns_head_t;
 typedef struct nvme_path *nvme_path_t;
+typedef struct nvme_stat *nvme_stat_t;
 typedef struct nvme_ctrl *nvme_ctrl_t;
 typedef struct nvme_subsystem *nvme_subsystem_t;
 typedef struct nvme_host *nvme_host_t;
@@ -719,6 +720,98 @@ nvme_ctrl_t nvme_path_get_ctrl(nvme_path_t p);
  */
 nvme_ns_t nvme_path_get_ns(nvme_path_t p);
 
+/**
+ * nvme_path_reset_stat() - Resets namespace path nvme stat
+ * @p:	&nvme_path_t object
+ */
+void nvme_path_reset_stat(nvme_path_t p);
+
+/**
+ * nvme_path_update_stat() - Update stat of an nvme_path_t object
+ * @p:		&nvme_path_t object
+ * @diffstat:	If set to true then getters return the diff stat otherwise
+ *		return the current absolute stat
+ *
+ * Return:	0 on success, -1 on error
+ */
+int nvme_path_update_stat(nvme_path_t p, bool diffstat);
+
+/**
+ * nvme_path_get_read_ios() - Calculate and return read IOs
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Num of read IOs processed between two stat samples
+ */
+unsigned long nvme_path_get_read_ios(nvme_path_t p);
+
+/**
+ * nvme_path_get_write_ios() - Get write I/Os
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Num of write I/Os processed between two stat samples
+ */
+unsigned long nvme_path_get_write_ios(nvme_path_t p);
+
+/**
+ * nvme_path_get_read_ticks() - Get read I/O ticks
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing read I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_read_ticks(nvme_path_t p);
+
+/**
+ * nvme_path_get_read_sectors() - Get read I/O sectors
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Number of sectors read from the device between two stat samples
+ */
+unsigned long long nvme_path_get_read_sectors(nvme_path_t p);
+
+/**
+ * nvme_path_get_write_sectors() - Get write I/O sectors
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Num of sectors written to the device between two stat samples
+ */
+unsigned long long nvme_path_get_write_sectors(nvme_path_t p);
+
+/**
+ * nvme_path_get_write_ticks() - Get write I/O ticks
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing write I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_write_ticks(nvme_path_t p);
+
+/**
+ * nvme_path_get_stat_interval() - Get interval between two stat samples
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Interval, in milliseconds between collection of two consecutive
+ *		stat samples
+ */
+double nvme_path_get_stat_interval(nvme_path_t p);
+
+/**
+ * nvme_path_get_io_ticks() - Get I/O ticks
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Time consumed, in milliseconds, processing I/O requests between
+ *		two stat samples
+ */
+unsigned int nvme_path_get_io_ticks(nvme_path_t p);
+
+/**
+ * nvme_path_get_inflights() - Inflight IOs for nvme_path_t object
+ * @p:		&nvme_path_t object
+ *
+ * Return:	Inflight number of IOs
+ */
+unsigned int nvme_path_get_inflights(nvme_path_t p);
+
 /**
  * nvme_ctrl_get_transport_handle() - Get associated transport handle
  * @c:	Controller instance
-- 
2.53.0



  parent reply	other threads:[~2026-04-04 10:15 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-04 10:14 [PATCHv2 0/9] libnvme: add support for retrieving additional NVMe stat Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 1/9] libnvme: annotate nvme_path::ana_state with !accessors:none Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 2/9] libnvme: annotate nvme_path::numa_nodes " Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 3/9] libnvme: annotate nvme_subsystem::iopolicy " Nilay Shroff
2026-04-04 10:14 ` Nilay Shroff [this message]
2026-04-04 10:14 ` [PATCHv2 5/9] libnvme: add support for retrieving namespace gendisk I/O statistics Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 6/9] libnvme: add support for per-path diagnostic counters Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 7/9] libnvme: add support for namespace " Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 8/9] libnvme: add support for nshead " Nilay Shroff
2026-04-04 10:14 ` [PATCHv2 9/9] libnvme: add support for ctrl " Nilay Shroff
2026-04-09  9:10 ` [PATCHv2 0/9] libnvme: add support for retrieving additional NVMe stat Daniel Wagner
2026-04-10  7:33   ` Nilay Shroff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260404101504.44539-5-nilay@linux.ibm.com \
    --to=nilay@linux.ibm.com \
    --cc=dwagner@suse.de \
    --cc=gjoyce@linux.ibm.com \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=wenxiong@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.