All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns
@ 2025-04-05 13:02 Nilay Shroff
  2025-04-05 13:02 ` [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link Nilay Shroff
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Nilay Shroff @ 2025-04-05 13:02 UTC (permalink / raw)
  To: linux-nvme; +Cc: dwagner, hare, kbusch, gjoyce

Hi,

Recently released Linux kernel v6.14 added support for easily discovering
multiple paths to a shared NVMe namespace. This multipath information is
exposed to userspace via a new sysfs group attribute named "multipath",
located under /sys/block/<ns-blkdev>/. More details on this functionality
can be found here [1].

This patch series leverages that new functionality to discover multiple paths
to a shared namespace and exposes that information in libnvme so that it can
later be used by nvme-cli.

There are three patches in this series:
The first patch adds support for discovering NVMe paths using the sysfs
"multipath" group attribute.
The second patch adds a new "queue_depth" attribute under the NVMe path object.
The third patch adds a new "numa_nodes" attribute under the NVMe path object.

[1]: https://lore.kernel.org/all/20250112124154.60690-1-nilay@linux.ibm.com/

Nilay Shroff (3):
  tree: add support for discovering nvme paths using sysfs multipath
    link
  tree: add queue-depth attribute for nvme path object
  tree: add attribute numa_nodes for NVMe path object

 src/libnvme.map    |   2 +
 src/nvme/filters.c |   6 ++
 src/nvme/filters.h |   9 +++
 src/nvme/private.h |  11 ++-
 src/nvme/tree.c    | 184 ++++++++++++++++++++++++++++++++-------------
 src/nvme/tree.h    |  25 ++++++
 6 files changed, 184 insertions(+), 53 deletions(-)

-- 
2.49.0



^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link
  2025-04-05 13:02 [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
@ 2025-04-05 13:02 ` Nilay Shroff
  2025-04-07  7:19   ` Daniel Wagner
  2025-04-05 13:02 ` [PATCH 2/3] tree: add queue-depth attribute for nvme path object Nilay Shroff
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-05 13:02 UTC (permalink / raw)
  To: linux-nvme; +Cc: dwagner, hare, kbusch, gjoyce

With kernel v6.14, when NVMe native multipath is enabled, it is possible
to discover all paths to a shared namespace using the sysfs multipath link.
Leverage this functionality to update the path links for each shared NVMe
namespace, simplifying path discovery and management. Please note that
this change is fully backward compatible.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 src/nvme/filters.c |   6 ++
 src/nvme/filters.h |   9 +++
 src/nvme/private.h |   9 ++-
 src/nvme/tree.c    | 162 +++++++++++++++++++++++++++++++--------------
 src/nvme/tree.h    |   9 +++
 5 files changed, 143 insertions(+), 52 deletions(-)

diff --git a/src/nvme/filters.c b/src/nvme/filters.c
index ceaba68f..4a8829db 100644
--- a/src/nvme/filters.c
+++ b/src/nvme/filters.c
@@ -105,3 +105,9 @@ int nvme_scan_ctrl_namespaces(nvme_ctrl_t c, struct dirent ***ns)
 	return scandir(nvme_ctrl_get_sysfs_dir(c), ns,
 		       nvme_namespace_filter, alphasort);
 }
+
+int nvme_scan_ns_head_paths(nvme_ns_head_t head, struct dirent ***paths)
+{
+	return scandir(nvme_ns_head_get_sysfs_dir(head), paths,
+		       nvme_paths_filter, alphasort);
+}
diff --git a/src/nvme/filters.h b/src/nvme/filters.h
index 4ceeffd5..9e9dbb95 100644
--- a/src/nvme/filters.h
+++ b/src/nvme/filters.h
@@ -94,4 +94,13 @@ int nvme_scan_ctrl_namespace_paths(nvme_ctrl_t c, struct dirent ***paths);
  */
 int nvme_scan_ctrl_namespaces(nvme_ctrl_t c, struct dirent ***ns);
 
+/**
+ * nvme_scan_ns_head_paths() - Scan for namespace paths
+ * @head: Namespace head node to scan
+ * @paths : Pointer to array of dirents
+ *
+ * Return: number of entries in @ents
+ */
+int nvme_scan_ns_head_paths(nvme_ns_head_t head, struct dirent ***paths);
+
 #endif /* _LIBNVME_FILTERS_H */
diff --git a/src/nvme/private.h b/src/nvme/private.h
index 33cdd555..f45c5823 100644
--- a/src/nvme/private.h
+++ b/src/nvme/private.h
@@ -36,12 +36,19 @@ struct nvme_path {
 	int grpid;
 };
 
+struct nvme_ns_head {
+	struct list_head paths;
+	struct nvme_ns *n;
+
+	char *sysfs_dir;
+};
+
 struct nvme_ns {
 	struct list_node entry;
-	struct list_head paths;
 
 	struct nvme_subsystem *s;
 	struct nvme_ctrl *c;
+	struct nvme_ns_head *head;
 
 	int fd;
 	__u32 nsid;
diff --git a/src/nvme/tree.c b/src/nvme/tree.c
index b0a4696f..bd7fb53e 100644
--- a/src/nvme/tree.c
+++ b/src/nvme/tree.c
@@ -564,12 +564,12 @@ nvme_ns_t nvme_subsystem_next_ns(nvme_subsystem_t s, nvme_ns_t n)
 
 nvme_path_t nvme_namespace_first_path(nvme_ns_t ns)
 {
-	return list_top(&ns->paths, struct nvme_path, nentry);
+	return list_top(&ns->head->paths, struct nvme_path, nentry);
 }
 
 nvme_path_t nvme_namespace_next_path(nvme_ns_t ns, nvme_path_t p)
 {
-	return p ? list_next(&ns->paths, p, nentry) : NULL;
+	return p ? list_next(&ns->head->paths, p, nentry) : NULL;
 }
 
 static void __nvme_free_ns(struct nvme_ns *n)
@@ -579,6 +579,8 @@ static void __nvme_free_ns(struct nvme_ns *n)
 	free(n->generic_name);
 	free(n->name);
 	free(n->sysfs_dir);
+	free(n->head->sysfs_dir);
+	free(n->head);
 	free(n);
 }
 
@@ -916,25 +918,6 @@ void nvme_free_path(struct nvme_path *p)
 	free(p);
 }
 
-static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p)
-{
-	char n_name[32] = { };
-	int i, c, nsid, ret;
-	nvme_ns_t n;
-
-	ret = sscanf(nvme_path_get_name(p), "nvme%dc%dn%d", &i, &c, &nsid);
-	if (ret != 3)
-		return;
-
-	sprintf(n_name, "nvme%dn%d", i, nsid);
-	nvme_subsystem_for_each_ns(s, n) {
-		if (!strcmp(n_name, nvme_ns_get_name(n))) {
-			list_add_tail(&n->paths, &p->nentry);
-			p->n = n;
-		}
-	}
-}
-
 static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
 {
 	struct nvme_path *p;
@@ -973,7 +956,6 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
 	}
 
 	list_node_init(&p->nentry);
-	nvme_subsystem_set_path_ns(c->s, p);
 	list_node_init(&p->entry);
 	list_add_tail(&c->paths, &p->entry);
 	return 0;
@@ -2250,8 +2232,8 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
 		return NULL;
 
 	path = NULL;
-	nvme_ctrl_scan_namespaces(r, c);
 	nvme_ctrl_scan_paths(r, c);
+	nvme_ctrl_scan_namespaces(r, c);
 	return c;
 }
 
@@ -2323,6 +2305,11 @@ const char *nvme_ns_get_sysfs_dir(nvme_ns_t n)
 	return n->sysfs_dir;
 }
 
+const char *nvme_ns_head_get_sysfs_dir(nvme_ns_head_t head)
+{
+	return head->sysfs_dir;
+}
+
 const char *nvme_ns_get_name(nvme_ns_t n)
 {
 	return n->name;
@@ -2749,7 +2736,11 @@ static void nvme_ns_set_generic_name(struct nvme_ns *n, const char *name)
 
 static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
 {
+	int ret;
 	struct nvme_ns *n;
+	struct nvme_ns_head *head;
+	struct stat arg;
+	_cleanup_free_ char *path = NULL;
 
 	n = calloc(1, sizeof(*n));
 	if (!n) {
@@ -2757,6 +2748,32 @@ static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
 		return NULL;
 	}
 
+	head = calloc(1, sizeof(*head));
+	if (!head) {
+		errno = ENOMEM;
+		free(n);
+		return NULL;
+	}
+
+	head->n = n;
+	list_head_init(&head->paths);
+	ret = asprintf(&path, "%s/%s", sys_path, "multipath");
+	if (ret < 0) {
+		errno = ENOMEM;
+		goto free_ns_head;
+	}
+	/*
+	 * The sysfs-dir "multipath" is available only when nvme multipath
+	 * is configured and we're running kernel version >= 6.14.
+	 */
+	ret = stat(path, &arg);
+	if (ret == 0) {
+		head->sysfs_dir = path;
+		path = NULL;
+	} else
+		head->sysfs_dir = NULL;
+
+	n->head = head;
 	n->fd = -1;
 	n->name = strdup(name);
 
@@ -2765,15 +2782,17 @@ static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
 	if (nvme_ns_init(sys_path, n) != 0)
 		goto free_ns;
 
-	list_head_init(&n->paths);
 	list_node_init(&n->entry);
 
 	nvme_ns_release_fd(n); /* Do not leak fds */
+
 	return n;
 
 free_ns:
 	free(n->generic_name);
 	free(n->name);
+free_ns_head:
+	free(head);
 	free(n);
 	return NULL;
 }
@@ -2836,6 +2855,71 @@ nvme_ns_t nvme_scan_namespace(const char *name)
 	return __nvme_scan_namespace(nvme_ns_sysfs_dir(), name);
 }
 
+
+static void nvme_ns_head_scan_path(nvme_subsystem_t s, nvme_ns_t n, char *name)
+{
+	nvme_ctrl_t c;
+	nvme_path_t p;
+
+	nvme_subsystem_for_each_ctrl(s, c) {
+		nvme_ctrl_for_each_path(c, p) {
+			if (!strcmp(nvme_path_get_name(p), name)) {
+				list_add_tail(&n->head->paths, &p->nentry);
+				p->n = n;
+				return;
+			}
+		}
+	}
+}
+
+static void nvme_subsystem_set_ns_path(nvme_subsystem_t s, nvme_ns_t n)
+{
+	struct nvme_ns_head *head = n->head;
+
+	if (nvme_ns_head_get_sysfs_dir(head)) {
+		struct dirents paths = {};
+		int i;
+
+		/*
+		 * When multipath is configured on kernel version >= 6.14,
+		 * we use multipath sysfs link to get each path of a namespace.
+		 */
+		paths.num = nvme_scan_ns_head_paths(head, &paths.ents);
+
+		for (i = 0; i < paths.num; i++)
+			nvme_ns_head_scan_path(s, n, paths.ents[i]->d_name);
+	} else {
+		nvme_ctrl_t c;
+		nvme_path_t p;
+		int ns_ctrl, ns_nsid, ret;
+
+		/*
+		 * If multipath is not configured or we're running on kernel
+		 * version < 6.14, fallback to the old way.
+		 */
+		ret = sscanf(nvme_ns_get_name(n), "nvme%dn%d",
+				&ns_ctrl, &ns_nsid);
+		if (ret != 2)
+			return;
+
+		nvme_subsystem_for_each_ctrl(s, c) {
+			nvme_ctrl_for_each_path(c, p) {
+				int p_subsys, p_ctrl, p_nsid;
+
+				ret = sscanf(nvme_path_get_name(p),
+					     "nvme%dc%dn%d",
+					     &p_subsys, &p_ctrl, &p_nsid);
+				if (ret != 3)
+					continue;
+				if (ns_ctrl == p_subsys && ns_nsid == p_nsid) {
+					list_add_tail(&head->paths, &p->nentry);
+					p->n = n;
+				}
+			}
+		}
+	}
+}
+
 static int nvme_ctrl_scan_namespace(nvme_root_t r, struct nvme_ctrl *c,
 				    char *name)
 {
@@ -2861,33 +2945,9 @@ static int nvme_ctrl_scan_namespace(nvme_root_t r, struct nvme_ctrl *c,
 	n->s = c->s;
 	n->c = c;
 	list_add_tail(&c->namespaces, &n->entry);
-	return 0;
-}
-
-static void nvme_subsystem_set_ns_path(nvme_subsystem_t s, nvme_ns_t n)
-{
-	nvme_ctrl_t c;
-	nvme_path_t p;
-	int ns_ctrl, ns_nsid, ret;
-
-	ret = sscanf(nvme_ns_get_name(n), "nvme%dn%d", &ns_ctrl, &ns_nsid);
-	if (ret != 2)
-		return;
+	nvme_subsystem_set_ns_path(c->s, n);
 
-	nvme_subsystem_for_each_ctrl(s, c) {
-		nvme_ctrl_for_each_path(c, p) {
-			int p_subsys, p_ctrl, p_nsid;
-
-			ret = sscanf(nvme_path_get_name(p), "nvme%dc%dn%d",
-				     &p_subsys, &p_ctrl, &p_nsid);
-			if (ret != 3)
-				continue;
-			if (ns_ctrl == p_subsys && ns_nsid == p_nsid) {
-				list_add_tail(&n->paths, &p->nentry);
-				p->n = n;
-			}
-		}
-	}
+	return 0;
 }
 
 static int nvme_subsystem_scan_namespace(nvme_root_t r, nvme_subsystem_t s,
@@ -2917,7 +2977,7 @@ static int nvme_subsystem_scan_namespace(nvme_root_t r, nvme_subsystem_t s,
 			list_del_init(&p->nentry);
 			p->n = NULL;
 		}
-		list_head_init(&_n->paths);
+		list_head_init(&_n->head->paths);
 		__nvme_free_ns(_n);
 	}
 	n->s = s;
diff --git a/src/nvme/tree.h b/src/nvme/tree.h
index 25d4b31b..9f382e9c 100644
--- a/src/nvme/tree.h
+++ b/src/nvme/tree.h
@@ -27,6 +27,7 @@
  */
 
 typedef struct nvme_ns *nvme_ns_t;
+typedef struct nvme_ns_head *nvme_ns_head_t;
 typedef struct nvme_path *nvme_path_t;
 typedef struct nvme_ctrl *nvme_ctrl_t;
 typedef struct nvme_subsystem *nvme_subsystem_t;
@@ -1091,6 +1092,14 @@ void nvme_ctrl_set_dhchap_host_key(nvme_ctrl_t c, const char *key);
  */
 const char *nvme_ctrl_get_dhchap_key(nvme_ctrl_t c);
 
+/**
+ * nvme_ns_head_get_sysfs_dir() - sysfs dir of namespave head
+ * @head: namespace head instance
+ *
+ * Returns: sysfs directory name of @head
+ */
+const char *nvme_ns_head_get_sysfs_dir(nvme_ns_head_t head);
+
 /**
  * nvme_ctrl_set_dhchap_key() - Set controller key
  * @c:		Controller for which the key should be set
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] tree: add queue-depth attribute for nvme path object
  2025-04-05 13:02 [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
  2025-04-05 13:02 ` [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link Nilay Shroff
@ 2025-04-05 13:02 ` Nilay Shroff
  2025-04-07  7:40   ` Daniel Wagner
  2025-04-05 13:02 ` [PATCH 3/3] tree: add attribute numa_nodes for NVMe " Nilay Shroff
  2025-04-05 16:15 ` [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
  3 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-05 13:02 UTC (permalink / raw)
  To: linux-nvme; +Cc: dwagner, hare, kbusch, gjoyce

Add a new attribute named "queue_depth" under the NVMe path object. This
attribute is used by the iopolicy "queue-depth", which was introduced in
kernel v6.11. However, the corresponding sysfs attribute for queue depth
was only added in kernel v6.14.

The queue_depth value can be useful for observing which paths are selected
for I/O forwarding, based on the depth of each path. To support this,
export the attribute in libnvme.map so it can be accessed via nvme-cli.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 src/libnvme.map    |  1 +
 src/nvme/private.h |  1 +
 src/nvme/tree.c    | 12 +++++++++++-
 src/nvme/tree.h    |  8 ++++++++
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/libnvme.map b/src/libnvme.map
index 4314705f..e53fad6b 100644
--- a/src/libnvme.map
+++ b/src/libnvme.map
@@ -317,6 +317,7 @@ LIBNVME_1_0 {
 		nvme_path_get_ctrl;
 		nvme_path_get_name;
 		nvme_path_get_ns;
+		nvme_path_get_queue_depth;
 		nvme_path_get_sysfs_dir;
 		nvme_paths_filter;
 		nvme_read_config;
diff --git a/src/nvme/private.h b/src/nvme/private.h
index f45c5823..f94276e2 100644
--- a/src/nvme/private.h
+++ b/src/nvme/private.h
@@ -34,6 +34,7 @@ struct nvme_path {
 	char *sysfs_dir;
 	char *ana_state;
 	int grpid;
+	int queue_depth;
 };
 
 struct nvme_ns_head {
diff --git a/src/nvme/tree.c b/src/nvme/tree.c
index bd7fb53e..b7a38a07 100644
--- a/src/nvme/tree.c
+++ b/src/nvme/tree.c
@@ -903,6 +903,11 @@ const char *nvme_path_get_name(nvme_path_t p)
 	return p->name;
 }
 
+int nvme_path_get_queue_depth(nvme_path_t p)
+{
+	return p->queue_depth;
+}
+
 const char *nvme_path_get_ana_state(nvme_path_t p)
 {
 	return p->ana_state;
@@ -921,7 +926,7 @@ void nvme_free_path(struct nvme_path *p)
 static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
 {
 	struct nvme_path *p;
-	_cleanup_free_ char *path = NULL, *grpid = NULL;
+	_cleanup_free_ char *path = NULL, *grpid = NULL, *queue_depth = NULL;
 	int ret;
 
 	nvme_msg(r, LOG_DEBUG, "scan controller %s path %s\n",
@@ -955,6 +960,11 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
 		sscanf(grpid, "%d", &p->grpid);
 	}
 
+	queue_depth = nvme_get_path_attr(p, "queue_depth");
+	if (queue_depth) {
+		sscanf(queue_depth, "%d", &p->queue_depth);
+	}
+
 	list_node_init(&p->nentry);
 	list_node_init(&p->entry);
 	list_add_tail(&c->paths, &p->entry);
diff --git a/src/nvme/tree.h b/src/nvme/tree.h
index 9f382e9c..a9082f8e 100644
--- a/src/nvme/tree.h
+++ b/src/nvme/tree.h
@@ -867,6 +867,14 @@ const char *nvme_path_get_sysfs_dir(nvme_path_t p);
  */
 const char *nvme_path_get_ana_state(nvme_path_t p);
 
+/**
+ * nvme_path_get_queue_depth() - Queue depth of an nvme_path_t object
+ * @p: &nvme_path_t object
+ *
+ * Return: Queue depth of @p
+ */
+int nvme_path_get_queue_depth(nvme_path_t p);
+
 /**
  * nvme_path_get_ctrl() - Parent controller of an nvme_path_t object
  * @p:	&nvme_path_t object
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-05 13:02 [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
  2025-04-05 13:02 ` [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link Nilay Shroff
  2025-04-05 13:02 ` [PATCH 2/3] tree: add queue-depth attribute for nvme path object Nilay Shroff
@ 2025-04-05 13:02 ` Nilay Shroff
  2025-04-07  7:44   ` Daniel Wagner
  2025-04-05 16:15 ` [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
  3 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-05 13:02 UTC (permalink / raw)
  To: linux-nvme; +Cc: dwagner, hare, kbusch, gjoyce

Add a new attribute named "numa_nodes" under the NVMe path object. This
attribute is used by the iopolicy "numa". The numa_nodes value is stored
for each NVMe path and represents the NUMA node(s) associated with it.
When the iopolicy is set to "numa", I/O traffic originating from a given
NUMA node will be forwarded through the corresponding NVMe path.

The numa_nodes attribute is useful for observing which NVMe path the
kernel would choose for I/O forwarding based on NUMA affinity. To support
this, export the attribute in libnvme.map so it can be accessed via
nvme-cli.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
 src/libnvme.map    |  1 +
 src/nvme/private.h |  1 +
 src/nvme/tree.c    | 10 ++++++++++
 src/nvme/tree.h    |  8 ++++++++
 4 files changed, 20 insertions(+)

diff --git a/src/libnvme.map b/src/libnvme.map
index e53fad6b..ffadc647 100644
--- a/src/libnvme.map
+++ b/src/libnvme.map
@@ -317,6 +317,7 @@ LIBNVME_1_0 {
 		nvme_path_get_ctrl;
 		nvme_path_get_name;
 		nvme_path_get_ns;
+		nvme_path_get_numa_nodes;
 		nvme_path_get_queue_depth;
 		nvme_path_get_sysfs_dir;
 		nvme_paths_filter;
diff --git a/src/nvme/private.h b/src/nvme/private.h
index f94276e2..5bfa027b 100644
--- a/src/nvme/private.h
+++ b/src/nvme/private.h
@@ -33,6 +33,7 @@ struct nvme_path {
 	char *name;
 	char *sysfs_dir;
 	char *ana_state;
+	char *numa_nodes;
 	int grpid;
 	int queue_depth;
 };
diff --git a/src/nvme/tree.c b/src/nvme/tree.c
index b7a38a07..ddb6fd70 100644
--- a/src/nvme/tree.c
+++ b/src/nvme/tree.c
@@ -913,6 +913,11 @@ const char *nvme_path_get_ana_state(nvme_path_t p)
 	return p->ana_state;
 }
 
+const char *nvme_path_get_numa_nodes(nvme_path_t p)
+{
+	return p->numa_nodes;
+}
+
 void nvme_free_path(struct nvme_path *p)
 {
 	list_del_init(&p->entry);
@@ -920,6 +925,7 @@ void nvme_free_path(struct nvme_path *p)
 	free(p->name);
 	free(p->sysfs_dir);
 	free(p->ana_state);
+	free(p->numa_nodes);
 	free(p);
 }
 
@@ -955,6 +961,10 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
 	if (!p->ana_state)
 		p->ana_state = strdup("optimized");
 
+	p->numa_nodes = nvme_get_path_attr(p, "numa_nodes");
+	if (!p->numa_nodes)
+		p->numa_nodes = strdup("-1");
+
 	grpid = nvme_get_path_attr(p, "ana_grpid");
 	if (grpid) {
 		sscanf(grpid, "%d", &p->grpid);
diff --git a/src/nvme/tree.h b/src/nvme/tree.h
index a9082f8e..f6116c2b 100644
--- a/src/nvme/tree.h
+++ b/src/nvme/tree.h
@@ -867,6 +867,14 @@ const char *nvme_path_get_sysfs_dir(nvme_path_t p);
  */
 const char *nvme_path_get_ana_state(nvme_path_t p);
 
+/**
+ * nvme_path_get_numa_nodes() - NUMA nodes of an nvme_path_t object
+ * @p : &nvme_path_t object
+ *
+ * Return: NUMA nodes associated to @p
+ */
+const char *nvme_path_get_numa_nodes(nvme_path_t p);
+
 /**
  * nvme_path_get_queue_depth() - Queue depth of an nvme_path_t object
  * @p: &nvme_path_t object
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns
  2025-04-05 13:02 [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
                   ` (2 preceding siblings ...)
  2025-04-05 13:02 ` [PATCH 3/3] tree: add attribute numa_nodes for NVMe " Nilay Shroff
@ 2025-04-05 16:15 ` Nilay Shroff
  3 siblings, 0 replies; 20+ messages in thread
From: Nilay Shroff @ 2025-04-05 16:15 UTC (permalink / raw)
  To: linux-nvme; +Cc: dwagner, hare, kbusch, gjoyce



On 4/5/25 6:32 PM, Nilay Shroff wrote:
> Hi,
> 
> Recently released Linux kernel v6.14 added support for easily discovering
> multiple paths to a shared NVMe namespace. This multipath information is
> exposed to userspace via a new sysfs group attribute named "multipath",
> located under /sys/block/<ns-blkdev>/. More details on this functionality
> can be found here [1].
> 
Sorry there's a typo here. The kernel version should be upcoming 6.15 and 
not 6.14. I will correct it in the next patch. 

Thanks,
--Nilay



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link
  2025-04-05 13:02 ` [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link Nilay Shroff
@ 2025-04-07  7:19   ` Daniel Wagner
  2025-04-07  9:36     ` Nilay Shroff
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07  7:19 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

Hi Nilay,

On Sat, Apr 05, 2025 at 06:32:47PM +0530, Nilay Shroff wrote:
> With kernel v6.14, when NVMe native multipath is enabled, it is possible
> to discover all paths to a shared namespace using the sysfs multipath link.
> Leverage this functionality to update the path links for each shared NVMe
> namespace, simplifying path discovery and management.

Since this is not a trivial change, could you extend this section a bit
how it works? I understand from reading the patch, a new head namespace
object is introduced which links to the different paths. 

> Please note that
> this change is fully backward compatible.

Could you elaborate what that means? libnvme should still work on older
kernels.

Also could you add a tests for this? The is already a test for the
current sysfs tree parser, see test/sysfs.

Thanks,
Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/3] tree: add queue-depth attribute for nvme path object
  2025-04-05 13:02 ` [PATCH 2/3] tree: add queue-depth attribute for nvme path object Nilay Shroff
@ 2025-04-07  7:40   ` Daniel Wagner
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07  7:40 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

Hi Nilay,

On Sat, Apr 05, 2025 at 06:32:48PM +0530, Nilay Shroff wrote:
> Add a new attribute named "queue_depth" under the NVMe path object. This
> attribute is used by the iopolicy "queue-depth", which was introduced in
> kernel v6.11. However, the corresponding sysfs attribute for queue depth
> was only added in kernel v6.14.
> 
> The queue_depth value can be useful for observing which paths are selected
> for I/O forwarding, based on the depth of each path. To support this,
> export the attribute in libnvme.map so it can be accessed via
> nvme-cli.

I understand the idea here is to get live update of the queue depth?
This here will only read it once and report always the same value. This
will work for nvme-cli but users which run the library for longer.

Thanks,
Daniel


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-05 13:02 ` [PATCH 3/3] tree: add attribute numa_nodes for NVMe " Nilay Shroff
@ 2025-04-07  7:44   ` Daniel Wagner
  2025-04-07  9:59     ` Nilay Shroff
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07  7:44 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

On Sat, Apr 05, 2025 at 06:32:49PM +0530, Nilay Shroff wrote:
> Add a new attribute named "numa_nodes" under the NVMe path object. This
> attribute is used by the iopolicy "numa". The numa_nodes value is stored
> for each NVMe path and represents the NUMA node(s) associated with it.
> When the iopolicy is set to "numa", I/O traffic originating from a given
> NUMA node will be forwarded through the corresponding NVMe path.
> 
> The numa_nodes attribute is useful for observing which NVMe path the
> kernel would choose for I/O forwarding based on NUMA affinity. To support
> this, export the attribute in libnvme.map so it can be accessed via
> nvme-cli.

This one has the same limitation as the previous one. Given that libnvme
currently caches everything, we could just accept this limitation for
the time being. Any thoughts on this?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link
  2025-04-07  7:19   ` Daniel Wagner
@ 2025-04-07  9:36     ` Nilay Shroff
  2025-04-07 11:01       ` Daniel Wagner
  0 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-07  9:36 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: linux-nvme, hare, kbusch, gjoyce



On 4/7/25 12:49 PM, Daniel Wagner wrote:
> Hi Nilay,
> 
> On Sat, Apr 05, 2025 at 06:32:47PM +0530, Nilay Shroff wrote:
>> With kernel v6.14, when NVMe native multipath is enabled, it is possible
>> to discover all paths to a shared namespace using the sysfs multipath link.
>> Leverage this functionality to update the path links for each shared NVMe
>> namespace, simplifying path discovery and management.
> 
> Since this is not a trivial change, could you extend this section a bit
> how it works? I understand from reading the patch, a new head namespace
> object is introduced which links to the different paths. 
Sure, I will update the commit and add more details.
> 
>> Please note that
>> this change is fully backward compatible.
> 
> Could you elaborate what that means? libnvme should still work on older
> kernels.
Yeah I think as it's given that libnvme should work with older kernels, I 
probably don't need to spell that out explicitly. The intention is just
to mention that this change would not break running libnvme on older kernels.
But I think I could remove this sentence.

> 
> Also could you add a tests for this? The is already a test for the
> current sysfs tree parser, see test/sysfs.
> 
Yes I'd add a testcase for this change. How about extending the existing 
test/sysfs/tree-dump.c? Or do you want me to add a new test case for this
change?

Thanks,
--Nilay


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-07  7:44   ` Daniel Wagner
@ 2025-04-07  9:59     ` Nilay Shroff
  2025-04-07 11:10       ` Daniel Wagner
  0 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-07  9:59 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: linux-nvme, hare, kbusch, gjoyce



On 4/7/25 1:14 PM, Daniel Wagner wrote:
> On Sat, Apr 05, 2025 at 06:32:49PM +0530, Nilay Shroff wrote:
>> Add a new attribute named "numa_nodes" under the NVMe path object. This
>> attribute is used by the iopolicy "numa". The numa_nodes value is stored
>> for each NVMe path and represents the NUMA node(s) associated with it.
>> When the iopolicy is set to "numa", I/O traffic originating from a given
>> NUMA node will be forwarded through the corresponding NVMe path.
>>
>> The numa_nodes attribute is useful for observing which NVMe path the
>> kernel would choose for I/O forwarding based on NUMA affinity. To support
>> this, export the attribute in libnvme.map so it can be accessed via
>> nvme-cli.
> 
> This one has the same limitation as the previous one. Given that libnvme
> currently caches everything, we could just accept this limitation for
> the time being. Any thoughts on this?

Yes agreed. So how about adding a new API, for instance, 
nvme_path_get_numa_nodes__no_cached which would not return 
the cached value but instead re-read the latest value from
sysfs attribute and return the latest value? We may similarly
extend other APIs where we don't want to retrieve cached 
value. 

Thanks,
--Nilay


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link
  2025-04-07  9:36     ` Nilay Shroff
@ 2025-04-07 11:01       ` Daniel Wagner
  2025-04-07 13:43         ` Nilay Shroff
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07 11:01 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

On Mon, Apr 07, 2025 at 03:06:33PM +0530, Nilay Shroff wrote:
> >> Please note that
> >> this change is fully backward compatible.
> > 
> > Could you elaborate what that means? libnvme should still work on older
> > kernels.
> Yeah I think as it's given that libnvme should work with older kernels, I 
> probably don't need to spell that out explicitly. The intention is just
> to mention that this change would not break running libnvme on older kernels.
> But I think I could remove this sentence.

For some reason I read 'this change is NOT fully backward compatible'. My
bad, sorry.

> > Also could you add a tests for this? The is already a test for the
> > current sysfs tree parser, see test/sysfs.
> > 
> Yes I'd add a testcase for this change. How about extending the existing 
> test/sysfs/tree-dump.c? Or do you want me to add a new test case for this
> change?

Ideally you just need to add new sysfs dump
(scripts/collect-sysfs.sh) and have the corresponding output placed in
the data directory. If not, yes please extend tree-dump.c.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-07  9:59     ` Nilay Shroff
@ 2025-04-07 11:10       ` Daniel Wagner
  2025-04-07 14:19         ` Nilay Shroff
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07 11:10 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

On Mon, Apr 07, 2025 at 03:29:53PM +0530, Nilay Shroff wrote:
> On 4/7/25 1:14 PM, Daniel Wagner wrote:
> > On Sat, Apr 05, 2025 at 06:32:49PM +0530, Nilay Shroff wrote:
> >> Add a new attribute named "numa_nodes" under the NVMe path object. This
> >> attribute is used by the iopolicy "numa". The numa_nodes value is stored
> >> for each NVMe path and represents the NUMA node(s) associated with it.
> >> When the iopolicy is set to "numa", I/O traffic originating from a given
> >> NUMA node will be forwarded through the corresponding NVMe path.
> >>
> >> The numa_nodes attribute is useful for observing which NVMe path the
> >> kernel would choose for I/O forwarding based on NUMA affinity. To support
> >> this, export the attribute in libnvme.map so it can be accessed via
> >> nvme-cli.
> > 
> > This one has the same limitation as the previous one. Given that libnvme
> > currently caches everything, we could just accept this limitation for
> > the time being. Any thoughts on this?
> 
> Yes agreed. So how about adding a new API, for instance, 
> nvme_path_get_numa_nodes__no_cached which would not return 
> the cached value but instead re-read the latest value from
> sysfs attribute and return the latest value? We may similarly
> extend other APIs where we don't want to retrieve cached 
> value. 

Adding _no_cached function is certainty an option for libnvme 1.x.

One of the API changes for the next major version of libnvme (aka 2.x)
is to add an handle to all functions. Currently, we only have it for the
fabrics API. If such handle is available, we could add no-cache flag
instead duplicating all functions.

Maybe adding an explicit flags argument for 1.x is an option or should
we just keep going with the cached only approach?


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link
  2025-04-07 11:01       ` Daniel Wagner
@ 2025-04-07 13:43         ` Nilay Shroff
  0 siblings, 0 replies; 20+ messages in thread
From: Nilay Shroff @ 2025-04-07 13:43 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: linux-nvme, hare, kbusch, gjoyce



On 4/7/25 4:31 PM, Daniel Wagner wrote:
> On Mon, Apr 07, 2025 at 03:06:33PM +0530, Nilay Shroff wrote:
>>>> Please note that
>>>> this change is fully backward compatible.
>>>
>>> Could you elaborate what that means? libnvme should still work on older
>>> kernels.
>> Yeah I think as it's given that libnvme should work with older kernels, I 
>> probably don't need to spell that out explicitly. The intention is just
>> to mention that this change would not break running libnvme on older kernels.
>> But I think I could remove this sentence.
> 
> For some reason I read 'this change is NOT fully backward compatible'. My
> bad, sorry.
> 
>>> Also could you add a tests for this? The is already a test for the
>>> current sysfs tree parser, see test/sysfs.
>>>
>> Yes I'd add a testcase for this change. How about extending the existing 
>> test/sysfs/tree-dump.c? Or do you want me to add a new test case for this
>> change?
> 
> Ideally you just need to add new sysfs dump
> (scripts/collect-sysfs.sh) and have the corresponding output placed in
> the data directory. If not, yes please extend tree-dump.c.

I see that the current tree-dump.c test program neither dumps namespace nor
dumps namespace path. So I'd extend this program. I'd also replace the existing
sysfs dump with another dump which shall have two nvme subsystems. The one
of the subsystems shall represent a multi port disk with a shared namespace
reachable through multipath, and another subsystem shall represent a single-port
disk with a private namespace. So this way, we'd be able to cover both shared/
multipath namespace as well as private namespace use cases. 

Thanks,
--Nilay


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-07 11:10       ` Daniel Wagner
@ 2025-04-07 14:19         ` Nilay Shroff
  2025-04-07 15:19           ` Daniel Wagner
  0 siblings, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-07 14:19 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: linux-nvme, hare, kbusch, gjoyce



On 4/7/25 4:40 PM, Daniel Wagner wrote:
> On Mon, Apr 07, 2025 at 03:29:53PM +0530, Nilay Shroff wrote:
>> On 4/7/25 1:14 PM, Daniel Wagner wrote:
>>> On Sat, Apr 05, 2025 at 06:32:49PM +0530, Nilay Shroff wrote:
>>>> Add a new attribute named "numa_nodes" under the NVMe path object. This
>>>> attribute is used by the iopolicy "numa". The numa_nodes value is stored
>>>> for each NVMe path and represents the NUMA node(s) associated with it.
>>>> When the iopolicy is set to "numa", I/O traffic originating from a given
>>>> NUMA node will be forwarded through the corresponding NVMe path.
>>>>
>>>> The numa_nodes attribute is useful for observing which NVMe path the
>>>> kernel would choose for I/O forwarding based on NUMA affinity. To support
>>>> this, export the attribute in libnvme.map so it can be accessed via
>>>> nvme-cli.
>>>
>>> This one has the same limitation as the previous one. Given that libnvme
>>> currently caches everything, we could just accept this limitation for
>>> the time being. Any thoughts on this?
>>
>> Yes agreed. So how about adding a new API, for instance, 
>> nvme_path_get_numa_nodes__no_cached which would not return 
>> the cached value but instead re-read the latest value from
>> sysfs attribute and return the latest value? We may similarly
>> extend other APIs where we don't want to retrieve cached 
>> value. 
> 
> Adding _no_cached function is certainty an option for libnvme 1.x.
> 
> One of the API changes for the next major version of libnvme (aka 2.x)
> is to add an handle to all functions. Currently, we only have it for the
> fabrics API. If such handle is available, we could add no-cache flag
> instead duplicating all functions.
> 
> Maybe adding an explicit flags argument for 1.x is an option or should
> we just keep going with the cached only approach?
Both approaches — either adding a no-cache flag or introducing dedicated 
__no_cached APIs — would work. However, in my opinion, we should aim to 
use a consistent method across both libnvme 1.x and 2.x versions.

As we discussed during LSFMM, if we plan to implement an "nvme top" command, 
we would need non-cached versions of these APIs even for nvme-cli. So, 
using the same mechanism for both versions makes sense. Otherwise, we’d 
also have to maintain different logic in nvme top depending on the libnvme
version, which adds unnecessary complexity.

Thanks,
--Nilay



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-07 14:19         ` Nilay Shroff
@ 2025-04-07 15:19           ` Daniel Wagner
  2025-04-08  5:58             ` Hannes Reinecke
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Wagner @ 2025-04-07 15:19 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

On Mon, Apr 07, 2025 at 07:49:41PM +0530, Nilay Shroff wrote:
> Both approaches — either adding a no-cache flag or introducing dedicated
> __no_cached APIs — would work. However, in my opinion, we should aim to 
> use a consistent method across both libnvme 1.x and 2.x versions.

There will be a lot of API changes for 2.x to address all the silly
mistakes. That's way it will be a major version change.

> As we discussed during LSFMM, if we plan to implement an "nvme top" command, 
> we would need non-cached versions of these APIs even for nvme-cli.

Yes, that is why I brought up this discussion. For such a command we
would need non cached versions. And I realize that we would need this
feature also for other commands. So it's not just the queue depth which
changes. Maybe we should just not try to add this feature to 1.x and
rather start the 2.x project.

> So, using the same mechanism for both versions makes sense. Otherwise,
> we’d also have to maintain different logic in nvme top depending on
> the libnvme version, which adds unnecessary complexity.

I don't see a big problem here. Either use the 1.x or 2.x.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-07 15:19           ` Daniel Wagner
@ 2025-04-08  5:58             ` Hannes Reinecke
  2025-04-08 11:42               ` Daniel Wagner
  0 siblings, 1 reply; 20+ messages in thread
From: Hannes Reinecke @ 2025-04-08  5:58 UTC (permalink / raw)
  To: Daniel Wagner, Nilay Shroff; +Cc: linux-nvme, hare, kbusch, gjoyce

On 4/7/25 17:19, Daniel Wagner wrote:
> On Mon, Apr 07, 2025 at 07:49:41PM +0530, Nilay Shroff wrote:
>> Both approaches — either adding a no-cache flag or introducing dedicated
>> __no_cached APIs — would work. However, in my opinion, we should aim to
>> use a consistent method across both libnvme 1.x and 2.x versions.
> 
> There will be a lot of API changes for 2.x to address all the silly
> mistakes. That's way it will be a major version change.
> 
>> As we discussed during LSFMM, if we plan to implement an "nvme top" command,
>> we would need non-cached versions of these APIs even for nvme-cli.
> 
> Yes, that is why I brought up this discussion. For such a command we
> would need non cached versions. And I realize that we would need this
> feature also for other commands. So it's not just the queue depth which
> changes. Maybe we should just not try to add this feature to 1.x and
> rather start the 2.x project.
> 
>> So, using the same mechanism for both versions makes sense. Otherwise,
>> we’d also have to maintain different logic in nvme top depending on
>> the libnvme version, which adds unnecessary complexity.
> 
> I don't see a big problem here. Either use the 1.x or 2.x.

Why don't we make this option as non-cached by default?
In the end, there is limited usability in having it cached.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-08  5:58             ` Hannes Reinecke
@ 2025-04-08 11:42               ` Daniel Wagner
  2025-04-08 11:48                 ` Hannes Reinecke
  2025-04-09  9:38                 ` Nilay Shroff
  0 siblings, 2 replies; 20+ messages in thread
From: Daniel Wagner @ 2025-04-08 11:42 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Nilay Shroff, linux-nvme, hare, kbusch, gjoyce

On Tue, Apr 08, 2025 at 07:58:40AM +0200, Hannes Reinecke wrote:
> On 4/7/25 17:19, Daniel Wagner wrote:
> > On Mon, Apr 07, 2025 at 07:49:41PM +0530, Nilay Shroff wrote:
> > > Both approaches — either adding a no-cache flag or introducing dedicated
> > > __no_cached APIs — would work. However, in my opinion, we should aim to
> > > use a consistent method across both libnvme 1.x and 2.x versions.
> > 
> > There will be a lot of API changes for 2.x to address all the silly
> > mistakes. That's way it will be a major version change.
> > 
> > > As we discussed during LSFMM, if we plan to implement an "nvme top" command,
> > > we would need non-cached versions of these APIs even for nvme-cli.
> > 
> > Yes, that is why I brought up this discussion. For such a command we
> > would need non cached versions. And I realize that we would need this
> > feature also for other commands. So it's not just the queue depth which
> > changes. Maybe we should just not try to add this feature to 1.x and
> > rather start the 2.x project.
> > 
> > > So, using the same mechanism for both versions makes sense. Otherwise,
> > > we’d also have to maintain different logic in nvme top depending on
> > > the libnvme version, which adds unnecessary complexity.
> > 
> > I don't see a big problem here. Either use the 1.x or 2.x.
> 
> Why don't we make this option as non-cached by default?
> In the end, there is limited usability in having it cached.

Sure, this is also fine. I suppose there will be some more conversion
from cached to non-cached necessary for the 'nvme top' feature.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-08 11:42               ` Daniel Wagner
@ 2025-04-08 11:48                 ` Hannes Reinecke
  2025-04-09  9:38                 ` Nilay Shroff
  1 sibling, 0 replies; 20+ messages in thread
From: Hannes Reinecke @ 2025-04-08 11:48 UTC (permalink / raw)
  To: Daniel Wagner; +Cc: Nilay Shroff, linux-nvme, hare, kbusch, gjoyce

On 4/8/25 13:42, Daniel Wagner wrote:
> On Tue, Apr 08, 2025 at 07:58:40AM +0200, Hannes Reinecke wrote:
>> On 4/7/25 17:19, Daniel Wagner wrote:
>>> On Mon, Apr 07, 2025 at 07:49:41PM +0530, Nilay Shroff wrote:
>>>> Both approaches — either adding a no-cache flag or introducing dedicated
>>>> __no_cached APIs — would work. However, in my opinion, we should aim to
>>>> use a consistent method across both libnvme 1.x and 2.x versions.
>>>
>>> There will be a lot of API changes for 2.x to address all the silly
>>> mistakes. That's way it will be a major version change.
>>>
>>>> As we discussed during LSFMM, if we plan to implement an "nvme top" command,
>>>> we would need non-cached versions of these APIs even for nvme-cli.
>>>
>>> Yes, that is why I brought up this discussion. For such a command we
>>> would need non cached versions. And I realize that we would need this
>>> feature also for other commands. So it's not just the queue depth which
>>> changes. Maybe we should just not try to add this feature to 1.x and
>>> rather start the 2.x project.
>>>
>>>> So, using the same mechanism for both versions makes sense. Otherwise,
>>>> we’d also have to maintain different logic in nvme top depending on
>>>> the libnvme version, which adds unnecessary complexity.
>>>
>>> I don't see a big problem here. Either use the 1.x or 2.x.
>>
>> Why don't we make this option as non-cached by default?
>> In the end, there is limited usability in having it cached.
> 
> Sure, this is also fine. I suppose there will be some more conversion
> from cached to non-cached necessary for the 'nvme top' feature.

And I dimly remember I did some of this work already during the python
binding work (where some attributes changed during the lifetime of the
tree). Not sure what happened to that, though.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-08 11:42               ` Daniel Wagner
  2025-04-08 11:48                 ` Hannes Reinecke
@ 2025-04-09  9:38                 ` Nilay Shroff
  2025-04-09 11:36                   ` Daniel Wagner
  1 sibling, 1 reply; 20+ messages in thread
From: Nilay Shroff @ 2025-04-09  9:38 UTC (permalink / raw)
  To: Daniel Wagner, Hannes Reinecke; +Cc: linux-nvme, hare, kbusch, gjoyce



On 4/8/25 5:12 PM, Daniel Wagner wrote:
> On Tue, Apr 08, 2025 at 07:58:40AM +0200, Hannes Reinecke wrote:
>> On 4/7/25 17:19, Daniel Wagner wrote:
>>> On Mon, Apr 07, 2025 at 07:49:41PM +0530, Nilay Shroff wrote:
>>>> Both approaches — either adding a no-cache flag or introducing dedicated
>>>> __no_cached APIs — would work. However, in my opinion, we should aim to
>>>> use a consistent method across both libnvme 1.x and 2.x versions.
>>>
>>> There will be a lot of API changes for 2.x to address all the silly
>>> mistakes. That's way it will be a major version change.
>>>
>>>> As we discussed during LSFMM, if we plan to implement an "nvme top" command,
>>>> we would need non-cached versions of these APIs even for nvme-cli.
>>>
>>> Yes, that is why I brought up this discussion. For such a command we
>>> would need non cached versions. And I realize that we would need this
>>> feature also for other commands. So it's not just the queue depth which
>>> changes. Maybe we should just not try to add this feature to 1.x and
>>> rather start the 2.x project.
>>>
>>>> So, using the same mechanism for both versions makes sense. Otherwise,
>>>> we’d also have to maintain different logic in nvme top depending on
>>>> the libnvme version, which adds unnecessary complexity.
>>>
>>> I don't see a big problem here. Either use the 1.x or 2.x.
>>
>> Why don't we make this option as non-cached by default?
>> In the end, there is limited usability in having it cached.
> 
> Sure, this is also fine. I suppose there will be some more conversion
> from cached to non-cached necessary for the 'nvme top' feature.

Makes sense. For the this patchset, I will spin a new series and send 
after incorporating comments. We shall address cached vs non-cached 
version of APIs in another series (probably when we implement nvme top). 

However, I like the idea of Hannes to only have non-cached APIs. May be, 
we should be able to convert existing cached APIs to non-cached APIs 
(or limit to only those APIs required for nvme top) without any side
effect. 

Thanks,
--Nilay




^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] tree: add attribute numa_nodes for NVMe path object
  2025-04-09  9:38                 ` Nilay Shroff
@ 2025-04-09 11:36                   ` Daniel Wagner
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Wagner @ 2025-04-09 11:36 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: Hannes Reinecke, linux-nvme, hare, kbusch, gjoyce

On Wed, Apr 09, 2025 at 03:08:40PM +0530, Nilay Shroff wrote:
> Makes sense. For the this patchset, I will spin a new series and send
> after incorporating comments. We shall address cached vs non-cached 
> version of APIs in another series (probably when we implement nvme
> top).

Okay.

> However, I like the idea of Hannes to only have non-cached APIs. May be, 
> we should be able to convert existing cached APIs to non-cached APIs 
> (or limit to only those APIs required for nvme top) without any side
> effect.

I expect some more changes due to the 'nvme top' command, but as you
suggest, let's first get this series right.


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2025-04-09 11:36 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-05 13:02 [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff
2025-04-05 13:02 ` [PATCH 1/3] tree: add support for discovering nvme paths using sysfs multipath link Nilay Shroff
2025-04-07  7:19   ` Daniel Wagner
2025-04-07  9:36     ` Nilay Shroff
2025-04-07 11:01       ` Daniel Wagner
2025-04-07 13:43         ` Nilay Shroff
2025-04-05 13:02 ` [PATCH 2/3] tree: add queue-depth attribute for nvme path object Nilay Shroff
2025-04-07  7:40   ` Daniel Wagner
2025-04-05 13:02 ` [PATCH 3/3] tree: add attribute numa_nodes for NVMe " Nilay Shroff
2025-04-07  7:44   ` Daniel Wagner
2025-04-07  9:59     ` Nilay Shroff
2025-04-07 11:10       ` Daniel Wagner
2025-04-07 14:19         ` Nilay Shroff
2025-04-07 15:19           ` Daniel Wagner
2025-04-08  5:58             ` Hannes Reinecke
2025-04-08 11:42               ` Daniel Wagner
2025-04-08 11:48                 ` Hannes Reinecke
2025-04-09  9:38                 ` Nilay Shroff
2025-04-09 11:36                   ` Daniel Wagner
2025-04-05 16:15 ` [PATCH 0/3] libnvme: add support for discovering multipath of a shared ns Nilay Shroff

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.