* [PATCH 1/1] nvme: always enable multipath
@ 2024-11-21 22:03 Bryan Gurney
2024-11-22 6:26 ` Nilay Shroff
2024-11-22 12:09 ` Christoph Hellwig
0 siblings, 2 replies; 11+ messages in thread
From: Bryan Gurney @ 2024-11-21 22:03 UTC (permalink / raw)
To: linux-kernel, linux-nvme, kbusch, hch, sagi, axboe, mpe, naveen,
maddy, kernel
Cc: jmeneghi, bmarzins, Bryan Gurney
Since device-mapper multipath will no longer be operating on NVMe
devices, there is no longer a need to set CONFIG_NVME_MULTIPATH=n.
Always enable NVMe multipath, remove CONFIG_NVME_MULTIPATH, and use
the code paths that would be used if CONFIG_NVME_MULTIPATH=y.
Reviewed-by: John Meneghini <jmeneghi@redhat.com>
Tested-by: Bryan Gurney <bgurney@redhat.com>
Signed-off-by: Bryan Gurney <bgurney@redhat.com>
---
arch/loongarch/configs/loongson3_defconfig | 1 -
arch/powerpc/configs/skiroot_defconfig | 1 -
drivers/nvme/host/Kconfig | 9 --
drivers/nvme/host/Makefile | 3 +-
drivers/nvme/host/core.c | 17 +---
drivers/nvme/host/ioctl.c | 3 +-
drivers/nvme/host/multipath.c | 10 +--
drivers/nvme/host/nvme.h | 97 +---------------------
drivers/nvme/host/sysfs.c | 6 --
9 files changed, 7 insertions(+), 140 deletions(-)
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 75b366407a60..91931927645a 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -422,7 +422,6 @@ CONFIG_BLK_DEV_RAM_SIZE=8192
CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_RBD=m
CONFIG_BLK_DEV_NVME=y
-CONFIG_NVME_MULTIPATH=y
CONFIG_NVME_RDMA=m
CONFIG_NVME_FC=m
CONFIG_NVME_TCP=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 9d44e6630908..10336c5796c7 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -76,7 +76,6 @@ CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_VIRTIO_BLK=m
CONFIG_BLK_DEV_NVME=m
-CONFIG_NVME_MULTIPATH=y
CONFIG_EEPROM_AT24=m
# CONFIG_CXL is not set
# CONFIG_OCXL is not set
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 486afe598184..50505bea1ca6 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -14,15 +14,6 @@ config BLK_DEV_NVME
To compile this driver as a module, choose M here: the
module will be called nvme.
-config NVME_MULTIPATH
- bool "NVMe multipath support"
- depends on NVME_CORE
- help
- This option enables support for multipath access to NVMe
- subsystems. If this option is enabled only a single
- /dev/nvmeXnY device will show up for each NVMe namespace,
- even if it is accessible through multiple controllers.
-
config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"
depends on NVME_CORE
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 6414ec968f99..4a3117ec0096 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -10,10 +10,9 @@ obj-$(CONFIG_NVME_FC) += nvme-fc.o
obj-$(CONFIG_NVME_TCP) += nvme-tcp.o
obj-$(CONFIG_NVME_APPLE) += nvme-apple.o
-nvme-core-y += core.o ioctl.o sysfs.o pr.o
+nvme-core-y += core.o ioctl.o sysfs.o pr.o multipath.o
nvme-core-$(CONFIG_NVME_VERBOSE_ERRORS) += constants.o
nvme-core-$(CONFIG_TRACING) += trace.o
-nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bfd71511c85f..0a71a1a5af68 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3599,9 +3599,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
size_t size = sizeof(*head);
int ret = -ENOMEM;
-#ifdef CONFIG_NVME_MULTIPATH
size += num_possible_nodes() * sizeof(struct nvme_ns *);
-#endif
head = kzalloc(size, GFP_KERNEL);
if (!head)
@@ -3750,14 +3748,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
info->nsid);
goto out_put_ns_head;
}
-
- if (!multipath) {
- dev_warn(ctrl->device,
- "Found shared namespace %d, but multipathing not supported.\n",
- info->nsid);
- dev_warn_once(ctrl->device,
- "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
- }
}
list_add_tail_rcu(&ns->siblings, &head->list);
@@ -3855,11 +3845,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
sprintf(disk->disk_name, "nvme%dc%dn%d", ctrl->subsys->instance,
ctrl->instance, ns->head->instance);
disk->flags |= GENHD_FL_HIDDEN;
- } else if (multipath) {
- sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance,
- ns->head->instance);
} else {
- sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance,
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->subsys->instance,
ns->head->instance);
}
@@ -4457,13 +4444,11 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
queue_work(nvme_wq, &ctrl->fw_act_work);
}
break;
-#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
break;
-#endif
case NVME_AER_NOTICE_DISC_CHANGED:
ctrl->aen_result = result;
break;
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 64b5542fb3b7..60d14084dcf1 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -676,7 +676,7 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
return blk_rq_poll(req, iob, poll_flags);
return 0;
}
-#ifdef CONFIG_NVME_MULTIPATH
+
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx,
bool open_for_write)
@@ -766,7 +766,6 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
-#endif /* CONFIG_NVME_MULTIPATH */
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
{
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a85d190942bd..da7cb04a3697 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -9,11 +9,6 @@
#include <trace/events/block.h>
#include "nvme.h"
-bool multipath = true;
-module_param(multipath, bool, 0444);
-MODULE_PARM_DESC(multipath,
- "turn on native support for multiple controllers per subsystem");
-
static const char *nvme_iopolicy_names[] = {
[NVME_IOPOLICY_NUMA] = "numa",
[NVME_IOPOLICY_RR] = "round-robin",
@@ -633,7 +628,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
* could change after a rescan.
*/
if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
- !nvme_is_unique_nsid(ctrl, head) || !multipath)
+ !nvme_is_unique_nsid(ctrl, head))
return 0;
blk_set_stacking_limits(&lim);
@@ -1039,8 +1034,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
int error = 0;
/* check if multipath is enabled and we have the capability */
- if (!multipath || !ctrl->subsys ||
- !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
+ if (!ctrl->subsys || !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
/* initialize this in the identify path to cover controller resets */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 611b02c8a8b3..c9d5dc42436d 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -186,9 +186,7 @@ struct nvme_request {
u8 retries;
u8 flags;
u16 status;
-#ifdef CONFIG_NVME_MULTIPATH
unsigned long start_time;
-#endif
struct nvme_ctrl *ctrl;
};
@@ -355,7 +353,6 @@ struct nvme_ctrl {
struct work_struct fw_act_work;
unsigned long events;
-#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
u8 anacap;
u8 anatt;
@@ -367,7 +364,6 @@ struct nvme_ctrl {
struct timer_list anatt_timer;
struct work_struct ana_work;
atomic_t nr_active;
-#endif
#ifdef CONFIG_NVME_HOST_AUTH
struct work_struct dhchap_auth_work;
@@ -439,9 +435,7 @@ struct nvme_subsystem {
u16 vendor_id;
u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
-#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
-#endif
};
/*
@@ -491,7 +485,6 @@ struct nvme_ns_head {
struct device cdev_device;
struct gendisk *disk;
-#ifdef CONFIG_NVME_MULTIPATH
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
@@ -500,12 +493,11 @@ struct nvme_ns_head {
unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0
struct nvme_ns __rcu *current_path[];
-#endif
};
static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
{
- return IS_ENABLED(CONFIG_NVME_MULTIPATH) && head->disk;
+ return head->disk;
}
enum nvme_ns_features {
@@ -520,10 +512,8 @@ struct nvme_ns {
struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
-#ifdef CONFIG_NVME_MULTIPATH
enum nvme_ana_state ana_state;
u32 ana_grpid;
-#endif
struct list_head siblings;
struct kref kref;
struct nvme_ns_head *head;
@@ -937,7 +927,7 @@ extern const struct block_device_operations nvme_bdev_ops;
void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
-#ifdef CONFIG_NVME_MULTIPATH
+
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return ctrl->ana_log_buf != NULL;
@@ -972,7 +962,6 @@ static inline void nvme_trace_bio_complete(struct request *req)
trace_block_bio_complete(ns->head->disk->queue, req->bio);
}
-extern bool multipath;
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
extern struct device_attribute subsys_attr_iopolicy;
@@ -981,88 +970,6 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
return disk->fops == &nvme_ns_head_ops;
}
-#else
-#define multipath false
-static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-{
- return false;
-}
-static inline void nvme_failover_req(struct request *req)
-{
-}
-static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
-{
-}
-static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
- struct nvme_ns_head *head)
-{
- return 0;
-}
-static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
-{
-}
-static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
-{
-}
-static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
-{
- return false;
-}
-static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
-{
-}
-static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
-{
-}
-static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
-{
-}
-static inline void nvme_trace_bio_complete(struct request *req)
-{
-}
-static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
-{
-}
-static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
- struct nvme_id_ctrl *id)
-{
- if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
- dev_warn(ctrl->device,
-"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
- return 0;
-}
-static inline void nvme_mpath_update(struct nvme_ctrl *ctrl)
-{
-}
-static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
-{
-}
-static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
-{
-}
-static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
-{
-}
-static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
-{
-}
-static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
-{
-}
-static inline void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
-{
-}
-static inline void nvme_mpath_start_request(struct request *rq)
-{
-}
-static inline void nvme_mpath_end_request(struct request *rq)
-{
-}
-static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
-{
- return false;
-}
-#endif /* CONFIG_NVME_MULTIPATH */
int nvme_ns_get_unique_id(struct nvme_ns *ns, u8 id[16],
enum blk_unique_id type);
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index b68a9e5f1ea3..e32f50f4d9cc 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -255,10 +255,8 @@ static struct attribute *nvme_ns_attrs[] = {
&dev_attr_nsid.attr,
&dev_attr_metadata_bytes.attr,
&dev_attr_nuse.attr,
-#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
-#endif
&dev_attr_io_passthru_err_log_enabled.attr,
NULL,
};
@@ -282,7 +280,6 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0;
}
-#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
/* per-path attr */
if (nvme_disk_is_ns_head(dev_to_disk(dev)))
@@ -290,7 +287,6 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
-#endif
return a->mode;
}
@@ -860,9 +856,7 @@ static struct attribute *nvme_subsys_attrs[] = {
&subsys_attr_firmware_rev.attr,
&subsys_attr_subsysnqn.attr,
&subsys_attr_subsystype.attr,
-#ifdef CONFIG_NVME_MULTIPATH
&subsys_attr_iopolicy.attr,
-#endif
NULL,
};
--
2.45.2
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-21 22:03 [PATCH 1/1] nvme: always enable multipath Bryan Gurney
@ 2024-11-22 6:26 ` Nilay Shroff
2024-11-22 14:10 ` John Meneghini
2024-11-22 12:09 ` Christoph Hellwig
1 sibling, 1 reply; 11+ messages in thread
From: Nilay Shroff @ 2024-11-22 6:26 UTC (permalink / raw)
To: Bryan Gurney, linux-kernel, linux-nvme, kbusch, hch, sagi, axboe,
mpe, naveen, maddy, kernel
Cc: jmeneghi, bmarzins
On 11/22/24 03:33, Bryan Gurney wrote:
> Since device-mapper multipath will no longer be operating on NVMe
> devices, there is no longer a need to set CONFIG_NVME_MULTIPATH=n.
>
> Always enable NVMe multipath, remove CONFIG_NVME_MULTIPATH, and use
> the code paths that would be used if CONFIG_NVME_MULTIPATH=y.
>
> Reviewed-by: John Meneghini <jmeneghi@redhat.com>
> Tested-by: Bryan Gurney <bgurney@redhat.com>
> Signed-off-by: Bryan Gurney <bgurney@redhat.com>
> ---
> arch/loongarch/configs/loongson3_defconfig | 1 -
> arch/powerpc/configs/skiroot_defconfig | 1 -
> drivers/nvme/host/Kconfig | 9 --
> drivers/nvme/host/Makefile | 3 +-
> drivers/nvme/host/core.c | 17 +---
> drivers/nvme/host/ioctl.c | 3 +-
> drivers/nvme/host/multipath.c | 10 +--
> drivers/nvme/host/nvme.h | 97 +---------------------
> drivers/nvme/host/sysfs.c | 6 --
> 9 files changed, 7 insertions(+), 140 deletions(-)
I applied the above changes to my kernel tree and ran the below blktests:
# ./check nvme/033 nvme/034 nvme/035 nvme/036 nvme/037 nvme/039
nvme/033 => nvme0n1 (tr=loop) (create and connect to an NVMeOF target with a passthru controller) [not run]
/dev/nvme0n1 is a NVMe multipath device
nvme/034 => nvme0n1 (tr=loop) (run data verification fio job on an NVMeOF passthru controller) [not run]
/dev/nvme0n1 is a NVMe multipath device
nvme/035 => nvme0n1 (tr=loop) (run mkfs and data verification fio job on an NVMeOF passthru controller) [not run]
/dev/nvme0n1 is a NVMe multipath device
nvme/036 => nvme0n1 (tr=loop) (test NVMe reset command on an NVMeOF target with a passthru controller) [not run]
/dev/nvme0n1 is a NVMe multipath device
nvme/037 => nvme0n1 (tr=loop) (test deletion of NVMeOF passthru controllers immediately after setup) [not run]
/dev/nvme0n1 is a NVMe multipath device
nvme/039 => nvme0n1 (test error logging) [not run]
/dev/nvme0n1 is a NVMe multipath device
As we can see here, the above tests were skipped because the test detects
that the device (/dev/nvme0n1) is a multipath device. However, in fact,
the test device is NOT a multipath. So I think we need to update the above
tests. We may submit another patch to blktest and update above tests once
your changes are merged upstream.
Otherwise, changes look good to me:
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 6:26 ` Nilay Shroff
@ 2024-11-22 14:10 ` John Meneghini
2024-11-24 13:09 ` Nilay Shroff
0 siblings, 1 reply; 11+ messages in thread
From: John Meneghini @ 2024-11-22 14:10 UTC (permalink / raw)
To: Nilay Shroff, Bryan Gurney, linux-kernel, linux-nvme, kbusch, hch,
sagi, axboe, mpe, naveen, maddy, kernel
Cc: bmarzins
On 11/22/24 01:26, Nilay Shroff wrote:
>
> On 11/22/24 03:33, Bryan Gurney wrote:
>
> I applied the above changes to my kernel tree and ran the below blktests:
>
> # ./check nvme/033 nvme/034 nvme/035 nvme/036 nvme/037 nvme/039
> nvme/033 => nvme0n1 (tr=loop) (create and connect to an NVMeOF target with a passthru controller) [not run]
> /dev/nvme0n1 is a NVMe multipath device
> nvme/034 => nvme0n1 (tr=loop) (run data verification fio job on an NVMeOF passthru controller) [not run]
> /dev/nvme0n1 is a NVMe multipath device
> nvme/035 => nvme0n1 (tr=loop) (run mkfs and data verification fio job on an NVMeOF passthru controller) [not run]
> /dev/nvme0n1 is a NVMe multipath device
> nvme/036 => nvme0n1 (tr=loop) (test NVMe reset command on an NVMeOF target with a passthru controller) [not run]
> /dev/nvme0n1 is a NVMe multipath device
> nvme/037 => nvme0n1 (tr=loop) (test deletion of NVMeOF passthru controllers immediately after setup) [not run]
> /dev/nvme0n1 is a NVMe multipath device
> nvme/039 => nvme0n1 (test error logging) [not run]
> /dev/nvme0n1 is a NVMe multipath device
>
> As we can see here, the above tests were skipped because the test detects
> that the device (/dev/nvme0n1) is a multipath device. However, in fact,
> the test device is NOT a multipath. So I think we need to update the above
> tests. We may submit another patch to blktest and update above tests once
> your changes are merged upstream.
Nilay, what kind of device is /dev/nvme0n1 ?
Can you show us the output of
nvme id-ctrl /dev/nvme0 -H
nvme id-ns /dev/nvme0n1 -H
Thanks,
/John
> Otherwise, changes look good to me:
> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 14:10 ` John Meneghini
@ 2024-11-24 13:09 ` Nilay Shroff
0 siblings, 0 replies; 11+ messages in thread
From: Nilay Shroff @ 2024-11-24 13:09 UTC (permalink / raw)
To: John Meneghini, Bryan Gurney, linux-kernel, linux-nvme, kbusch,
hch, sagi, axboe, mpe, naveen, maddy, kernel
Cc: bmarzins
[-- Attachment #1: Type: text/plain, Size: 1880 bytes --]
On 11/22/24 19:40, John Meneghini wrote:
> On 11/22/24 01:26, Nilay Shroff wrote:
>>
>> On 11/22/24 03:33, Bryan Gurney wrote:
>>
>> I applied the above changes to my kernel tree and ran the below blktests:
>>
>> # ./check nvme/033 nvme/034 nvme/035 nvme/036 nvme/037 nvme/039
>> nvme/033 => nvme0n1 (tr=loop) (create and connect to an NVMeOF target with a passthru controller) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>> nvme/034 => nvme0n1 (tr=loop) (run data verification fio job on an NVMeOF passthru controller) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>> nvme/035 => nvme0n1 (tr=loop) (run mkfs and data verification fio job on an NVMeOF passthru controller) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>> nvme/036 => nvme0n1 (tr=loop) (test NVMe reset command on an NVMeOF target with a passthru controller) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>> nvme/037 => nvme0n1 (tr=loop) (test deletion of NVMeOF passthru controllers immediately after setup) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>> nvme/039 => nvme0n1 (test error logging) [not run]
>> /dev/nvme0n1 is a NVMe multipath device
>>
>> As we can see here, the above tests were skipped because the test detects
>> that the device (/dev/nvme0n1) is a multipath device. However, in fact,
>> the test device is NOT a multipath. So I think we need to update the above
>> tests. We may submit another patch to blktest and update above tests once
>> your changes are merged upstream.
>
> Nilay, what kind of device is /dev/nvme0n1 ?
>
It's an NVMe namespace attached to a controller.
> Can you show us the output of
>
> nvme id-ctrl /dev/nvme0 -H
> nvme id-ns /dev/nvme0n1 -H
>
Sure, attached the output of above commands for your reference.
Thanks,
--Nilay
[-- Attachment #2: nvme-id.txt --]
[-- Type: text/plain, Size: 13087 bytes --]
# nvme list -v
Subsystem Subsystem-NQN Controllers
---------------- ------------------------------------------------------------------------------------------------ ----------------
nvme-subsys0 nqn.1994-11.com.samsung:nvme:PM1735:2.5-inch:S6EUNA0R500358 nvme0
Device Cntlid SN MN FR TxPort Address Slot Subsystem Namespaces
---------------- ------ -------------------- ---------------------------------------- -------- ------ -------------- ------ ------------ ----------------
nvme0 65 S6EUNA0R500358 1.6TB NVMe Gen4 U.2 SSD REV.SN49 pcie 0018:01:00.0 U78D4.ND0.WZS0021-P1-C8 nvme-subsys0 nvme0n1
Device Generic NSID Usage Format Controllers
----------------- ----------------- ---------- -------------------------- ---------------- ----------------
/dev/nvme0n1 /dev/ng0n1 0x1 5.75 GB / 5.75 GB 4 KiB + 8 B nvme0
# nvme id-ctrl /dev/nvme0 -H
NVME Identify Controller:
vid : 0x144d
ssvid : 0x1014
sn : S6EUNA0R500358
mn : 1.6TB NVMe Gen4 U.2 SSD
fr : REV.SN49
rab : 8
ieee : 002538
cmic : 0x3
[3:3] : 0 ANA not supported
[2:2] : 0 PCI
[1:1] : 0x1 Multi Controller
[0:0] : 0x1 Multi Port
mdts : 0
cntlid : 0x41
ver : 0x10300
rtd3r : 0xe4e1c0
rtd3e : 0x989680
oaes : 0x300
[31:31] : 0 Discovery Log Change Notice Not Supported
[27:27] : 0 Zone Descriptor Changed Notices Not Supported
[16:16] : 0 Temperature Threshold Hysteresis Recovery Not Supported
[15:15] : 0 Normal NSS Shutdown Event Not Supported
[14:14] : 0 Endurance Group Event Aggregate Log Page Change Notice Not Supported
[13:13] : 0 LBA Status Information Notices Not Supported
[12:12] : 0 Predictable Latency Event Aggregate Log Change Notices Not Supported
[11:11] : 0 Asymmetric Namespace Access Change Notices Not Supported
[9:9] : 0x1 Firmware Activation Notices Supported
[8:8] : 0x1 Namespace Attribute Changed Event Supported
ctratt : 0
[19:19] : 0 Flexible Data Placement Not Supported
[17:17] : 0 HMB Restrict Non-Operational Power State Access Not Supported
[16:16] : 0 MDTS and Size Limits Exclude Metadata Not Supported
[15:15] : 0 Extended LBA Formats Not Supported
[14:14] : 0 Delete NVM Set Not Supported
[13:13] : 0 Delete Endurance Group Not Supported
[12:12] : 0 Variable Capacity Management Not Supported
[11:11] : 0 Fixed Capacity Management Not Supported
[10:10] : 0 Multi Domain Subsystem Not Supported
[9:9] : 0 UUID List Not Supported
[8:8] : 0 SQ Associations Not Supported
[7:7] : 0 Namespace Granularity Not Supported
[6:6] : 0 Traffic Based Keep Alive Not Supported
[5:5] : 0 Predictable Latency Mode Not Supported
[4:4] : 0 Endurance Groups Not Supported
[3:3] : 0 Read Recovery Levels Not Supported
[2:2] : 0 NVM Sets Not Supported
[1:1] : 0 Non-Operational Power State Permissive Not Supported
[0:0] : 0 128-bit Host Identifier Not Supported
rrls : 0
cntrltype : 0
[7:2] : 0 Reserved
[1:0] : 0 Controller type not reported
fguid : 00000000-0000-0000-0000-000000000000
crdt1 : 0
crdt2 : 0
crdt3 : 0
nvmsr : 1
[1:1] : 0 NVM subsystem Not part of an Enclosure
[0:0] : 0x1 NVM subsystem part of a Storage Device
vwci : 255
[7:7] : 0x1 VPD Write Cycles Remaining field is valid.
[6:0] : 0xfe VPD Write Cycles Remaining
mec : 0
[1:1] : 0 NVM subsystem Not contains a Management Endpoint on a PCIe port
[0:0] : 0 NVM subsystem Not contains a Management Endpoint on an SMBus/I2C port
oacs : 0x5f
[10:10] : 0 Lockdown Command and Feature Not Supported
[9:9] : 0 Get LBA Status Capability Not Supported
[8:8] : 0 Doorbell Buffer Config Not Supported
[7:7] : 0 Virtualization Management Not Supported
[6:6] : 0x1 NVMe-MI Send and Receive Supported
[5:5] : 0 Directives Not Supported
[4:4] : 0x1 Device Self-test Supported
[3:3] : 0x1 NS Management and Attachment Supported
[2:2] : 0x1 FW Commit and Download Supported
[1:1] : 0x1 Format NVM Supported
[0:0] : 0x1 Security Send and Receive Supported
acl : 127
aerl : 15
frmw : 0x16
[5:5] : 0 Multiple FW or Boot Update Detection Not Supported
[4:4] : 0x1 Firmware Activate Without Reset Supported
[3:1] : 0x3 Number of Firmware Slots
[0:0] : 0 Firmware Slot 1 Read/Write
lpa : 0x1e
[6:6] : 0 Telemetry Log Data Area 4 Not Supported
[5:5] : 0 LID 0x0, Scope of each command in LID 0x5, 0x12, 0x13 Not Supported
[4:4] : 0x1 Persistent Event log Supported
[3:3] : 0x1 Telemetry host/controller initiated log page Supported
[2:2] : 0x1 Extended data for Get Log Page Supported
[1:1] : 0x1 Command Effects Log Page Supported
[0:0] : 0 SMART/Health Log Page per NS Not Supported
elpe : 255
[7:0] : 255 (0's based) Error Log Page Entries (ELPE)
npss : 2
[7:0] : 2 (0's based) Number of Power States Support (NPSS)
avscc : 0x1
[0:0] : 0x1 Admin Vendor Specific Commands uses NVMe Format
apsta : 0
[0:0] : 0 Autonomous Power State Transitions Not Supported
wctemp : 345
[15:0] : 72 °C (345 K) Warning Composite Temperature Threshold (WCTEMP)
cctemp : 358
[15:0] : 85 °C (358 K) Critical Composite Temperature Threshold (CCTEMP)
mtfa : 130
hmpre : 0
hmmin : 0
tnvmcap : 1600321314816
[127:0] : 1600321314816
Total NVM Capacity (TNVMCAP)
unvmcap : 1594569523200
[127:0] : 1594569523200
Unallocated NVM Capacity (UNVMCAP)
rpmbs : 0
[31:24]: 0 Access Size
[23:16]: 0 Total Size
[5:3] : 0 Authentication Method
[2:0] : 0 Number of RPMB Units
edstt : 2
dsto : 1
fwug : 255
kas : 10
hctma : 0
[0:0] : 0 Host Controlled Thermal Management Not Supported
mntmt : 0
[15:0] : -273 °C (0 K) Minimum Thermal Management Temperature (MNTMT)
mxtmt : 0
[15:0] : -273 °C (0 K) Maximum Thermal Management Temperature (MXTMT)
sanicap : 0x3
[31:30] : 0 Additional media modification after sanitize operation completes successfully is not defined
[29:29] : 0 No-Deallocate After Sanitize bit in Sanitize command Supported
[3:3] : 0 Media Verification and Post-Verification Deallocation state Not Supported
[2:2] : 0 Overwrite Sanitize Operation Not Supported
[1:1] : 0x1 Block Erase Sanitize Operation Supported
[0:0] : 0x1 Crypto Erase Sanitize Operation Supported
hmminds : 0
hmmaxd : 0
nsetidmax : 0
endgidmax : 0
anatt : 0
anacap : 0
[7:7] : 0 Non-zero group ID Not Supported
[6:6] : 0 Group ID does change
[4:4] : 0 ANA Change state Not Supported
[3:3] : 0 ANA Persistent Loss state Not Supported
[2:2] : 0 ANA Inaccessible state Not Supported
[1:1] : 0 ANA Non-optimized state Not Supported
[0:0] : 0 ANA Optimized state Not Supported
anagrpmax : 0
nanagrpid : 0
pels : 80
domainid : 0
megcap : 0
tmpthha : 0
[2:0] : 0 Temperature Threshold Maximum Hysteresis
sqes : 0x66
[7:4] : 0x6 Max SQ Entry Size (64)
[3:0] : 0x6 Min SQ Entry Size (64)
cqes : 0x44
[7:4] : 0x4 Max CQ Entry Size (16)
[3:0] : 0x4 Min CQ Entry Size (16)
maxcmd : 0
nn : 64
oncs : 0x7f
[10:10] : 0 All Fast Copy Not Supported
[9:9] : 0 Copy Single Atomicity Not Supported
[8:8] : 0 Copy Not Supported
[7:7] : 0 Verify Not Supported
[6:6] : 0x1 Timestamp Supported
[5:5] : 0x1 Reservations Supported
[4:4] : 0x1 Save and Select Supported
[3:3] : 0x1 Write Zeroes Supported
[2:2] : 0x1 Data Set Management Supported
[1:1] : 0x1 Write Uncorrectable Supported
[0:0] : 0x1 Compare Supported
fuses : 0
[0:0] : 0 Fused Compare and Write Not Supported
fna : 0x4
[3:3] : 0 Format NVM Broadcast NSID (FFFFFFFFh) Supported
[2:2] : 0x1 Crypto Erase Supported as part of Secure Erase
[1:1] : 0 Crypto Erase Applies to Single Namespace(s)
[0:0] : 0 Format Applies to Single Namespace(s)
vwc : 0
[2:1] : 0 Support for the NSID field set to FFFFFFFFh is not indicated
[0:0] : 0 Volatile Write Cache Not Present
awun : 65535
awupf : 0
icsvscc : 1
[0:0] : 0x1 NVM Vendor Specific Commands uses NVMe Format
nwpc : 0
[2:2] : 0 Permanent Write Protect Not Supported
[1:1] : 0 Write Protect Until Power Supply Not Supported
[0:0] : 0 No Write Protect and Write Protect Namespace Not Supported
acwu : 0
ocfs : 0
[3:3] : 0 Controller Copy Format 3h Not Supported
[2:2] : 0 Controller Copy Format 2h Not Supported
[1:1] : 0 Controller Copy Format 1h Not Supported
[0:0] : 0 Controller Copy Format 0h Not Supported
sgls : 0xf0002
[21:21]: 0 Transport SGL Data Block Descriptor Not Supported
[20:20]: 0 Address Offsets Not Supported
[19:19]: 0x1 Metadata Pointer Containing SGL Descriptor is Supported
[18:18]: 0x1 SGL Length Larger than Buffer Supported
[17:17]: 0x1 Byte-Aligned Contig. MD Buffer Supported
[16:16]: 0x1 SGL Bit-Bucket Supported
[15:8] : 0 SGL Descriptor Threshold
[2:2] : 0 Keyed SGL Data Block descriptor Not Supported
[1:0] : 0x2 Scatter-Gather Lists Supported. Dword alignment required.
mnan : 0
maxdna : 0
maxcna : 0
oaqd : 0
subnqn : nqn.1994-11.com.samsung:nvme:PM1735:2.5-inch:S6EUNA0R500358
ioccsz : 0
iorcsz : 0
icdoff : 0
fcatt : 0
[0:0] : 0 Dynamic Controller Model
msdbd : 0
ofcs : 0
[0:0] : 0 Disconnect command Not Supported
ps 0 : mp:25.00W operational enlat:180 exlat:180 rrt:0 rrl:0
rwt:0 rwl:0 idle_power:7.00W active_power:15.00W
active_power_workload:80K 128KiB SW
ps 1 : mp:18.00W operational enlat:180 exlat:180 rrt:1 rrl:1
rwt:1 rwl:1 idle_power:7.00W active_power:15.00W
active_power_workload:80K 128KiB SW
ps 2 : mp:12.00W operational enlat:180 exlat:180 rrt:2 rrl:2
rwt:2 rwl:2 idle_power:7.00W active_power:12.00W
active_power_workload:80K 128KiB SW
# nvme id-ns /dev/nvme0n1 -H
NVME Identify Namespace 1:
nsze : 0x156d56
ncap : 0x156d56
nuse : 0x156d56
nsfeat : 0
[5:4] : 0 NPWG, NPWA, NPDG, NPDGL, NPDA, and NOWS are Not Supported
[3:3] : 0 NGUID and EUI64 fields if non-zero, Reused
[2:2] : 0 Deallocated or Unwritten Logical Block error Not Supported
[1:1] : 0 Namespace uses AWUN, AWUPF, and ACWU
[0:0] : 0 Thin Provisioning Not Supported
nlbaf : 4
flbas : 0x1
[6:5] : 0 Most significant 2 bits of Current LBA Format Selected
[4:4] : 0 Metadata Transferred in Separate Contiguous Buffer
[3:0] : 0x1 Least significant 4 bits of Current LBA Format Selected
mc : 0x3
[1:1] : 0x1 Metadata Pointer Supported
[0:0] : 0x1 Metadata as Part of Extended Data LBA Supported
dpc : 0x1f
[4:4] : 0x1 Protection Information Transferred as Last Bytes of Metadata Supported
[3:3] : 0x1 Protection Information Transferred as First Bytes of Metadata Supported
[2:2] : 0x1 Protection Information Type 3 Supported
[1:1] : 0x1 Protection Information Type 2 Supported
[0:0] : 0x1 Protection Information Type 1 Supported
dps : 0x1
[3:3] : 0 Protection Information is Transferred as Last Bytes of Metadata
[2:0] : 0x1 Protection Information Type 1 Enabled
nmic : 0
[0:0] : 0 Namespace Multipath Not Capable
rescap : 0xff
[7:7] : 0x1 Ignore Existing Key - Used as defined in revision 1.3 or later
[6:6] : 0x1 Exclusive Access - All Registrants Supported
[5:5] : 0x1 Write Exclusive - All Registrants Supported
[4:4] : 0x1 Exclusive Access - Registrants Only Supported
[3:3] : 0x1 Write Exclusive - Registrants Only Supported
[2:2] : 0x1 Exclusive Access Supported
[1:1] : 0x1 Write Exclusive Supported
[0:0] : 0x1 Persist Through Power Loss Supported
fpi : 0x80
[7:7] : 0x1 Format Progress Indicator Supported
[6:0] : 0 Format Progress Indicator (Remaining 0%)
dlfeat : 17
[4:4] : 0x1 Guard Field of Deallocated Logical Blocks is set to CRC of The Value Read
[3:3] : 0 Deallocate Bit in the Write Zeroes Command is Not Supported
[2:0] : 0x1 Bytes Read From a Deallocated Logical Block and its Metadata are 0x00
nawun : 0
nawupf : 0
nacwu : 0
nabsn : 0
nabo : 0
nabspf : 0
noiob : 0
nvmcap : 5751791616
mssrl : 0
mcl : 0
msrc : 0
nulbaf : 0
anagrpid: 0
nsattr : 0
nvmsetid: 0
endgid : 0
nguid : 36455530525003580025384100000001
eui64 : 0000000000000000
LBA Format 0 : Metadata Size: 0 bytes - Data Size: 4096 bytes - Relative Performance: 0 Best
LBA Format 1 : Metadata Size: 8 bytes - Data Size: 4096 bytes - Relative Performance: 0x2 Good (in use)
LBA Format 2 : Metadata Size: 0 bytes - Data Size: 512 bytes - Relative Performance: 0x1 Better
LBA Format 3 : Metadata Size: 8 bytes - Data Size: 512 bytes - Relative Performance: 0x3 Degraded
LBA Format 4 : Metadata Size: 64 bytes - Data Size: 4096 bytes - Relative Performance: 0x3 Degraded
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-21 22:03 [PATCH 1/1] nvme: always enable multipath Bryan Gurney
2024-11-22 6:26 ` Nilay Shroff
@ 2024-11-22 12:09 ` Christoph Hellwig
2024-11-22 15:39 ` Keith Busch
1 sibling, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2024-11-22 12:09 UTC (permalink / raw)
To: Bryan Gurney
Cc: linux-kernel, linux-nvme, kbusch, hch, sagi, axboe, mpe, naveen,
maddy, kernel, jmeneghi, bmarzins
On Thu, Nov 21, 2024 at 05:03:21PM -0500, Bryan Gurney wrote:
> Since device-mapper multipath will no longer be operating on NVMe
> devices, there is no longer a need to set CONFIG_NVME_MULTIPATH=n.
>
> Always enable NVMe multipath, remove CONFIG_NVME_MULTIPATH, and use
> the code paths that would be used if CONFIG_NVME_MULTIPATH=y.
As mentioned last round not having to build the not tiny multipath
code for embedded systems and other small builds that never require
multipathing still seems like a sensible idea.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 12:09 ` Christoph Hellwig
@ 2024-11-22 15:39 ` Keith Busch
2024-11-22 17:49 ` John Meneghini
2024-11-22 17:52 ` John Meneghini
0 siblings, 2 replies; 11+ messages in thread
From: Keith Busch @ 2024-11-22 15:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Bryan Gurney, linux-kernel, linux-nvme, sagi, axboe, mpe, naveen,
maddy, kernel, jmeneghi, bmarzins
On Fri, Nov 22, 2024 at 01:09:25PM +0100, Christoph Hellwig wrote:
> On Thu, Nov 21, 2024 at 05:03:21PM -0500, Bryan Gurney wrote:
> > Since device-mapper multipath will no longer be operating on NVMe
> > devices, there is no longer a need to set CONFIG_NVME_MULTIPATH=n.
> >
> > Always enable NVMe multipath, remove CONFIG_NVME_MULTIPATH, and use
> > the code paths that would be used if CONFIG_NVME_MULTIPATH=y.
>
> As mentioned last round not having to build the not tiny multipath
> code for embedded systems and other small builds that never require
> multipathing still seems like a sensible idea.
It's not just embedded either. I have plenty of single port datacenter
PCIe NVMe's that claim multi-controller capabilities. I think it's some
artifact of SRIOV that some versions of the firmware can bring.
Anyway, we only use the one physical function, so they're certainly not
multipath devices here. We disable the CONFIG option because while the
nvme multipath IO overhead is low, it's not zero.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 15:39 ` Keith Busch
@ 2024-11-22 17:49 ` John Meneghini
2024-11-22 18:15 ` Keith Busch
2024-11-22 17:52 ` John Meneghini
1 sibling, 1 reply; 11+ messages in thread
From: John Meneghini @ 2024-11-22 17:49 UTC (permalink / raw)
To: Keith Busch, Christoph Hellwig
Cc: Bryan Gurney, linux-kernel, linux-nvme, sagi, axboe, mpe, naveen,
maddy, kernel, bmarzins
On 11/22/24 10:39, Keith Busch wrote:
> On Fri, Nov 22, 2024 at 01:09:25PM +0100, Christoph Hellwig wrote:
>> On Thu, Nov 21, 2024 at 05:03:21PM -0500, Bryan Gurney wrote:
>>> Since device-mapper multipath will no longer be operating on NVMe
>>> devices, there is no longer a need to set CONFIG_NVME_MULTIPATH=n.
>>>
>>> Always enable NVMe multipath, remove CONFIG_NVME_MULTIPATH, and use
>>> the code paths that would be used if CONFIG_NVME_MULTIPATH=y.
>>
>> As mentioned last round not having to build the not tiny multipath
>> code for embedded systems and other small builds that never require
>> multipathing still seems like a sensible idea.
>
> It's not just embedded either. I have plenty of single port datacenter
> PCIe NVMe's that claim multi-controller capabilities. I think it's some
> artifact of SRIOV that some versions of the firmware can bring.
>
> Anyway, we only use the one physical function, so they're certainly not
> multipath devices here. We disable the CONFIG option because while the
> nvme multipath IO overhead is low, it's not zero.
So you're saying you want to keep CONFIG_NVME_MULTIPATH and simply remove the modparam nvme_core.multipath? (I know I said we
were going to do that but that's before Bryan and I started testing his changes with blktests. I think we can fix that.)
The problem with this solution is: when you build a kernel with CONFIG_NVME_MULTIPATH=n you get exactly the same thing as
CONFIG_NVME_MULTIPATH=y with nvme_core.multipath=n. You get a separate /dev/nvmeNN entry for every namespace/controller path,
minus the multipath.c code.
So, I don't see the point. If you really want to stop supporting user space multi-path solutions like DMMP with nvme we need to
stop creating multiple dev entries for multi-path controllers, no matter what.
Note that this multi-pathing stuff is a part of the confusion in UDEV, like I spoke about at ALPPS this year. One reason why the
/dev/disk/by-path symlinks are so broken is because the kernel has at least three different ways to configure multi-pathing
support for nvme devices.
We've been saying we're going to do this since since v5.18.
So how do we want to do this?
-
- if (!multipath) {
- dev_warn(ctrl->device,
- "Found shared namespace %d, but multipathing not supported.\n",
- info->nsid);
- dev_warn_once(ctrl->device,
- "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
- }
commit ce8d78616a6b637d1b763eb18e32045687a84305
Author: Christoph Hellwig <hch@lst.de>
Date: Tue Mar 15 13:27:07 2022 +0100
nvme: warn about shared namespaces without CONFIG_NVME_MULTIPATH
Start warning about exposing a namespace as multiple block devices,
and set a fixed deprecation release.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
(master) > git name-rev --tags --name-only ce8d78616a6b637d1b763eb18e32045687a84305
nvme-5.18-2022-03-17^0
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 17:49 ` John Meneghini
@ 2024-11-22 18:15 ` Keith Busch
2024-11-22 18:29 ` John Meneghini
0 siblings, 1 reply; 11+ messages in thread
From: Keith Busch @ 2024-11-22 18:15 UTC (permalink / raw)
To: John Meneghini
Cc: Christoph Hellwig, Bryan Gurney, linux-kernel, linux-nvme, sagi,
axboe, mpe, naveen, maddy, kernel, bmarzins
On Fri, Nov 22, 2024 at 12:49:32PM -0500, John Meneghini wrote:
> On 11/22/24 10:39, Keith Busch wrote:
> > On Fri, Nov 22, 2024 at 01:09:25PM +0100, Christoph Hellwig wrote:
> So you're saying you want to keep CONFIG_NVME_MULTIPATH and simply remove
> the modparam nvme_core.multipath? (I know I said we were going to do that
> but that's before Bryan and I started testing his changes with blktests. I
> think we can fix that.)
>
> The problem with this solution is: when you build a kernel with
> CONFIG_NVME_MULTIPATH=n you get exactly the same thing as
> CONFIG_NVME_MULTIPATH=y with nvme_core.multipath=n. You get a separate
> /dev/nvmeNN entry for every namespace/controller path, minus the multipath.c
> code.
>
> So, I don't see the point. If you really want to stop supporting user space
> multi-path solutions like DMMP with nvme we need to stop creating multiple
> dev entries for multi-path controllers, no matter what.
>
> Note that this multi-pathing stuff is a part of the confusion in UDEV, like
> I spoke about at ALPPS this year. One reason why the /dev/disk/by-path
> symlinks are so broken is because the kernel has at least three different
> ways to configure multi-pathing support for nvme devices.
>
> We've been saying we're going to do this since since v5.18.
>
> So how do we want to do this?
>
> -
> - if (!multipath) {
> - dev_warn(ctrl->device,
> - "Found shared namespace %d, but multipathing not supported.\n",
> - info->nsid);
> - dev_warn_once(ctrl->device,
> - "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
> - }
If you want to change the driver to prevent exposing subsequent
namepsace path block devices when multipath is not enabled, that is
probably fine.
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 18:15 ` Keith Busch
@ 2024-11-22 18:29 ` John Meneghini
0 siblings, 0 replies; 11+ messages in thread
From: John Meneghini @ 2024-11-22 18:29 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, Bryan Gurney, linux-kernel, linux-nvme, sagi,
axboe, mpe, naveen, maddy, kernel, bmarzins
On 11/22/24 13:15, Keith Busch wrote:> If you want to change the driver to prevent exposing subsequent
> namepsace path block devices when multipath is not enabled, that is
> probably fine.
OK, we'll work on another patch to do this. The plan will be:
When CONFIG_NVME_MULTIPATH=y, you get multipath.c and full support for both nvme multipath (CMIC/NMIC = 1) and non-multipath
devices (CMIC/NMIC=0) devices with 1 /dev entry per namespace.
When CONFIG_NVME_MULTIPATH=n, you get support for non-multipath devices (CMIC/NMIC=0) devices, but multipath devices
(CMIC/NMIC=1) will only expose the first controller/namespace path.
/John
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 15:39 ` Keith Busch
2024-11-22 17:49 ` John Meneghini
@ 2024-11-22 17:52 ` John Meneghini
2024-11-22 18:16 ` Keith Busch
1 sibling, 1 reply; 11+ messages in thread
From: John Meneghini @ 2024-11-22 17:52 UTC (permalink / raw)
To: Keith Busch, Christoph Hellwig
Cc: Bryan Gurney, linux-kernel, linux-nvme, sagi, axboe, mpe, naveen,
maddy, kernel, bmarzins
On 11/22/24 10:39, Keith Busch wrote:
> Anyway, we only use the one physical function, so they're certainly not
> multipath devices here. We disable the CONFIG option because while the
> nvme multipath IO overhead is low, it's not zero.
Do we know what the performance impact is? Has anybody measured the difference?
/John
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/1] nvme: always enable multipath
2024-11-22 17:52 ` John Meneghini
@ 2024-11-22 18:16 ` Keith Busch
0 siblings, 0 replies; 11+ messages in thread
From: Keith Busch @ 2024-11-22 18:16 UTC (permalink / raw)
To: John Meneghini
Cc: Christoph Hellwig, Bryan Gurney, linux-kernel, linux-nvme, sagi,
axboe, mpe, naveen, maddy, kernel, bmarzins
On Fri, Nov 22, 2024 at 12:52:51PM -0500, John Meneghini wrote:
> On 11/22/24 10:39, Keith Busch wrote:
> > Anyway, we only use the one physical function, so they're certainly not
> > multipath devices here. We disable the CONFIG option because while the
> > nvme multipath IO overhead is low, it's not zero.
>
> Do we know what the performance impact is? Has anybody measured the difference?
It doesn't benefit single path devices, so any cost for it is too high.
It's clearly not free.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2024-11-24 13:09 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-21 22:03 [PATCH 1/1] nvme: always enable multipath Bryan Gurney
2024-11-22 6:26 ` Nilay Shroff
2024-11-22 14:10 ` John Meneghini
2024-11-24 13:09 ` Nilay Shroff
2024-11-22 12:09 ` Christoph Hellwig
2024-11-22 15:39 ` Keith Busch
2024-11-22 17:49 ` John Meneghini
2024-11-22 18:15 ` Keith Busch
2024-11-22 18:29 ` John Meneghini
2024-11-22 17:52 ` John Meneghini
2024-11-22 18:16 ` Keith Busch
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox