* fix atomic limits check v2
@ 2025-06-25 6:39 Christoph Hellwig
2025-06-25 6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25 6:39 UTC (permalink / raw)
To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry
Hi all,
this series tries to fix the atomics limit check to limit it to
the per-controller values and to the controller probing.
I think this should solve the root cause of the report from Yi Zhang,
but needs new verification.
Changes since v1:
- initialize subsys->awupf in nvme_init_subsystem to make sure that only
happens once per controller
Diffstat:
core.c | 83 +++++++++++++++++++++++++++++------------------------------------
nvme.h | 3 --
2 files changed, 39 insertions(+), 47 deletions(-)
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/2] nvme: refactor the atomic write unit detection
2025-06-25 6:39 fix atomic limits check v2 Christoph Hellwig
@ 2025-06-25 6:39 ` Christoph Hellwig
2025-06-25 6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
2 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25 6:39 UTC (permalink / raw)
To: Keith Busch, Sagi Grimberg
Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry, Luis Chamberlain
Move all the code out of nvme_update_disk_info into the helper, and
rename the helper to have a somewhat less clumsy name.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
---
drivers/nvme/host/core.c | 72 +++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 34 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 724f5732786c..520fb5f1e214 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2015,21 +2015,51 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
}
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
- struct nvme_id_ns *id, struct queue_limits *lim,
- u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+ struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
{
- unsigned int boundary = 0;
+ u32 atomic_bs, boundary = 0;
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
- if (le16_to_cpu(id->nabspf))
+ /*
+ * We do not support an offset for the atomic boundaries.
+ */
+ if (id->nabo)
+ return bs;
+
+ if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+ /*
+ * Use the per-namespace atomic write unit when available.
+ */
+ atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+ if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+ } else {
+ /*
+ * Use the controller wide atomic write unit. This sucks
+ * because the limit is defined in terms of logical blocks while
+ * namespaces can have different formats, and because there is
+ * no clear language in the specification prohibiting different
+ * values for different controllers in the subsystem.
+ */
+ atomic_bs = (1 + ns->ctrl->awupf) * bs;
+ }
+
+ if (!ns->ctrl->subsys->atomic_bs) {
+ ns->ctrl->subsys->atomic_bs = atomic_bs;
+ } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
+ dev_err_ratelimited(ns->ctrl->device,
+ "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
+ ns->disk ? ns->disk->disk_name : "?",
+ ns->ctrl->subsys->atomic_bs,
+ atomic_bs);
}
+
lim->atomic_write_hw_max = atomic_bs;
lim->atomic_write_hw_boundary = boundary;
lim->atomic_write_hw_unit_min = bs;
lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
lim->features |= BLK_FEAT_ATOMIC_WRITES;
+ return atomic_bs;
}
static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2067,34 +2097,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
valid = false;
}
- atomic_bs = phys_bs = bs;
- if (id->nabo == 0) {
- /*
- * Bit 1 indicates whether NAWUPF is defined for this namespace
- * and whether it should be used instead of AWUPF. If NAWUPF ==
- * 0 then AWUPF must be used instead.
- */
- if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
- atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
- else
- atomic_bs = (1 + ns->ctrl->awupf) * bs;
-
- /*
- * Set subsystem atomic bs.
- */
- if (ns->ctrl->subsys->atomic_bs) {
- if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
- dev_err_ratelimited(ns->ctrl->device,
- "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
- ns->disk ? ns->disk->disk_name : "?",
- ns->ctrl->subsys->atomic_bs,
- atomic_bs);
- }
- } else
- ns->ctrl->subsys->atomic_bs = atomic_bs;
-
- nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
- }
+ phys_bs = bs;
+ atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
--
2.47.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] nvme: fix atomic write size validation
2025-06-25 6:39 fix atomic limits check v2 Christoph Hellwig
2025-06-25 6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
@ 2025-06-25 6:39 ` Christoph Hellwig
2025-06-27 0:05 ` Uday Shankar
2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
2 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25 6:39 UTC (permalink / raw)
To: Keith Busch, Sagi Grimberg
Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry, Luis Chamberlain
Don't mix the namespace and controller values, and validate the
per-controller limit when probing the controller. This avoid spurious
failures for controllers with namespaces that have different namespaces
with different logical block sizes, or report the per-namespace values
only for some namespaces.
It also fixes a missing queue_limits_cancel_update in an error path by
removing that error path.
Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
---
drivers/nvme/host/core.c | 33 +++++++++++----------------------
drivers/nvme/host/nvme.h | 3 +--
2 files changed, 12 insertions(+), 24 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 520fb5f1e214..e533d791955d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
* no clear language in the specification prohibiting different
* values for different controllers in the subsystem.
*/
- atomic_bs = (1 + ns->ctrl->awupf) * bs;
- }
-
- if (!ns->ctrl->subsys->atomic_bs) {
- ns->ctrl->subsys->atomic_bs = atomic_bs;
- } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
- dev_err_ratelimited(ns->ctrl->device,
- "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
- ns->disk ? ns->disk->disk_name : "?",
- ns->ctrl->subsys->atomic_bs,
- atomic_bs);
+ atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
lim->atomic_write_hw_max = atomic_bs;
@@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
- /*
- * Validate the max atomic write size fits within the subsystem's
- * atomic write capabilities.
- */
- if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
- blk_mq_unfreeze_queue(ns->disk->queue, memflags);
- ret = -ENXIO;
- goto out;
- }
-
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->model, id->mn, sizeof(subsys->model));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
+ subsys->awupf = le16_to_cpu(id->awupf);
/* Versions prior to 1.4 don't necessarily report a valid type */
if (id->cntrltype == NVME_CTRL_DISC ||
@@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ret)
goto out_free;
}
+
+ if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
+ dev_err_ratelimited(ctrl->device,
+ "inconsistent AWUPF, controller not added (%u/%u).\n",
+ le16_to_cpu(id->awupf), ctrl->subsys->awupf);
+ ret = -EINVAL;
+ goto out_free;
+ }
+
memcpy(ctrl->subsys->firmware_rev, id->fr,
sizeof(ctrl->subsys->firmware_rev));
@@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
dev_pm_qos_hide_latency_tolerance(ctrl->device);
- ctrl->awupf = le16_to_cpu(id->awupf);
out_free:
kfree(id);
return ret;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a468cdc5b5cb..7df2ea21851f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,7 +410,6 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
- u16 awupf; /* 0's based value. */
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -443,11 +442,11 @@ struct nvme_subsystem {
u8 cmic;
enum nvme_subsys_type subtype;
u16 vendor_id;
+ u16 awupf; /* 0's based value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
#endif
- u32 atomic_bs;
};
/*
--
2.47.2
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: fix atomic limits check v2
2025-06-25 6:39 fix atomic limits check v2 Christoph Hellwig
2025-06-25 6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
2025-06-25 6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
@ 2025-06-25 17:04 ` alan.adamson
2025-06-26 5:22 ` Christoph Hellwig
2 siblings, 1 reply; 7+ messages in thread
From: alan.adamson @ 2025-06-25 17:04 UTC (permalink / raw)
To: Christoph Hellwig, Keith Busch, Sagi Grimberg
Cc: linux-nvme, Yi Zhang, John Garry
On 6/24/25 11:39 PM, Christoph Hellwig wrote:
> Hi all,
>
> this series tries to fix the atomics limit check to limit it to
> the per-controller values and to the controller probing.
>
> I think this should solve the root cause of the report from Yi Zhang,
> but needs new verification.
>
> Changes since v1:
> - initialize subsys->awupf in nvme_init_subsystem to make sure that only
> happens once per controller
>
> Diffstat:
> core.c | 83 +++++++++++++++++++++++++++++------------------------------------
> nvme.h | 3 --
> 2 files changed, 39 insertions(+), 47 deletions(-)
CTRL 0 - AWUN=31 AWUPF=15 nvme0n1
CTRL 1 - AWUN=31 AWUPF=31
CTRL 2 - AWUN=15 AWUPF=7
CTRL 3 - AWUN=15 AWUPF=15
NS - NAWUN=31 NAWUPF=15 nvme0n2
NS - NAWUN=127 NAWUPF=63 nvme0n3
[root@localhost ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS
sda 8:0 0 40G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 39G 0 part
└─ol-root 252:0 0 39G 0 lvm /
sr0 11:0 1 1024M 0 rom
nvme0n1 259:2 0 250G 0 disk
nvme0n2 259:3 0 250G 0 disk
nvme0n3 259:5 0 250G 0 disk
[root@localhost ~]# nvme id-ctrl /dev/nvme0n1 | grep awupf
awupf : 15
[root@localhost ~]# cat /sys/block/nvme0n1/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n2 | grep nawupf
nawupf : 15
[root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n3 | grep nawupf
nawupf : 63
[root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
32768
[root@localhost ~]# dmesg | grep nvme | grep AWUPF
[ 2.761599] nvme nvme2: inconsistent AWUPF, controller not added (7/15).
[ 2.765669] nvme nvme1: inconsistent AWUPF, controller not added (31/15).
[root@localhost ~]#
Why was CTRL1 (nvme1) not added? AWUPF of 31 works with a
atomic_write_max_bytes of 8192.
Alan
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: fix atomic limits check v2
2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
@ 2025-06-26 5:22 ` Christoph Hellwig
0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-26 5:22 UTC (permalink / raw)
To: alan.adamson
Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
Yi Zhang, John Garry
On Wed, Jun 25, 2025 at 10:04:55AM -0700, alan.adamson@oracle.com wrote:
> [root@localhost ~]# nvme id-ns /dev/nvme0n2 | grep nawupf
> nawupf : 15
> [root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
> 8192
> [root@localhost ~]# nvme id-ns /dev/nvme0n3 | grep nawupf
> nawupf : 63
> [root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
> 32768
> [root@localhost ~]# dmesg | grep nvme | grep AWUPF
> [ 2.761599] nvme nvme2: inconsistent AWUPF, controller not added (7/15).
> [ 2.765669] nvme nvme1: inconsistent AWUPF, controller not added (31/15).
> [root@localhost ~]#
>
> Why was CTRL1 (nvme1) not added? AWUPF of 31 works with a
> atomic_write_max_bytes of 8192.
Because it reports different AWUPF for controllers in the same subsystem.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] nvme: fix atomic write size validation
2025-06-25 6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
@ 2025-06-27 0:05 ` Uday Shankar
2025-06-27 0:18 ` Yi Zhang
0 siblings, 1 reply; 7+ messages in thread
From: Uday Shankar @ 2025-06-27 0:05 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, Alan Adamson,
John Garry, Luis Chamberlain
On Wed, Jun 25, 2025 at 08:39:56AM +0200, Christoph Hellwig wrote:
> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller. This avoid spurious
> failures for controllers with namespaces that have different namespaces
nit: having namespaces with different logical block sizes
> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
>
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.
>
> Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
I couldn't find the report on linux-nvme; if it is public, can you
include a "Closes:" link here?
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> Reviewed-by: John Garry <john.g.garry@oracle.com>
> Tested-by: Yi Zhang <yi.zhang@redhat.com>
I also saw a problem in a system running on 6.16-rc3 with several NVMe
subsystems, each containing one controller with AWUPF=0, each containing
two namespaces with NSABP=0. One namespace has a 512-byte LBA size while
the other has a 4096-byte LBA size, and some namespaces failed to add:
# dmesg | grep Inconsistent
[ 5.537494] nvme nvme4: nvme4n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[ 5.539038] nvme nvme6: nvme6n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[ 5.560079] nvme nvme7: nvme7n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[ 5.595093] nvme nvme3: nvme3n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[ 5.597627] nvme nvme8: nvme8n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[ 5.600007] nvme nvme0: nvme0n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[ 5.605748] nvme nvme5: nvme5n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[ 5.608961] nvme nvme11: nvme11n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[ 5.618011] nvme nvme12: nvme12n1: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[ 5.618251] nvme nvme10: nvme10n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
Note that despite the messages saying otherwise, only those log lines
containing "Subsystem=512 bytes, Controller/Namespace=4096 bytes"
actually failed to add a namespace - the others had both namespaces
added just fine. I guess it isn't deterministic as to which namespace
was added first.
Anyways, the problem was fixed by this patch set.
Tested-by: Uday Shankar <ushankar@purestorage.com>
> ---
> drivers/nvme/host/core.c | 33 +++++++++++----------------------
> drivers/nvme/host/nvme.h | 3 +--
> 2 files changed, 12 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 520fb5f1e214..e533d791955d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
> * no clear language in the specification prohibiting different
> * values for different controllers in the subsystem.
> */
> - atomic_bs = (1 + ns->ctrl->awupf) * bs;
> - }
> -
> - if (!ns->ctrl->subsys->atomic_bs) {
> - ns->ctrl->subsys->atomic_bs = atomic_bs;
> - } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> - dev_err_ratelimited(ns->ctrl->device,
> - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> - ns->disk ? ns->disk->disk_name : "?",
> - ns->ctrl->subsys->atomic_bs,
> - atomic_bs);
> + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
> }
>
> lim->atomic_write_hw_max = atomic_bs;
> @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> if (!nvme_update_disk_info(ns, id, &lim))
> capacity = 0;
>
> - /*
> - * Validate the max atomic write size fits within the subsystem's
> - * atomic write capabilities.
> - */
> - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> - blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> - ret = -ENXIO;
> - goto out;
> - }
> -
> nvme_config_discard(ns, &lim);
> if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> ns->head->ids.csi == NVME_CSI_ZNS)
> @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
> memcpy(subsys->model, id->mn, sizeof(subsys->model));
> subsys->vendor_id = le16_to_cpu(id->vid);
> subsys->cmic = id->cmic;
> + subsys->awupf = le16_to_cpu(id->awupf);
>
> /* Versions prior to 1.4 don't necessarily report a valid type */
> if (id->cntrltype == NVME_CTRL_DISC ||
> @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> if (ret)
> goto out_free;
> }
> +
> + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> + dev_err_ratelimited(ctrl->device,
> + "inconsistent AWUPF, controller not added (%u/%u).\n",
> + le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> + ret = -EINVAL;
> + goto out_free;
> + }
> +
Could you explain (and perhaps add a comment here) why all controllers
in a subsystem must report the same awupf? Is it because namespaces may
inherit awupf from the controller, and may be reachable through multiple
controllers for multipath?
> memcpy(ctrl->subsys->firmware_rev, id->fr,
> sizeof(ctrl->subsys->firmware_rev));
>
> @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> dev_pm_qos_expose_latency_tolerance(ctrl->device);
> else if (!ctrl->apst_enabled && prev_apst_enabled)
> dev_pm_qos_hide_latency_tolerance(ctrl->device);
> - ctrl->awupf = le16_to_cpu(id->awupf);
> out_free:
> kfree(id);
> return ret;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a468cdc5b5cb..7df2ea21851f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -410,7 +410,6 @@ struct nvme_ctrl {
>
> enum nvme_ctrl_type cntrltype;
> enum nvme_dctype dctype;
> - u16 awupf; /* 0's based value. */
> };
>
> static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> @@ -443,11 +442,11 @@ struct nvme_subsystem {
> u8 cmic;
> enum nvme_subsys_type subtype;
> u16 vendor_id;
> + u16 awupf; /* 0's based value. */
> struct ida ns_ida;
> #ifdef CONFIG_NVME_MULTIPATH
> enum nvme_iopolicy iopolicy;
> #endif
> - u32 atomic_bs;
> };
>
> /*
> --
> 2.47.2
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] nvme: fix atomic write size validation
2025-06-27 0:05 ` Uday Shankar
@ 2025-06-27 0:18 ` Yi Zhang
0 siblings, 0 replies; 7+ messages in thread
From: Yi Zhang @ 2025-06-27 0:18 UTC (permalink / raw)
To: Uday Shankar
Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
Alan Adamson, John Garry, Luis Chamberlain
On Fri, Jun 27, 2025 at 8:05 AM Uday Shankar <ushankar@purestorage.com> wrote:
>
> On Wed, Jun 25, 2025 at 08:39:56AM +0200, Christoph Hellwig wrote:
> > Don't mix the namespace and controller values, and validate the
> > per-controller limit when probing the controller. This avoid spurious
> > failures for controllers with namespaces that have different namespaces
>
> nit: having namespaces with different logical block sizes
>
> > with different logical block sizes, or report the per-namespace values
> > only for some namespaces.
> >
> > It also fixes a missing queue_limits_cancel_update in an error path by
> > removing that error path.
> >
> > Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> > Reported-by: Yi Zhang <yi.zhang@redhat.com>
>
> I couldn't find the report on linux-nvme; if it is public, can you
> include a "Closes:" link here?
Yes, it was reported here:
https://lore.kernel.org/linux-nvme/CAHj4cs93WutyoPLFMr0JidHhRAxHC1ZDcj-RvdnX=R7OaV5ejg@mail.gmail.com/
>
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > Tested-by: Yi Zhang <yi.zhang@redhat.com>
>
> I also saw a problem in a system running on 6.16-rc3 with several NVMe
> subsystems, each containing one controller with AWUPF=0, each containing
> two namespaces with NSABP=0. One namespace has a 512-byte LBA size while
> the other has a 4096-byte LBA size, and some namespaces failed to add:
>
> # dmesg | grep Inconsistent
> [ 5.537494] nvme nvme4: nvme4n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [ 5.539038] nvme nvme6: nvme6n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [ 5.560079] nvme nvme7: nvme7n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [ 5.595093] nvme nvme3: nvme3n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [ 5.597627] nvme nvme8: nvme8n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [ 5.600007] nvme nvme0: nvme0n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [ 5.605748] nvme nvme5: nvme5n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [ 5.608961] nvme nvme11: nvme11n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [ 5.618011] nvme nvme12: nvme12n1: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [ 5.618251] nvme nvme10: nvme10n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
>
> Note that despite the messages saying otherwise, only those log lines
> containing "Subsystem=512 bytes, Controller/Namespace=4096 bytes"
> actually failed to add a namespace - the others had both namespaces
> added just fine. I guess it isn't deterministic as to which namespace
> was added first.
>
> Anyways, the problem was fixed by this patch set.
>
> Tested-by: Uday Shankar <ushankar@purestorage.com>
>
> > ---
> > drivers/nvme/host/core.c | 33 +++++++++++----------------------
> > drivers/nvme/host/nvme.h | 3 +--
> > 2 files changed, 12 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 520fb5f1e214..e533d791955d 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
> > * no clear language in the specification prohibiting different
> > * values for different controllers in the subsystem.
> > */
> > - atomic_bs = (1 + ns->ctrl->awupf) * bs;
> > - }
> > -
> > - if (!ns->ctrl->subsys->atomic_bs) {
> > - ns->ctrl->subsys->atomic_bs = atomic_bs;
> > - } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> > - dev_err_ratelimited(ns->ctrl->device,
> > - "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> > - ns->disk ? ns->disk->disk_name : "?",
> > - ns->ctrl->subsys->atomic_bs,
> > - atomic_bs);
> > + atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
> > }
> >
> > lim->atomic_write_hw_max = atomic_bs;
> > @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> > if (!nvme_update_disk_info(ns, id, &lim))
> > capacity = 0;
> >
> > - /*
> > - * Validate the max atomic write size fits within the subsystem's
> > - * atomic write capabilities.
> > - */
> > - if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> > - blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> > - ret = -ENXIO;
> > - goto out;
> > - }
> > -
> > nvme_config_discard(ns, &lim);
> > if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> > ns->head->ids.csi == NVME_CSI_ZNS)
> > @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
> > memcpy(subsys->model, id->mn, sizeof(subsys->model));
> > subsys->vendor_id = le16_to_cpu(id->vid);
> > subsys->cmic = id->cmic;
> > + subsys->awupf = le16_to_cpu(id->awupf);
> >
> > /* Versions prior to 1.4 don't necessarily report a valid type */
> > if (id->cntrltype == NVME_CTRL_DISC ||
> > @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> > if (ret)
> > goto out_free;
> > }
> > +
> > + if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> > + dev_err_ratelimited(ctrl->device,
> > + "inconsistent AWUPF, controller not added (%u/%u).\n",
> > + le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> > + ret = -EINVAL;
> > + goto out_free;
> > + }
> > +
>
> Could you explain (and perhaps add a comment here) why all controllers
> in a subsystem must report the same awupf? Is it because namespaces may
> inherit awupf from the controller, and may be reachable through multiple
> controllers for multipath?
>
> > memcpy(ctrl->subsys->firmware_rev, id->fr,
> > sizeof(ctrl->subsys->firmware_rev));
> >
> > @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> > dev_pm_qos_expose_latency_tolerance(ctrl->device);
> > else if (!ctrl->apst_enabled && prev_apst_enabled)
> > dev_pm_qos_hide_latency_tolerance(ctrl->device);
> > - ctrl->awupf = le16_to_cpu(id->awupf);
> > out_free:
> > kfree(id);
> > return ret;
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index a468cdc5b5cb..7df2ea21851f 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -410,7 +410,6 @@ struct nvme_ctrl {
> >
> > enum nvme_ctrl_type cntrltype;
> > enum nvme_dctype dctype;
> > - u16 awupf; /* 0's based value. */
> > };
> >
> > static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> > @@ -443,11 +442,11 @@ struct nvme_subsystem {
> > u8 cmic;
> > enum nvme_subsys_type subtype;
> > u16 vendor_id;
> > + u16 awupf; /* 0's based value. */
> > struct ida ns_ida;
> > #ifdef CONFIG_NVME_MULTIPATH
> > enum nvme_iopolicy iopolicy;
> > #endif
> > - u32 atomic_bs;
> > };
> >
> > /*
> > --
> > 2.47.2
> >
> >
>
--
Best Regards,
Yi Zhang
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2025-06-27 0:49 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-25 6:39 fix atomic limits check v2 Christoph Hellwig
2025-06-25 6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
2025-06-25 6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
2025-06-27 0:05 ` Uday Shankar
2025-06-27 0:18 ` Yi Zhang
2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
2025-06-26 5:22 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).