linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* fix atomic limits check v2
@ 2025-06-25  6:39 Christoph Hellwig
  2025-06-25  6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25  6:39 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry

Hi all,

this series tries to fix the atomics limit check to limit it to
the per-controller values and to the controller probing.

I think this should solve the root cause of the report from Yi Zhang,
but needs new verification.

Changes since v1:
 - initialize subsys->awupf in nvme_init_subsystem to make sure that only
   happens once per controller

Diffstat:
 core.c |   83 +++++++++++++++++++++++++++++------------------------------------
 nvme.h |    3 --
 2 files changed, 39 insertions(+), 47 deletions(-)


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] nvme: refactor the atomic write unit detection
  2025-06-25  6:39 fix atomic limits check v2 Christoph Hellwig
@ 2025-06-25  6:39 ` Christoph Hellwig
  2025-06-25  6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
  2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
  2 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25  6:39 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry, Luis Chamberlain

Move all the code out of nvme_update_disk_info into the helper, and
rename the helper to have a somewhat less clumsy name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
---
 drivers/nvme/host/core.c | 72 +++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 724f5732786c..520fb5f1e214 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2015,21 +2015,51 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
 }
 
 
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
-			struct nvme_id_ns *id, struct queue_limits *lim,
-			u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+		struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
 {
-	unsigned int boundary = 0;
+	u32 atomic_bs, boundary = 0;
 
-	if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
-		if (le16_to_cpu(id->nabspf))
+	/*
+	 * We do not support an offset for the atomic boundaries.
+	 */
+	if (id->nabo)
+		return bs;
+
+	if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+		/*
+		 * Use the per-namespace atomic write unit when available.
+		 */
+		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+		if (id->nabspf)
 			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+	} else {
+		/*
+		 * Use the controller wide atomic write unit.  This sucks
+		 * because the limit is defined in terms of logical blocks while
+		 * namespaces can have different formats, and because there is
+		 * no clear language in the specification prohibiting different
+		 * values for different controllers in the subsystem.
+		 */
+		atomic_bs = (1 + ns->ctrl->awupf) * bs;
+	}
+
+	if (!ns->ctrl->subsys->atomic_bs) {
+		ns->ctrl->subsys->atomic_bs = atomic_bs;
+	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
+		dev_err_ratelimited(ns->ctrl->device,
+			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
+			ns->disk ? ns->disk->disk_name : "?",
+			ns->ctrl->subsys->atomic_bs,
+			atomic_bs);
 	}
+
 	lim->atomic_write_hw_max = atomic_bs;
 	lim->atomic_write_hw_boundary = boundary;
 	lim->atomic_write_hw_unit_min = bs;
 	lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
 	lim->features |= BLK_FEAT_ATOMIC_WRITES;
+	return atomic_bs;
 }
 
 static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2067,34 +2097,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
 		valid = false;
 	}
 
-	atomic_bs = phys_bs = bs;
-	if (id->nabo == 0) {
-		/*
-		 * Bit 1 indicates whether NAWUPF is defined for this namespace
-		 * and whether it should be used instead of AWUPF. If NAWUPF ==
-		 * 0 then AWUPF must be used instead.
-		 */
-		if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
-			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
-		else
-			atomic_bs = (1 + ns->ctrl->awupf) * bs;
-
-		/*
-		 * Set subsystem atomic bs.
-		 */
-		if (ns->ctrl->subsys->atomic_bs) {
-			if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
-				dev_err_ratelimited(ns->ctrl->device,
-					"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
-					ns->disk ? ns->disk->disk_name : "?",
-					ns->ctrl->subsys->atomic_bs,
-					atomic_bs);
-			}
-		} else
-			ns->ctrl->subsys->atomic_bs = atomic_bs;
-
-		nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
-	}
+	phys_bs = bs;
+	atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
 
 	if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
 		/* NPWG = Namespace Preferred Write Granularity */
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] nvme: fix atomic write size validation
  2025-06-25  6:39 fix atomic limits check v2 Christoph Hellwig
  2025-06-25  6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
@ 2025-06-25  6:39 ` Christoph Hellwig
  2025-06-27  0:05   ` Uday Shankar
  2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
  2 siblings, 1 reply; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-25  6:39 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry, Luis Chamberlain

Don't mix the namespace and controller values, and validate the
per-controller limit when probing the controller.  This avoid spurious
failures for controllers with namespaces that have different namespaces
with different logical block sizes, or report the per-namespace values
only for some namespaces.

It also fixes a missing queue_limits_cancel_update in an error path by
removing that error path.

Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
---
 drivers/nvme/host/core.c | 33 +++++++++++----------------------
 drivers/nvme/host/nvme.h |  3 +--
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 520fb5f1e214..e533d791955d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		 * no clear language in the specification prohibiting different
 		 * values for different controllers in the subsystem.
 		 */
-		atomic_bs = (1 + ns->ctrl->awupf) * bs;
-	}
-
-	if (!ns->ctrl->subsys->atomic_bs) {
-		ns->ctrl->subsys->atomic_bs = atomic_bs;
-	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
-		dev_err_ratelimited(ns->ctrl->device,
-			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
-			ns->disk ? ns->disk->disk_name : "?",
-			ns->ctrl->subsys->atomic_bs,
-			atomic_bs);
+		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
 	}
 
 	lim->atomic_write_hw_max = atomic_bs;
@@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	if (!nvme_update_disk_info(ns, id, &lim))
 		capacity = 0;
 
-	/*
-	 * Validate the max atomic write size fits within the subsystem's
-	 * atomic write capabilities.
-	 */
-	if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
-		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
-		ret = -ENXIO;
-		goto out;
-	}
-
 	nvme_config_discard(ns, &lim);
 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
 	    ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	memcpy(subsys->model, id->mn, sizeof(subsys->model));
 	subsys->vendor_id = le16_to_cpu(id->vid);
 	subsys->cmic = id->cmic;
+	subsys->awupf = le16_to_cpu(id->awupf);
 
 	/* Versions prior to 1.4 don't necessarily report a valid type */
 	if (id->cntrltype == NVME_CTRL_DISC ||
@@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		if (ret)
 			goto out_free;
 	}
+
+	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
+		dev_err_ratelimited(ctrl->device,
+			"inconsistent AWUPF, controller not added (%u/%u).\n",
+			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
+		ret = -EINVAL;
+		goto out_free;
+	}
+
 	memcpy(ctrl->subsys->firmware_rev, id->fr,
 	       sizeof(ctrl->subsys->firmware_rev));
 
@@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		dev_pm_qos_expose_latency_tolerance(ctrl->device);
 	else if (!ctrl->apst_enabled && prev_apst_enabled)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
-	ctrl->awupf = le16_to_cpu(id->awupf);
 out_free:
 	kfree(id);
 	return ret;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a468cdc5b5cb..7df2ea21851f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,7 +410,6 @@ struct nvme_ctrl {
 
 	enum nvme_ctrl_type cntrltype;
 	enum nvme_dctype dctype;
-	u16 awupf; /* 0's based value. */
 };
 
 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -443,11 +442,11 @@ struct nvme_subsystem {
 	u8			cmic;
 	enum nvme_subsys_type	subtype;
 	u16			vendor_id;
+	u16			awupf; /* 0's based value. */
 	struct ida		ns_ida;
 #ifdef CONFIG_NVME_MULTIPATH
 	enum nvme_iopolicy	iopolicy;
 #endif
-	u32			atomic_bs;
 };
 
 /*
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: fix atomic limits check v2
  2025-06-25  6:39 fix atomic limits check v2 Christoph Hellwig
  2025-06-25  6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
  2025-06-25  6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
@ 2025-06-25 17:04 ` alan.adamson
  2025-06-26  5:22   ` Christoph Hellwig
  2 siblings, 1 reply; 7+ messages in thread
From: alan.adamson @ 2025-06-25 17:04 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, John Garry


On 6/24/25 11:39 PM, Christoph Hellwig wrote:
> Hi all,
>
> this series tries to fix the atomics limit check to limit it to
> the per-controller values and to the controller probing.
>
> I think this should solve the root cause of the report from Yi Zhang,
> but needs new verification.
>
> Changes since v1:
>   - initialize subsys->awupf in nvme_init_subsystem to make sure that only
>     happens once per controller
>
> Diffstat:
>   core.c |   83 +++++++++++++++++++++++++++++------------------------------------
>   nvme.h |    3 --
>   2 files changed, 39 insertions(+), 47 deletions(-)
CTRL 0 - AWUN=31 AWUPF=15    nvme0n1
CTRL 1 - AWUN=31 AWUPF=31
CTRL 2 - AWUN=15 AWUPF=7
CTRL 3 - AWUN=15 AWUPF=15
     NS - NAWUN=31 NAWUPF=15  nvme0n2
     NS - NAWUN=127 NAWUPF=63 nvme0n3

[root@localhost ~]# lsblk
NAME        MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
sda           8:0    0   40G  0 disk
├─sda1        8:1    0    1G  0 part /boot
└─sda2        8:2    0   39G  0 part
   └─ol-root 252:0    0   39G  0 lvm  /
sr0          11:0    1 1024M  0 rom
nvme0n1     259:2    0  250G  0 disk
nvme0n2     259:3    0  250G  0 disk
nvme0n3     259:5    0  250G  0 disk
[root@localhost ~]# nvme id-ctrl /dev/nvme0n1 | grep awupf
awupf     : 15
[root@localhost ~]# cat /sys/block/nvme0n1/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n2 | grep nawupf
nawupf  : 15
[root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n3 | grep nawupf
nawupf  : 63
[root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
32768
[root@localhost ~]# dmesg | grep nvme | grep AWUPF
[    2.761599] nvme nvme2: inconsistent AWUPF, controller not added (7/15).
[    2.765669] nvme nvme1: inconsistent AWUPF, controller not added (31/15).
[root@localhost ~]#

Why was CTRL1 (nvme1) not added? AWUPF of 31 works with a 
atomic_write_max_bytes of 8192.


Alan



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: fix atomic limits check v2
  2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
@ 2025-06-26  5:22   ` Christoph Hellwig
  0 siblings, 0 replies; 7+ messages in thread
From: Christoph Hellwig @ 2025-06-26  5:22 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, John Garry

On Wed, Jun 25, 2025 at 10:04:55AM -0700, alan.adamson@oracle.com wrote:
> [root@localhost ~]# nvme id-ns /dev/nvme0n2 | grep nawupf
> nawupf  : 15
> [root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
> 8192
> [root@localhost ~]# nvme id-ns /dev/nvme0n3 | grep nawupf
> nawupf  : 63
> [root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
> 32768
> [root@localhost ~]# dmesg | grep nvme | grep AWUPF
> [    2.761599] nvme nvme2: inconsistent AWUPF, controller not added (7/15).
> [    2.765669] nvme nvme1: inconsistent AWUPF, controller not added (31/15).
> [root@localhost ~]#
>
> Why was CTRL1 (nvme1) not added? AWUPF of 31 works with a 
> atomic_write_max_bytes of 8192.

Because it reports different AWUPF for controllers in the same subsystem.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write size validation
  2025-06-25  6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
@ 2025-06-27  0:05   ` Uday Shankar
  2025-06-27  0:18     ` Yi Zhang
  0 siblings, 1 reply; 7+ messages in thread
From: Uday Shankar @ 2025-06-27  0:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, Alan Adamson,
	John Garry, Luis Chamberlain

On Wed, Jun 25, 2025 at 08:39:56AM +0200, Christoph Hellwig wrote:
> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller.  This avoid spurious
> failures for controllers with namespaces that have different namespaces

nit: having namespaces with different logical block sizes

> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
> 
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.
> 
> Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> Reported-by: Yi Zhang <yi.zhang@redhat.com>

I couldn't find the report on linux-nvme; if it is public, can you
include a "Closes:" link here?

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> Reviewed-by: John Garry <john.g.garry@oracle.com>
> Tested-by: Yi Zhang <yi.zhang@redhat.com>

I also saw a problem in a system running on 6.16-rc3 with several NVMe
subsystems, each containing one controller with AWUPF=0, each containing
two namespaces with NSABP=0. One namespace has a 512-byte LBA size while
the other has a 4096-byte LBA size, and some namespaces failed to add:

# dmesg | grep Inconsistent
[    5.537494] nvme nvme4: nvme4n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.539038] nvme nvme6: nvme6n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.560079] nvme nvme7: nvme7n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.595093] nvme nvme3: nvme3n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.597627] nvme nvme8: nvme8n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.600007] nvme nvme0: nvme0n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.605748] nvme nvme5: nvme5n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
[    5.608961] nvme nvme11: nvme11n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.618011] nvme nvme12: nvme12n1: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
[    5.618251] nvme nvme10: nvme10n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes

Note that despite the messages saying otherwise, only those log lines
containing "Subsystem=512 bytes, Controller/Namespace=4096 bytes"
actually failed to add a namespace - the others had both namespaces
added just fine. I guess it isn't deterministic as to which namespace
was added first.

Anyways, the problem was fixed by this patch set.

Tested-by: Uday Shankar <ushankar@purestorage.com>

> ---
>  drivers/nvme/host/core.c | 33 +++++++++++----------------------
>  drivers/nvme/host/nvme.h |  3 +--
>  2 files changed, 12 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 520fb5f1e214..e533d791955d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>  		 * no clear language in the specification prohibiting different
>  		 * values for different controllers in the subsystem.
>  		 */
> -		atomic_bs = (1 + ns->ctrl->awupf) * bs;
> -	}
> -
> -	if (!ns->ctrl->subsys->atomic_bs) {
> -		ns->ctrl->subsys->atomic_bs = atomic_bs;
> -	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> -		dev_err_ratelimited(ns->ctrl->device,
> -			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> -			ns->disk ? ns->disk->disk_name : "?",
> -			ns->ctrl->subsys->atomic_bs,
> -			atomic_bs);
> +		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
>  	}
>  
>  	lim->atomic_write_hw_max = atomic_bs;
> @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>  	if (!nvme_update_disk_info(ns, id, &lim))
>  		capacity = 0;
>  
> -	/*
> -	 * Validate the max atomic write size fits within the subsystem's
> -	 * atomic write capabilities.
> -	 */
> -	if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> -		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> -		ret = -ENXIO;
> -		goto out;
> -	}
> -
>  	nvme_config_discard(ns, &lim);
>  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>  	    ns->head->ids.csi == NVME_CSI_ZNS)
> @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
>  	memcpy(subsys->model, id->mn, sizeof(subsys->model));
>  	subsys->vendor_id = le16_to_cpu(id->vid);
>  	subsys->cmic = id->cmic;
> +	subsys->awupf = le16_to_cpu(id->awupf);
>  
>  	/* Versions prior to 1.4 don't necessarily report a valid type */
>  	if (id->cntrltype == NVME_CTRL_DISC ||
> @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>  		if (ret)
>  			goto out_free;
>  	}
> +
> +	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> +		dev_err_ratelimited(ctrl->device,
> +			"inconsistent AWUPF, controller not added (%u/%u).\n",
> +			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> +		ret = -EINVAL;
> +		goto out_free;
> +	}
> +

Could you explain (and perhaps add a comment here) why all controllers
in a subsystem must report the same awupf? Is it because namespaces may
inherit awupf from the controller, and may be reachable through multiple
controllers for multipath?

>  	memcpy(ctrl->subsys->firmware_rev, id->fr,
>  	       sizeof(ctrl->subsys->firmware_rev));
>  
> @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>  		dev_pm_qos_expose_latency_tolerance(ctrl->device);
>  	else if (!ctrl->apst_enabled && prev_apst_enabled)
>  		dev_pm_qos_hide_latency_tolerance(ctrl->device);
> -	ctrl->awupf = le16_to_cpu(id->awupf);
>  out_free:
>  	kfree(id);
>  	return ret;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a468cdc5b5cb..7df2ea21851f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -410,7 +410,6 @@ struct nvme_ctrl {
>  
>  	enum nvme_ctrl_type cntrltype;
>  	enum nvme_dctype dctype;
> -	u16 awupf; /* 0's based value. */
>  };
>  
>  static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> @@ -443,11 +442,11 @@ struct nvme_subsystem {
>  	u8			cmic;
>  	enum nvme_subsys_type	subtype;
>  	u16			vendor_id;
> +	u16			awupf; /* 0's based value. */
>  	struct ida		ns_ida;
>  #ifdef CONFIG_NVME_MULTIPATH
>  	enum nvme_iopolicy	iopolicy;
>  #endif
> -	u32			atomic_bs;
>  };
>  
>  /*
> -- 
> 2.47.2
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write size validation
  2025-06-27  0:05   ` Uday Shankar
@ 2025-06-27  0:18     ` Yi Zhang
  0 siblings, 0 replies; 7+ messages in thread
From: Yi Zhang @ 2025-06-27  0:18 UTC (permalink / raw)
  To: Uday Shankar
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Alan Adamson, John Garry, Luis Chamberlain

On Fri, Jun 27, 2025 at 8:05 AM Uday Shankar <ushankar@purestorage.com> wrote:
>
> On Wed, Jun 25, 2025 at 08:39:56AM +0200, Christoph Hellwig wrote:
> > Don't mix the namespace and controller values, and validate the
> > per-controller limit when probing the controller.  This avoid spurious
> > failures for controllers with namespaces that have different namespaces
>
> nit: having namespaces with different logical block sizes
>
> > with different logical block sizes, or report the per-namespace values
> > only for some namespaces.
> >
> > It also fixes a missing queue_limits_cancel_update in an error path by
> > removing that error path.
> >
> > Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> > Reported-by: Yi Zhang <yi.zhang@redhat.com>
>
> I couldn't find the report on linux-nvme; if it is public, can you
> include a "Closes:" link here?

Yes, it was reported here:
https://lore.kernel.org/linux-nvme/CAHj4cs93WutyoPLFMr0JidHhRAxHC1ZDcj-RvdnX=R7OaV5ejg@mail.gmail.com/

>
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> > Reviewed-by: John Garry <john.g.garry@oracle.com>
> > Tested-by: Yi Zhang <yi.zhang@redhat.com>
>
> I also saw a problem in a system running on 6.16-rc3 with several NVMe
> subsystems, each containing one controller with AWUPF=0, each containing
> two namespaces with NSABP=0. One namespace has a 512-byte LBA size while
> the other has a 4096-byte LBA size, and some namespaces failed to add:
>
> # dmesg | grep Inconsistent
> [    5.537494] nvme nvme4: nvme4n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [    5.539038] nvme nvme6: nvme6n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [    5.560079] nvme nvme7: nvme7n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [    5.595093] nvme nvme3: nvme3n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [    5.597627] nvme nvme8: nvme8n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [    5.600007] nvme nvme0: nvme0n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [    5.605748] nvme nvme5: nvme5n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=512 bytes, Controller/Namespace=4096 bytes
> [    5.608961] nvme nvme11: nvme11n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [    5.618011] nvme nvme12: nvme12n1: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
> [    5.618251] nvme nvme10: nvme10n2: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=4096 bytes, Controller/Namespace=512 bytes
>
> Note that despite the messages saying otherwise, only those log lines
> containing "Subsystem=512 bytes, Controller/Namespace=4096 bytes"
> actually failed to add a namespace - the others had both namespaces
> added just fine. I guess it isn't deterministic as to which namespace
> was added first.
>
> Anyways, the problem was fixed by this patch set.
>
> Tested-by: Uday Shankar <ushankar@purestorage.com>
>
> > ---
> >  drivers/nvme/host/core.c | 33 +++++++++++----------------------
> >  drivers/nvme/host/nvme.h |  3 +--
> >  2 files changed, 12 insertions(+), 24 deletions(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 520fb5f1e214..e533d791955d 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
> >                * no clear language in the specification prohibiting different
> >                * values for different controllers in the subsystem.
> >                */
> > -             atomic_bs = (1 + ns->ctrl->awupf) * bs;
> > -     }
> > -
> > -     if (!ns->ctrl->subsys->atomic_bs) {
> > -             ns->ctrl->subsys->atomic_bs = atomic_bs;
> > -     } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> > -             dev_err_ratelimited(ns->ctrl->device,
> > -                     "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> > -                     ns->disk ? ns->disk->disk_name : "?",
> > -                     ns->ctrl->subsys->atomic_bs,
> > -                     atomic_bs);
> > +             atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
> >       }
> >
> >       lim->atomic_write_hw_max = atomic_bs;
> > @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
> >       if (!nvme_update_disk_info(ns, id, &lim))
> >               capacity = 0;
> >
> > -     /*
> > -      * Validate the max atomic write size fits within the subsystem's
> > -      * atomic write capabilities.
> > -      */
> > -     if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> > -             blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> > -             ret = -ENXIO;
> > -             goto out;
> > -     }
> > -
> >       nvme_config_discard(ns, &lim);
> >       if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> >           ns->head->ids.csi == NVME_CSI_ZNS)
> > @@ -3219,6 +3199,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
> >       memcpy(subsys->model, id->mn, sizeof(subsys->model));
> >       subsys->vendor_id = le16_to_cpu(id->vid);
> >       subsys->cmic = id->cmic;
> > +     subsys->awupf = le16_to_cpu(id->awupf);
> >
> >       /* Versions prior to 1.4 don't necessarily report a valid type */
> >       if (id->cntrltype == NVME_CTRL_DISC ||
> > @@ -3556,6 +3537,15 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> >               if (ret)
> >                       goto out_free;
> >       }
> > +
> > +     if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> > +             dev_err_ratelimited(ctrl->device,
> > +                     "inconsistent AWUPF, controller not added (%u/%u).\n",
> > +                     le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> > +             ret = -EINVAL;
> > +             goto out_free;
> > +     }
> > +
>
> Could you explain (and perhaps add a comment here) why all controllers
> in a subsystem must report the same awupf? Is it because namespaces may
> inherit awupf from the controller, and may be reachable through multiple
> controllers for multipath?
>
> >       memcpy(ctrl->subsys->firmware_rev, id->fr,
> >              sizeof(ctrl->subsys->firmware_rev));
> >
> > @@ -3651,7 +3641,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
> >               dev_pm_qos_expose_latency_tolerance(ctrl->device);
> >       else if (!ctrl->apst_enabled && prev_apst_enabled)
> >               dev_pm_qos_hide_latency_tolerance(ctrl->device);
> > -     ctrl->awupf = le16_to_cpu(id->awupf);
> >  out_free:
> >       kfree(id);
> >       return ret;
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index a468cdc5b5cb..7df2ea21851f 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -410,7 +410,6 @@ struct nvme_ctrl {
> >
> >       enum nvme_ctrl_type cntrltype;
> >       enum nvme_dctype dctype;
> > -     u16 awupf; /* 0's based value. */
> >  };
> >
> >  static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> > @@ -443,11 +442,11 @@ struct nvme_subsystem {
> >       u8                      cmic;
> >       enum nvme_subsys_type   subtype;
> >       u16                     vendor_id;
> > +     u16                     awupf; /* 0's based value. */
> >       struct ida              ns_ida;
> >  #ifdef CONFIG_NVME_MULTIPATH
> >       enum nvme_iopolicy      iopolicy;
> >  #endif
> > -     u32                     atomic_bs;
> >  };
> >
> >  /*
> > --
> > 2.47.2
> >
> >
>


-- 
Best Regards,
  Yi Zhang



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-06-27  0:49 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-25  6:39 fix atomic limits check v2 Christoph Hellwig
2025-06-25  6:39 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
2025-06-25  6:39 ` [PATCH 2/2] nvme: fix atomic write size validation Christoph Hellwig
2025-06-27  0:05   ` Uday Shankar
2025-06-27  0:18     ` Yi Zhang
2025-06-25 17:04 ` fix atomic limits check v2 alan.adamson
2025-06-26  5:22   ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).