linux-nvme.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* fix atomic limits check
@ 2025-06-11  5:54 Christoph Hellwig
  2025-06-11  5:54 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-11  5:54 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry

Hi all,

this series tries to fix the atomics limit check to limit it to
the per-controller values and to the controller probing.

I think this should solve the root cause of the report from Yi Zhang,
but needs new verification.

Diffstat:
 core.c |   84 ++++++++++++++++++++++++++++++-----------------------------------
 nvme.h |    3 --
 2 files changed, 40 insertions(+), 47 deletions(-)


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/2] nvme: refactor the atomic write unit detection
  2025-06-11  5:54 fix atomic limits check Christoph Hellwig
@ 2025-06-11  5:54 ` Christoph Hellwig
  2025-06-13  7:52   ` John Garry
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-11  5:54 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry

Move all the code out of nvme_update_disk_info into the helper, and
rename the helper to have a somewhat less clumsy name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 72 +++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 724f5732786c..520fb5f1e214 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2015,21 +2015,51 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
 }
 
 
-static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
-			struct nvme_id_ns *id, struct queue_limits *lim,
-			u32 bs, u32 atomic_bs)
+static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
+		struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
 {
-	unsigned int boundary = 0;
+	u32 atomic_bs, boundary = 0;
 
-	if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
-		if (le16_to_cpu(id->nabspf))
+	/*
+	 * We do not support an offset for the atomic boundaries.
+	 */
+	if (id->nabo)
+		return bs;
+
+	if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
+		/*
+		 * Use the per-namespace atomic write unit when available.
+		 */
+		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+		if (id->nabspf)
 			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+	} else {
+		/*
+		 * Use the controller wide atomic write unit.  This sucks
+		 * because the limit is defined in terms of logical blocks while
+		 * namespaces can have different formats, and because there is
+		 * no clear language in the specification prohibiting different
+		 * values for different controllers in the subsystem.
+		 */
+		atomic_bs = (1 + ns->ctrl->awupf) * bs;
+	}
+
+	if (!ns->ctrl->subsys->atomic_bs) {
+		ns->ctrl->subsys->atomic_bs = atomic_bs;
+	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
+		dev_err_ratelimited(ns->ctrl->device,
+			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
+			ns->disk ? ns->disk->disk_name : "?",
+			ns->ctrl->subsys->atomic_bs,
+			atomic_bs);
 	}
+
 	lim->atomic_write_hw_max = atomic_bs;
 	lim->atomic_write_hw_boundary = boundary;
 	lim->atomic_write_hw_unit_min = bs;
 	lim->atomic_write_hw_unit_max = rounddown_pow_of_two(atomic_bs);
 	lim->features |= BLK_FEAT_ATOMIC_WRITES;
+	return atomic_bs;
 }
 
 static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
@@ -2067,34 +2097,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
 		valid = false;
 	}
 
-	atomic_bs = phys_bs = bs;
-	if (id->nabo == 0) {
-		/*
-		 * Bit 1 indicates whether NAWUPF is defined for this namespace
-		 * and whether it should be used instead of AWUPF. If NAWUPF ==
-		 * 0 then AWUPF must be used instead.
-		 */
-		if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
-			atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
-		else
-			atomic_bs = (1 + ns->ctrl->awupf) * bs;
-
-		/*
-		 * Set subsystem atomic bs.
-		 */
-		if (ns->ctrl->subsys->atomic_bs) {
-			if (atomic_bs != ns->ctrl->subsys->atomic_bs) {
-				dev_err_ratelimited(ns->ctrl->device,
-					"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
-					ns->disk ? ns->disk->disk_name : "?",
-					ns->ctrl->subsys->atomic_bs,
-					atomic_bs);
-			}
-		} else
-			ns->ctrl->subsys->atomic_bs = atomic_bs;
-
-		nvme_update_atomic_write_disk_info(ns, id, lim, bs, atomic_bs);
-	}
+	phys_bs = bs;
+	atomic_bs = nvme_configure_atomic_write(ns, id, lim, bs);
 
 	if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
 		/* NPWG = Namespace Preferred Write Granularity */
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-11  5:54 fix atomic limits check Christoph Hellwig
  2025-06-11  5:54 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
@ 2025-06-11  5:54 ` Christoph Hellwig
  2025-06-13  3:04   ` Yi Zhang
                     ` (3 more replies)
  2025-06-13 21:22 ` fix atomic limits check alan.adamson
  2025-06-17  4:56 ` Christoph Hellwig
  3 siblings, 4 replies; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-11  5:54 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry

Don't mix the namespace and controller values, and validate the
per-controller limit when probing the controller.  This avoid spurious
failures for controllers with namespaces that have different namespaces
with different logical block sizes, or report the per-namespace values
only for some namespaces.

It also fixes a missing queue_limits_cancel_update in an error path by
removing that error path.

Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 34 ++++++++++++----------------------
 drivers/nvme/host/nvme.h |  3 +--
 2 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 520fb5f1e214..3da5ac71a9b0 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		 * no clear language in the specification prohibiting different
 		 * values for different controllers in the subsystem.
 		 */
-		atomic_bs = (1 + ns->ctrl->awupf) * bs;
-	}
-
-	if (!ns->ctrl->subsys->atomic_bs) {
-		ns->ctrl->subsys->atomic_bs = atomic_bs;
-	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
-		dev_err_ratelimited(ns->ctrl->device,
-			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
-			ns->disk ? ns->disk->disk_name : "?",
-			ns->ctrl->subsys->atomic_bs,
-			atomic_bs);
+		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
 	}
 
 	lim->atomic_write_hw_max = atomic_bs;
@@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	if (!nvme_update_disk_info(ns, id, &lim))
 		capacity = 0;
 
-	/*
-	 * Validate the max atomic write size fits within the subsystem's
-	 * atomic write capabilities.
-	 */
-	if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
-		blk_mq_unfreeze_queue(ns->disk->queue, memflags);
-		ret = -ENXIO;
-		goto out;
-	}
-
 	nvme_config_discard(ns, &lim);
 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
 	    ns->head->ids.csi == NVME_CSI_ZNS)
@@ -3555,7 +3535,18 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		ret = nvme_init_effects(ctrl, id);
 		if (ret)
 			goto out_free;
+
+		ctrl->subsys->awupf = le16_to_cpu(id->awupf);
 	}
+
+	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
+		dev_err_ratelimited(ctrl->device,
+			"inconsistent AWUPF, controller not added (%u/%u).\n",
+			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
+		ret = -EINVAL;
+		goto out_free;
+	}
+
 	memcpy(ctrl->subsys->firmware_rev, id->fr,
 	       sizeof(ctrl->subsys->firmware_rev));
 
@@ -3651,7 +3642,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		dev_pm_qos_expose_latency_tolerance(ctrl->device);
 	else if (!ctrl->apst_enabled && prev_apst_enabled)
 		dev_pm_qos_hide_latency_tolerance(ctrl->device);
-	ctrl->awupf = le16_to_cpu(id->awupf);
 out_free:
 	kfree(id);
 	return ret;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a468cdc5b5cb..7df2ea21851f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -410,7 +410,6 @@ struct nvme_ctrl {
 
 	enum nvme_ctrl_type cntrltype;
 	enum nvme_dctype dctype;
-	u16 awupf; /* 0's based value. */
 };
 
 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
@@ -443,11 +442,11 @@ struct nvme_subsystem {
 	u8			cmic;
 	enum nvme_subsys_type	subtype;
 	u16			vendor_id;
+	u16			awupf; /* 0's based value. */
 	struct ida		ns_ida;
 #ifdef CONFIG_NVME_MULTIPATH
 	enum nvme_iopolicy	iopolicy;
 #endif
-	u32			atomic_bs;
 };
 
 /*
-- 
2.47.2



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
@ 2025-06-13  3:04   ` Yi Zhang
  2025-06-13  8:03   ` John Garry
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 26+ messages in thread
From: Yi Zhang @ 2025-06-13  3:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Alan Adamson, John Garry

On Wed, Jun 11, 2025 at 1:56 PM Christoph Hellwig <hch@lst.de> wrote:
>
> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller.  This avoid spurious
> failures for controllers with namespaces that have different namespaces
> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
>
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.
>
> Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> Reported-by: Yi Zhang <yi.zhang@redhat.com>

Thanks for the fix:
Tested-by: Yi Zhang <yi.zhang@redhat.com>


> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/nvme/host/core.c | 34 ++++++++++++----------------------
>  drivers/nvme/host/nvme.h |  3 +--
>  2 files changed, 13 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 520fb5f1e214..3da5ac71a9b0 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2041,17 +2041,7 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>                  * no clear language in the specification prohibiting different
>                  * values for different controllers in the subsystem.
>                  */
> -               atomic_bs = (1 + ns->ctrl->awupf) * bs;
> -       }
> -
> -       if (!ns->ctrl->subsys->atomic_bs) {
> -               ns->ctrl->subsys->atomic_bs = atomic_bs;
> -       } else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> -               dev_err_ratelimited(ns->ctrl->device,
> -                       "%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
> -                       ns->disk ? ns->disk->disk_name : "?",
> -                       ns->ctrl->subsys->atomic_bs,
> -                       atomic_bs);
> +               atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
>         }
>
>         lim->atomic_write_hw_max = atomic_bs;
> @@ -2386,16 +2376,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
>         if (!nvme_update_disk_info(ns, id, &lim))
>                 capacity = 0;
>
> -       /*
> -        * Validate the max atomic write size fits within the subsystem's
> -        * atomic write capabilities.
> -        */
> -       if (lim.atomic_write_hw_max > ns->ctrl->subsys->atomic_bs) {
> -               blk_mq_unfreeze_queue(ns->disk->queue, memflags);
> -               ret = -ENXIO;
> -               goto out;
> -       }
> -
>         nvme_config_discard(ns, &lim);
>         if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>             ns->head->ids.csi == NVME_CSI_ZNS)
> @@ -3555,7 +3535,18 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>                 ret = nvme_init_effects(ctrl, id);
>                 if (ret)
>                         goto out_free;
> +
> +               ctrl->subsys->awupf = le16_to_cpu(id->awupf);
>         }
> +
> +       if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> +               dev_err_ratelimited(ctrl->device,
> +                       "inconsistent AWUPF, controller not added (%u/%u).\n",
> +                       le16_to_cpu(id->awupf), ctrl->subsys->awupf);
> +               ret = -EINVAL;
> +               goto out_free;
> +       }
> +
>         memcpy(ctrl->subsys->firmware_rev, id->fr,
>                sizeof(ctrl->subsys->firmware_rev));
>
> @@ -3651,7 +3642,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
>                 dev_pm_qos_expose_latency_tolerance(ctrl->device);
>         else if (!ctrl->apst_enabled && prev_apst_enabled)
>                 dev_pm_qos_hide_latency_tolerance(ctrl->device);
> -       ctrl->awupf = le16_to_cpu(id->awupf);
>  out_free:
>         kfree(id);
>         return ret;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index a468cdc5b5cb..7df2ea21851f 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -410,7 +410,6 @@ struct nvme_ctrl {
>
>         enum nvme_ctrl_type cntrltype;
>         enum nvme_dctype dctype;
> -       u16 awupf; /* 0's based value. */
>  };
>
>  static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
> @@ -443,11 +442,11 @@ struct nvme_subsystem {
>         u8                      cmic;
>         enum nvme_subsys_type   subtype;
>         u16                     vendor_id;
> +       u16                     awupf; /* 0's based value. */
>         struct ida              ns_ida;
>  #ifdef CONFIG_NVME_MULTIPATH
>         enum nvme_iopolicy      iopolicy;
>  #endif
> -       u32                     atomic_bs;
>  };
>
>  /*
> --
> 2.47.2
>
>


--
Best Regards,
  Yi Zhang



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] nvme: refactor the atomic write unit detection
  2025-06-11  5:54 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
@ 2025-06-13  7:52   ` John Garry
  0 siblings, 0 replies; 26+ messages in thread
From: John Garry @ 2025-06-13  7:52 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson

On 11/06/2025 06:54, Christoph Hellwig wrote:
> Move all the code out of nvme_update_disk_info into the helper, and
> rename the helper to have a somewhat less clumsy name.

I suppose that the name nvme_update_atomic_write_disk_info just comes 
from extending the caller name, nvme_update_disk_info

> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Regardless of a couple of nits below:
Reviewed-by: John Garry <john.g.garry@oracle.com>

> ---
>   drivers/nvme/host/core.c | 72 +++++++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 724f5732786c..520fb5f1e214 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2015,21 +2015,51 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
>   }
>   
>   
> -static void nvme_update_atomic_write_disk_info(struct nvme_ns *ns,
> -			struct nvme_id_ns *id, struct queue_limits *lim,
> -			u32 bs, u32 atomic_bs)
> +static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
> +		struct nvme_id_ns *id, struct queue_limits *lim, u32 bs)
>   {
> -	unsigned int boundary = 0;
> +	u32 atomic_bs, boundary = 0;
>   
> -	if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) {
> -		if (le16_to_cpu(id->nabspf))
> +	/*
> +	 * We do not support an offset for the atomic boundaries.
> +	 */
> +	if (id->nabo)
> +		return bs;
> +
> +	if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) {
> +		/*
> +		 * Use the per-namespace atomic write unit when available.
> +		 */
> +		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
> +		if (id->nabspf)
>   			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
> +	} else {
> +		/*
> +		 * Use the controller wide atomic write unit.  This sucks

nit: I found the wording a bit ambiguous, as it sounds like the value in 
the controller-wide atomic write unit itself is somehow bad (which I 
assume is not the intention). How about rewording slightly:

Having to use this value sucks, as this limit is defined in terms of 
logical...

> +		 * because the limit is defined in terms of logical blocks while
> +		 * namespaces can have different formats, and because there is
> +		 * no clear language in the specification prohibiting different
> +		 * values for different controllers in the subsystem.
> +		 */
> +		atomic_bs = (1 + ns->ctrl->awupf) * bs;
> +	}
> +
> +	if (!ns->ctrl->subsys->atomic_bs) {
> +		ns->ctrl->subsys->atomic_bs = atomic_bs;
> +	} else if (ns->ctrl->subsys->atomic_bs != atomic_bs) {
> +		dev_err_ratelimited(ns->ctrl->device,
> +			"%s: Inconsistent Atomic Write Size, Namespace will not be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",

nit on pre-existing code: /s/Atomic Write Size/Atomic Write Sizes/

And, again for pre-existing code, maybe stop using the camelcase




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
  2025-06-13  3:04   ` Yi Zhang
@ 2025-06-13  8:03   ` John Garry
  2025-06-16  5:31     ` Christoph Hellwig
  2025-06-16 10:19   ` John Garry
  2025-06-17  7:36   ` John Garry
  3 siblings, 1 reply; 26+ messages in thread
From: John Garry @ 2025-06-13  8:03 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson

On 11/06/2025 06:54, Christoph Hellwig wrote:
> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller.  This avoid spurious
> failures for controllers with namespaces that have different namespaces
> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
> 
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.

I suppose that many comments in 1/2 aren't relevant any more from this 
change.

For the immediate moment, I just want to test this a bit.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-11  5:54 fix atomic limits check Christoph Hellwig
  2025-06-11  5:54 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
@ 2025-06-13 21:22 ` alan.adamson
  2025-06-16  5:36   ` Christoph Hellwig
  2025-06-17  4:56 ` Christoph Hellwig
  3 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-13 21:22 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, John Garry

On 6/10/25 10:54 PM, Christoph Hellwig wrote:

> Hi all,
>
> this series tries to fix the atomics limit check to limit it to
> the per-controller values and to the controller probing.
>
> I think this should solve the root cause of the report from Yi Zhang,
> but needs new verification.
>
> Diffstat:
>   core.c |   84 ++++++++++++++++++++++++++++++-----------------------------------
>   nvme.h |    3 --
>   2 files changed, 40 insertions(+), 47 deletions(-)

Some testing with my qemu-nvme atomic write setup.  My qemu includes ns 
atomic parameters that isn't upstream yet.

-device nvme-subsys,id=subsys0 \
-device 
nvme,serial=deadbeef,id=nvme0,subsys=subsys0,atomic.dn=off,atomic.awun=31,atomic.awupf=15 
\
-drive id=ns1,file=/dev/nullb1,if=none \
-device nvme-ns,drive=ns1,bus=nvme0,nsid=1,zoned=false,shared=false \
-device 
nvme,serial=deadbeef,id=nvme1,subsys=subsys0,atomic.dn=off,atomic.awun=63,atomic.awupf=31 
\
-drive id=ns2,file=/dev/nullb2,if=none \
-device nvme-ns,drive=ns2,bus=nvme1,nsid=2,zoned=false,shared=false \
-device 
nvme,serial=deadbeef,id=nvme2,subsys=subsys0,atomic.dn=off,atomic.awun=15,atomic.awupf=7 
\
-drive id=ns3,file=/dev/nullb3,if=none \
-device 
nvme-ns,drive=ns3,bus=nvme2,nsid=3,zoned=false,shared=false,atomic.nawun=63,atomic.nawupf=31,atomic.nsfeat=true 
\
-device 
nvme,serial=deadbeef,id=nvme3,subsys=subsys0,atomic.dn=off,atomic.awun=15,atomic.awupf=7 
\
-drive id=ns4,file=/dev/nullb4,if=none \
-device 
nvme-ns,drive=ns4,bus=nvme3,nsid=4,zoned=false,shared=false,atomic.nawun=31,atomic.nawupf=15,atomic.nsfeat=true 
\
-drive id=ns5,file=/dev/nullb5,if=none \
-device 
nvme-ns,drive=ns5,bus=nvme3,nsid=5,zoned=false,shared=false,atomic.nawun=127,atomic.nawupf=63,atomic.nsfeat=true 
\
--nographic

A single subsystem with 4 controllers, 1 of the controllers has 2 
namespaces.

[root@localhost ~]# lsblk
NAME        MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
sda           8:0    0   40G  0 disk
├─sda1        8:1    0    1G  0 part /boot
└─sda2        8:2    0   39G  0 part
   └─ol-root 252:0    0   39G  0 lvm  /
sr0          11:0    1 1024M  0 rom
nvme1n1     259:1    0  250G  0 disk
nvme1n2     259:5    0  250G  0 disk
nvme1n3     259:6    0  250G  0 disk
nvme1n4     259:7    0  250G  0 disk
nvme1n5     259:9    0  250G  0 disk


[root@localhost ~]# cat testxx.sh
set -x
nvme id-ctrl /dev/$1 | grep cmic
nvme id-ctrl /dev/$1 | grep awupf
nvme id-ns /dev/$1 | grep nawupf
cat /sys/block/$1/queue/atomic_write_max_bytes


[root@localhost ~]# sh testxx.sh nvme1n1
+ nvme id-ctrl /dev/nvme1n1
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme1n1
+ grep awupf
awupf     : 31
+ nvme id-ns /dev/nvme1n1
+ grep nawupf
nawupf  : 0
+ cat /sys/block/nvme1n1/queue/atomic_write_max_bytes
4096

[root@localhost ~]# sh testxx.sh nvme1n2
+ nvme id-ctrl /dev/nvme1n2
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme1n2
+ grep awupf
awupf     : 15
+ nvme id-ns /dev/nvme1n2
+ grep nawupf
nawupf  : 0
+ cat /sys/block/nvme1n2/queue/atomic_write_max_bytes
4096

[root@localhost ~]# sh testxx.sh nvme1n3
+ nvme id-ctrl /dev/nvme1n3
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme1n3
+ grep awupf
awupf     : 7
+ nvme id-ns /dev/nvme1n3
+ grep nawupf
nawupf  : 15
+ cat /sys/block/nvme1n3/queue/atomic_write_max_bytes
8192

[root@localhost ~]# sh testxx.sh nvme1n4
+ nvme id-ctrl /dev/nvme1n4
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme1n4
+ grep awupf
awupf     : 7
+ nvme id-ns /dev/nvme1n4
+ grep nawupf
nawupf  : 63
+ cat /sys/block/nvme1n4/queue/atomic_write_max_bytes
32768

[root@localhost ~]# sh testxx.sh nvme1n5
+ nvme id-ctrl /dev/nvme1n5
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme1n5
+ grep awupf
awupf     : 7
+ nvme id-ns /dev/nvme1n5
+ grep nawupf
nawupf  : 31
+ cat /sys/block/nvme1n5/queue/atomic_write_max_bytes
16384

Does this look right?

Before, we had a single atomic_write_max_bytes value per subsystem.  
That's not the case now.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-13  8:03   ` John Garry
@ 2025-06-16  5:31     ` Christoph Hellwig
  2025-06-16  7:42       ` John Garry
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-16  5:31 UTC (permalink / raw)
  To: John Garry
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, Alan Adamson

On Fri, Jun 13, 2025 at 09:03:07AM +0100, John Garry wrote:
> I suppose that many comments in 1/2 aren't relevant any more from this 
> change.

What do you mean with comments in 1/2?



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-13 21:22 ` fix atomic limits check alan.adamson
@ 2025-06-16  5:36   ` Christoph Hellwig
  2025-06-17 17:40     ` alan.adamson
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-16  5:36 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, John Garry

On Fri, Jun 13, 2025 at 02:22:00PM -0700, alan.adamson@oracle.com wrote:
> Some testing with my qemu-nvme atomic write setup.  My qemu includes ns 
> atomic parameters that isn't upstream yet.

> A single subsystem with 4 controllers, 1 of the controllers has 2 
> namespaces.

.. and none of the namespaces are shared if I parse the command line
right?

> Does this look right?

I think it does, at least if I understand the setup correctly.

> Before, we had a single atomic_write_max_bytes value per subsystem.  
> That's not the case now.

Yes, with AWUPF the value should be different for namespaces with
different LBA sizes, and with NAWUPF it could be different per namespaces.

Maybe we want an extra sanity check to catch a subsystem reporting
differnet NAWUPF for the same namespace attached to multiple controllers,
even if that's against the spec.

And I wonder how we can come up with a testcase to validate all these
checks..



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-16  5:31     ` Christoph Hellwig
@ 2025-06-16  7:42       ` John Garry
  2025-06-16 11:36         ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: John Garry @ 2025-06-16  7:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, Alan Adamson

On 16/06/2025 06:31, Christoph Hellwig wrote:
> On Fri, Jun 13, 2025 at 09:03:07AM +0100, John Garry wrote:
>> I suppose that many comments in 1/2 aren't relevant any more from this
>> change.
> What do you mean with comments in 1/2?

I really mean my nitpicking, like:

On 13/06/2025 08:52, John Garry wrote:
 >> +        dev_err_ratelimited(ns->ctrl->device,
 >> +            "%s: Inconsistent Atomic Write Size, Namespace will not
 >> be added: Subsystem=%d bytes, Controller/Namespace=%d bytes\n",
 >
 > nit on pre-existing code: /s/Atomic Write Size/Atomic Write Sizes/

That dev_err_ratelimited message changes/disappears in this patch.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
  2025-06-13  3:04   ` Yi Zhang
  2025-06-13  8:03   ` John Garry
@ 2025-06-16 10:19   ` John Garry
  2025-06-16 11:37     ` Christoph Hellwig
  2025-06-17  7:36   ` John Garry
  3 siblings, 1 reply; 26+ messages in thread
From: John Garry @ 2025-06-16 10:19 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson

On 11/06/2025 06:54, Christoph Hellwig wrote:
>   		ret = nvme_init_effects(ctrl, id);
>   		if (ret)
>   			goto out_free;
> +
> +		ctrl->subsys->awupf = le16_to_cpu(id->awupf);
>   	}
> +
> +	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
> +		dev_err_ratelimited(ctrl->device,
> +			"inconsistent AWUPF, controller not added (%u/%u).\n",
> +			le16_to_cpu(id->awupf), ctrl->subsys->awupf);

Could we just disable atomic writes instead of doing this?

Or are there bigger issues, like the value returned from 
nvme_update_disk_info() for setting the physical block size is just not 
valid?


> +		ret = -EINVAL;
> +		goto out_free;
> +	}
> +
>   	memcpy(ctrl->subsys->firmware_rev, id->fr,
>   	       sizeof(ctrl->subsys->firmware_rev));



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-16  7:42       ` John Garry
@ 2025-06-16 11:36         ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-16 11:36 UTC (permalink / raw)
  To: John Garry
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, Alan Adamson

On Mon, Jun 16, 2025 at 08:42:41AM +0100, John Garry wrote:
> On 16/06/2025 06:31, Christoph Hellwig wrote:
>> On Fri, Jun 13, 2025 at 09:03:07AM +0100, John Garry wrote:
>>> I suppose that many comments in 1/2 aren't relevant any more from this
>>> change.
>> What do you mean with comments in 1/2?
>
> I really mean my nitpicking, like:

> That dev_err_ratelimited message changes/disappears in this patch.

Yeah, this gets removed here, and IIRC it was a mostly blind move of
the existing code.  So I'm not sure touching it is worth the effort.




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-16 10:19   ` John Garry
@ 2025-06-16 11:37     ` Christoph Hellwig
  2025-06-16 11:49       ` John Garry
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-16 11:37 UTC (permalink / raw)
  To: John Garry
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, Alan Adamson

On Mon, Jun 16, 2025 at 11:19:44AM +0100, John Garry wrote:
> On 11/06/2025 06:54, Christoph Hellwig wrote:
>>   		ret = nvme_init_effects(ctrl, id);
>>   		if (ret)
>>   			goto out_free;
>> +
>> +		ctrl->subsys->awupf = le16_to_cpu(id->awupf);
>>   	}
>> +
>> +	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
>> +		dev_err_ratelimited(ctrl->device,
>> +			"inconsistent AWUPF, controller not added (%u/%u).\n",
>> +			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
>
> Could we just disable atomic writes instead of doing this?
>
> Or are there bigger issues, like the value returned from 
> nvme_update_disk_info() for setting the physical block size is just not 
> valid?

This brings up back to the old problem of how do we tell the file system
and/or application that the atomic write size it probed suddenly went
away entirely because a new controller disappeared.



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-16 11:37     ` Christoph Hellwig
@ 2025-06-16 11:49       ` John Garry
  0 siblings, 0 replies; 26+ messages in thread
From: John Garry @ 2025-06-16 11:49 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, Alan Adamson

On 16/06/2025 12:37, Christoph Hellwig wrote:
>>> +	if (le16_to_cpu(id->awupf) != ctrl->subsys->awupf) {
>>> +		dev_err_ratelimited(ctrl->device,
>>> +			"inconsistent AWUPF, controller not added (%u/%u).\n",
>>> +			le16_to_cpu(id->awupf), ctrl->subsys->awupf);
>> Could we just disable atomic writes instead of doing this?
>>
>> Or are there bigger issues, like the value returned from
>> nvme_update_disk_info() for setting the physical block size is just not
>> valid?
> This brings up back to the old problem of how do we tell the file system
> and/or application that the atomic write size it probed suddenly went
> away entirely because a new controller disappeared.

Yeah, xfs does store those values at mount time.

If spinning another version, it would be good to explicitly mention this 
for benefit of forgetful people like me.

cheers


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-11  5:54 fix atomic limits check Christoph Hellwig
                   ` (2 preceding siblings ...)
  2025-06-13 21:22 ` fix atomic limits check alan.adamson
@ 2025-06-17  4:56 ` Christoph Hellwig
  2025-06-17 16:38   ` Luis Chamberlain
  3 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-17  4:56 UTC (permalink / raw)
  To: Keith Busch, Sagi Grimberg; +Cc: linux-nvme, Yi Zhang, Alan Adamson, John Garry

Can I get reviews for this so that we can queue it up for -rc3?



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] nvme: fix atomic write boundary validation
  2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
                     ` (2 preceding siblings ...)
  2025-06-16 10:19   ` John Garry
@ 2025-06-17  7:36   ` John Garry
  3 siblings, 0 replies; 26+ messages in thread
From: John Garry @ 2025-06-17  7:36 UTC (permalink / raw)
  To: Christoph Hellwig, Keith Busch, Sagi Grimberg
  Cc: linux-nvme, Yi Zhang, Alan Adamson

On 11/06/2025 06:54, Christoph Hellwig wrote:



> Don't mix the namespace and controller values, and validate the
> per-controller limit when probing the controller.  This avoid spurious
> failures for controllers with namespaces that have different namespaces
> with different logical block sizes, or report the per-namespace values
> only for some namespaces.
> 
> It also fixes a missing queue_limits_cancel_update in an error path by
> removing that error path.
> 
> Fixes: 8695f060a029 ("nvme: all namespaces in a subsystem must adhere to a common atomic write size")
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Is boundary the right word in the subject?

Anyway, FWIW:

Reviewed-by: John Garry <john.g.garry@oracle.com>


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-17  4:56 ` Christoph Hellwig
@ 2025-06-17 16:38   ` Luis Chamberlain
  0 siblings, 0 replies; 26+ messages in thread
From: Luis Chamberlain @ 2025-06-17 16:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, Alan Adamson,
	John Garry

On Tue, Jun 17, 2025 at 06:56:18AM +0200, Christoph Hellwig wrote:
> Can I get reviews for this so that we can queue it up for -rc3?

This entire series looks good to me.

Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>

  Luis


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-16  5:36   ` Christoph Hellwig
@ 2025-06-17 17:40     ` alan.adamson
  2025-06-18  5:32       ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-17 17:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, John Garry


On 6/15/25 10:36 PM, Christoph Hellwig wrote:
> On Fri, Jun 13, 2025 at 02:22:00PM -0700, alan.adamson@oracle.com wrote:
>> Some testing with my qemu-nvme atomic write setup.  My qemu includes ns
>> atomic parameters that isn't upstream yet.
>> A single subsystem with 4 controllers, 1 of the controllers has 2
>> namespaces.
> .. and none of the namespaces are shared if I parse the command line
> right?

Correct, nothing is shared.

>
>> Does this look right?
> I think it does, at least if I understand the setup correctly.
>
>> Before, we had a single atomic_write_max_bytes value per subsystem.
>> That's not the case now.
> Yes, with AWUPF the value should be different for namespaces with
> different LBA sizes, and with NAWUPF it could be different per namespaces.
>
> Maybe we want an extra sanity check to catch a subsystem reporting
> differnet NAWUPF for the same namespace attached to multiple controllers,
> even if that's against the spec.
>
> And I wonder how we can come up with a testcase to validate all these
> checks..

I changed the config a bit:

CTRL 0 - AWUN=31 AWUPF=15
CTRL 1 - AWUN=31 AWUPF=31
CTRL 2 - AWUN=15 AWUPF=7
CTRL 3 - AWUN=15 AWUPF=15
     NS - NAWUN=31 NAWUPF=15
     NS - NAWUN=127 NAWUPF=63


and his doesn't look right:

[root@localhost ~]# sh testxx.sh nvme0n3
+ nvme id-ctrl /dev/nvme0n3
+ grep cmic
cmic      : 0x2
+ nvme id-ctrl /dev/nvme0n3
+ grep awupf
awupf     : 7
+ nvme id-ns /dev/nvme0n3
+ grep nawupf
nawupf  : 0
+ cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
8192
[root@localhost ~]# dmesg | grep -i awupf
[root@localhost ~]#

AWUPF of 7 isn't consistent with an atomic_write_max_bytes of 8192.

Alan





^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-17 17:40     ` alan.adamson
@ 2025-06-18  5:32       ` Christoph Hellwig
  2025-06-18 16:07         ` alan.adamson
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-18  5:32 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, Keith Busch, Sagi Grimberg, linux-nvme,
	Yi Zhang, John Garry

On Tue, Jun 17, 2025 at 10:40:35AM -0700, alan.adamson@oracle.com wrote:
> I changed the config a bit:
>
> CTRL 0 - AWUN=31 AWUPF=15
> CTRL 1 - AWUN=31 AWUPF=31
> CTRL 2 - AWUN=15 AWUPF=7
> CTRL 3 - AWUN=15 AWUPF=15
>     NS - NAWUN=31 NAWUPF=15
>     NS - NAWUN=127 NAWUPF=63

I'm a bit confused what this config means.  The NS lines here are
multiple namespaces?

> [root@localhost ~]# sh testxx.sh nvme0n3

But this is all about the controller 0 above, which now has multiple
namespaces?

> + cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
> 8192
> [root@localhost ~]# dmesg | grep -i awupf
> [root@localhost ~]#
>
> AWUPF of 7 isn't consistent with an atomic_write_max_bytes of 8192.

It is consistent with an (N)AWUPF=15, and it is namespace number 3.  Are
you testing controller 3 above or is the namespace shared between
controller 0 and controller 3?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-18  5:32       ` Christoph Hellwig
@ 2025-06-18 16:07         ` alan.adamson
       [not found]           ` <20250618171750.GA29321@lst.de>
  0 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-18 16:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Keith Busch, Sagi Grimberg, linux-nvme, Yi Zhang, John Garry


On 6/17/25 10:32 PM, Christoph Hellwig wrote:
> On Tue, Jun 17, 2025 at 10:40:35AM -0700, alan.adamson@oracle.com wrote:
>> I changed the config a bit:
>>
>> CTRL 0 - AWUN=31 AWUPF=15
>> CTRL 1 - AWUN=31 AWUPF=31
>> CTRL 2 - AWUN=15 AWUPF=7
>> CTRL 3 - AWUN=15 AWUPF=15
>>      NS - NAWUN=31 NAWUPF=15
>>      NS - NAWUN=127 NAWUPF=63
> I'm a bit confused what this config means.  The NS lines here are
> multiple namespaces?

CTRL 0 - 2 - each has 1 namespace, none of which define namespace atomic 
parameters.

CTRL 3 - has 2 namespaces both define namespace atomic parameters.

>
>> [root@localhost ~]# sh testxx.sh nvme0n3
> But this is all about the controller 0 above, which now has multiple
> namespaces?
>
>> + cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
>> 8192
>> [root@localhost ~]# dmesg | grep -i awupf
>> [root@localhost ~]#
>>
>> AWUPF of 7 isn't consistent with an atomic_write_max_bytes of 8192.
> It is consistent with an (N)AWUPF=15, and it is namespace number 3.  Are
> you testing controller 3 above or is the namespace shared between
> controller 0 and controller 3?

CTRL 2 (nvme0n3) has AWUPF=7 which means it supports a max atomic write 
size of 4096b, but /sys/block/nvme0n3/queue/atomic_write_max_bytes is 8192b.

Alan



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
       [not found]           ` <20250618171750.GA29321@lst.de>
@ 2025-06-18 18:30             ` alan.adamson
  2025-06-23 13:33               ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-18 18:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Christoph Hellwig, kbusch@kernel.org, Sagi Grimberg,
	linux-nvme@lists.infradead.org, Yi Zhang, John Garry


On 6/18/25 10:17 AM, Christoph Hellwig wrote:
> On Wed, Jun 18, 2025 at 09:07:42AM -0700, alan.adamson@oracle.com wrote:
>>>> AWUPF of 7 isn't consistent with an atomic_write_max_bytes of 8192.
>>> It is consistent with an (N)AWUPF=15, and it is namespace number 3.  Are
>>> you testing controller 3 above or is the namespace shared between
>>> controller 0 and controller 3?
>> CTRL 2 (nvme0n3) has AWUPF=7 which means it supports a max atomic write
>> size of 4096b, but /sys/block/nvme0n3/queue/atomic_write_max_bytes is
>> 8192b.
> Can you double check this really is your CTLR2?  At least for me
> the async probing reorders nvme devices quite a lot with qemu.
>
CTRL 0 (nvme0) - AWUN=31 AWUPF=15       nvme1n2
CTRL 1 (nvme1) - AWUN=31 AWUPF=31       nvme1n1
CTRL 2 (nvme2) - AWUN=15 AWUPF=7         nvme1n3
CTRL 3 (nvme3) - AWUN=15 AWUPF=15
     NS - NAWUN=31 NAWUPF=15     nvme1n5
     NS - NAWUN=127 NAWUPF=63    nvme1n4


I rebooted and it probed differently.  Now CTRL2 is nvme1n3.  We know 
CTRL2 is the same as nvme1n3 because awun and awupf from nvme id-ctrl 
matches the qemu config (atomic.awun=15,atomic.awupf=7) for CTRL2 (nvme2).

# nvme id-ctrl /dev/nvme1n3 | grep awun
awun      : 15
# nvme id-ctrl /dev/nvme1n3 | grep awupf
awupf     : 7
# cat /sys/block/nvme1n3/queue/atomic_write_max_bytes
8192




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-18 18:30             ` alan.adamson
@ 2025-06-23 13:33               ` Christoph Hellwig
  2025-06-23 17:24                 ` alan.adamson
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-23 13:33 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, kbusch@kernel.org, Sagi Grimberg,
	linux-nvme@lists.infradead.org, Yi Zhang, John Garry

On Wed, Jun 18, 2025 at 11:30:20AM -0700, alan.adamson@oracle.com wrote:
>> Can you double check this really is your CTLR2?  At least for me
>> the async probing reorders nvme devices quite a lot with qemu.
>>
> CTRL 0 (nvme0) - AWUN=31 AWUPF=15       nvme1n2
> CTRL 1 (nvme1) - AWUN=31 AWUPF=31       nvme1n1
> CTRL 2 (nvme2) - AWUN=15 AWUPF=7         nvme1n3
> CTRL 3 (nvme3) - AWUN=15 AWUPF=15

Do you mean AWUN or AWUPF?  Because AWUN is totally irrelevant and
not even parsed by Linux.

> I rebooted and it probed differently.  Now CTRL2 is nvme1n3.  We know 
> CTRL2 is the same as nvme1n3 because awun and awupf from nvme id-ctrl 
> matches the qemu config (atomic.awun=15,atomic.awupf=7) for CTRL2 (nvme2).
>
> # nvme id-ctrl /dev/nvme1n3 | grep awun
> awun      : 15
> # nvme id-ctrl /dev/nvme1n3 | grep awupf
> awupf     : 7
> # cat /sys/block/nvme1n3/queue/atomic_write_max_bytes
> 8192

The code in nvme_configure_atomic_write calculates the values pretty
much directly from (N)AWUPF, so I don't see how it would get things
wrong.  Can you run your patched qemu setup with this debug printk
patch?

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3da5ac71a9b0..245397b217f4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2033,6 +2033,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
 		if (id->nabspf)
 			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+		dev_info(ns->ctrl->device, "NAWUPF: %u, atomic_bs: %u\n",
+			le16_to_cpu(id->nabspf), atomic_bs);
 	} else {
 		/*
 		 * Use the controller wide atomic write unit.  This sucks
@@ -2042,6 +2044,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		 * values for different controllers in the subsystem.
 		 */
 		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+		dev_info(ns->ctrl->device, "AWUPF: %u, atomic_bs: %u\n",
+			ns->ctrl->subsys->awupf, atomic_bs);
 	}
 
 	lim->atomic_write_hw_max = atomic_bs;


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-23 13:33               ` Christoph Hellwig
@ 2025-06-23 17:24                 ` alan.adamson
  2025-06-24 13:10                   ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-23 17:24 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kbusch@kernel.org, Sagi Grimberg, linux-nvme@lists.infradead.org,
	Yi Zhang, John Garry


On 6/23/25 6:33 AM, Christoph Hellwig wrote:
> On Wed, Jun 18, 2025 at 11:30:20AM -0700, alan.adamson@oracle.com wrote:
>>> Can you double check this really is your CTLR2?  At least for me
>>> the async probing reorders nvme devices quite a lot with qemu.
>>>
>> CTRL 0 (nvme0) - AWUN=31 AWUPF=15       nvme1n2
>> CTRL 1 (nvme1) - AWUN=31 AWUPF=31       nvme1n1
>> CTRL 2 (nvme2) - AWUN=15 AWUPF=7         nvme1n3
>> CTRL 3 (nvme3) - AWUN=15 AWUPF=15
> Do you mean AWUN or AWUPF?  Because AWUN is totally irrelevant and
> not even parsed by Linux.

The above configuration is the HW/QEMU configuration. Both AWUN and 
AWUPF are setup.

>
>> I rebooted and it probed differently.  Now CTRL2 is nvme1n3.  We know
>> CTRL2 is the same as nvme1n3 because awun and awupf from nvme id-ctrl
>> matches the qemu config (atomic.awun=15,atomic.awupf=7) for CTRL2 (nvme2).
>>
>> # nvme id-ctrl /dev/nvme1n3 | grep awun
>> awun      : 15
>> # nvme id-ctrl /dev/nvme1n3 | grep awupf
>> awupf     : 7
>> # cat /sys/block/nvme1n3/queue/atomic_write_max_bytes
>> 8192
> The code in nvme_configure_atomic_write calculates the values pretty
> much directly from (N)AWUPF, so I don't see how it would get things
> wrong.  Can you run your patched qemu setup with this debug printk
> patch?
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 3da5ac71a9b0..245397b217f4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2033,6 +2033,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>   		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
>   		if (id->nabspf)
>   			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
> +		dev_info(ns->ctrl->device, "NAWUPF: %u, atomic_bs: %u\n",
> +			le16_to_cpu(id->nabspf), atomic_bs);
>   	} else {
>   		/*
>   		 * Use the controller wide atomic write unit.  This sucks
> @@ -2042,6 +2044,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>   		 * values for different controllers in the subsystem.
>   		 */
>   		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
> +		dev_info(ns->ctrl->device, "AWUPF: %u, atomic_bs: %u\n",
> +			ns->ctrl->subsys->awupf, atomic_bs);
>   	}
>   
>   	lim->atomic_write_hw_max = atomic_bs;

Here you go:

CTRL 0 (nvme0) - AWUN=31 AWUPF=15       nvme0n1
CTRL 1 (nvme1) - AWUN=31 AWUPF=31       nvme0n3
CTRL 2 (nvme2) - AWUN=15 AWUPF=7        nvme0n2
CTRL 3 (nvme3) - AWUN=15 AWUPF=15
     NS - NAWUN=31 NAWUPF=15                 nvme0n4
     NS - NAWUN=127 NAWUPF=63               nvme0n5


[root@localhost ~]# nvme id-ctrl /dev/nvme0n1 | grep awupf
awupf     : 15
[root@localhost ~]# cat /sys/block/nvme0n1/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ctrl /dev/nvme0n2 | grep awupf
awupf     : 7
[root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ctrl /dev/nvme0n3 | grep awupf
awupf     : 31
[root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n4 | grep nawupf
nawupf  : 15
[root@localhost ~]# cat /sys/block/nvme0n4/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n5 | grep nawupf
nawupf  : 63
[root@localhost ~]# cat /sys/block/nvme0n5/queue/atomic_write_max_bytes
32768
[root@localhost ~]#

[root@localhost ~]# dmesg | grep nvme | grep atomic_bs
[    2.831882] nvme nvme0: AWUPF: 15, atomic_bs: 8192
[    2.840480] nvme nvme2: AWUPF: 15, atomic_bs: 8192
[    2.842418] nvme nvme1: AWUPF: 15, atomic_bs: 8192
[    2.861119] nvme nvme3: NAWUPF: 0, atomic_bs: 8192
[    2.862427] nvme nvme3: NAWUPF: 0, atomic_bs: 32768
[root@localhost ~]#




^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-23 17:24                 ` alan.adamson
@ 2025-06-24 13:10                   ` Christoph Hellwig
  2025-06-24 16:38                     ` alan.adamson
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-24 13:10 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, kbusch@kernel.org, Sagi Grimberg,
	linux-nvme@lists.infradead.org, Yi Zhang, John Garry

Arrg, the patch did not actually print NAWUPF, and made me scramble
really hard how the output could be possible.  New one below:


diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3da5ac71a9b0..90473d4770f5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2033,6 +2033,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
 		if (id->nabspf)
 			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
+		dev_info(ns->ctrl->device, "nsid %u: NAWUPF: %u, atomic_bs: %u\n",
+			ns->head->ns_id, le16_to_cpu(id->nawupf), atomic_bs);
 	} else {
 		/*
 		 * Use the controller wide atomic write unit.  This sucks
@@ -2042,6 +2044,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
 		 * values for different controllers in the subsystem.
 		 */
 		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+		dev_info(ns->ctrl->device, "nsid: %u: AWUPF: %u, atomic_bs: %u\n",
+			ns->head->ns_id, ns->ctrl->subsys->awupf, atomic_bs);
 	}
 
 	lim->atomic_write_hw_max = atomic_bs;



^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-24 13:10                   ` Christoph Hellwig
@ 2025-06-24 16:38                     ` alan.adamson
  2025-06-25  6:39                       ` Christoph Hellwig
  0 siblings, 1 reply; 26+ messages in thread
From: alan.adamson @ 2025-06-24 16:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: kbusch@kernel.org, Sagi Grimberg, linux-nvme@lists.infradead.org,
	Yi Zhang, John Garry


On 6/24/25 6:10 AM, Christoph Hellwig wrote:
> Arrg, the patch did not actually print NAWUPF, and made me scramble
> really hard how the output could be possible.  New one below:
>
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 3da5ac71a9b0..90473d4770f5 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2033,6 +2033,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>   		atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
>   		if (id->nabspf)
>   			boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
> +		dev_info(ns->ctrl->device, "nsid %u: NAWUPF: %u, atomic_bs: %u\n",
> +			ns->head->ns_id, le16_to_cpu(id->nawupf), atomic_bs);
>   	} else {
>   		/*
>   		 * Use the controller wide atomic write unit.  This sucks
> @@ -2042,6 +2044,8 @@ static u32 nvme_configure_atomic_write(struct nvme_ns *ns,
>   		 * values for different controllers in the subsystem.
>   		 */
>   		atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
> +		dev_info(ns->ctrl->device, "nsid: %u: AWUPF: %u, atomic_bs: %u\n",
> +			ns->head->ns_id, ns->ctrl->subsys->awupf, atomic_bs);
>   	}
>   
>   	lim->atomic_write_hw_max = atomic_bs;
>
CTRL 0 (nvme0) - AWUN=31 AWUPF=15       nvme0n1
CTRL 1 (nvme1) - AWUN=31 AWUPF=31       nvme0n3
CTRL 2 (nvme2) - AWUN=15 AWUPF=7        nvme0n2
CTRL 3 (nvme3) - AWUN=15 AWUPF=15
     NS - NAWUN=31 NAWUPF=15             nvme0n4
     NS - NAWUN=127 NAWUPF=63            nvme0n5

[root@localhost ~]# nvme id-ctrl /dev/nvme0n1 | grep awupf
awupf     : 15
[root@localhost ~]# cat /sys/block/nvme0n1/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ctrl /dev/nvme0n2 | grep awupf
awupf     : 7
[root@localhost ~]# cat /sys/block/nvme0n2/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ctrl /dev/nvme0n3 | grep awupf
awupf     : 31
[root@localhost ~]# cat /sys/block/nvme0n3/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n4 | grep nawupf
nawupf  : 15
[root@localhost ~]# cat /sys/block/nvme0n4/queue/atomic_write_max_bytes
8192
[root@localhost ~]# nvme id-ns /dev/nvme0n5 | grep nawupf
nawupf  : 63
[root@localhost ~]# cat /sys/block/nvme0n5/queue/atomic_write_max_bytes
32768
[root@localhost ~]#

[root@localhost ~]# dmesg | grep nvme | grep atomic_bs
[    3.034383] nvme nvme0: nsid: 1: AWUPF: 15, atomic_bs: 8192
[    3.050431] nvme nvme2: nsid: 3: AWUPF: 15, atomic_bs: 8192
[    3.056716] nvme nvme1: nsid: 2: AWUPF: 15, atomic_bs: 8192
[    3.059079] nvme nvme3: nsid 4: NAWUPF: 15, atomic_bs: 8192
[    3.060867] nvme nvme3: nsid 5: NAWUPF: 63, atomic_bs: 32768
[root@localhost ~]#



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: fix atomic limits check
  2025-06-24 16:38                     ` alan.adamson
@ 2025-06-25  6:39                       ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2025-06-25  6:39 UTC (permalink / raw)
  To: alan.adamson
  Cc: Christoph Hellwig, kbusch@kernel.org, Sagi Grimberg,
	linux-nvme@lists.infradead.org, Yi Zhang, John Garry

Thanks.  We should only initialize awupf once per subsystem,
and the patch got this wrong.  I'm sending out a new version now.



^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2025-06-25  6:48 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-11  5:54 fix atomic limits check Christoph Hellwig
2025-06-11  5:54 ` [PATCH 1/2] nvme: refactor the atomic write unit detection Christoph Hellwig
2025-06-13  7:52   ` John Garry
2025-06-11  5:54 ` [PATCH 2/2] nvme: fix atomic write boundary validation Christoph Hellwig
2025-06-13  3:04   ` Yi Zhang
2025-06-13  8:03   ` John Garry
2025-06-16  5:31     ` Christoph Hellwig
2025-06-16  7:42       ` John Garry
2025-06-16 11:36         ` Christoph Hellwig
2025-06-16 10:19   ` John Garry
2025-06-16 11:37     ` Christoph Hellwig
2025-06-16 11:49       ` John Garry
2025-06-17  7:36   ` John Garry
2025-06-13 21:22 ` fix atomic limits check alan.adamson
2025-06-16  5:36   ` Christoph Hellwig
2025-06-17 17:40     ` alan.adamson
2025-06-18  5:32       ` Christoph Hellwig
2025-06-18 16:07         ` alan.adamson
     [not found]           ` <20250618171750.GA29321@lst.de>
2025-06-18 18:30             ` alan.adamson
2025-06-23 13:33               ` Christoph Hellwig
2025-06-23 17:24                 ` alan.adamson
2025-06-24 13:10                   ` Christoph Hellwig
2025-06-24 16:38                     ` alan.adamson
2025-06-25  6:39                       ` Christoph Hellwig
2025-06-17  4:56 ` Christoph Hellwig
2025-06-17 16:38   ` Luis Chamberlain

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).