From: "Dan Williams (nvidia)" <djbw@kernel.org>
To: Srirangan Madhavan <smadhavan@nvidia.com>,
Alison Schofield <alison.schofield@intel.com>,
Bjorn Helgaas <bhelgaas@google.com>,
Dan Williams <djbw@kernel.org>,
Dave Jiang <dave.jiang@intel.com>,
Davidlohr Bueso <dave@stgolabs.net>,
Ira Weiny <ira.weiny@intel.com>,
Jonathan Cameron <jic23@kernel.org>,
Vishal Verma <vishal.l.verma@intel.com>,
linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: vsethi@nvidia.com, alwilliamson@nvidia.com,
Dan Williams <danwilliams@nvidia.com>,
Sai Yashwanth Reddy Kancherla <skancherla@nvidia.com>,
Vishal Aslot <vaslot@nvidia.com>,
Manish Honap <mhonap@nvidia.com>, Jiandi An <jan@nvidia.com>,
Richard Cheng <icheng@nvidia.com>,
linux-tegra@vger.kernel.org,
Srirangan Madhavan <smadhavan@nvidia.com>
Subject: Re: [PATCH v7 03/11] cxl: Cache endpoint decoder settings during PCI enumeration
Date: Tue, 23 Jun 2026 19:15:42 -0700 [thread overview]
Message-ID: <6a3b3dce9f3e0_3c9f1005e@djbw-dev.notmuch> (raw)
In-Reply-To: <20260623032453.3404772-4-smadhavan@nvidia.com>
Srirangan Madhavan wrote:
> Populate pci_dev->hdm from PCI capability initialization when a CXL.mem
> function already has memory decoding enabled. This gives driver-free reset
> paths an early HDM snapshot without enabling BAR decoding during
> enumeration.
>
> CXL core later reuses and refreshes the same cache. Move the register
> helpers into the built-in CONFIG_CXL_HDM set so the early cache path is
> available without cxl_core.
>
> Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
> ---
> drivers/cxl/core/Makefile | 3 +-
> drivers/cxl/core/hdm.c | 59 +++++++----
> drivers/cxl/core/reset.c | 202 ++++++++++++++++++++++++++++++++++++++
> drivers/pci/probe.c | 2 +
> include/cxl/cxl.h | 9 ++
> 5 files changed, 252 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index dc075cee0450..69cf2ea7ee74 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -1,6 +1,6 @@
> # SPDX-License-Identifier: GPL-2.0
> obj-$(CONFIG_CXL_BUS) += cxl_core.o
> -obj-$(CONFIG_CXL_HDM) += reset.o
> +obj-$(CONFIG_CXL_HDM) += regs.o reset.o
> obj-$(CONFIG_CXL_SUSPEND) += suspend.o
>
> ccflags-y += -I$(srctree)/drivers/cxl
> @@ -8,7 +8,6 @@ CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
>
> cxl_core-y := port.o
> cxl_core-y += pmem.o
> -cxl_core-y += regs.o
> cxl_core-y += memdev.o
> cxl_core-y += mbox.o
> cxl_core-y += pci.o
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 83cda63f76a5..0230ebfada42 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -91,11 +91,9 @@ static void clear_hdm_info(void *data)
> WRITE_ONCE(pdev->hdm, NULL);
> }
>
> -static int devm_cxl_pci_setup_hdm_info(struct cxl_hdm *cxlhdm)
> +static struct pci_dev *cxl_hdm_to_pci_dev(struct cxl_hdm *cxlhdm)
> {
> struct cxl_port *port = cxlhdm->port;
> - struct cxl_hdm_info *info;
> - struct pci_dev *pdev;
> struct device *uport;
>
> if (is_cxl_endpoint(port)) {
> @@ -107,9 +105,27 @@ static int devm_cxl_pci_setup_hdm_info(struct cxl_hdm *cxlhdm)
> }
>
> if (!dev_is_pci(uport))
> + return NULL;
> +
> + return to_pci_dev(uport);
> +}
> +
> +static int devm_cxl_pci_setup_hdm_info(struct cxl_hdm *cxlhdm)
> +{
> + struct cxl_hdm_info *info;
> + struct pci_dev *pdev;
> +
> + pdev = cxl_hdm_to_pci_dev(cxlhdm);
> + if (!pdev)
> + return 0;
> +
> + info = READ_ONCE(pdev->hdm);
> + if (info) {
> + if (info->decoder_count != cxlhdm->decoder_count)
> + return -ENXIO;
> return 0;
> + }
>
> - pdev = to_pci_dev(uport);
> info = devm_kzalloc(&pdev->dev,
> struct_size(info, settings, cxlhdm->decoder_count),
> GFP_KERNEL);
> @@ -125,23 +141,13 @@ static int devm_cxl_pci_setup_hdm_info(struct cxl_hdm *cxlhdm)
> static void cxl_hdm_info_set_decoder(struct cxl_hdm *cxlhdm,
> struct cxl_decoder *cxld)
> {
> - struct cxl_port *port = cxlhdm->port;
> struct cxl_hdm_info *info;
> struct pci_dev *pdev;
> - struct device *uport;
> -
> - if (is_cxl_endpoint(port)) {
> - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
> -
> - uport = cxlmd->dev.parent;
> - } else {
> - uport = port->uport_dev;
> - }
>
> - if (!dev_is_pci(uport))
> + pdev = cxl_hdm_to_pci_dev(cxlhdm);
> + if (!pdev)
> return;
>
> - pdev = to_pci_dev(uport);
> info = READ_ONCE(pdev->hdm);
> if (!info || cxld->id >= info->decoder_count)
> return;
> @@ -202,6 +208,7 @@ static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
> struct cxl_register_map *reg_map = &port->reg_map;
> struct device *dev = &port->dev;
> struct cxl_hdm *cxlhdm;
> + struct pci_dev *pdev;
> int rc;
>
> cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
> @@ -227,11 +234,21 @@ static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
> return ERR_PTR(-ENODEV);
> }
>
> - rc = cxl_map_component_regs(reg_map, &cxlhdm->regs,
> - BIT(CXL_CM_CAP_CAP_ID_HDM));
> - if (rc) {
> - dev_err(dev, "Failed to map HDM capability.\n");
> - return ERR_PTR(rc);
> + pdev = cxl_hdm_to_pci_dev(cxlhdm);
> + if (pdev) {
> + struct cxl_hdm_info *info = READ_ONCE(pdev->hdm);
> +
> + if (info && info->regs.hdm_decoder)
> + cxlhdm->regs = info->regs;
The driver probably should not reuse the core mapping. The driver's only
role is to update the cached settings if it changes them.
> + }
> +
> + if (!cxlhdm->regs.hdm_decoder) {
> + rc = cxl_map_component_regs(reg_map, &cxlhdm->regs,
> + BIT(CXL_CM_CAP_CAP_ID_HDM));
> + if (rc) {
> + dev_err(dev, "Failed to map HDM capability.\n");
> + return ERR_PTR(rc);
> + }
> }
>
> parse_hdm_decoder_caps(cxlhdm);
> diff --git a/drivers/cxl/core/reset.c b/drivers/cxl/core/reset.c
> index 14f024098e82..fc52d3abdb5b 100644
> --- a/drivers/cxl/core/reset.c
> +++ b/drivers/cxl/core/reset.c
> @@ -2,9 +2,16 @@
> /* Copyright(c) 2026 NVIDIA Corporation. All rights reserved. */
> #include <linux/delay.h>
> #include <linux/bug.h>
> +#include <linux/bitfield.h>
> #include <linux/errno.h>
> #include <linux/export.h>
> +#include <linux/io.h>
> +#include <linux/ioport.h>
> #include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/slab.h>
> +
> +#include <cxlpci.h>
>
> #include "cxl.h"
> #include "core.h"
> @@ -116,3 +123,198 @@ int cxl_commit(struct cxl_decoder_settings *settings, void __iomem *hdm)
> return 0;
> }
> EXPORT_SYMBOL_FOR_MODULES(cxl_commit, "cxl_core");
> +
> +#define CXL_HDM_DECODER_MAX_COUNT 32
> +
> +static void cxl_pci_hdm_clear(void *data)
> +{
> + struct pci_dev *pdev = data;
> +
> + WRITE_ONCE(pdev->hdm, NULL);
> +}
> +
> +static void cxl_pci_hdm_unmap(struct pci_dev *pdev,
> + struct cxl_component_regs *regs,
> + struct cxl_register_map *map)
> +{
> + struct cxl_reg_map *hdm_map = &map->component_map.hdm_decoder;
> +
> + if (!regs->hdm_decoder)
> + return;
> +
> + devm_iounmap(&pdev->dev, regs->hdm_decoder);
> + devm_release_mem_region(&pdev->dev, map->resource + hdm_map->offset,
> + hdm_map->size);
> +}
> +
> +static int cxl_pci_hdm_read_decoder(struct pci_dev *pdev,
> + struct cxl_decoder_settings *settings,
> + void __iomem *hdm, int id)
> +{
> + u64 target_or_skip, base, size;
> + u32 ctrl, lo, hi;
> + int rc;
> +
> + *settings = (struct cxl_decoder_settings) {
> + .id = id,
> + };
> +
> + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
> + if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
> + return 0;
> +
> + lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
> + hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
> + base = ((u64)hi << 32) | lo;
> +
> + lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
> + hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
> + size = ((u64)hi << 32) | lo;
> +
> + if (!size || base == U64_MAX || size == U64_MAX ||
> + base > U64_MAX - (size - 1)) {
> + pci_err(pdev, "CXL HDM decoder %d has invalid range\n", id);
> + return -ENXIO;
> + }
Zero size is ok now (see series from Richard), and the U64_MAX checks in
the main driver are more about catching common MMIO read failure cases.
In any event I think the core of this function and the validation should
be shared with init_hdm_decoder() so these kinds of disconnects are
prevented.
> +
> + lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(id));
> + hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(id));
> + target_or_skip = ((u64)hi << 32) | lo;
> +
> + settings->hpa_range = (struct range) {
> + .start = base,
> + .end = base + size - 1,
> + };
> + settings->targets = target_or_skip;
> + settings->target_type = FIELD_GET(CXL_HDM_DECODER0_CTRL_HOSTONLY, ctrl) ?
> + CXL_DECODER_HOSTONLYMEM : CXL_DECODER_DEVMEM;
> + settings->flags = CXL_DECODER_F_ENABLE;
> + if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
> + settings->flags |= CXL_DECODER_F_LOCK;
> +
> + rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
> + &settings->interleave_ways);
> + if (rc)
> + return rc;
> +
> + return eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK,
> + ctrl),
> + &settings->interleave_granularity);
> +}
> +
> +static int cxl_pci_hdm_capable(struct pci_dev *pdev)
> +{
> + u16 cap;
> + int dvsec;
> + int rc;
> +
> + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
> + PCI_DVSEC_CXL_DEVICE);
> + if (!dvsec)
> + return -ENOTTY;
> +
> + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE))
> + return -ENOTTY;
> +
> + return 0;
> +}
> +
> +int pci_cxl_hdm_init(struct pci_dev *pdev)
Capability init does not fail, so this can be void.
If you want you can make an inner function to report the init failure
via an info message, but otherwise PCI core is not looking for failures.
> +{
> + struct cxl_decoder_settings *settings;
> + struct cxl_component_regs regs = { 0 };
> + struct cxl_register_map map = { 0 };
> + struct cxl_hdm_info *info;
> + bool allocated_info = false;
> + int decoder_count;
> + u16 command;
> + int rc;
> +
> + info = READ_ONCE(pdev->hdm);
> + if (info && info->regs.hdm_decoder)
> + return 0;
> +
> + rc = cxl_pci_hdm_capable(pdev);
> + if (rc)
> + return rc;
> +
> + rc = pci_read_config_word(pdev, PCI_COMMAND, &command);
> + if (rc)
> + return pcibios_err_to_errno(rc);
> +
> + if (!(command & PCI_COMMAND_MEMORY))
> + return -ENOTTY;
This needs some commentary about expectations and likely some formal
recommendation to, and agreement with firmware implementers.
The problem is that CXL memory may be, and usually often is, handed off
actively mapped to the OS. When that happens this needs to be able to
assume that PCI_COMMAND_MEMORY is also enabled for access to the HDM
decoders. It would be broken for firmware to say "here is a device
actively mapping CXL.mem and by the way you can not find out what those
settings are without enabling the device MMIO memory space."
So the expectation is that if a device arrives without
PCI_COMMAND_MEMORY then the OS is free to disable CXL reset when a
driver is not loaded. Conversely, if a device arrives actively providing
system resources, it needs to also arrange for active management.
That said, is it safe to assume that in your testing the memory space is
indeed arriving enabled?
> +
> + if (!info) {
> + info = devm_kzalloc(&pdev->dev,
Devres is not suitable for use outside of a driver.
> + struct_size(info, settings,
> + CXL_HDM_DECODER_MAX_COUNT),
> + GFP_KERNEL);
> + if (!info)
> + return -ENOMEM;
> + allocated_info = true;
> + }
> +
> + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
> + if (rc)
> + return rc;
> +
> + rc = cxl_setup_regs(&map);
> + if (rc)
> + return rc;
> +
> + if (!map.component_map.hdm_decoder.valid) {
> + rc = -ENODEV;
> + return rc;
> + }
> +
> + rc = cxl_map_component_regs(&map, ®s, BIT(CXL_CM_CAP_CAP_ID_HDM));
This uses devres internally so needs a flavor that does typical
allocation. One way to differentiate would be to set "map.host = NULL"
for this case.
> + if (rc)
> + return rc;
> +
> + decoder_count = cxl_hdm_decoder_count(readl(regs.hdm_decoder +
> + CXL_HDM_DECODER_CAP_OFFSET));
> + if (decoder_count < 0) {
> + rc = decoder_count;
> + goto out_unmap;
> + }
> +
> + if (decoder_count > CXL_HDM_DECODER_MAX_COUNT) {
> + rc = -ENXIO;
> + goto out_unmap;
> + }
> +
> + if (info->decoder_count && info->decoder_count != decoder_count) {
> + rc = -ENXIO;
> + goto out_unmap;
> + }
> +
> + info->decoder_count = decoder_count;
> + info->regs = regs;
> +
> + settings = info->settings;
> + for (int i = 0; i < info->decoder_count; i++) {
> + rc = cxl_pci_hdm_read_decoder(pdev, &settings[i],
> + regs.hdm_decoder, i);
> + if (rc)
> + goto out_unmap;
> + }
> +
> + WRITE_ONCE(pdev->hdm, info);
The HDM info is accessed either by the reset path or the driver and
should be protected by cxl_rwsem.dpa. So WRITE_ONCE() becomes
unnecessary if info update happens under that synchronization.
next prev parent reply other threads:[~2026-06-24 2:15 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-23 3:24 [PATCH v7 00/11] PCI/CXL: Add CXL reset support for Type 2 devices Srirangan Madhavan
2026-06-23 3:24 ` [PATCH v7 01/11] cxl: Split decoder programming into a reusable helper Srirangan Madhavan
2026-06-23 3:42 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 02/11] cxl: Cache decoder settings on PCI devices Srirangan Madhavan
2026-06-23 3:42 ` sashiko-bot
2026-06-23 23:13 ` Dan Williams (nvidia)
2026-06-23 3:24 ` [PATCH v7 03/11] cxl: Cache endpoint decoder settings during PCI enumeration Srirangan Madhavan
2026-06-23 3:45 ` sashiko-bot
2026-06-24 2:15 ` Dan Williams (nvidia) [this message]
2026-06-23 3:24 ` [PATCH v7 04/11] PCI: Export pci_dev_save_and_disable() and pci_dev_restore() Srirangan Madhavan
2026-06-23 3:34 ` sashiko-bot
2026-06-24 2:17 ` Dan Williams (nvidia)
2026-06-23 3:24 ` [PATCH v7 05/11] cxl: Add CXL Device Reset helper Srirangan Madhavan
2026-06-23 3:36 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 06/11] cxl: Validate HDM ranges before CXL reset Srirangan Madhavan
2026-06-23 3:33 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 07/11] PCI/cxl: Discover the CXL reset scope Srirangan Madhavan
2026-06-23 3:34 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset Srirangan Madhavan
2026-06-23 3:42 ` sashiko-bot
2026-06-23 23:00 ` Dan Williams (nvidia)
2026-06-23 3:24 ` [PATCH v7 09/11] cxl: Restore CXL HDM state after PCI reset Srirangan Madhavan
2026-06-23 3:39 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 10/11] PCI/cxl: Expose CXL Reset as a PCI reset method Srirangan Madhavan
2026-06-23 3:47 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 11/11] Documentation/ABI: Document CXL Reset " Srirangan Madhavan
2026-06-23 3:35 ` sashiko-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6a3b3dce9f3e0_3c9f1005e@djbw-dev.notmuch \
--to=djbw@kernel.org \
--cc=alison.schofield@intel.com \
--cc=alwilliamson@nvidia.com \
--cc=bhelgaas@google.com \
--cc=danwilliams@nvidia.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=icheng@nvidia.com \
--cc=ira.weiny@intel.com \
--cc=jan@nvidia.com \
--cc=jic23@kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=mhonap@nvidia.com \
--cc=skancherla@nvidia.com \
--cc=smadhavan@nvidia.com \
--cc=vaslot@nvidia.com \
--cc=vishal.l.verma@intel.com \
--cc=vsethi@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.