From: Srirangan Madhavan <smadhavan@nvidia.com>
To: linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: vsethi@nvidia.com, alwilliamson@nvidia.com,
Dan Williams <danwilliams@nvidia.com>,
Sai Yashwanth Reddy Kancherla <skancherla@nvidia.com>,
Vishal Aslot <vaslot@nvidia.com>,
Manish Honap <mhonap@nvidia.com>, Jiandi An <jan@nvidia.com>,
Richard Cheng <icheng@nvidia.com>,
linux-tegra@vger.kernel.org,
Srirangan Madhavan <smadhavan@nvidia.com>
Subject: [PATCH v6 4/9] PCI/CXL: Add sibling function coordination for reset
Date: Thu, 28 May 2026 08:31:49 +0000 [thread overview]
Message-ID: <20260528083154.137979-5-smadhavan@nvidia.com> (raw)
In-Reply-To: <20260528083154.137979-1-smadhavan@nvidia.com>
Add helpers to collect CXL sibling PCI functions affected by a CXL reset
and prepare them for reset by saving and disabling them. Restore those
siblings and drop their references when reset coordination completes.
Use the Non-CXL Function Map DVSEC to exclude non-CXL functions, and
filter remaining siblings to functions that advertise CXL.cache or
CXL.mem capability.
Use pci_dev_trylock() for sibling locking and unwind on contention or
allocation failure, so competing reset paths fail with an errno.
Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
---
drivers/cxl/core/pci.c | 207 ++++++++++++++++++++++++++++++++++
include/uapi/linux/pci_regs.h | 2 +
2 files changed, 209 insertions(+)
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 318744695f62..01effbb4e7cd 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
#include <linux/units.h>
+#include <linux/bitmap.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
#include <linux/delay.h>
+#include <linux/iommu.h>
#include <linux/memregion.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
@@ -15,6 +17,10 @@
#include "core.h"
#include "trace.h"
+#define CXL_RESET_MAX_FUNCTIONS 256
+#define CXL_RESET_FUNCTION_MAP_REGS (CXL_RESET_MAX_FUNCTIONS / 32)
+#define CXL_RESET_SIBLINGS_INIT 8
+
/**
* DOC: cxl core pci
*
@@ -1096,3 +1102,204 @@ cxl_reset_flush_cpu_caches(struct cxl_reset_region_context *ctx)
return 0;
}
+
+struct cxl_reset_context {
+ struct pci_dev *target;
+ struct pci_dev **siblings;
+ int nr_siblings;
+ int sibling_capacity;
+ int nr_siblings_prepared;
+};
+
+struct cxl_reset_walk_ctx {
+ struct cxl_reset_context *ctx;
+ unsigned long *non_cxl_func_map;
+ int rc;
+};
+
+static void
+cxl_reset_read_non_cxl_func_map(struct pci_dev *pdev,
+ unsigned long *non_cxl_func_map)
+{
+ u32 map[CXL_RESET_FUNCTION_MAP_REGS] = {};
+ u16 dvsec;
+ int rc, i;
+
+ bitmap_zero(non_cxl_func_map, CXL_RESET_MAX_FUNCTIONS);
+
+ dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_FUNCTION_MAP);
+ if (!dvsec)
+ return;
+
+ for (i = 0; i < CXL_RESET_FUNCTION_MAP_REGS; i++) {
+ rc = pci_read_config_dword(pdev,
+ dvsec + PCI_DVSEC_CXL_FUNCTION_MAP_REG +
+ i * sizeof(map[i]), &map[i]);
+ if (rc) {
+ pci_warn(pdev,
+ "failed to read CXL Function Map; treating all siblings as CXL: %d\n",
+ rc);
+ bitmap_zero(non_cxl_func_map, CXL_RESET_MAX_FUNCTIONS);
+ return;
+ }
+ }
+
+ bitmap_from_arr32(non_cxl_func_map, map, CXL_RESET_MAX_FUNCTIONS);
+}
+
+static bool cxl_reset_is_cxl_sibling(struct pci_dev *pdev,
+ struct pci_dev *sibling,
+ unsigned long *non_cxl_func_map)
+{
+ if (sibling == pdev || sibling->bus != pdev->bus)
+ return false;
+
+ if (pci_ari_enabled(pdev->bus))
+ return !test_bit(sibling->devfn, non_cxl_func_map);
+
+ if (PCI_SLOT(sibling->devfn) != PCI_SLOT(pdev->devfn))
+ return false;
+
+ return !test_bit(PCI_FUNC(sibling->devfn) * 32 +
+ PCI_SLOT(sibling->devfn), non_cxl_func_map);
+}
+
+static bool cxl_reset_has_cache_or_mem(struct pci_dev *pdev)
+{
+ u16 dvsec, cap;
+
+ dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+ PCI_DVSEC_CXL_DEVICE);
+ if (!dvsec)
+ return false;
+
+ if (pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap))
+ return false;
+
+ return cap & (PCI_DVSEC_CXL_CACHE_CAPABLE | PCI_DVSEC_CXL_MEM_CAPABLE);
+}
+
+static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
+ struct pci_dev *sibling)
+{
+ struct pci_dev **siblings;
+ int capacity;
+
+ if (ctx->nr_siblings < ctx->sibling_capacity)
+ goto add;
+
+ capacity = ctx->sibling_capacity ? ctx->sibling_capacity * 2 :
+ CXL_RESET_SIBLINGS_INIT;
+ siblings = krealloc(ctx->siblings, capacity * sizeof(*siblings),
+ GFP_KERNEL);
+ if (!siblings)
+ return -ENOMEM;
+
+ ctx->siblings = siblings;
+ ctx->sibling_capacity = capacity;
+
+add:
+ pci_dev_get(sibling);
+ ctx->siblings[ctx->nr_siblings++] = sibling;
+ return 0;
+}
+
+static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
+{
+ struct cxl_reset_walk_ctx *wctx = data;
+ struct cxl_reset_context *ctx = wctx->ctx;
+ struct pci_dev *pdev = ctx->target;
+
+ if (!cxl_reset_is_cxl_sibling(pdev, sibling, wctx->non_cxl_func_map))
+ return 0;
+
+ if (!cxl_reset_has_cache_or_mem(sibling))
+ return 0;
+
+ wctx->rc = cxl_reset_add_sibling(ctx, sibling);
+ return wctx->rc;
+}
+
+static int cxl_reset_collect_siblings(struct cxl_reset_context *ctx)
+{
+ DECLARE_BITMAP(non_cxl_func_map, CXL_RESET_MAX_FUNCTIONS);
+ struct cxl_reset_walk_ctx wctx = {
+ .ctx = ctx,
+ .non_cxl_func_map = non_cxl_func_map,
+ };
+
+ cxl_reset_read_non_cxl_func_map(ctx->target, non_cxl_func_map);
+ pci_walk_bus(ctx->target->bus, cxl_reset_collect_sibling, &wctx);
+ return wctx.rc;
+}
+
+static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+{
+ int i;
+
+ for (i = ctx->nr_siblings_prepared - 1; i >= 0; i--) {
+ struct pci_dev *sibling = ctx->siblings[i];
+
+ pci_dev_reset_iommu_done(sibling);
+ pci_dev_restore(sibling);
+ pci_dev_unlock(sibling);
+ }
+
+ for (i = 0; i < ctx->nr_siblings; i++)
+ pci_dev_put(ctx->siblings[i]);
+
+ kfree(ctx->siblings);
+ ctx->siblings = NULL;
+ ctx->nr_siblings = 0;
+ ctx->sibling_capacity = 0;
+ ctx->nr_siblings_prepared = 0;
+}
+
+static int __maybe_unused
+cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+{
+ int rc, i;
+
+ ctx->siblings = NULL;
+ ctx->nr_siblings = 0;
+ ctx->sibling_capacity = 0;
+ ctx->nr_siblings_prepared = 0;
+
+ rc = cxl_reset_collect_siblings(ctx);
+ if (rc)
+ goto err;
+
+ for (i = 0; i < ctx->nr_siblings; i++) {
+ struct pci_dev *sibling = ctx->siblings[i];
+
+ if (!pci_dev_trylock(sibling)) {
+ rc = -EAGAIN;
+ goto err;
+ }
+
+ pci_dev_save_and_disable(sibling);
+ rc = pci_dev_reset_iommu_prepare(sibling);
+ if (rc) {
+ pci_err(sibling,
+ "failed to block IOMMU for CXL reset: %d\n",
+ rc);
+ /*
+ * Undo save_and_disable() for this sibling. IOMMU
+ * prepare failed, so this sibling is not counted in
+ * nr_siblings_prepared and must not get iommu_done().
+ */
+ pci_dev_restore(sibling);
+ pci_dev_unlock(sibling);
+ goto err;
+ }
+
+ ctx->nr_siblings_prepared++;
+ }
+
+ return 0;
+
+err:
+ cxl_pci_functions_reset_done(ctx);
+ return rc;
+}
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 14f634ab9350..fa1fcd26af01 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1349,6 +1349,7 @@
/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
#define PCI_DVSEC_CXL_DEVICE 0
#define PCI_DVSEC_CXL_CAP 0xA
+#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0)
#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2)
#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4)
#define PCI_DVSEC_CXL_CTRL 0xC
@@ -1366,6 +1367,7 @@
/* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */
#define PCI_DVSEC_CXL_FUNCTION_MAP 2
+#define PCI_DVSEC_CXL_FUNCTION_MAP_REG 0x0C
/* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */
#define PCI_DVSEC_CXL_PORT 3
--
2.43.0
next prev parent reply other threads:[~2026-05-28 8:32 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-28 8:31 [PATCH v6 0/9] cxl: Add cxl_reset sysfs attribute for memdevs Srirangan Madhavan
2026-05-28 8:31 ` [PATCH v6 1/9] cxl/hdm: Add helpers to restore and commit memdev decoders Srirangan Madhavan
2026-05-28 9:12 ` sashiko-bot
2026-05-28 11:06 ` Richard Cheng
2026-06-02 18:12 ` Dave Jiang
2026-06-02 18:31 ` Dave Jiang
2026-06-02 20:34 ` Cheatham, Benjamin
2026-06-03 22:35 ` Dan Williams (nvidia)
2026-05-28 8:31 ` [PATCH v6 2/9] PCI: Export pci_dev_save_and_disable() and pci_dev_restore() Srirangan Madhavan
2026-06-02 20:18 ` Dave Jiang
2026-06-03 22:36 ` Dan Williams (nvidia)
2026-05-28 8:31 ` [PATCH v6 3/9] cxl: Add reset-idle and cache flush helpers Srirangan Madhavan
2026-05-28 10:09 ` sashiko-bot
2026-06-02 20:34 ` Cheatham, Benjamin
2026-06-02 20:36 ` Dave Jiang
2026-06-04 2:49 ` Dan Williams (nvidia)
2026-05-28 8:31 ` Srirangan Madhavan [this message]
2026-05-28 10:41 ` [PATCH v6 4/9] PCI/CXL: Add sibling function coordination for reset sashiko-bot
2026-05-28 11:15 ` Richard Cheng
2026-06-02 22:10 ` Dave Jiang
2026-06-04 3:13 ` Dan Williams (nvidia)
2026-05-28 8:31 ` [PATCH v6 5/9] cxl/pci: Add CXL DVSEC reset helper Srirangan Madhavan
2026-05-28 11:05 ` sashiko-bot
2026-06-02 20:34 ` Cheatham, Benjamin
2026-05-28 8:31 ` [PATCH v6 6/9] cxl/pci: Track memdevs affected by CXL reset Srirangan Madhavan
2026-05-28 11:36 ` sashiko-bot
2026-06-02 20:34 ` Cheatham, Benjamin
2026-05-28 8:31 ` [PATCH v6 7/9] cxl/pci: Orchestrate CXL reset for affected memdevs Srirangan Madhavan
2026-05-28 12:25 ` sashiko-bot
2026-06-02 20:34 ` Cheatham, Benjamin
2026-06-04 3:25 ` Dan Williams (nvidia)
2026-05-28 8:31 ` [PATCH v6 8/9] cxl/memdev: Add cxl_reset sysfs attribute Srirangan Madhavan
2026-05-28 13:03 ` sashiko-bot
2026-06-02 21:35 ` Cheatham, Benjamin
2026-06-02 23:50 ` Dave Jiang
2026-05-28 8:31 ` [PATCH v6 9/9] Documentation/ABI: Document CXL memdev cxl_reset Srirangan Madhavan
2026-06-03 0:11 ` Dave Jiang
2026-06-02 20:34 ` [PATCH v6 0/9] cxl: Add cxl_reset sysfs attribute for memdevs Cheatham, Benjamin
2026-06-02 21:42 ` Dan Williams (nvidia)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260528083154.137979-5-smadhavan@nvidia.com \
--to=smadhavan@nvidia.com \
--cc=alwilliamson@nvidia.com \
--cc=danwilliams@nvidia.com \
--cc=icheng@nvidia.com \
--cc=jan@nvidia.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=mhonap@nvidia.com \
--cc=skancherla@nvidia.com \
--cc=vaslot@nvidia.com \
--cc=vsethi@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.