From: Srirangan Madhavan <smadhavan@nvidia.com>
To: Alison Schofield <alison.schofield@intel.com>,
Bjorn Helgaas <bhelgaas@google.com>,
Dan Williams <djbw@kernel.org>, Dave Jiang <dave.jiang@intel.com>,
Davidlohr Bueso <dave@stgolabs.net>,
Ira Weiny <ira.weiny@intel.com>,
Jonathan Cameron <jic23@kernel.org>,
Vishal Verma <vishal.l.verma@intel.com>,
linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: vsethi@nvidia.com, alwilliamson@nvidia.com,
Dan Williams <danwilliams@nvidia.com>,
Sai Yashwanth Reddy Kancherla <skancherla@nvidia.com>,
Vishal Aslot <vaslot@nvidia.com>,
Manish Honap <mhonap@nvidia.com>, Jiandi An <jan@nvidia.com>,
Richard Cheng <icheng@nvidia.com>,
linux-tegra@vger.kernel.org,
Srirangan Madhavan <smadhavan@nvidia.com>
Subject: [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset
Date: Tue, 23 Jun 2026 03:24:50 +0000 [thread overview]
Message-ID: <20260623032453.3404772-9-smadhavan@nvidia.com> (raw)
In-Reply-To: <20260623032453.3404772-1-smadhavan@nvidia.com>
CXL Device Reset affects all CXL.cache and CXL.mem functions in the reset
scope. Lock same-scope siblings with pci_dev_trylock(), save/disable them,
drain pending transactions, and hold IOMMU reset blocks until recovery.
Also include mem-capable siblings in HDM range validation and CPU cache
invalidation. Cache-only siblings are quiesced, but skipped for HDM range
handling.
Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
---
drivers/cxl/core/reset.c | 146 ++++++++++++++++++++++++++++++++++-----
1 file changed, 130 insertions(+), 16 deletions(-)
diff --git a/drivers/cxl/core/reset.c b/drivers/cxl/core/reset.c
index 1ae714a3595c..69bcfab89858 100644
--- a/drivers/cxl/core/reset.c
+++ b/drivers/cxl/core/reset.c
@@ -344,10 +344,17 @@ static const u32 cxl_reset_timeout_ms[] = {
#define CXL_RESET_FUNCTION_MAP_REGS (CXL_RESET_MAX_FUNCTIONS / 32)
#define CXL_RESET_SIBLINGS_INIT 8
+struct cxl_reset_sibling {
+ struct pci_dev *pdev;
+ bool has_mem;
+};
+
struct cxl_reset_context {
struct pci_dev *target;
- struct pci_dev **siblings;
+ struct cxl_reset_sibling *siblings;
int nr_siblings;
+ int nr_siblings_locked;
+ int nr_siblings_prepared;
int sibling_capacity;
};
@@ -380,7 +387,7 @@ static void cxl_reset_context_init(struct cxl_reset_context *ctx,
static void cxl_reset_context_destroy(struct cxl_reset_context *ctx)
{
for (int i = 0; i < ctx->nr_siblings; i++)
- pci_dev_put(ctx->siblings[i]);
+ pci_dev_put(ctx->siblings[i].pdev);
kfree(ctx->siblings);
}
@@ -426,35 +433,49 @@ static int cxl_reset_func_map_bit(struct pci_dev *sibling, bool ari)
return PCI_FUNC(sibling->devfn) * 32 + PCI_SLOT(sibling->devfn);
}
-static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev)
+static int cxl_reset_read_cxl_cap(struct pci_dev *pdev, u16 *cap)
{
int dvsec, rc;
- u16 cap;
dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
PCI_DVSEC_CXL_DEVICE);
if (!dvsec)
- return 0;
+ return -ENODEV;
- rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
+ rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, cap);
if (rc) {
rc = pcibios_err_to_errno(rc);
- pci_warn(pdev,
- "failed to read CXL capability; cannot determine reset scope: %d\n",
- rc);
+ pci_warn(pdev, "failed to read CXL capability: %d\n", rc);
return rc;
}
+ return 0;
+}
+
+static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev, bool *has_mem)
+{
+ u16 cap;
+ int rc;
+
+ *has_mem = false;
+
+ rc = cxl_reset_read_cxl_cap(pdev, &cap);
+ if (rc == -ENODEV)
+ return 0;
+ if (rc)
+ return rc;
+
+ *has_mem = cap & PCI_DVSEC_CXL_MEM_CAPABLE;
return !!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
PCI_DVSEC_CXL_MEM_CAPABLE));
}
static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
- struct pci_dev *sibling)
+ struct pci_dev *sibling, bool has_mem)
{
if (ctx->nr_siblings >= ctx->sibling_capacity) {
int capacity = ctx->sibling_capacity ?: CXL_RESET_SIBLINGS_INIT;
- struct pci_dev **siblings;
+ struct cxl_reset_sibling *siblings;
if (capacity > INT_MAX / 2)
return -ENOMEM;
@@ -470,7 +491,11 @@ static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
ctx->sibling_capacity = capacity;
}
- ctx->siblings[ctx->nr_siblings++] = pci_dev_get(sibling);
+ ctx->siblings[ctx->nr_siblings] = (struct cxl_reset_sibling) {
+ .pdev = pci_dev_get(sibling),
+ .has_mem = has_mem,
+ };
+ ctx->nr_siblings++;
return 0;
}
@@ -479,6 +504,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
struct cxl_reset_walk_context *wctx = data;
struct cxl_reset_context *ctx = wctx->ctx;
struct pci_dev *pdev = ctx->target;
+ bool has_mem;
int fn, rc;
if (sibling == pdev)
@@ -494,7 +520,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
if (test_bit(fn, wctx->non_cxl_func_map))
return 0;
- rc = cxl_reset_has_cache_or_mem(sibling);
+ rc = cxl_reset_has_cache_or_mem(sibling, &has_mem);
if (rc < 0) {
wctx->rc = rc;
return rc;
@@ -502,7 +528,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
if (!rc)
return 0;
- wctx->rc = cxl_reset_add_sibling(ctx, sibling);
+ wctx->rc = cxl_reset_add_sibling(ctx, sibling, has_mem);
return wctx->rc;
}
@@ -520,6 +546,69 @@ static int cxl_reset_collect_siblings(struct cxl_reset_context *ctx)
return wctx.rc;
}
+static void cxl_pci_functions_unlock(struct cxl_reset_context *ctx)
+{
+ while (ctx->nr_siblings_locked) {
+ struct pci_dev *sibling;
+
+ sibling = ctx->siblings[--ctx->nr_siblings_locked].pdev;
+ pci_dev_unlock(sibling);
+ }
+}
+
+static int cxl_pci_functions_lock(struct cxl_reset_context *ctx)
+{
+ for (int i = 0; i < ctx->nr_siblings; i++) {
+ struct pci_dev *sibling = ctx->siblings[i].pdev;
+
+ if (!pci_dev_trylock(sibling)) {
+ cxl_pci_functions_unlock(ctx);
+ return -EAGAIN;
+ }
+
+ ctx->nr_siblings_locked++;
+ }
+
+ return 0;
+}
+
+static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+{
+ while (ctx->nr_siblings_prepared) {
+ struct pci_dev *sibling;
+
+ sibling = ctx->siblings[--ctx->nr_siblings_prepared].pdev;
+ pci_dev_reset_iommu_done(sibling);
+ pci_dev_restore(sibling);
+ }
+}
+
+static int cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+{
+ for (int i = 0; i < ctx->nr_siblings_locked; i++) {
+ struct pci_dev *sibling = ctx->siblings[i].pdev;
+ int rc;
+
+ pci_dev_save_and_disable(sibling);
+ if (!pci_wait_for_pending_transaction(sibling))
+ pci_err(sibling,
+ "timed out waiting for pending transactions\n");
+
+ rc = pci_dev_reset_iommu_prepare(sibling);
+ if (rc) {
+ pci_err(sibling,
+ "failed to stop IOMMU for CXL reset: %d\n",
+ rc);
+ pci_dev_restore(sibling);
+ return rc;
+ }
+
+ ctx->nr_siblings_prepared++;
+ }
+
+ return 0;
+}
+
static void cxl_hdm_range_context_init(struct cxl_hdm_range_context *ctx)
{
INIT_LIST_HEAD(&ctx->ranges);
@@ -716,8 +805,9 @@ static int cxl_hdm_ranges_flush_cpu_caches(struct cxl_hdm_range_context *ctx,
}
static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
- struct pci_dev *pdev)
+ struct cxl_reset_context *reset_ctx)
{
+ struct pci_dev *pdev = reset_ctx->target;
int rc;
lockdep_assert_held_write(&cxl_rwsem.region);
@@ -726,6 +816,17 @@ static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
if (rc)
return rc;
+ for (int i = 0; i < reset_ctx->nr_siblings; i++) {
+ struct cxl_reset_sibling *sibling = &reset_ctx->siblings[i];
+
+ if (!sibling->has_mem)
+ continue;
+
+ rc = cxl_hdm_ranges_collect(ctx, sibling->pdev);
+ if (rc)
+ return rc;
+ }
+
rc = cxl_hdm_ranges_request(ctx);
if (rc)
return rc;
@@ -944,11 +1045,24 @@ int cxl_reset_function(struct pci_dev *pdev, bool probe)
if (rc)
goto out;
+ rc = cxl_pci_functions_lock(&ctx);
+ if (rc)
+ goto out_unlock;
+
+ rc = cxl_pci_functions_reset_prepare(&ctx);
+ if (rc)
+ goto out_functions_done;
+
scoped_guard(rwsem_write, &cxl_rwsem.region) {
- rc = cxl_hdm_ranges_prepare(&range_ctx, pdev);
+ rc = cxl_hdm_ranges_prepare(&range_ctx, &ctx);
if (!rc)
rc = cxl_reset_execute(pdev, dvsec);
}
+
+out_functions_done:
+ cxl_pci_functions_reset_done(&ctx);
+out_unlock:
+ cxl_pci_functions_unlock(&ctx);
out:
cxl_hdm_range_context_destroy(&range_ctx);
cxl_reset_context_destroy(&ctx);
--
2.43.0
next prev parent reply other threads:[~2026-06-23 3:25 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-23 3:24 [PATCH v7 00/11] PCI/CXL: Add CXL reset support for Type 2 devices Srirangan Madhavan
2026-06-23 3:24 ` [PATCH v7 01/11] cxl: Split decoder programming into a reusable helper Srirangan Madhavan
2026-06-23 3:42 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 02/11] cxl: Cache decoder settings on PCI devices Srirangan Madhavan
2026-06-23 3:42 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 03/11] cxl: Cache endpoint decoder settings during PCI enumeration Srirangan Madhavan
2026-06-23 3:45 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 04/11] PCI: Export pci_dev_save_and_disable() and pci_dev_restore() Srirangan Madhavan
2026-06-23 3:34 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 05/11] cxl: Add CXL Device Reset helper Srirangan Madhavan
2026-06-23 3:36 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 06/11] cxl: Validate HDM ranges before CXL reset Srirangan Madhavan
2026-06-23 3:33 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 07/11] PCI/cxl: Discover the CXL reset scope Srirangan Madhavan
2026-06-23 3:34 ` sashiko-bot
2026-06-23 3:24 ` Srirangan Madhavan [this message]
2026-06-23 3:42 ` [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset sashiko-bot
2026-06-23 3:24 ` [PATCH v7 09/11] cxl: Restore CXL HDM state after PCI reset Srirangan Madhavan
2026-06-23 3:39 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 10/11] PCI/cxl: Expose CXL Reset as a PCI reset method Srirangan Madhavan
2026-06-23 3:47 ` sashiko-bot
2026-06-23 3:24 ` [PATCH v7 11/11] Documentation/ABI: Document CXL Reset " Srirangan Madhavan
2026-06-23 3:35 ` sashiko-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260623032453.3404772-9-smadhavan@nvidia.com \
--to=smadhavan@nvidia.com \
--cc=alison.schofield@intel.com \
--cc=alwilliamson@nvidia.com \
--cc=bhelgaas@google.com \
--cc=danwilliams@nvidia.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=djbw@kernel.org \
--cc=icheng@nvidia.com \
--cc=ira.weiny@intel.com \
--cc=jan@nvidia.com \
--cc=jic23@kernel.org \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=mhonap@nvidia.com \
--cc=skancherla@nvidia.com \
--cc=vaslot@nvidia.com \
--cc=vishal.l.verma@intel.com \
--cc=vsethi@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox