Linux Tegra architecture development
 help / color / mirror / Atom feed
From: Srirangan Madhavan <smadhavan@nvidia.com>
To: Alison Schofield <alison.schofield@intel.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Dan Williams <djbw@kernel.org>, Dave Jiang <dave.jiang@intel.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Ira Weiny <ira.weiny@intel.com>,
	Jonathan Cameron <jic23@kernel.org>,
	Vishal Verma <vishal.l.verma@intel.com>,
	linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: vsethi@nvidia.com, alwilliamson@nvidia.com,
	Dan Williams <danwilliams@nvidia.com>,
	Sai Yashwanth Reddy Kancherla <skancherla@nvidia.com>,
	Vishal Aslot <vaslot@nvidia.com>,
	Manish Honap <mhonap@nvidia.com>, Jiandi An <jan@nvidia.com>,
	Richard Cheng <icheng@nvidia.com>,
	linux-tegra@vger.kernel.org,
	Srirangan Madhavan <smadhavan@nvidia.com>
Subject: [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset
Date: Tue, 23 Jun 2026 03:24:50 +0000	[thread overview]
Message-ID: <20260623032453.3404772-9-smadhavan@nvidia.com> (raw)
In-Reply-To: <20260623032453.3404772-1-smadhavan@nvidia.com>

CXL Device Reset affects all CXL.cache and CXL.mem functions in the reset
scope. Lock same-scope siblings with pci_dev_trylock(), save/disable them,
drain pending transactions, and hold IOMMU reset blocks until recovery.

Also include mem-capable siblings in HDM range validation and CPU cache
invalidation. Cache-only siblings are quiesced, but skipped for HDM range
handling.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
---
 drivers/cxl/core/reset.c | 146 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 130 insertions(+), 16 deletions(-)

diff --git a/drivers/cxl/core/reset.c b/drivers/cxl/core/reset.c
index 1ae714a3595c..69bcfab89858 100644
--- a/drivers/cxl/core/reset.c
+++ b/drivers/cxl/core/reset.c
@@ -344,10 +344,17 @@ static const u32 cxl_reset_timeout_ms[] = {
 #define CXL_RESET_FUNCTION_MAP_REGS (CXL_RESET_MAX_FUNCTIONS / 32)
 #define CXL_RESET_SIBLINGS_INIT 8
 
+struct cxl_reset_sibling {
+	struct pci_dev *pdev;
+	bool has_mem;
+};
+
 struct cxl_reset_context {
 	struct pci_dev *target;
-	struct pci_dev **siblings;
+	struct cxl_reset_sibling *siblings;
 	int nr_siblings;
+	int nr_siblings_locked;
+	int nr_siblings_prepared;
 	int sibling_capacity;
 };
 
@@ -380,7 +387,7 @@ static void cxl_reset_context_init(struct cxl_reset_context *ctx,
 static void cxl_reset_context_destroy(struct cxl_reset_context *ctx)
 {
 	for (int i = 0; i < ctx->nr_siblings; i++)
-		pci_dev_put(ctx->siblings[i]);
+		pci_dev_put(ctx->siblings[i].pdev);
 	kfree(ctx->siblings);
 }
 
@@ -426,35 +433,49 @@ static int cxl_reset_func_map_bit(struct pci_dev *sibling, bool ari)
 	return PCI_FUNC(sibling->devfn) * 32 + PCI_SLOT(sibling->devfn);
 }
 
-static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev)
+static int cxl_reset_read_cxl_cap(struct pci_dev *pdev, u16 *cap)
 {
 	int dvsec, rc;
-	u16 cap;
 
 	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
 					  PCI_DVSEC_CXL_DEVICE);
 	if (!dvsec)
-		return 0;
+		return -ENODEV;
 
-	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, cap);
 	if (rc) {
 		rc = pcibios_err_to_errno(rc);
-		pci_warn(pdev,
-			 "failed to read CXL capability; cannot determine reset scope: %d\n",
-			 rc);
+		pci_warn(pdev, "failed to read CXL capability: %d\n", rc);
 		return rc;
 	}
 
+	return 0;
+}
+
+static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev, bool *has_mem)
+{
+	u16 cap;
+	int rc;
+
+	*has_mem = false;
+
+	rc = cxl_reset_read_cxl_cap(pdev, &cap);
+	if (rc == -ENODEV)
+		return 0;
+	if (rc)
+		return rc;
+
+	*has_mem = cap & PCI_DVSEC_CXL_MEM_CAPABLE;
 	return !!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
 			 PCI_DVSEC_CXL_MEM_CAPABLE));
 }
 
 static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
-				 struct pci_dev *sibling)
+				 struct pci_dev *sibling, bool has_mem)
 {
 	if (ctx->nr_siblings >= ctx->sibling_capacity) {
 		int capacity = ctx->sibling_capacity ?: CXL_RESET_SIBLINGS_INIT;
-		struct pci_dev **siblings;
+		struct cxl_reset_sibling *siblings;
 
 		if (capacity > INT_MAX / 2)
 			return -ENOMEM;
@@ -470,7 +491,11 @@ static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
 		ctx->sibling_capacity = capacity;
 	}
 
-	ctx->siblings[ctx->nr_siblings++] = pci_dev_get(sibling);
+	ctx->siblings[ctx->nr_siblings] = (struct cxl_reset_sibling) {
+		.pdev = pci_dev_get(sibling),
+		.has_mem = has_mem,
+	};
+	ctx->nr_siblings++;
 	return 0;
 }
 
@@ -479,6 +504,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	struct cxl_reset_walk_context *wctx = data;
 	struct cxl_reset_context *ctx = wctx->ctx;
 	struct pci_dev *pdev = ctx->target;
+	bool has_mem;
 	int fn, rc;
 
 	if (sibling == pdev)
@@ -494,7 +520,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	if (test_bit(fn, wctx->non_cxl_func_map))
 		return 0;
 
-	rc = cxl_reset_has_cache_or_mem(sibling);
+	rc = cxl_reset_has_cache_or_mem(sibling, &has_mem);
 	if (rc < 0) {
 		wctx->rc = rc;
 		return rc;
@@ -502,7 +528,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	if (!rc)
 		return 0;
 
-	wctx->rc = cxl_reset_add_sibling(ctx, sibling);
+	wctx->rc = cxl_reset_add_sibling(ctx, sibling, has_mem);
 	return wctx->rc;
 }
 
@@ -520,6 +546,69 @@ static int cxl_reset_collect_siblings(struct cxl_reset_context *ctx)
 	return wctx.rc;
 }
 
+static void cxl_pci_functions_unlock(struct cxl_reset_context *ctx)
+{
+	while (ctx->nr_siblings_locked) {
+		struct pci_dev *sibling;
+
+		sibling = ctx->siblings[--ctx->nr_siblings_locked].pdev;
+		pci_dev_unlock(sibling);
+	}
+}
+
+static int cxl_pci_functions_lock(struct cxl_reset_context *ctx)
+{
+	for (int i = 0; i < ctx->nr_siblings; i++) {
+		struct pci_dev *sibling = ctx->siblings[i].pdev;
+
+		if (!pci_dev_trylock(sibling)) {
+			cxl_pci_functions_unlock(ctx);
+			return -EAGAIN;
+		}
+
+		ctx->nr_siblings_locked++;
+	}
+
+	return 0;
+}
+
+static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+{
+	while (ctx->nr_siblings_prepared) {
+		struct pci_dev *sibling;
+
+		sibling = ctx->siblings[--ctx->nr_siblings_prepared].pdev;
+		pci_dev_reset_iommu_done(sibling);
+		pci_dev_restore(sibling);
+	}
+}
+
+static int cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+{
+	for (int i = 0; i < ctx->nr_siblings_locked; i++) {
+		struct pci_dev *sibling = ctx->siblings[i].pdev;
+		int rc;
+
+		pci_dev_save_and_disable(sibling);
+		if (!pci_wait_for_pending_transaction(sibling))
+			pci_err(sibling,
+				"timed out waiting for pending transactions\n");
+
+		rc = pci_dev_reset_iommu_prepare(sibling);
+		if (rc) {
+			pci_err(sibling,
+				"failed to stop IOMMU for CXL reset: %d\n",
+				rc);
+			pci_dev_restore(sibling);
+			return rc;
+		}
+
+		ctx->nr_siblings_prepared++;
+	}
+
+	return 0;
+}
+
 static void cxl_hdm_range_context_init(struct cxl_hdm_range_context *ctx)
 {
 	INIT_LIST_HEAD(&ctx->ranges);
@@ -716,8 +805,9 @@ static int cxl_hdm_ranges_flush_cpu_caches(struct cxl_hdm_range_context *ctx,
 }
 
 static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
-				  struct pci_dev *pdev)
+				  struct cxl_reset_context *reset_ctx)
 {
+	struct pci_dev *pdev = reset_ctx->target;
 	int rc;
 
 	lockdep_assert_held_write(&cxl_rwsem.region);
@@ -726,6 +816,17 @@ static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
 	if (rc)
 		return rc;
 
+	for (int i = 0; i < reset_ctx->nr_siblings; i++) {
+		struct cxl_reset_sibling *sibling = &reset_ctx->siblings[i];
+
+		if (!sibling->has_mem)
+			continue;
+
+		rc = cxl_hdm_ranges_collect(ctx, sibling->pdev);
+		if (rc)
+			return rc;
+	}
+
 	rc = cxl_hdm_ranges_request(ctx);
 	if (rc)
 		return rc;
@@ -944,11 +1045,24 @@ int cxl_reset_function(struct pci_dev *pdev, bool probe)
 	if (rc)
 		goto out;
 
+	rc = cxl_pci_functions_lock(&ctx);
+	if (rc)
+		goto out_unlock;
+
+	rc = cxl_pci_functions_reset_prepare(&ctx);
+	if (rc)
+		goto out_functions_done;
+
 	scoped_guard(rwsem_write, &cxl_rwsem.region) {
-		rc = cxl_hdm_ranges_prepare(&range_ctx, pdev);
+		rc = cxl_hdm_ranges_prepare(&range_ctx, &ctx);
 		if (!rc)
 			rc = cxl_reset_execute(pdev, dvsec);
 	}
+
+out_functions_done:
+	cxl_pci_functions_reset_done(&ctx);
+out_unlock:
+	cxl_pci_functions_unlock(&ctx);
 out:
 	cxl_hdm_range_context_destroy(&range_ctx);
 	cxl_reset_context_destroy(&ctx);
-- 
2.43.0


  parent reply	other threads:[~2026-06-23  3:25 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-23  3:24 [PATCH v7 00/11] PCI/CXL: Add CXL reset support for Type 2 devices Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 01/11] cxl: Split decoder programming into a reusable helper Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 02/11] cxl: Cache decoder settings on PCI devices Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 03/11] cxl: Cache endpoint decoder settings during PCI enumeration Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 04/11] PCI: Export pci_dev_save_and_disable() and pci_dev_restore() Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 05/11] cxl: Add CXL Device Reset helper Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 06/11] cxl: Validate HDM ranges before CXL reset Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 07/11] PCI/cxl: Discover the CXL reset scope Srirangan Madhavan
2026-06-23  3:24 ` Srirangan Madhavan [this message]
2026-06-23  3:24 ` [PATCH v7 09/11] cxl: Restore CXL HDM state after PCI reset Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 10/11] PCI/cxl: Expose CXL Reset as a PCI reset method Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 11/11] Documentation/ABI: Document CXL Reset " Srirangan Madhavan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260623032453.3404772-9-smadhavan@nvidia.com \
    --to=smadhavan@nvidia.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=bhelgaas@google.com \
    --cc=danwilliams@nvidia.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=djbw@kernel.org \
    --cc=icheng@nvidia.com \
    --cc=ira.weiny@intel.com \
    --cc=jan@nvidia.com \
    --cc=jic23@kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-tegra@vger.kernel.org \
    --cc=mhonap@nvidia.com \
    --cc=skancherla@nvidia.com \
    --cc=vaslot@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox