All of lore.kernel.org
 help / color / mirror / Atom feed
From: Srirangan Madhavan <smadhavan@nvidia.com>
To: Alison Schofield <alison.schofield@intel.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Dan Williams <djbw@kernel.org>, Dave Jiang <dave.jiang@intel.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Ira Weiny <ira.weiny@intel.com>,
	Jonathan Cameron <jic23@kernel.org>,
	Vishal Verma <vishal.l.verma@intel.com>,
	linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: vsethi@nvidia.com, alwilliamson@nvidia.com,
	Dan Williams <danwilliams@nvidia.com>,
	Sai Yashwanth Reddy Kancherla <skancherla@nvidia.com>,
	Vishal Aslot <vaslot@nvidia.com>,
	Manish Honap <mhonap@nvidia.com>, Jiandi An <jan@nvidia.com>,
	Richard Cheng <icheng@nvidia.com>,
	linux-tegra@vger.kernel.org,
	Srirangan Madhavan <smadhavan@nvidia.com>
Subject: [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset
Date: Tue, 23 Jun 2026 03:24:50 +0000	[thread overview]
Message-ID: <20260623032453.3404772-9-smadhavan@nvidia.com> (raw)
In-Reply-To: <20260623032453.3404772-1-smadhavan@nvidia.com>

CXL Device Reset affects all CXL.cache and CXL.mem functions in the reset
scope. Lock same-scope siblings with pci_dev_trylock(), save/disable them,
drain pending transactions, and hold IOMMU reset blocks until recovery.

Also include mem-capable siblings in HDM range validation and CPU cache
invalidation. Cache-only siblings are quiesced, but skipped for HDM range
handling.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
---
 drivers/cxl/core/reset.c | 146 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 130 insertions(+), 16 deletions(-)

diff --git a/drivers/cxl/core/reset.c b/drivers/cxl/core/reset.c
index 1ae714a3595c..69bcfab89858 100644
--- a/drivers/cxl/core/reset.c
+++ b/drivers/cxl/core/reset.c
@@ -344,10 +344,17 @@ static const u32 cxl_reset_timeout_ms[] = {
 #define CXL_RESET_FUNCTION_MAP_REGS (CXL_RESET_MAX_FUNCTIONS / 32)
 #define CXL_RESET_SIBLINGS_INIT 8
 
+struct cxl_reset_sibling {
+	struct pci_dev *pdev;
+	bool has_mem;
+};
+
 struct cxl_reset_context {
 	struct pci_dev *target;
-	struct pci_dev **siblings;
+	struct cxl_reset_sibling *siblings;
 	int nr_siblings;
+	int nr_siblings_locked;
+	int nr_siblings_prepared;
 	int sibling_capacity;
 };
 
@@ -380,7 +387,7 @@ static void cxl_reset_context_init(struct cxl_reset_context *ctx,
 static void cxl_reset_context_destroy(struct cxl_reset_context *ctx)
 {
 	for (int i = 0; i < ctx->nr_siblings; i++)
-		pci_dev_put(ctx->siblings[i]);
+		pci_dev_put(ctx->siblings[i].pdev);
 	kfree(ctx->siblings);
 }
 
@@ -426,35 +433,49 @@ static int cxl_reset_func_map_bit(struct pci_dev *sibling, bool ari)
 	return PCI_FUNC(sibling->devfn) * 32 + PCI_SLOT(sibling->devfn);
 }
 
-static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev)
+static int cxl_reset_read_cxl_cap(struct pci_dev *pdev, u16 *cap)
 {
 	int dvsec, rc;
-	u16 cap;
 
 	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
 					  PCI_DVSEC_CXL_DEVICE);
 	if (!dvsec)
-		return 0;
+		return -ENODEV;
 
-	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, cap);
 	if (rc) {
 		rc = pcibios_err_to_errno(rc);
-		pci_warn(pdev,
-			 "failed to read CXL capability; cannot determine reset scope: %d\n",
-			 rc);
+		pci_warn(pdev, "failed to read CXL capability: %d\n", rc);
 		return rc;
 	}
 
+	return 0;
+}
+
+static int cxl_reset_has_cache_or_mem(struct pci_dev *pdev, bool *has_mem)
+{
+	u16 cap;
+	int rc;
+
+	*has_mem = false;
+
+	rc = cxl_reset_read_cxl_cap(pdev, &cap);
+	if (rc == -ENODEV)
+		return 0;
+	if (rc)
+		return rc;
+
+	*has_mem = cap & PCI_DVSEC_CXL_MEM_CAPABLE;
 	return !!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
 			 PCI_DVSEC_CXL_MEM_CAPABLE));
 }
 
 static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
-				 struct pci_dev *sibling)
+				 struct pci_dev *sibling, bool has_mem)
 {
 	if (ctx->nr_siblings >= ctx->sibling_capacity) {
 		int capacity = ctx->sibling_capacity ?: CXL_RESET_SIBLINGS_INIT;
-		struct pci_dev **siblings;
+		struct cxl_reset_sibling *siblings;
 
 		if (capacity > INT_MAX / 2)
 			return -ENOMEM;
@@ -470,7 +491,11 @@ static int cxl_reset_add_sibling(struct cxl_reset_context *ctx,
 		ctx->sibling_capacity = capacity;
 	}
 
-	ctx->siblings[ctx->nr_siblings++] = pci_dev_get(sibling);
+	ctx->siblings[ctx->nr_siblings] = (struct cxl_reset_sibling) {
+		.pdev = pci_dev_get(sibling),
+		.has_mem = has_mem,
+	};
+	ctx->nr_siblings++;
 	return 0;
 }
 
@@ -479,6 +504,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	struct cxl_reset_walk_context *wctx = data;
 	struct cxl_reset_context *ctx = wctx->ctx;
 	struct pci_dev *pdev = ctx->target;
+	bool has_mem;
 	int fn, rc;
 
 	if (sibling == pdev)
@@ -494,7 +520,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	if (test_bit(fn, wctx->non_cxl_func_map))
 		return 0;
 
-	rc = cxl_reset_has_cache_or_mem(sibling);
+	rc = cxl_reset_has_cache_or_mem(sibling, &has_mem);
 	if (rc < 0) {
 		wctx->rc = rc;
 		return rc;
@@ -502,7 +528,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *sibling, void *data)
 	if (!rc)
 		return 0;
 
-	wctx->rc = cxl_reset_add_sibling(ctx, sibling);
+	wctx->rc = cxl_reset_add_sibling(ctx, sibling, has_mem);
 	return wctx->rc;
 }
 
@@ -520,6 +546,69 @@ static int cxl_reset_collect_siblings(struct cxl_reset_context *ctx)
 	return wctx.rc;
 }
 
+static void cxl_pci_functions_unlock(struct cxl_reset_context *ctx)
+{
+	while (ctx->nr_siblings_locked) {
+		struct pci_dev *sibling;
+
+		sibling = ctx->siblings[--ctx->nr_siblings_locked].pdev;
+		pci_dev_unlock(sibling);
+	}
+}
+
+static int cxl_pci_functions_lock(struct cxl_reset_context *ctx)
+{
+	for (int i = 0; i < ctx->nr_siblings; i++) {
+		struct pci_dev *sibling = ctx->siblings[i].pdev;
+
+		if (!pci_dev_trylock(sibling)) {
+			cxl_pci_functions_unlock(ctx);
+			return -EAGAIN;
+		}
+
+		ctx->nr_siblings_locked++;
+	}
+
+	return 0;
+}
+
+static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+{
+	while (ctx->nr_siblings_prepared) {
+		struct pci_dev *sibling;
+
+		sibling = ctx->siblings[--ctx->nr_siblings_prepared].pdev;
+		pci_dev_reset_iommu_done(sibling);
+		pci_dev_restore(sibling);
+	}
+}
+
+static int cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+{
+	for (int i = 0; i < ctx->nr_siblings_locked; i++) {
+		struct pci_dev *sibling = ctx->siblings[i].pdev;
+		int rc;
+
+		pci_dev_save_and_disable(sibling);
+		if (!pci_wait_for_pending_transaction(sibling))
+			pci_err(sibling,
+				"timed out waiting for pending transactions\n");
+
+		rc = pci_dev_reset_iommu_prepare(sibling);
+		if (rc) {
+			pci_err(sibling,
+				"failed to stop IOMMU for CXL reset: %d\n",
+				rc);
+			pci_dev_restore(sibling);
+			return rc;
+		}
+
+		ctx->nr_siblings_prepared++;
+	}
+
+	return 0;
+}
+
 static void cxl_hdm_range_context_init(struct cxl_hdm_range_context *ctx)
 {
 	INIT_LIST_HEAD(&ctx->ranges);
@@ -716,8 +805,9 @@ static int cxl_hdm_ranges_flush_cpu_caches(struct cxl_hdm_range_context *ctx,
 }
 
 static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
-				  struct pci_dev *pdev)
+				  struct cxl_reset_context *reset_ctx)
 {
+	struct pci_dev *pdev = reset_ctx->target;
 	int rc;
 
 	lockdep_assert_held_write(&cxl_rwsem.region);
@@ -726,6 +816,17 @@ static int cxl_hdm_ranges_prepare(struct cxl_hdm_range_context *ctx,
 	if (rc)
 		return rc;
 
+	for (int i = 0; i < reset_ctx->nr_siblings; i++) {
+		struct cxl_reset_sibling *sibling = &reset_ctx->siblings[i];
+
+		if (!sibling->has_mem)
+			continue;
+
+		rc = cxl_hdm_ranges_collect(ctx, sibling->pdev);
+		if (rc)
+			return rc;
+	}
+
 	rc = cxl_hdm_ranges_request(ctx);
 	if (rc)
 		return rc;
@@ -944,11 +1045,24 @@ int cxl_reset_function(struct pci_dev *pdev, bool probe)
 	if (rc)
 		goto out;
 
+	rc = cxl_pci_functions_lock(&ctx);
+	if (rc)
+		goto out_unlock;
+
+	rc = cxl_pci_functions_reset_prepare(&ctx);
+	if (rc)
+		goto out_functions_done;
+
 	scoped_guard(rwsem_write, &cxl_rwsem.region) {
-		rc = cxl_hdm_ranges_prepare(&range_ctx, pdev);
+		rc = cxl_hdm_ranges_prepare(&range_ctx, &ctx);
 		if (!rc)
 			rc = cxl_reset_execute(pdev, dvsec);
 	}
+
+out_functions_done:
+	cxl_pci_functions_reset_done(&ctx);
+out_unlock:
+	cxl_pci_functions_unlock(&ctx);
 out:
 	cxl_hdm_range_context_destroy(&range_ctx);
 	cxl_reset_context_destroy(&ctx);
-- 
2.43.0


  parent reply	other threads:[~2026-06-23  3:25 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-23  3:24 [PATCH v7 00/11] PCI/CXL: Add CXL reset support for Type 2 devices Srirangan Madhavan
2026-06-23  3:24 ` [PATCH v7 01/11] cxl: Split decoder programming into a reusable helper Srirangan Madhavan
2026-06-23  3:42   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 02/11] cxl: Cache decoder settings on PCI devices Srirangan Madhavan
2026-06-23  3:42   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 03/11] cxl: Cache endpoint decoder settings during PCI enumeration Srirangan Madhavan
2026-06-23  3:45   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 04/11] PCI: Export pci_dev_save_and_disable() and pci_dev_restore() Srirangan Madhavan
2026-06-23  3:34   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 05/11] cxl: Add CXL Device Reset helper Srirangan Madhavan
2026-06-23  3:36   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 06/11] cxl: Validate HDM ranges before CXL reset Srirangan Madhavan
2026-06-23  3:33   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 07/11] PCI/cxl: Discover the CXL reset scope Srirangan Madhavan
2026-06-23  3:34   ` sashiko-bot
2026-06-23  3:24 ` Srirangan Madhavan [this message]
2026-06-23  3:42   ` [PATCH v7 08/11] cxl: Coordinate sibling functions for CXL reset sashiko-bot
2026-06-23  3:24 ` [PATCH v7 09/11] cxl: Restore CXL HDM state after PCI reset Srirangan Madhavan
2026-06-23  3:39   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 10/11] PCI/cxl: Expose CXL Reset as a PCI reset method Srirangan Madhavan
2026-06-23  3:47   ` sashiko-bot
2026-06-23  3:24 ` [PATCH v7 11/11] Documentation/ABI: Document CXL Reset " Srirangan Madhavan
2026-06-23  3:35   ` sashiko-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260623032453.3404772-9-smadhavan@nvidia.com \
    --to=smadhavan@nvidia.com \
    --cc=alison.schofield@intel.com \
    --cc=alwilliamson@nvidia.com \
    --cc=bhelgaas@google.com \
    --cc=danwilliams@nvidia.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=djbw@kernel.org \
    --cc=icheng@nvidia.com \
    --cc=ira.weiny@intel.com \
    --cc=jan@nvidia.com \
    --cc=jic23@kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-tegra@vger.kernel.org \
    --cc=mhonap@nvidia.com \
    --cc=skancherla@nvidia.com \
    --cc=vaslot@nvidia.com \
    --cc=vishal.l.verma@intel.com \
    --cc=vsethi@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.