From: Lu Baolu <baolu.lu@linux.intel.com>
To: Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Robin Murphy <robin.murphy@arm.com>,
Kevin Tian <kevin.tian@intel.com>,
Jason Gunthorpe <jgg@nvidia.com>
Cc: Dmytro Maluka <dmaluka@chromium.org>,
Samiullah Khawaja <skhawaja@google.com>,
iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
Lu Baolu <baolu.lu@linux.intel.com>
Subject: [PATCH 2/8] iommu/vt-d: Add entry_sync support for PASID entry updates
Date: Mon, 9 Mar 2026 14:06:42 +0800 [thread overview]
Message-ID: <20260309060648.276762-3-baolu.lu@linux.intel.com> (raw)
In-Reply-To: <20260309060648.276762-1-baolu.lu@linux.intel.com>
Updating PASID table entries while the device hardware is possibly
performing DMA concurrently is complex. Traditionally, this required
a "clear-then-update" approach — clearing the Present bit, flushing
caches, updating the entry, and then restoring the Present bit. This
causes unnecessary latency or interruptions for transactions that might
not even be affected by the specific bits being changed.
Plumb this driver into the generic entry_sync library to modernize
this process. The library uses the concept of "Used bits" to determine
if a transition can be performed "hitlessly" (via a single atomic
128-bit swap) or if a disruptive 3-step update is truly required.
The implementation includes:
- intel_pasid_get_used(): Defines which bits the IOMMU hardware is
sensitive to based on the PGTT.
- intel_pasid_sync(): Handles the required clflushes, PASID cache
invalidations, and IOTLB/Dev-TLB flushes required between update
steps.
- 128-bit atomicity: Depends on IOMMU_ENTRY_SYNC128 to ensure that
256-bit PASID entries are updated in atomic 128-bit quanta,
preventing the hardware from ever seeing a "torn" entry.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/intel/Kconfig | 2 +
drivers/iommu/intel/pasid.c | 173 ++++++++++++++++++++++++++++++++++++
2 files changed, 175 insertions(+)
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 5471f814e073..7fa31b9d4ef4 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -26,6 +26,8 @@ config INTEL_IOMMU
select PCI_ATS
select PCI_PRI
select PCI_PASID
+ select IOMMU_ENTRY_SYNC
+ select IOMMU_ENTRY_SYNC128
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 9d30015b8940..5b9eb5c8f42d 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -21,12 +21,185 @@
#include "iommu.h"
#include "pasid.h"
#include "../iommu-pages.h"
+#include "../entry_sync.h"
/*
* Intel IOMMU system wide PASID name space:
*/
u32 intel_pasid_max_id = PASID_MAX;
+/*
+ * Plumb into the generic entry_sync library:
+ */
+static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid);
+static void pasid_flush_caches(struct intel_iommu *iommu, struct pasid_entry *pte,
+ u32 pasid, u16 did);
+static void intel_pasid_flush_present(struct intel_iommu *iommu, struct device *dev,
+ u32 pasid, u16 did, struct pasid_entry *pte);
+static void pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
+ u16 did, u32 pasid);
+static void devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid);
+
+struct intel_pasid_writer {
+ struct entry_sync_writer128 writer;
+ struct intel_iommu *iommu;
+ struct device *dev;
+ u32 pasid;
+ struct pasid_entry orig_pte;
+ bool was_present;
+};
+
+/*
+ * Identify which bits of the 256-bit entry the HW is using. The "Used" bits
+ * are those that, if changed, would cause the IOMMU to behave differently
+ * for an active transaction.
+ */
+static void intel_pasid_get_used(const u128 *entry, u128 *used)
+{
+ struct pasid_entry *pe = (struct pasid_entry *)entry;
+ struct pasid_entry *ue = (struct pasid_entry *)used;
+ u16 pgtt;
+
+ /* Initialize used bits to 0. */
+ memset(ue, 0, sizeof(*ue));
+
+ /* Present bit always matters. */
+ ue->val[0] |= PASID_PTE_PRESENT;
+
+ /* Nothing more for non-present entries. */
+ if (!(pe->val[0] & PASID_PTE_PRESENT))
+ return;
+
+ pgtt = pasid_pte_get_pgtt(pe);
+ switch (pgtt) {
+ case PASID_ENTRY_PGTT_FL_ONLY:
+ /* AW, PGTT */
+ ue->val[0] |= GENMASK_ULL(4, 2) | GENMASK_ULL(8, 6);
+ /* DID, PWSNP, PGSNP */
+ ue->val[1] |= GENMASK_ULL(24, 23) | GENMASK_ULL(15, 0);
+ /* FSPTPTR, FSPM */
+ ue->val[2] |= GENMASK_ULL(63, 12) | GENMASK_ULL(3, 2);
+ break;
+ case PASID_ENTRY_PGTT_NESTED:
+ /* FPD, AW, PGTT, SSADE, SSPTPTR*/
+ ue->val[0] |= GENMASK_ULL(63, 12) | GENMASK_ULL(9, 6) |
+ GENMASK_ULL(4, 1);
+ /* PGSNP, DID, PWSNP */
+ ue->val[1] |= GENMASK_ULL(24, 23) | GENMASK_ULL(15, 0);
+ /* FSPTPTR, FSPM, EAFE, WPE, SRE */
+ ue->val[2] |= GENMASK_ULL(63, 12) | BIT_ULL(7) |
+ GENMASK_ULL(4, 2) | BIT_ULL(0);
+ break;
+ case PASID_ENTRY_PGTT_SL_ONLY:
+ /* FPD, AW, PGTT, SSADE, SSPTPTR */
+ ue->val[0] |= GENMASK_ULL(63, 12) | GENMASK_ULL(9, 6) |
+ GENMASK_ULL(4, 1);
+ /* DID, PWSNP */
+ ue->val[1] |= GENMASK_ULL(15, 0) | BIT_ULL(23);
+ break;
+ case PASID_ENTRY_PGTT_PT:
+ /* FPD, AW, PGTT */
+ ue->val[0] |= GENMASK_ULL(4, 2) | GENMASK_ULL(8, 6) | BIT_ULL(1);
+ /* DID, PWSNP */
+ ue->val[1] |= GENMASK_ULL(15, 0) | BIT_ULL(23);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+static void intel_pasid_sync(struct entry_sync_writer128 *writer)
+{
+ struct intel_pasid_writer *p_writer = container_of(writer,
+ struct intel_pasid_writer, writer);
+ struct intel_iommu *iommu = p_writer->iommu;
+ struct device *dev = p_writer->dev;
+ bool was_present, is_present;
+ u32 pasid = p_writer->pasid;
+ struct pasid_entry *pte;
+ u16 old_did, old_pgtt;
+
+ pte = intel_pasid_get_entry(dev, pasid);
+ was_present = p_writer->was_present;
+ is_present = pasid_pte_is_present(pte);
+ old_did = pasid_get_domain_id(&p_writer->orig_pte);
+ old_pgtt = pasid_pte_get_pgtt(&p_writer->orig_pte);
+
+ /* Update the last present state: */
+ p_writer->was_present = is_present;
+
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(pte, sizeof(*pte));
+
+ /* Sync for "P=0" to "P=1": */
+ if (!was_present) {
+ if (is_present)
+ pasid_flush_caches(iommu, pte, pasid,
+ pasid_get_domain_id(pte));
+
+ return;
+ }
+
+ /* Sync for "P=1" to "P=1": */
+ if (is_present) {
+ intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+ return;
+ }
+
+ /* Sync for "P=1" to "P=0": */
+ pasid_cache_invalidation_with_pasid(iommu, old_did, pasid);
+
+ if (old_pgtt == PASID_ENTRY_PGTT_PT || old_pgtt == PASID_ENTRY_PGTT_FL_ONLY)
+ qi_flush_piotlb(iommu, old_did, pasid, 0, -1, 0);
+ else
+ iommu->flush.flush_iotlb(iommu, old_did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+ devtlb_invalidation_with_pasid(iommu, dev, pasid);
+}
+
+static const struct entry_sync_writer_ops128 writer_ops128 = {
+ .get_used = intel_pasid_get_used,
+ .sync = intel_pasid_sync,
+};
+
+#define INTEL_PASID_SYNC_MEM_COUNT 12
+
+static int __maybe_unused intel_pasid_write(struct intel_iommu *iommu,
+ struct device *dev, u32 pasid,
+ u128 *target)
+{
+ struct pasid_entry *pte = intel_pasid_get_entry(dev, pasid);
+ struct intel_pasid_writer p_writer = {
+ .writer = {
+ .ops = &writer_ops128,
+ /* 512 bits total (4 * 128-bit chunks) */
+ .num_quantas = 4,
+ /* The 'P' bit is in the first 128-bit chunk */
+ .vbit_quanta = 0,
+ },
+ .iommu = iommu,
+ .dev = dev,
+ .pasid = pasid,
+ };
+ u128 memory[INTEL_PASID_SYNC_MEM_COUNT];
+
+ if (!pte)
+ return -ENODEV;
+
+ p_writer.orig_pte = *pte;
+ p_writer.was_present = pasid_pte_is_present(pte);
+
+ /*
+ * The library now does the heavy lifting:
+ * 1. Checks if it can do a 1-quanta hitless flip.
+ * 2. If not, it does a 3-step V=0 (disruptive) update.
+ */
+ entry_sync_write128(&p_writer.writer, (u128 *)pte, target, memory, sizeof(memory));
+
+ return 0;
+}
+
/*
* Per device pasid table management:
*/
--
2.43.0
next prev parent reply other threads:[~2026-03-09 6:09 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-09 6:06 [PATCH 0/8] iommu/vt-d: Hitless PASID updates via entry_sync Lu Baolu
2026-03-09 6:06 ` [PATCH 1/8] iommu: Lift and generalize the STE/CD update code from SMMUv3 Lu Baolu
2026-03-09 23:33 ` Samiullah Khawaja
2026-03-10 0:06 ` Samiullah Khawaja
2026-03-14 8:13 ` Baolu Lu
2026-03-16 9:51 ` Will Deacon
2026-03-18 3:10 ` Baolu Lu
2026-03-23 12:55 ` Jason Gunthorpe
2026-03-24 5:30 ` Baolu Lu
2026-03-16 16:35 ` Samiullah Khawaja
2026-03-18 3:23 ` Baolu Lu
2026-03-13 5:39 ` Nicolin Chen
2026-03-16 6:24 ` Baolu Lu
2026-03-23 12:59 ` Jason Gunthorpe
2026-03-24 5:49 ` Baolu Lu
2026-03-09 6:06 ` Lu Baolu [this message]
2026-03-09 13:41 ` [PATCH 2/8] iommu/vt-d: Add entry_sync support for PASID entry updates Jason Gunthorpe
2026-03-11 8:42 ` Baolu Lu
2026-03-11 12:23 ` Jason Gunthorpe
2026-03-12 7:51 ` Baolu Lu
2026-03-12 7:50 ` Baolu Lu
2026-03-12 11:44 ` Jason Gunthorpe
2026-03-15 8:11 ` Baolu Lu
2026-03-23 13:07 ` Jason Gunthorpe
2026-03-24 6:22 ` Baolu Lu
2026-03-24 12:53 ` Jason Gunthorpe
2026-03-09 6:06 ` [PATCH 3/8] iommu/vt-d: Require CMPXCHG16B for PASID support Lu Baolu
2026-03-09 13:42 ` Jason Gunthorpe
2026-03-12 7:59 ` Baolu Lu
2026-03-09 6:06 ` [PATCH 4/8] iommu/vt-d: Add trace events for PASID entry sync updates Lu Baolu
2026-03-09 6:06 ` [PATCH 5/8] iommu/vt-d: Use intel_pasid_write() for first-stage setup Lu Baolu
2026-03-09 6:06 ` [PATCH 6/8] iommu/vt-d: Use intel_pasid_write() for second-stage setup Lu Baolu
2026-03-09 6:06 ` [PATCH 7/8] iommu/vt-d: Use intel_pasid_write() for pass-through setup Lu Baolu
2026-03-09 6:06 ` [PATCH 8/8] iommu/vt-d: Use intel_pasid_write() for nested setup Lu Baolu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260309060648.276762-3-baolu.lu@linux.intel.com \
--to=baolu.lu@linux.intel.com \
--cc=dmaluka@chromium.org \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=robin.murphy@arm.com \
--cc=skhawaja@google.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox