Linux-RISC-V Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: fangyu.yu@linux.alibaba.com
To: tjeznach@rivosinc.com, joro@8bytes.org, will@kernel.org,
	robin.murphy@arm.com, pjw@kernel.org, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, alex@ghiti.fr,
	andrew.jones@oss.qualcomm.com
Cc: guoren@kernel.org, iommu@lists.linux.dev,
	linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org,
	Fangyu Yu <fangyu.yu@linux.alibaba.com>
Subject: [PATCH 1/2] iommu/riscv: Add NAPOT range invalidation support for IOTINVAL
Date: Sun,  8 Feb 2026 22:42:12 +0800	[thread overview]
Message-ID: <20260208144213.94856-2-fangyu.yu@linux.alibaba.com> (raw)
In-Reply-To: <20260208144213.94856-1-fangyu.yu@linux.alibaba.com>

From: Fangyu Yu <fangyu.yu@linux.alibaba.com>

RISC-V IOMMU v1.0.1 defines an Address Range Invalidation extension
(capabilities.S) which allows encoding the invalidation size as a
NAPOT range in the ADDR operand when issuing IOTINVAL.VMA/GVMA with
the S bit set. This can significantly reduce the number of invalidation
commands, especially when superpages are used.

Add the missing capabilities.S definition, introduce the IOTINVAL.S bit
and a helper to program NAPOT-encoded ranges, and switch the IOTLB
invalidation path to use range invalidations when it is available. The
implementation splits the requested interval into the largest aligned
NAPOT ranges and falls back to whole address space invalidation for larger
ranges.

Signed-off-by: Fangyu Yu <fangyu.yu@linux.alibaba.com>
---
 drivers/iommu/riscv/iommu-bits.h | 10 ++++
 drivers/iommu/riscv/iommu.c      | 86 ++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 98daf0e1a306..0d1f8813ae31 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -62,6 +62,7 @@
 #define RISCV_IOMMU_CAPABILITIES_PD8		BIT_ULL(38)
 #define RISCV_IOMMU_CAPABILITIES_PD17		BIT_ULL(39)
 #define RISCV_IOMMU_CAPABILITIES_PD20		BIT_ULL(40)
+#define RISCV_IOMMU_CAPABILITIES_S		BIT_ULL(43)
 
 /**
  * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
@@ -472,6 +473,7 @@ struct riscv_iommu_command {
 #define RISCV_IOMMU_CMD_IOTINVAL_PSCV		BIT_ULL(32)
 #define RISCV_IOMMU_CMD_IOTINVAL_GV		BIT_ULL(33)
 #define RISCV_IOMMU_CMD_IOTINVAL_GSCID		GENMASK_ULL(59, 44)
+#define RISCV_IOMMU_CMD_IOTINVAL_S		BIT_ULL(9)
 /* dword1[61:10] is the 4K-aligned page address */
 #define RISCV_IOMMU_CMD_IOTINVAL_ADDR		GENMASK_ULL(61, 10)
 
@@ -715,6 +717,14 @@ static inline void riscv_iommu_cmd_inval_vma(struct riscv_iommu_command *cmd)
 	cmd->dword1 = 0;
 }
 
+static inline void riscv_iommu_cmd_inval_set_range(struct riscv_iommu_command *cmd,
+						   u64 addr)
+{
+	cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, addr) |
+		      RISCV_IOMMU_CMD_IOTINVAL_S;
+	cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV;
+}
+
 static inline void riscv_iommu_cmd_inval_set_addr(struct riscv_iommu_command *cmd,
 						  u64 addr)
 {
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index d9429097a2b5..ae48409a052a 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -913,7 +913,88 @@ static void riscv_iommu_bond_unlink(struct riscv_iommu_domain *domain,
 		riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
 	}
 }
+/*
+ * Encode a NAPOT range for IOTINVAL.{VMA,GVMA} when the S bit is set.
+ *
+ * Per RISC-V IOMMU Address Range Invalidation Extension:
+ *   - The ADDR operand is NAPOT encoded in 4KiB units.
+ *   - Scanning ADDR from bit 0 upwards, if the first 0 bit is at position X,
+ *     the invalidation range size is 2^(X+1) * 4KiB (X=0 => 8KiB).
+ *   - Thus, for a range of size = 4KiB * 2^k (k >= 1), the encoded ADDR has
+ *     its low (k-1) bits set to 1, and bit (k-1) cleared (by alignment).
+ *
+ */
+static unsigned long range_encode(unsigned long start, unsigned long size)
+{
+	unsigned long blocks = size >> PAGE_SHIFT;
+	unsigned long x = ilog2(blocks) - 1;
+
+	return (start >> PAGE_SHIFT) | ((1ULL << x) - 1);
+}
+static void riscv_iommu_iotlb_inval_range(struct riscv_iommu_domain *domain,
+					  struct riscv_iommu_device *iommu,
+					  unsigned long start, unsigned long end)
+{
+	struct riscv_iommu_command cmd;
+	unsigned long len = end - start + 1;
+	unsigned long page_start, limit, cur, max_range, size, range_addr;
+	int order;
+
+	riscv_iommu_cmd_inval_vma(&cmd);
+	riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
+
+	/*
+	 * Using NAPOT range invalidations may still require multiple commands
+	 * to cover a large interval (e.g. when the range is poorly aligned and
+	 * needs to be split into many smaller NAPOT blocks).
+	 *
+	 * To keep the number of queued IOTINVAL commands bounded and avoid
+	 * excessive invalidation overhead, treat very large invalidation
+	 * requests as a global flush for the address space (AV=0, PSCV=1).
+	 *
+	 */
+	if (len > SZ_1G) {
+		riscv_iommu_cmd_send(iommu, &cmd);
+		return;
+	}
 
+	page_start = start & PAGE_MASK;
+	limit = PAGE_ALIGN(end + 1);
+	cur = page_start;
+
+	while (cur < limit) {
+		max_range = 0;
+
+		/*
+		 * We cap the maximum NAPOT range to 1GiB (order=18, i.e. 2^18 * 4KiB) and
+		 * fall back to a whole-address-space invalidation for larger ranges. This
+		 * keeps the command generation bounded and aligns with the existing policy
+		 * of treating very large invalidations as global flushes.
+		 */
+		for (order = 18; order >= 1; order--) {
+			/* 1GB, ... , 16KB, 8KB */
+			size = (1ULL << order) * SZ_4K;
+			if (cur + size <= limit && IS_ALIGNED(cur, size)) {
+				max_range = size;
+				break;
+			}
+		}
+
+		if (max_range) {
+			range_addr = range_encode(cur, max_range);
+
+			riscv_iommu_cmd_inval_set_range(&cmd, range_addr);
+			riscv_iommu_cmd_send(iommu, &cmd);
+			cur += max_range;
+			continue;
+		}
+
+		/* Fall back to single-page invalidation */
+		riscv_iommu_cmd_inval_set_addr(&cmd, cur);
+		riscv_iommu_cmd_send(iommu, &cmd);
+		cur += PAGE_SIZE;
+	}
+}
 /*
  * Send IOTLB.INVAL for whole address space for ranges larger than 2MB.
  * This limit will be replaced with range invalidations, if supported by
@@ -970,6 +1051,11 @@ static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
 		if (iommu == prev)
 			continue;
 
+		if (!!(iommu->caps & RISCV_IOMMU_CAPABILITIES_S)) {
+			riscv_iommu_iotlb_inval_range(domain, iommu, start, end);
+			continue;
+		}
+
 		riscv_iommu_cmd_inval_vma(&cmd);
 		riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
 		if (len && len < RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
-- 
2.50.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  reply	other threads:[~2026-02-08 14:43 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-08 14:42 [PATCH 0/2] iommu/riscv: support range and non-leaf IOTLB invalidation fangyu.yu
2026-02-08 14:42 ` fangyu.yu [this message]
2026-02-08 14:42 ` [PATCH 2/2] iommu/riscv: Add non-leaf invalidation support fangyu.yu
2026-02-10 13:02 ` [PATCH 0/2] iommu/riscv: support range and non-leaf IOTLB invalidation Jason Gunthorpe
2026-02-11 12:07   ` fangyu.yu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260208144213.94856-2-fangyu.yu@linux.alibaba.com \
    --to=fangyu.yu@linux.alibaba.com \
    --cc=alex@ghiti.fr \
    --cc=andrew.jones@oss.qualcomm.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=guoren@kernel.org \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=pjw@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=tjeznach@rivosinc.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox