All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zhenyu Ye <yezhenyu2@huawei.com>
To: <catalin.marinas@arm.com>, <will@kernel.org>, <maz@kernel.org>,
	<suzuki.poulose@arm.com>, <mark.rutland@arm.com>
Cc: <tangnianyao@huawei.com>, <xiexiangyou@huawei.com>,
	<linux-kernel@vger.kernel.org>, <yezhenyu2@huawei.com>,
	<arm@kernel.org>
Subject: [RFC PATCH v2] arm64: cpufeatures: add support for tlbi range instructions
Date: Mon, 11 Nov 2019 21:23:55 +0800	[thread overview]
Message-ID: <5DC960EB.9050503@huawei.com> (raw)

ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
range of input addresses. This patch adds support for this feature.
This is the second version of the patch.

I traced the __flush_tlb_range() for a minute and get some statistical
data as below:

	PAGENUM		COUNT
	1		34944
	2		5683
	3		1343
	4		7857
	5		838
	9		339
	16		933
	19		427
	20		5821
	23		279
	41		338
	141		279
	512		428
	1668		120
	2038		100

Those data are based on kernel-5.4.0, where PAGENUM = end - start, COUNT
shows number of calls to the __flush_tlb_range() in a minute. There only
shows the data which COUNT >= 100. The kernel is started normally, and
transparent hugepage is opened. As we can see, though most user TLBI
ranges were 1 pages long, the num of long-range can not be ignored.

The new feature of TLB range can improve lots of performance compared to
the current implementation. As an example, flush 512 ranges needs only 1
instruction as opposed to 512 instructions using current implementation.

And for a new hardware feature, support is better than not.

Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
---
ChangeLog v1 -> v2:
- Change the main implementation of this feature.
- Add some comments.

---
 arch/arm64/include/asm/cpucaps.h  |  3 +-
 arch/arm64/include/asm/sysreg.h   |  4 ++
 arch/arm64/include/asm/tlbflush.h | 99 ++++++++++++++++++++++++++++++-
 arch/arm64/kernel/cpufeature.c    | 10 ++++
 4 files changed, 112 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ac1dbca3d0cd..5b5230060e5b 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,8 @@
 #define ARM64_WORKAROUND_1463225		44
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM	45
 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
+#define ARM64_HAS_TLBI_RANGE			47

-#define ARM64_NCAPS				47
+#define ARM64_NCAPS				48

 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fafb43d..a6abbf2b067d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -539,6 +539,7 @@
 			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)

 /* id_aa64isar0 */
+#define ID_AA64ISAR0_TLB_SHIFT		56
 #define ID_AA64ISAR0_TS_SHIFT		52
 #define ID_AA64ISAR0_FHM_SHIFT		48
 #define ID_AA64ISAR0_DP_SHIFT		44
@@ -552,6 +553,9 @@
 #define ID_AA64ISAR0_SHA1_SHIFT		8
 #define ID_AA64ISAR0_AES_SHIFT		4

+#define ID_AA64ISAR0_TLB_NI		0x0
+#define ID_AA64ISAR0_TLB_RANGE		0x2
+
 /* id_aa64isar1 */
 #define ID_AA64ISAR1_SB_SHIFT		36
 #define ID_AA64ISAR1_FRINTTS_SHIFT	32
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc3949064725..f49bed7ecb68 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -59,6 +59,33 @@
 		__ta;						\
 	})

+/*
+ * This macro creates a properly formatted VA operand for the TLBI RANGE.
+ * The value bit assignments are:
+ *
+ * +----------+------+-------+-------+-------+----------------------+
+ * |   ASID   |  TG  | SCALE |  NUM  |  TTL  |        BADDR         |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63      48|47  46|45   44|43   39|38   37|36                   0|
+ *
+ * The address range is determined by below formula:
+ * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ */
+#define __TLBI_VADDR_RANGE(addr, asid, tg, scale, num, ttl)	\
+	({							\
+		unsigned long __ta = (addr) >> PAGE_SHIFT;	\
+		__ta |= (unsigned long)(ttl) << 37;		\
+		__ta |= (unsigned long)(num) << 39;		\
+		__ta |= (unsigned long)(scale) << 44;		\
+		__ta |= (unsigned long)(tg) << 46;		\
+		__ta |= (unsigned long)(asid) << 48;		\
+		__ta;						\
+	})
+
+#define TLB_RANGE_MASK_SHIFT 5
+#define TLB_RANGE_MASK GENMASK_ULL(TLB_RANGE_MASK_SHIFT, 0)
+
 /*
  *	TLB Invalidation
  *	================
@@ -177,9 +204,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  */
 #define MAX_TLBI_OPS	PTRS_PER_PTE

-static inline void __flush_tlb_range(struct vm_area_struct *vma,
-				     unsigned long start, unsigned long end,
-				     unsigned long stride, bool last_level)
+static inline void __flush_tlb_range_old(struct vm_area_struct *vma,
+					 unsigned long start, unsigned long end,
+					 unsigned long stride, bool last_level)
 {
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
@@ -211,6 +238,72 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	dsb(ish);
 }

+static inline void __flush_tlb_range_new(struct vm_area_struct *vma,
+					 unsigned long start, unsigned long end,
+					 unsigned long stride, bool last_level)
+{
+	int num = 0;
+	int scale = 0;
+	int ttl = 0;
+	int tg = (PAGE_SHIFT - 12) / 2 + 1;
+	unsigned long asid = ASID(vma->vm_mm);
+	unsigned long addr = 0;
+	unsigned long offset = (end - start) >> PAGE_SHIFT;
+
+	if (offset > (1UL << 21)) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	dsb(ishst);
+
+	/*
+	 * The minimum size of TLB RANGE is 2 PAGE;
+	 * Use normal TLB instruction to handle odd PAGEs
+	 */
+	if (offset % 2 == 1) {
+		addr = __TLBI_VADDR(start, asid);
+		if (last_level) {
+			__tlbi(vale1is, addr);
+			__tlbi_user(vale1is, addr);
+		} else {
+			__tlbi(vae1is, addr);
+			__tlbi_user(vae1is, addr);
+		}
+		start += 1 << PAGE_SHIFT;
+		offset -= 1;
+	}
+
+	while (offset > 0) {
+		num = (offset & TLB_RANGE_MASK) - 1;
+		if (num >= 0) {
+			addr = __TLBI_VADDR_RANGE(start, asid, tg,
+						  scale, num, ttl);
+			if (last_level) {
+				__tlbi(rvale1is, addr);
+				__tlbi_user(rvale1is, addr);
+			} else {
+				__tlbi(rvae1is, addr);
+				__tlbi_user(rvae1is, addr);
+			}
+			start += (num + 1) << (5 * scale + 1) << PAGE_SHIFT;
+		}
+		scale++;
+		offset >>= TLB_RANGE_MASK_SHIFT;
+	}
+	dsb(ish);
+}
+
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end,
+				     unsigned long stride, bool last_level)
+{
+	if (cpus_have_const_cap(ARM64_HAS_TLBI_RANGE))
+		__flush_tlb_range_new(vma, start, end, stride, last_level);
+	else
+		__flush_tlb_range_old(vma, start, end, stride, last_level);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 80f459ad0190..bdefd8a34729 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1566,6 +1566,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 1,
 	},
 #endif
+	{
+		.desc = "TLB range maintenance instruction",
+		.capability = ARM64_HAS_TLBI_RANGE,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR0_EL1,
+		.field_pos = ID_AA64ISAR0_TLB_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = ID_AA64ISAR0_TLB_RANGE,
+	},
 	{},
 };

-- 
2.19.1



             reply	other threads:[~2019-11-11 13:24 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-11 13:23 Zhenyu Ye [this message]
2019-11-11 13:27 ` [RFC PATCH v2] arm64: cpufeatures: add support for tlbi range instructions Will Deacon
2019-11-11 13:47   ` Zhenyu Ye
2019-11-11 14:04     ` Marc Zyngier
2019-11-11 14:23       ` Zhenyu Ye
2019-11-19  1:13       ` Hanjun Guo
2019-11-19  1:13         ` Hanjun Guo
2019-11-19 10:03         ` Marc Zyngier
2019-11-19 10:03           ` Marc Zyngier
2019-11-20  1:29           ` Hanjun Guo
2019-11-20  1:29             ` Hanjun Guo
2020-04-10  1:43             ` Hanjun Guo
2020-04-10 11:54               ` Will Deacon
2020-04-10 12:02                 ` Will Deacon
2020-04-11  6:39                   ` Hanjun Guo
2020-04-14  7:08                     ` Will Deacon
2020-04-14  7:27                       ` Hanjun Guo
2020-04-14 11:45                       ` Zhenyu Ye
2020-04-11  6:24                 ` Hanjun Guo
2019-11-20  8:47           ` Will Deacon
2019-11-20  8:47             ` Will Deacon
2019-11-11 16:32 ` Olof Johansson
2019-11-12  2:19   ` Zhenyu Ye

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5DC960EB.9050503@huawei.com \
    --to=yezhenyu2@huawei.com \
    --cc=arm@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=maz@kernel.org \
    --cc=suzuki.poulose@arm.com \
    --cc=tangnianyao@huawei.com \
    --cc=will@kernel.org \
    --cc=xiexiangyou@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.