From: Zhenyu Ye <yezhenyu2@huawei.com>
To: <catalin.marinas@arm.com>, <will@kernel.org>, <maz@kernel.org>,
<suzuki.poulose@arm.com>, <mark.rutland@arm.com>
Cc: <tangnianyao@huawei.com>, <xiexiangyou@huawei.com>,
<linux-kernel@vger.kernel.org>, <yezhenyu2@huawei.com>,
<arm@kernel.org>
Subject: [RFC PATCH v2] arm64: cpufeatures: add support for tlbi range instructions
Date: Mon, 11 Nov 2019 21:23:55 +0800 [thread overview]
Message-ID: <5DC960EB.9050503@huawei.com> (raw)
ARMv8.4-TLBI provides TLBI invalidation instruction that apply to a
range of input addresses. This patch adds support for this feature.
This is the second version of the patch.
I traced the __flush_tlb_range() for a minute and get some statistical
data as below:
PAGENUM COUNT
1 34944
2 5683
3 1343
4 7857
5 838
9 339
16 933
19 427
20 5821
23 279
41 338
141 279
512 428
1668 120
2038 100
Those data are based on kernel-5.4.0, where PAGENUM = end - start, COUNT
shows number of calls to the __flush_tlb_range() in a minute. There only
shows the data which COUNT >= 100. The kernel is started normally, and
transparent hugepage is opened. As we can see, though most user TLBI
ranges were 1 pages long, the num of long-range can not be ignored.
The new feature of TLB range can improve lots of performance compared to
the current implementation. As an example, flush 512 ranges needs only 1
instruction as opposed to 512 instructions using current implementation.
And for a new hardware feature, support is better than not.
Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
---
ChangeLog v1 -> v2:
- Change the main implementation of this feature.
- Add some comments.
---
arch/arm64/include/asm/cpucaps.h | 3 +-
arch/arm64/include/asm/sysreg.h | 4 ++
arch/arm64/include/asm/tlbflush.h | 99 ++++++++++++++++++++++++++++++-
arch/arm64/kernel/cpufeature.c | 10 ++++
4 files changed, 112 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index ac1dbca3d0cd..5b5230060e5b 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,8 @@
#define ARM64_WORKAROUND_1463225 44
#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
+#define ARM64_HAS_TLBI_RANGE 47
-#define ARM64_NCAPS 47
+#define ARM64_NCAPS 48
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6e919fafb43d..a6abbf2b067d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -539,6 +539,7 @@
ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
/* id_aa64isar0 */
+#define ID_AA64ISAR0_TLB_SHIFT 56
#define ID_AA64ISAR0_TS_SHIFT 52
#define ID_AA64ISAR0_FHM_SHIFT 48
#define ID_AA64ISAR0_DP_SHIFT 44
@@ -552,6 +553,9 @@
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
+#define ID_AA64ISAR0_TLB_NI 0x0
+#define ID_AA64ISAR0_TLB_RANGE 0x2
+
/* id_aa64isar1 */
#define ID_AA64ISAR1_SB_SHIFT 36
#define ID_AA64ISAR1_FRINTTS_SHIFT 32
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc3949064725..f49bed7ecb68 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -59,6 +59,33 @@
__ta; \
})
+/*
+ * This macro creates a properly formatted VA operand for the TLBI RANGE.
+ * The value bit assignments are:
+ *
+ * +----------+------+-------+-------+-------+----------------------+
+ * | ASID | TG | SCALE | NUM | TTL | BADDR |
+ * +-----------------+-------+-------+-------+----------------------+
+ * |63 48|47 46|45 44|43 39|38 37|36 0|
+ *
+ * The address range is determined by below formula:
+ * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
+ *
+ */
+#define __TLBI_VADDR_RANGE(addr, asid, tg, scale, num, ttl) \
+ ({ \
+ unsigned long __ta = (addr) >> PAGE_SHIFT; \
+ __ta |= (unsigned long)(ttl) << 37; \
+ __ta |= (unsigned long)(num) << 39; \
+ __ta |= (unsigned long)(scale) << 44; \
+ __ta |= (unsigned long)(tg) << 46; \
+ __ta |= (unsigned long)(asid) << 48; \
+ __ta; \
+ })
+
+#define TLB_RANGE_MASK_SHIFT 5
+#define TLB_RANGE_MASK GENMASK_ULL(TLB_RANGE_MASK_SHIFT, 0)
+
/*
* TLB Invalidation
* ================
@@ -177,9 +204,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end,
- unsigned long stride, bool last_level)
+static inline void __flush_tlb_range_old(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level)
{
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
@@ -211,6 +238,72 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ish);
}
+static inline void __flush_tlb_range_new(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level)
+{
+ int num = 0;
+ int scale = 0;
+ int ttl = 0;
+ int tg = (PAGE_SHIFT - 12) / 2 + 1;
+ unsigned long asid = ASID(vma->vm_mm);
+ unsigned long addr = 0;
+ unsigned long offset = (end - start) >> PAGE_SHIFT;
+
+ if (offset > (1UL << 21)) {
+ flush_tlb_mm(vma->vm_mm);
+ return;
+ }
+
+ dsb(ishst);
+
+ /*
+ * The minimum size of TLB RANGE is 2 PAGE;
+ * Use normal TLB instruction to handle odd PAGEs
+ */
+ if (offset % 2 == 1) {
+ addr = __TLBI_VADDR(start, asid);
+ if (last_level) {
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+ } else {
+ __tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
+ }
+ start += 1 << PAGE_SHIFT;
+ offset -= 1;
+ }
+
+ while (offset > 0) {
+ num = (offset & TLB_RANGE_MASK) - 1;
+ if (num >= 0) {
+ addr = __TLBI_VADDR_RANGE(start, asid, tg,
+ scale, num, ttl);
+ if (last_level) {
+ __tlbi(rvale1is, addr);
+ __tlbi_user(rvale1is, addr);
+ } else {
+ __tlbi(rvae1is, addr);
+ __tlbi_user(rvae1is, addr);
+ }
+ start += (num + 1) << (5 * scale + 1) << PAGE_SHIFT;
+ }
+ scale++;
+ offset >>= TLB_RANGE_MASK_SHIFT;
+ }
+ dsb(ish);
+}
+
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level)
+{
+ if (cpus_have_const_cap(ARM64_HAS_TLBI_RANGE))
+ __flush_tlb_range_new(vma, start, end, stride, last_level);
+ else
+ __flush_tlb_range_old(vma, start, end, stride, last_level);
+}
+
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 80f459ad0190..bdefd8a34729 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1566,6 +1566,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.min_field_value = 1,
},
#endif
+ {
+ .desc = "TLB range maintenance instruction",
+ .capability = ARM64_HAS_TLBI_RANGE,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_TLB_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = ID_AA64ISAR0_TLB_RANGE,
+ },
{},
};
--
2.19.1
next reply other threads:[~2019-11-11 13:24 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-11 13:23 Zhenyu Ye [this message]
2019-11-11 13:27 ` [RFC PATCH v2] arm64: cpufeatures: add support for tlbi range instructions Will Deacon
2019-11-11 13:47 ` Zhenyu Ye
2019-11-11 14:04 ` Marc Zyngier
2019-11-11 14:23 ` Zhenyu Ye
2019-11-19 1:13 ` Hanjun Guo
2019-11-19 1:13 ` Hanjun Guo
2019-11-19 10:03 ` Marc Zyngier
2019-11-19 10:03 ` Marc Zyngier
2019-11-20 1:29 ` Hanjun Guo
2019-11-20 1:29 ` Hanjun Guo
2020-04-10 1:43 ` Hanjun Guo
2020-04-10 11:54 ` Will Deacon
2020-04-10 12:02 ` Will Deacon
2020-04-11 6:39 ` Hanjun Guo
2020-04-14 7:08 ` Will Deacon
2020-04-14 7:27 ` Hanjun Guo
2020-04-14 11:45 ` Zhenyu Ye
2020-04-11 6:24 ` Hanjun Guo
2019-11-20 8:47 ` Will Deacon
2019-11-20 8:47 ` Will Deacon
2019-11-11 16:32 ` Olof Johansson
2019-11-12 2:19 ` Zhenyu Ye
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5DC960EB.9050503@huawei.com \
--to=yezhenyu2@huawei.com \
--cc=arm@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=maz@kernel.org \
--cc=suzuki.poulose@arm.com \
--cc=tangnianyao@huawei.com \
--cc=will@kernel.org \
--cc=xiexiangyou@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.