public inbox for linux-riscv@lists.infradead.org
 help / color / mirror / Atom feed
From: Xu Lu <luxu.kernel@bytedance.com>
To: paul.walmsley@sifive.com, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, ardb@kernel.org, anup@brainfault.org,
	atishp@atishpatra.org
Cc: dengliang.1214@bytedance.com, xieyongji@bytedance.com,
	lihangjing@bytedance.com, songmuchun@bytedance.com,
	punit.agrawal@bytedance.com, linux-kernel@vger.kernel.org,
	linux-riscv@lists.infradead.org,
	Xu Lu <luxu.kernel@bytedance.com>
Subject: [RFC PATCH V1 06/11] riscv: Distinguish pmd huge pte and napot huge pte
Date: Thu, 23 Nov 2023 14:57:03 +0800	[thread overview]
Message-ID: <20231123065708.91345-7-luxu.kernel@bytedance.com> (raw)
In-Reply-To: <20231123065708.91345-1-luxu.kernel@bytedance.com>

There exist two kinds of huge pte on RISC-V: pmd and napot pte. For pmd
kind huge pte, the huge page it represents is much larger than base
page. Thus pmd kind huge pte can be represented via a single pmd entry
and needs no special handling. For napot kind huge pte, it is actually
several normal pte entries encoded in Svnapot format. Thus napot kind
huge pte should be represented via pte_t struct and handled via pte
operations.

This commit distinguishes these two kinds of huge pte and handles them
with different operations.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 arch/riscv/include/asm/hugetlb.h | 71 +++++++++++++++++++++++++++++++-
 arch/riscv/mm/hugetlbpage.c      | 40 ++++++++++++------
 2 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index 4c5b0e929890..1cdd5a26e6d4 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
@@ -12,6 +13,7 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 #define arch_clear_hugepage_flags arch_clear_hugepage_flags
 
 #ifdef CONFIG_RISCV_ISA_SVNAPOT
+
 #define __HAVE_ARCH_HUGE_PTE_CLEAR
 void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, unsigned long sz);
@@ -41,10 +43,77 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_PTEP_GET
 pte_t huge_ptep_get(pte_t *ptep);
 
+#define __HAVE_ARCH_HUGE_PTEP_GET_LOCKLESS
+static inline pte_t huge_ptep_get_lockless(pte_t *ptep)
+{
+	unsigned long pteval = READ_ONCE(ptep->ptes[0]);
+
+	return __pte(pteval);
+}
+
 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
 #define arch_make_huge_pte arch_make_huge_pte
 
-#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
+#else /* CONFIG_RISCV_ISA_SVNAPOT */
+
+#define __HAVE_ARCH_HUGE_PTEP_GET
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	pmd_t *pmdp = (pmd_t *)ptep;
+
+	return pmd_pte(pmdp_get(pdmp));
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_LOCKLESS
+static inline pte_t huge_ptep_get_lockless(pte_t *ptep)
+{
+	return huge_ptep_get(ptep);
+}
+
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		pte_t *ptep, pte_t pte)
+{
+	set_pmd_at(mm, addr, (pmd_t *)ptep, pte_pmd(pte));
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep,
+		pte_t pte, int dirty)
+{
+	return pmdp_set_access_flags(vma, addr, (pmd_t *)ptep, pte_pmd(pte), dirty);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	return pmd_pte(pmdp_get_and_clear(mm, addr, (pmd_t *)ptep));
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+		unsigned long addr, pte_t *ptep)
+{
+	pmdp_set_wrprotect(mm, addr, (pmd_t *)ptep);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *ptep)
+{
+	return pmd_pte(pmdp_clear_flush(vma, addr, (pmd_t *)ptep));
+}
+
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, unsigned long sz)
+{
+	pmd_clear((pmd_t *)ptep);
+}
+
+#endif /* CONFIG_RISCV_ISA_SVNAPOT */
 
 #include <asm-generic/hugetlb.h>
 
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 67fd71c36853..4a2ad8657502 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -7,8 +7,13 @@ pte_t huge_ptep_get(pte_t *ptep)
 {
 	unsigned long pte_num;
 	int i;
-	pte_t orig_pte = ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *)ptep;
+	pte_t orig_pte = pmd_pte(pmdp_get(pmdp));
 
+	/*
+	 * Non napot pte indicates a middle page table entry and
+	 * should be treated as a pmd.
+	 */
 	if (!pte_present(orig_pte) || !pte_napot(orig_pte))
 		return orig_pte;
 
@@ -198,6 +203,8 @@ void set_huge_pte_at(struct mm_struct *mm,
 		hugepage_shift = PAGE_SHIFT;
 
 	pte_num = sz >> hugepage_shift;
+	if (pte_num == 1)
+		set_pmd_at(mm, addr, (pmd_t *)ptep, pte_pmd(pte));
 	for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift))
 		set_pte_at(mm, addr, ptep, pte);
 }
@@ -214,7 +221,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 	int i, pte_num;
 
 	if (!pte_napot(pte))
-		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+		return pmdp_set_access_flags(vma, addr, (pmd_t *)ptep,
+					     pte_pmd(pte), dirty);
 
 	order = napot_cont_order(pte);
 	pte_num = napot_pte_num(order);
@@ -237,11 +245,12 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr,
 			      pte_t *ptep)
 {
-	pte_t orig_pte = ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *)ptep;
+	pte_t orig_pte = pmd_pte(pmdp_get(pmdp));
 	int pte_num;
 
 	if (!pte_napot(orig_pte))
-		return ptep_get_and_clear(mm, addr, ptep);
+		return pmd_pte(pmdp_get_and_clear(mm, addr, pmdp));
 
 	pte_num = napot_pte_num(napot_cont_order(orig_pte));
 
@@ -252,13 +261,14 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
 			     unsigned long addr,
 			     pte_t *ptep)
 {
-	pte_t pte = ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *)ptep;
+	pte_t pte = pmd_pte(pmdp_get(pmdp));
 	unsigned long order;
 	pte_t orig_pte;
 	int i, pte_num;
 
 	if (!pte_napot(pte)) {
-		ptep_set_wrprotect(mm, addr, ptep);
+		pmdp_set_wrprotect(mm, addr, pmdp);
 		return;
 	}
 
@@ -277,11 +287,12 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 			    unsigned long addr,
 			    pte_t *ptep)
 {
-	pte_t pte = ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *)ptep;
+	pte_t pte = pmd_pte(pmdp_get(pmdp));
 	int pte_num;
 
 	if (!pte_napot(pte))
-		return ptep_clear_flush(vma, addr, ptep);
+		return pmd_pte(pmdp_clear_flush(vma, addr, pmdp));
 
 	pte_num = napot_pte_num(napot_cont_order(pte));
 
@@ -293,11 +304,12 @@ void huge_pte_clear(struct mm_struct *mm,
 		    pte_t *ptep,
 		    unsigned long sz)
 {
-	pte_t pte = ptep_get(ptep);
+	pmd_t *pmdp = (pmd_t *)ptep;
+	pte_t pte = pmd_pte(pmdp_get(pmdp));
 	int i, pte_num;
 
 	if (!pte_napot(pte)) {
-		pte_clear(mm, addr, ptep);
+		pmd_clear(pmdp);
 		return;
 	}
 
@@ -325,8 +337,10 @@ static __init int napot_hugetlbpages_init(void)
 	if (has_svnapot()) {
 		unsigned long order;
 
-		for_each_napot_order(order)
-			hugetlb_add_hstate(order);
+		for_each_napot_order(order) {
+			if (napot_cont_shift(order) > PAGE_SHIFT)
+				hugetlb_add_hstate(order);
+		}
 	}
 	return 0;
 }
@@ -357,7 +371,7 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
 		return true;
 	else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
 		return true;
-	else if (is_napot_size(size))
+	else if (is_napot_size(size) && size > PAGE_SIZE)
 		return true;
 	else
 		return false;
-- 
2.20.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2023-11-23  6:58 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-23  6:56 [RFC PATCH V1 00/11] riscv: Introduce 64K base page Xu Lu
2023-11-23  6:56 ` [RFC PATCH V1 01/11] mm: Fix misused APIs on huge pte Xu Lu
2023-12-31 16:39   ` Alexandre Ghiti
2024-01-04  7:59     ` [External] " Xu Lu
2023-11-23  6:56 ` [RFC PATCH V1 02/11] riscv: Introduce concept of hardware base page Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 03/11] riscv: Adapt pte struct to gap between hw page and sw page Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 04/11] riscv: Adapt pte operations " Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 05/11] riscv: Decouple pmd operations and pte operations Xu Lu
2023-11-23  6:57 ` Xu Lu [this message]
2023-11-23  6:57 ` [RFC PATCH V1 07/11] riscv: Adapt satp operations to gap between hw page and sw page Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 08/11] riscv: Apply Svnapot for base page mapping Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 09/11] riscv: Adjust fix_btmap slots number to match variable page size Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 10/11] riscv: kvm: Adapt kvm to gap between hw page and sw page Xu Lu
2023-11-23  6:57 ` [RFC PATCH V1 11/11] riscv: Introduce 64K page size Xu Lu
2023-11-23  9:29 ` [RFC PATCH V1 00/11] riscv: Introduce 64K base page Arnd Bergmann
2023-11-27  8:14   ` [External] " Xu Lu
2023-12-07  6:07 ` Xu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231123065708.91345-7-luxu.kernel@bytedance.com \
    --to=luxu.kernel@bytedance.com \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=ardb@kernel.org \
    --cc=atishp@atishpatra.org \
    --cc=dengliang.1214@bytedance.com \
    --cc=lihangjing@bytedance.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=songmuchun@bytedance.com \
    --cc=xieyongji@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox