public inbox for linux-riscv@lists.infradead.org
 help / color / mirror / Atom feed
From: Xu Lu <luxu.kernel@bytedance.com>
To: paul.walmsley@sifive.com, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, ardb@kernel.org, anup@brainfault.org,
	atishp@atishpatra.org
Cc: xieyongji@bytedance.com, lihangjing@bytedance.com,
	punit.agrawal@bytedance.com, linux-kernel@vger.kernel.org,
	linux-riscv@lists.infradead.org,
	Xu Lu <luxu.kernel@bytedance.com>
Subject: [RFC PATCH v2 03/21] riscv: mm: Reimplement page table entry structures
Date: Thu,  5 Dec 2024 18:37:11 +0800	[thread overview]
Message-ID: <20241205103729.14798-4-luxu.kernel@bytedance.com> (raw)
In-Reply-To: <20241205103729.14798-1-luxu.kernel@bytedance.com>

After decoupling hardware base page and software base page, each
software page can consists of several hardware base pages now. The pte
struct should be turned to an array of mapping entires to map the
software page. For example, in 64K Page Size kernel, each software page
consists of 16 contiguous hardware pages. Thus the pte struct should
contains 16 mapping entries to map 16 hardware pages.

This commit reimplements pte structure.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 arch/riscv/include/asm/page.h       | 43 +++++++++++++++++++++++----
 arch/riscv/include/asm/pgtable-64.h | 41 +++++++++++++++++++++----
 arch/riscv/include/asm/pgtable.h    | 23 +++++++++++++--
 arch/riscv/mm/pgtable.c             | 46 +++++++++++++++++++++++++++++
 4 files changed, 141 insertions(+), 12 deletions(-)

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 7c581a3e057b..9bc908d94c7a 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -63,6 +63,36 @@ void clear_page(void *page);
  * Use struct definitions to apply C type checking
  */
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+
+#define HW_PAGES_PER_PAGE	(1 << (PAGE_SHIFT - HW_PAGE_SHIFT))
+
+struct page_table_entry {
+	union {
+		unsigned long pgds[HW_PAGES_PER_PAGE];
+		unsigned long p4ds[HW_PAGES_PER_PAGE];
+		unsigned long puds[HW_PAGES_PER_PAGE];
+		unsigned long pmds[HW_PAGES_PER_PAGE];
+		unsigned long ptes[HW_PAGES_PER_PAGE];
+	};
+};
+
+/* Page Global Directory entry */
+typedef struct page_table_entry pgd_t;
+
+/* Page Table entry */
+typedef struct page_table_entry pte_t;
+
+#define pte_val(x)	((x).ptes[0])
+#define pgd_val(x)	((x).pgds[0])
+
+pte_t __pte(unsigned long pteval);
+pgd_t __pgd(unsigned long pgdval);
+#define __pte		__pte
+#define __pgd		__pgd
+
+#else /* CONFIG_RISCV_USE_SW_PAGE */
+
 /* Page Global Directory entry */
 typedef struct {
 	unsigned long pgd;
@@ -73,18 +103,21 @@ typedef struct {
 	unsigned long pte;
 } pte_t;
 
+#define pte_val(x)	((x).pte)
+#define pgd_val(x)	((x).pgd)
+
+#define __pte(x)	((pte_t) { (x) })
+#define __pgd(x)	((pgd_t) { (x) })
+
+#endif /* CONFIG_RISCV_USE_SW_PAGE */
+
 typedef struct {
 	unsigned long pgprot;
 } pgprot_t;
 
 typedef struct page *pgtable_t;
 
-#define pte_val(x)	((x).pte)
-#define pgd_val(x)	((x).pgd)
 #define pgprot_val(x)	((x).pgprot)
-
-#define __pte(x)	((pte_t) { (x) })
-#define __pgd(x)	((pgd_t) { (x) })
 #define __pgprot(x)	((pgprot_t) { (x) })
 
 #ifdef CONFIG_64BIT
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 963aa4be9eed..e736873d7768 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -41,6 +41,35 @@ extern bool pgtable_l5_enabled;
 #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE - 1))
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+
+/* Page 4th Directory entry */
+typedef struct page_table_entry p4d_t;
+
+#define p4d_val(x)	((x).p4ds[0])
+p4d_t __p4d(unsigned long p4dval);
+#define __p4d		__p4d
+#define PTRS_PER_P4D	(PAGE_SIZE / sizeof(p4d_t))
+
+/* Page Upper Directory entry */
+typedef struct page_table_entry pud_t;
+
+#define pud_val(x)      ((x).puds[0])
+pud_t __pud(unsigned long pudval);
+#define __pud		__pud
+#define PTRS_PER_PUD    (PAGE_SIZE / sizeof(pud_t))
+
+/* Page Middle Directory entry */
+typedef struct page_table_entry pmd_t;
+
+#define pmd_val(x)      ((x).pmds[0])
+pmd_t __pmd(unsigned long pmdval);
+#define __pmd		__pmd
+
+#define PTRS_PER_PMD    (PAGE_SIZE / sizeof(pmd_t))
+
+#else /* CONFIG_RISCV_USE_SW_PAGE */
+
 /* Page 4th Directory entry */
 typedef struct {
 	unsigned long p4d;
@@ -69,6 +98,8 @@ typedef struct {
 
 #define PTRS_PER_PMD    (PAGE_SIZE / sizeof(pmd_t))
 
+#endif /* CONFIG_RISCV_USE_SW_PAGE */
+
 /*
  * rv64 PTE format:
  * | 63 | 62 61 | 60 54 | 53  10 | 9             8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
@@ -98,7 +129,7 @@ enum napot_cont_order {
 #define for_each_napot_order_rev(order)						\
 	for (order = NAPOT_ORDER_MAX - 1;					\
 	     order >= NAPOT_CONT_ORDER_BASE; order--)
-#define napot_cont_order(val)	(__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1))
+#define napot_cont_order(val)	(__builtin_ctzl((pte_val(val) >> _PAGE_PFN_SHIFT) << 1))
 
 #define napot_cont_shift(order)	((order) + PAGE_SHIFT)
 #define napot_cont_size(order)	BIT(napot_cont_shift(order))
@@ -279,7 +310,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
 	if (pgtable_l4_enabled)
 		WRITE_ONCE(*p4dp, p4d);
 	else
-		set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+		set_pud((pud_t *)p4dp, __pud(p4d_val(p4d)));
 }
 
 static inline int p4d_none(p4d_t p4d)
@@ -327,7 +358,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
 	if (pgtable_l4_enabled)
 		return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d)));
 
-	return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+	return (pud_t *)pud_pgtable(__pud(p4d_val(p4d)));
 }
 #define p4d_page_vaddr(p4d)	((unsigned long)p4d_pgtable(p4d))
 
@@ -346,7 +377,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 	if (pgtable_l5_enabled)
 		WRITE_ONCE(*pgdp, pgd);
 	else
-		set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+		set_p4d((p4d_t *)pgdp, __p4d(pgd_val(pgd)));
 }
 
 static inline int pgd_none(pgd_t pgd)
@@ -384,7 +415,7 @@ static inline p4d_t *pgd_pgtable(pgd_t pgd)
 	if (pgtable_l5_enabled)
 		return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd)));
 
-	return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+	return (p4d_t *)p4d_pgtable(__p4d(pgd_val(pgd)));
 }
 #define pgd_page_vaddr(pgd)	((unsigned long)pgd_pgtable(pgd))
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9d3947ec3523..f9aed43809b3 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -574,6 +574,25 @@ static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval)
 
 #define PFN_PTE_SHIFT		_PAGE_PFN_SHIFT
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+	unsigned int i;
+
+	if (pte_present(pte) && !pte_napot(pte))
+		for (i = 0; i < HW_PAGES_PER_PAGE; i++)
+			pte.ptes[i] += nr << _PAGE_PFN_SHIFT;
+
+	return pte;
+}
+#else
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+	return __pte(pte_val(pte) + (nr << _PAGE_PFN_SHIFT));
+}
+#endif
+#define pte_advance_pfn		pte_advance_pfn
+
 static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
 		pte_t *ptep, pte_t pteval, unsigned int nr)
 {
@@ -584,7 +603,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
 		if (--nr == 0)
 			break;
 		ptep++;
-		pte_val(pteval) += 1 << _PAGE_PFN_SHIFT;
+		pteval = pte_advance_pfn(pteval, 1);
 	}
 }
 #define set_ptes set_ptes
@@ -882,7 +901,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 	  ((offset) << __SWP_OFFSET_SHIFT) })
 
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+#define __swp_entry_to_pte(x)	(__pte((x).val))
 
 static inline int pte_swp_exclusive(pte_t pte)
 {
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 4ae67324f992..0c6b2fc6be58 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -5,6 +5,52 @@
 #include <linux/kernel.h>
 #include <linux/pgtable.h>
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+
+pte_t __pte(unsigned long pteval)
+{
+	pte_t pte;
+
+	return pte;
+}
+EXPORT_SYMBOL(__pte);
+
+pgd_t __pgd(unsigned long pgdval)
+{
+	pgd_t pgd;
+
+	return pgd;
+}
+EXPORT_SYMBOL(__pgd);
+
+#ifdef CONFIG_64BIT
+p4d_t __p4d(unsigned long p4dval)
+{
+	p4d_t p4d;
+
+	return p4d;
+}
+EXPORT_SYMBOL(__p4d);
+
+pud_t __pud(unsigned long pudval)
+{
+	pud_t pud;
+
+	return pud;
+}
+EXPORT_SYMBOL(__pud);
+
+pmd_t __pmd(unsigned long pmdval)
+{
+	pmd_t pmd;
+
+	return pmd;
+}
+EXPORT_SYMBOL(__pmd);
+#endif /* CONFIG_64BIT */
+
+#endif /* CONFIG_RISCV_USE_SW_PAGE */
+
 int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
-- 
2.20.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

  parent reply	other threads:[~2024-12-05 10:44 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-05 10:37 [RFC PATCH v2 00/21] riscv: Introduce 64K base page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 01/21] riscv: mm: Distinguish hardware base page and software " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 02/21] riscv: mm: Configure satp with hw page pfn Xu Lu
2024-12-05 10:37 ` Xu Lu [this message]
2024-12-05 10:37 ` [RFC PATCH v2 04/21] riscv: mm: Reimplement page table entry constructor function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 05/21] riscv: mm: Reimplement conversion functions between page table entry Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 06/21] riscv: mm: Avoid pte constructor during pte conversion Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 07/21] riscv: mm: Reimplement page table entry get function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 08/21] riscv: mm: Reimplement page table entry atomic " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 09/21] riscv: mm: Replace READ_ONCE with atomic pte " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 10/21] riscv: mm: Reimplement PTE A/D bit check function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 11/21] riscv: mm: Reimplement mk_huge_pte function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 12/21] riscv: mm: Reimplement tlb flush function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 13/21] riscv: mm: Adjust PGDIR/P4D/PUD/PMD_SHIFT Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 14/21] riscv: mm: Only apply svnapot region bigger than software page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 15/21] riscv: mm: Adjust FIX_BTMAPS_SLOTS for variable PAGE_SIZE Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 16/21] riscv: mm: Adjust FIX_FDT_SIZE for variable PMD_SIZE Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 17/21] riscv: mm: Apply Svnapot for base page mapping if possible Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 18/21] riscv: Kconfig: Introduce 64K page size Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 19/21] riscv: Kconfig: Adjust mmap rnd bits for 64K Page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 20/21] riscv: mm: Adjust address space layout and init page table " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 21/21] riscv: mm: Update EXEC_PAGESIZE " Xu Lu
2024-12-06  2:00 ` [RFC PATCH v2 00/21] riscv: Introduce 64K base page Zi Yan
2024-12-06  2:41   ` [External] " Xu Lu
2024-12-06 10:13   ` David Hildenbrand
2024-12-06 13:42     ` [External] " Xu Lu
2024-12-06 18:48       ` Pedro Falcato
2024-12-07  8:03         ` Xu Lu
2024-12-07 22:02           ` Yu Zhao
2024-12-09  3:36             ` Xu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241205103729.14798-4-luxu.kernel@bytedance.com \
    --to=luxu.kernel@bytedance.com \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=ardb@kernel.org \
    --cc=atishp@atishpatra.org \
    --cc=lihangjing@bytedance.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=xieyongji@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox