public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Xu Lu <luxu.kernel@bytedance.com>
To: paul.walmsley@sifive.com, palmer@dabbelt.com,
	aou@eecs.berkeley.edu, ardb@kernel.org, anup@brainfault.org,
	atishp@atishpatra.org
Cc: xieyongji@bytedance.com, lihangjing@bytedance.com,
	punit.agrawal@bytedance.com, linux-kernel@vger.kernel.org,
	linux-riscv@lists.infradead.org,
	Xu Lu <luxu.kernel@bytedance.com>
Subject: [RFC PATCH v2 08/21] riscv: mm: Reimplement page table entry atomic get function
Date: Thu,  5 Dec 2024 18:37:16 +0800	[thread overview]
Message-ID: <20241205103729.14798-9-luxu.kernel@bytedance.com> (raw)
In-Reply-To: <20241205103729.14798-1-luxu.kernel@bytedance.com>

This commit implements lockless functions to atomically fetch pte's
value. For each pte structure, we atomically fetch the first mapping
entry, and then fetch the following entries and compare them with the
first mappin entry plus certain step path in a loop. If we find any
difference in their pfns or prots, then the pte structure has been
modified and need to be reloaded.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
---
 arch/riscv/include/asm/pgtable.h | 156 +++++++++++++++++++++++++++++++
 include/linux/pgtable.h          |  21 +++++
 2 files changed, 177 insertions(+)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ba4a083b7210..fe42afb4441e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -220,6 +220,18 @@ static inline unsigned long satp_pfn(unsigned long satp)
 	return hwpfn_to_pfn(hwpfn);
 }
 
+static inline unsigned long __pte_pgprot(unsigned long pteval)
+{
+	unsigned long prot_mask = GENMASK(_PAGE_HWPFN_SHIFT - 1, 0);
+
+	return pteval & prot_mask;
+}
+
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	return __pgprot(__pte_pgprot(pte_val(pte)));
+}
+
 static inline int __pgd_leaf(unsigned long pgdval)
 {
 	return __pgd_present(pgdval) && (pgdval & _PAGE_LEAF);
@@ -734,6 +746,150 @@ static inline pgd_t pgdp_get(pgd_t *pgdp)
 }
 #define pgdp_get	pgdp_get
 
+#ifdef CONFIG_RISCV_USE_SW_PAGE
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+	unsigned long pteval;
+	pte_t pte;
+	int i;
+
+retry:
+	pteval = READ_ONCE(ptep->ptes[0]);
+	pte = *ptep;
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++) {
+		if (__page_val_to_pfn(pteval) !=
+		    __page_val_to_pfn(pte.ptes[i]))
+			goto retry;
+		if ((__pte_pgprot(pteval) | _PAGE_DIRTY | _PAGE_ACCESSED) !=
+		    (__pte_pgprot(pte.ptes[i]) | _PAGE_DIRTY | _PAGE_ACCESSED))
+			goto retry;
+
+		if (__pte_present(pteval) && !__pte_napot(pteval))
+			pteval += 1 << _PAGE_HWPFN_SHIFT;
+	}
+
+	return pte;
+}
+#define ptep_get_lockless	ptep_get_lockless
+
+static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
+{
+	unsigned long pmdval;
+	pmd_t pmd;
+	int i;
+
+retry:
+	pmdval = READ_ONCE(pmdp->pmds[0]);
+	pmd = *pmdp;
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++) {
+		if (__page_val_to_pfn(pmdval) !=
+		    __page_val_to_pfn(pmd.pmds[i]))
+			goto retry;
+		if ((__pte_pgprot(pmdval) | _PAGE_DIRTY | _PAGE_ACCESSED) !=
+		    (__pte_pgprot(pmd.pmds[i]) | _PAGE_DIRTY | _PAGE_ACCESSED))
+			goto retry;
+
+		if (__pmd_leaf(pmdval))
+			pmdval += (1 << (PMD_SHIFT - PAGE_SHIFT)) <<
+					_PAGE_HWPFN_SHIFT;
+		else if (__pmd_present(pmdval))
+			pmdval += 1 << _PAGE_HWPFN_SHIFT;
+	}
+
+	return pmd;
+}
+#define pmdp_get_lockless	pmdp_get_lockless
+
+static inline void pmdp_get_lockless_sync(void)
+{
+}
+
+static inline pud_t pudp_get_lockless(pud_t *pudp)
+{
+	unsigned long pudval;
+	pud_t pud;
+	int i;
+
+retry:
+	pudval = READ_ONCE(pudp->puds[0]);
+	pud = *pudp;
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++) {
+		if (__page_val_to_pfn(pudval) !=
+		    __page_val_to_pfn(pud.puds[i]))
+			goto retry;
+		if ((__pte_pgprot(pudval) | _PAGE_DIRTY | _PAGE_ACCESSED) !=
+		    (__pte_pgprot(pud.puds[i]) | _PAGE_DIRTY | _PAGE_ACCESSED))
+			goto retry;
+
+		if (__pud_leaf(pudval))
+			pudval += (1 << (PUD_SHIFT - PAGE_SHIFT)) <<
+					_PAGE_HWPFN_SHIFT;
+		else if (__pud_present(pudval))
+			pudval += 1 << _PAGE_HWPFN_SHIFT;
+	}
+
+	return pud;
+}
+#define pudp_get_lockless	pudp_get_lockless
+
+static inline p4d_t p4dp_get_lockless(p4d_t *p4dp)
+{
+	unsigned long p4dval;
+	p4d_t p4d;
+	int i;
+
+retry:
+	p4dval = READ_ONCE(p4dp->p4ds[0]);
+	p4d = *p4dp;
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++) {
+		if (__page_val_to_pfn(p4dval) !=
+		    __page_val_to_pfn(p4d.p4ds[i]))
+			goto retry;
+		if ((__pte_pgprot(p4dval) | _PAGE_DIRTY | _PAGE_ACCESSED) !=
+		    (__pte_pgprot(p4d.p4ds[i]) | _PAGE_DIRTY | _PAGE_ACCESSED))
+			goto retry;
+
+		if (__p4d_leaf(p4dval))
+			p4dval += (1 << (P4D_SHIFT - PAGE_SHIFT)) <<
+					_PAGE_HWPFN_SHIFT;
+		else if (__p4d_present(p4dval))
+			p4dval += 1 << _PAGE_HWPFN_SHIFT;
+	}
+
+	return p4d;
+}
+#define p4dp_get_lockless	p4dp_get_lockless
+
+static inline pgd_t pgdp_get_lockless(pgd_t *pgdp)
+{
+	unsigned long pgdval;
+	pgd_t pgd;
+	int i;
+
+retry:
+	pgdval = READ_ONCE(pgdp->pgds[0]);
+	pgd = *pgdp;
+	for (i = 0; i < HW_PAGES_PER_PAGE; i++) {
+		if (__page_val_to_pfn(pgdval) !=
+		    __page_val_to_pfn(pgd.pgds[i]))
+			goto retry;
+		if ((__pte_pgprot(pgdval) | _PAGE_DIRTY | _PAGE_ACCESSED) !=
+		    (__pte_pgprot(pgd.pgds[i]) | _PAGE_DIRTY | _PAGE_ACCESSED))
+			goto retry;
+
+		if (__pgd_leaf(pgdval))
+			pgdval += (1 << (PGDIR_SHIFT - PAGE_SHIFT)) <<
+					_PAGE_HWPFN_SHIFT;
+		else if (__pgd_present(pgdval))
+			pgdval += 1 << _PAGE_HWPFN_SHIFT;
+	}
+
+	return pgd;
+}
+#define pgdp_get_lockless	pgdp_get_lockless
+
+#endif /* CONFIG_RISCV_USE_SW_PAGE */
+
 void flush_icache_pte(struct mm_struct *mm, pte_t pte);
 
 static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e8b2ac6bd2ae..b629c48b980b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -598,6 +598,27 @@ static inline void pmdp_get_lockless_sync(void)
 }
 #endif
 
+#ifndef pudp_get_lockless
+static inline pud_t pudp_get_lockless(pud_t *pudp)
+{
+	return pudp_get(pudp);
+}
+#endif
+
+#ifndef p4dp_get_lockless
+static inline p4d_t p4dp_get_lockless(p4d_t *p4dp)
+{
+	return p4dp_get(p4dp);
+}
+#endif
+
+#ifndef pgdp_get_lockless
+static inline pgd_t pgdp_get_lockless(pgd_t *pgdp)
+{
+	return pgdp_get(pgdp);
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-- 
2.20.1


  parent reply	other threads:[~2024-12-05 10:38 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-05 10:37 [RFC PATCH v2 00/21] riscv: Introduce 64K base page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 01/21] riscv: mm: Distinguish hardware base page and software " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 02/21] riscv: mm: Configure satp with hw page pfn Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 03/21] riscv: mm: Reimplement page table entry structures Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 04/21] riscv: mm: Reimplement page table entry constructor function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 05/21] riscv: mm: Reimplement conversion functions between page table entry Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 06/21] riscv: mm: Avoid pte constructor during pte conversion Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 07/21] riscv: mm: Reimplement page table entry get function Xu Lu
2024-12-05 10:37 ` Xu Lu [this message]
2024-12-05 10:37 ` [RFC PATCH v2 09/21] riscv: mm: Replace READ_ONCE with atomic pte " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 10/21] riscv: mm: Reimplement PTE A/D bit check function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 11/21] riscv: mm: Reimplement mk_huge_pte function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 12/21] riscv: mm: Reimplement tlb flush function Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 13/21] riscv: mm: Adjust PGDIR/P4D/PUD/PMD_SHIFT Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 14/21] riscv: mm: Only apply svnapot region bigger than software page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 15/21] riscv: mm: Adjust FIX_BTMAPS_SLOTS for variable PAGE_SIZE Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 16/21] riscv: mm: Adjust FIX_FDT_SIZE for variable PMD_SIZE Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 17/21] riscv: mm: Apply Svnapot for base page mapping if possible Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 18/21] riscv: Kconfig: Introduce 64K page size Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 19/21] riscv: Kconfig: Adjust mmap rnd bits for 64K Page Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 20/21] riscv: mm: Adjust address space layout and init page table " Xu Lu
2024-12-05 10:37 ` [RFC PATCH v2 21/21] riscv: mm: Update EXEC_PAGESIZE " Xu Lu
2024-12-06  2:00 ` [RFC PATCH v2 00/21] riscv: Introduce 64K base page Zi Yan
2024-12-06  2:41   ` [External] " Xu Lu
2024-12-06 10:13   ` David Hildenbrand
2024-12-06 13:42     ` [External] " Xu Lu
2024-12-06 18:48       ` Pedro Falcato
2024-12-07  8:03         ` Xu Lu
2024-12-07 22:02           ` Yu Zhao
2024-12-09  3:36             ` Xu Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241205103729.14798-9-luxu.kernel@bytedance.com \
    --to=luxu.kernel@bytedance.com \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=ardb@kernel.org \
    --cc=atishp@atishpatra.org \
    --cc=lihangjing@bytedance.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=xieyongji@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox