public inbox for linux-s390@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
@ 2026-04-27  5:20 Alexander Gordeev
  2026-04-27  5:47 ` Anshuman Khandual
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Alexander Gordeev @ 2026-04-27  5:20 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes
  Cc: Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel

Convert pgtable direct entry dereferences to the corresponding
pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
reads when no lock is taken.

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 mm/page_vma_mapped.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index b38a1d00c971..a4520bb10d2a 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -41,7 +41,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
 	if (!pvmw->pte)
 		return false;
 
-	ptent = ptep_get(pvmw->pte);
+	ptent = ptep_get_lockless(pvmw->pte);
 
 	if (pte_none(ptent)) {
 		return false;
@@ -219,17 +219,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 restart:
 	do {
 		pgd = pgd_offset(mm, pvmw->address);
-		if (!pgd_present(*pgd)) {
+		if (!pgd_present(pgdp_get(pgd))) {
 			step_forward(pvmw, PGDIR_SIZE);
 			continue;
 		}
 		p4d = p4d_offset(pgd, pvmw->address);
-		if (!p4d_present(*p4d)) {
+		if (!p4d_present(p4dp_get(p4d))) {
 			step_forward(pvmw, P4D_SIZE);
 			continue;
 		}
 		pud = pud_offset(p4d, pvmw->address);
-		if (!pud_present(*pud)) {
+		if (!pud_present(pudp_get(pud))) {
 			step_forward(pvmw, PUD_SIZE);
 			continue;
 		}
@@ -244,7 +244,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
 		if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) {
 			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
-			pmde = *pvmw->pmd;
+			pmde = pmdp_get(pvmw->pmd);
 			if (!pmd_present(pmde)) {
 				softleaf_t entry;
 
@@ -317,7 +317,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 				goto restart;
 			}
 			pvmw->pte++;
-		} while (pte_none(ptep_get(pvmw->pte)));
+		} while (pte_none(ptep_get_lockless(pvmw->pte)));
 
 		if (!pvmw->ptl) {
 			spin_lock(ptl);
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  5:20 [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors Alexander Gordeev
@ 2026-04-27  5:47 ` Anshuman Khandual
  2026-04-27  8:40 ` Oscar Salvador
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: Anshuman Khandual @ 2026-04-27  5:47 UTC (permalink / raw)
  To: Alexander Gordeev, Andrew Morton, David Hildenbrand,
	Lorenzo Stoakes
  Cc: Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel



On 27/04/26 10:50 AM, Alexander Gordeev wrote:
> Convert pgtable direct entry dereferences to the corresponding
> pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
> reads when no lock is taken.
> 
> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> ---
>  mm/page_vma_mapped.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index b38a1d00c971..a4520bb10d2a 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -41,7 +41,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>  	if (!pvmw->pte)
>  		return false;
>  
> -	ptent = ptep_get(pvmw->pte);
> +	ptent = ptep_get_lockless(pvmw->pte);
>  
>  	if (pte_none(ptent)) {
>  		return false;
> @@ -219,17 +219,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>  restart:
>  	do {
>  		pgd = pgd_offset(mm, pvmw->address);
> -		if (!pgd_present(*pgd)) {
> +		if (!pgd_present(pgdp_get(pgd))) {
>  			step_forward(pvmw, PGDIR_SIZE);
>  			continue;
>  		}
>  		p4d = p4d_offset(pgd, pvmw->address);
> -		if (!p4d_present(*p4d)) {
> +		if (!p4d_present(p4dp_get(p4d))) {
>  			step_forward(pvmw, P4D_SIZE);
>  			continue;
>  		}
>  		pud = pud_offset(p4d, pvmw->address);
> -		if (!pud_present(*pud)) {
> +		if (!pud_present(pudp_get(pud))) {
>  			step_forward(pvmw, PUD_SIZE);
>  			continue;
>  		}
> @@ -244,7 +244,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>  
>  		if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) {
>  			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
> -			pmde = *pvmw->pmd;
> +			pmde = pmdp_get(pvmw->pmd);
>  			if (!pmd_present(pmde)) {
>  				softleaf_t entry;
>  
> @@ -317,7 +317,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>  				goto restart;
>  			}
>  			pvmw->pte++;
> -		} while (pte_none(ptep_get(pvmw->pte)));
> +		} while (pte_none(ptep_get_lockless(pvmw->pte)));
>  
>  		if (!pvmw->ptl) {
>  			spin_lock(ptl);

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  5:20 [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors Alexander Gordeev
  2026-04-27  5:47 ` Anshuman Khandual
@ 2026-04-27  8:40 ` Oscar Salvador
  2026-04-27  8:49   ` Anshuman Khandual
  2026-04-27  9:01   ` Alexander Gordeev
  2026-04-27  9:02 ` David Hildenbrand (Arm)
  2026-04-30 12:11 ` kernel test robot
  3 siblings, 2 replies; 9+ messages in thread
From: Oscar Salvador @ 2026-04-27  8:40 UTC (permalink / raw)
  To: Alexander Gordeev
  Cc: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel

On Mon, Apr 27, 2026 at 07:20:00AM +0200, Alexander Gordeev wrote:
> Convert pgtable direct entry dereferences to the corresponding
> pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
> reads when no lock is taken.
> 
> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> ---
>  mm/page_vma_mapped.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index b38a1d00c971..a4520bb10d2a 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
...
> @@ -317,7 +317,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>  				goto restart;
>  			}
>  			pvmw->pte++;
> -		} while (pte_none(ptep_get(pvmw->pte)));
> +		} while (pte_none(ptep_get_lockless(pvmw->pte)));

map_pte() might take the lock if PVWN_SYNC mode, or am I missing something?

 

-- 
Oscar Salvador
SUSE Labs

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  8:40 ` Oscar Salvador
@ 2026-04-27  8:49   ` Anshuman Khandual
  2026-04-27  9:01   ` Alexander Gordeev
  1 sibling, 0 replies; 9+ messages in thread
From: Anshuman Khandual @ 2026-04-27  8:49 UTC (permalink / raw)
  To: Oscar Salvador, Alexander Gordeev
  Cc: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel



On 27/04/26 2:10 PM, Oscar Salvador wrote:
> On Mon, Apr 27, 2026 at 07:20:00AM +0200, Alexander Gordeev wrote:
>> Convert pgtable direct entry dereferences to the corresponding
>> pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
>> reads when no lock is taken.
>>
>> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
>> ---
>>  mm/page_vma_mapped.c | 12 ++++++------
>>  1 file changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>> index b38a1d00c971..a4520bb10d2a 100644
>> --- a/mm/page_vma_mapped.c
>> +++ b/mm/page_vma_mapped.c
> ...
>> @@ -317,7 +317,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>>  				goto restart;
>>  			}
>>  			pvmw->pte++;
>> -		} while (pte_none(ptep_get(pvmw->pte)));
>> +		} while (pte_none(ptep_get_lockless(pvmw->pte)));
> 
> map_pte() might take the lock if PVWN_SYNC mode, or am I missing something?

Right - should leave ptep_get() unchanged.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  8:40 ` Oscar Salvador
  2026-04-27  8:49   ` Anshuman Khandual
@ 2026-04-27  9:01   ` Alexander Gordeev
  1 sibling, 0 replies; 9+ messages in thread
From: Alexander Gordeev @ 2026-04-27  9:01 UTC (permalink / raw)
  To: Oscar Salvador
  Cc: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel

On Mon, Apr 27, 2026 at 10:40:02AM +0200, Oscar Salvador wrote:
> On Mon, Apr 27, 2026 at 07:20:00AM +0200, Alexander Gordeev wrote:
> > Convert pgtable direct entry dereferences to the corresponding
> > pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
> > reads when no lock is taken.
> > 
> > Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> > ---
> >  mm/page_vma_mapped.c | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index b38a1d00c971..a4520bb10d2a 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> ...
> > @@ -317,7 +317,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
> >  				goto restart;
> >  			}
> >  			pvmw->pte++;
> > -		} while (pte_none(ptep_get(pvmw->pte)));
> > +		} while (pte_none(ptep_get_lockless(pvmw->pte)));
> 
> map_pte() might take the lock if PVWN_SYNC mode, or am I missing something?

Then ptep_get_lockless() would not be a problem. But if it did not
take the lock, then it is a problem.

> -- 
> Oscar Salvador
> SUSE Labs

Thanks!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  5:20 [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors Alexander Gordeev
  2026-04-27  5:47 ` Anshuman Khandual
  2026-04-27  8:40 ` Oscar Salvador
@ 2026-04-27  9:02 ` David Hildenbrand (Arm)
  2026-04-27  9:24   ` David Hildenbrand (Arm)
  2026-04-29 13:47   ` Alexander Gordeev
  2026-04-30 12:11 ` kernel test robot
  3 siblings, 2 replies; 9+ messages in thread
From: David Hildenbrand (Arm) @ 2026-04-27  9:02 UTC (permalink / raw)
  To: Alexander Gordeev, Andrew Morton, Lorenzo Stoakes
  Cc: Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel

On 4/27/26 07:20, Alexander Gordeev wrote:
> Convert pgtable direct entry dereferences to the corresponding
> pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
> reads when no lock is taken.
> 
> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> ---
>  mm/page_vma_mapped.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index b38a1d00c971..a4520bb10d2a 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -41,7 +41,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>  	if (!pvmw->pte)
>  		return false;
>  
> -	ptent = ptep_get(pvmw->pte);
> +	ptent = ptep_get_lockless(pvmw->pte);
>  
>  	if (pte_none(ptent)) {
>  		return false;
> @@ -219,17 +219,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>  restart:
>  	do {
>  		pgd = pgd_offset(mm, pvmw->address);
> -		if (!pgd_present(*pgd)) {
> +		if (!pgd_present(pgdp_get(pgd))) {
>  			step_forward(pvmw, PGDIR_SIZE);
>  			continue;
>  		}
>  		p4d = p4d_offset(pgd, pvmw->address);
> -		if (!p4d_present(*p4d)) {
> +		if (!p4d_present(p4dp_get(p4d))) {
>  			step_forward(pvmw, P4D_SIZE);
>  			continue;
>  		}
>  		pud = pud_offset(p4d, pvmw->address);
> -		if (!pud_present(*pud)) {
> +		if (!pud_present(pudp_get(pud))) {
>  			step_forward(pvmw, PUD_SIZE);
>  			continue;

Wasn't there a problem with folded page tables, where we would no longer be able
to optimize out the folded page table accesses?

I thought we discussed ways to resolve that. Let me dig ...

-- 
Cheers,

David

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  9:02 ` David Hildenbrand (Arm)
@ 2026-04-27  9:24   ` David Hildenbrand (Arm)
  2026-04-29 13:47   ` Alexander Gordeev
  1 sibling, 0 replies; 9+ messages in thread
From: David Hildenbrand (Arm) @ 2026-04-27  9:24 UTC (permalink / raw)
  To: Alexander Gordeev, Andrew Morton, Lorenzo Stoakes
  Cc: Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-mm, linux-kernel, Ryan Roberts, Christophe Leroy (CS GROUP)

On 4/27/26 11:02, David Hildenbrand (Arm) wrote:
> On 4/27/26 07:20, Alexander Gordeev wrote:
>> Convert pgtable direct entry dereferences to the corresponding
>> pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
>> reads when no lock is taken.
>>
>> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
>> ---
>>  mm/page_vma_mapped.c | 12 ++++++------
>>  1 file changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>> index b38a1d00c971..a4520bb10d2a 100644
>> --- a/mm/page_vma_mapped.c
>> +++ b/mm/page_vma_mapped.c
>> @@ -41,7 +41,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>  	if (!pvmw->pte)
>>  		return false;
>>  
>> -	ptent = ptep_get(pvmw->pte);
>> +	ptent = ptep_get_lockless(pvmw->pte);
>>  
>>  	if (pte_none(ptent)) {
>>  		return false;
>> @@ -219,17 +219,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
>>  restart:
>>  	do {
>>  		pgd = pgd_offset(mm, pvmw->address);
>> -		if (!pgd_present(*pgd)) {
>> +		if (!pgd_present(pgdp_get(pgd))) {
>>  			step_forward(pvmw, PGDIR_SIZE);
>>  			continue;
>>  		}
>>  		p4d = p4d_offset(pgd, pvmw->address);
>> -		if (!p4d_present(*p4d)) {
>> +		if (!p4d_present(p4dp_get(p4d))) {
>>  			step_forward(pvmw, P4D_SIZE);
>>  			continue;
>>  		}
>>  		pud = pud_offset(p4d, pvmw->address);
>> -		if (!pud_present(*pud)) {
>> +		if (!pud_present(pudp_get(pud))) {
>>  			step_forward(pvmw, PUD_SIZE);
>>  			continue;
> 
> Wasn't there a problem with folded page tables, where we would no longer be able
> to optimize out the folded page table accesses?
> 
> I thought we discussed ways to resolve that. Let me dig ...

Here is what I have after the discussion following
https://lore.kernel.org/all/0019d675-ce3d-4a5c-89ed-f126c45145c9@kernel.org/.

Completely untested of course:

From b4715d49feb588032c6bb44bd97cf06739b5c99f Mon Sep 17 00:00:00 2001
From: "David Hildenbrand (Arm)" <david@kernel.org>
Date: Mon, 27 Apr 2026 11:16:21 +0200
Subject: [PATCH] mm: optimize pmdp_get() and friends for folded pagetable
 levels

Using pmdp_get() and friends in common code on a kernel config with
folded page tables is suboptimal: pdmp_get() and friends defaults to a
READ_ONCE(), forcing the compiler to actually read that value even though
it will not actually be used afterwards.

This was recently reported by Christophe Leroy [1].

Once we realize that the output of pdmp_get() on these kernel configs is
entirely ignored, as pmd_present()==1 and pmd_leaf()==0 are just
hard-coded, we can just make it return some dummy value.

pmd_offset() is expected to be called with the pudp afterwards, simply
performing a typecast of the pudp pointer to a pmdp pointer.

Let's introduce a pmd_offset_lockless() that does exactly the same:
perform a typecast.

[1] https://lore.kernel.org/all/0019d675-ce3d-4a5c-89ed-f126c45145c9@kernel.org/

Signed-off-by: David Hildenbrand (Arm) <david@kernel.org>
---
 include/asm-generic/pgtable-nop4d.h |  8 ++++++++
 include/asm-generic/pgtable-nopmd.h | 19 +++++++++++++++++++
 include/asm-generic/pgtable-nopud.h |  8 ++++++++
 3 files changed, 35 insertions(+)

diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index 03b7dae47dd4..cfe87036d61c 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -32,6 +32,14 @@ static inline void pgd_clear(pgd_t *pgd)	{ }
  */
 #define set_pgd(pgdptr, pgdval)	set_p4d((p4d_t *)(pgdptr), (p4d_t) { pgdval })
 
+static inline pgd_t pgdp_get(pgd_t *p4dp)
+{
+	pgd_t dummy = { 0 };
+
+	return dummy;
+}
+#define pgdp_get pgdp_get
+
 static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
 	return (p4d_t *)pgd;
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 8ffd64e7a24c..e87a42407563 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -43,12 +43,31 @@ static inline void pud_clear(pud_t *pud)	{ }
  */
 #define set_pud(pudptr, pudval)			set_pmd((pmd_t *)(pudptr), (pmd_t) { pudval })
 
+static inline pud_t pudp_get(pud_t *pudp)
+{
+	pud_t dummy = { 0 };
+
+	/*
+	 * Given that pud_present()==1 and pud_leaf==0, page table walking code
+	 * treats this like a page table and calls pmd_offset() /
+	 * pmd_offset_lockless() with pudp, ignoring the returned value.
+	 */
+	return dummy;
+}
+#define pudp_get pudp_get
+
 static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
 {
 	return (pmd_t *)pud;
 }
 #define pmd_offset pmd_offset
 
+static inline pmd_t * pmd_offset_lockless(pud_t *pud, puf_t pud, unsigned long address)
+{
+	return (pmd_t *)pud;
+}
+#define pmd_offset_lockless pmd_offset_lockless
+
 #define pmd_val(x)				(pud_val((x).pud))
 #define __pmd(x)				((pmd_t) { __pud(x) } )
 
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index eb70c6d7ceff..2cacd9571b2f 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -39,6 +39,14 @@ static inline void p4d_clear(p4d_t *p4d)	{ }
  */
 #define set_p4d(p4dptr, p4dval)	set_pud((pud_t *)(p4dptr), (pud_t) { p4dval })
 
+static inline p4d_t p4dp_get(p4d_t *p4dp)
+{
+	p4d_t dummy = { 0 };
+
+	return dummy;
+}
+#define p4dp_get p4dp_get
+
 static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 {
 	return (pud_t *)p4d;
-- 
2.43.0


-- 
Cheers,

David

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  9:02 ` David Hildenbrand (Arm)
  2026-04-27  9:24   ` David Hildenbrand (Arm)
@ 2026-04-29 13:47   ` Alexander Gordeev
  1 sibling, 0 replies; 9+ messages in thread
From: Alexander Gordeev @ 2026-04-29 13:47 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: Andrew Morton, Lorenzo Stoakes, Gerald Schaefer, Heiko Carstens,
	Vasily Gorbik, linux-s390, linux-mm, linux-kernel

On Mon, Apr 27, 2026 at 11:02:50AM +0200, David Hildenbrand (Arm) wrote:
> On 4/27/26 07:20, Alexander Gordeev wrote:
> > Convert pgtable direct entry dereferences to the corresponding
> > pXdp_get() accessors. Use ptep_get_lockless() variant for PTE
> > reads when no lock is taken.
> > 
> > Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
> > ---
> >  mm/page_vma_mapped.c | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index b38a1d00c971..a4520bb10d2a 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> > @@ -41,7 +41,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >  	if (!pvmw->pte)
> >  		return false;
> >  
> > -	ptent = ptep_get(pvmw->pte);
> > +	ptent = ptep_get_lockless(pvmw->pte);

(*)

> >  	if (pte_none(ptent)) {
> >  		return false;
> > @@ -219,17 +219,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
> >  restart:
> >  	do {
> >  		pgd = pgd_offset(mm, pvmw->address);
> > -		if (!pgd_present(*pgd)) {
> > +		if (!pgd_present(pgdp_get(pgd))) {
> >  			step_forward(pvmw, PGDIR_SIZE);
> >  			continue;
> >  		}
> >  		p4d = p4d_offset(pgd, pvmw->address);
> > -		if (!p4d_present(*p4d)) {
> > +		if (!p4d_present(p4dp_get(p4d))) {
> >  			step_forward(pvmw, P4D_SIZE);
> >  			continue;
> >  		}
> >  		pud = pud_offset(p4d, pvmw->address);
> > -		if (!pud_present(*pud)) {
> > +		if (!pud_present(pudp_get(pud))) {
> >  			step_forward(pvmw, PUD_SIZE);
> >  			continue;
> 
> Wasn't there a problem with folded page tables, where we would no longer be able
> to optimize out the folded page table accesses?

Ok, that is a different topic. I actually tried to resolve the lockless
issue(s) in (*) while the direct dereferences just seemed to be relevant
to go along (similarly to the GUP patch).

But I would rather resend ptep_get() => ptep_get_lockless() fixes for
this patch.

Would it work?

> I thought we discussed ways to resolve that. Let me dig ...
> 
> 
> -- 
> Cheers,
> 
> David

Thanks!

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
  2026-04-27  5:20 [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors Alexander Gordeev
                   ` (2 preceding siblings ...)
  2026-04-27  9:02 ` David Hildenbrand (Arm)
@ 2026-04-30 12:11 ` kernel test robot
  3 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2026-04-30 12:11 UTC (permalink / raw)
  To: Alexander Gordeev, Andrew Morton, David Hildenbrand,
	Lorenzo Stoakes
  Cc: llvm, oe-kbuild-all, Linux Memory Management List,
	Gerald Schaefer, Heiko Carstens, Vasily Gorbik, linux-s390,
	linux-kernel

Hi Alexander,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Alexander-Gordeev/mm-page_vma_mapped_walk-add-missing-pgtable-entry-accessors/20260429-030409
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20260427052000.196402-1-agordeev%40linux.ibm.com
patch subject: [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors
config: arm-defconfig (https://download.01.org/0day-ci/archive/20260430/202604302041.gNPhRRxL-lkp@intel.com/config)
compiler: clang version 23.0.0git (https://github.com/llvm/llvm-project 5bac06718f502014fade905512f1d26d578a18f3)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260430/202604302041.gNPhRRxL-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202604302041.gNPhRRxL-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: use of undeclared identifier 'pgdp'; did you mean 'pgd'?
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:36: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                            ^~~~
   mm/page_vma_mapped.c:186:9: note: 'pgd' declared here
     186 |         pgd_t *pgd;
         |                ^
>> mm/page_vma_mapped.c:222:20: error: passing 'const volatile pmdval_t *' (aka 'const volatile unsigned int *') to parameter of type 'pmdval_t *' (aka 'unsigned int *') discards qualifiers [-Werror,-Wincompatible-pointer-types-discards-qualifiers]
     222 |                 if (!pgd_present(pgdp_get(pgd))) {
         |                                  ^~~~~~~~~~~~~
   arch/arm/include/asm/pgtable.h:144:25: note: expanded from macro 'pgdp_get'
     144 | #define pgdp_get(pgpd)          READ_ONCE(*pgdp)
         |                                 ^~~~~~~~~~~~~~~~
   include/asm-generic/rwonce.h:47:28: note: expanded from macro 'READ_ONCE'
      47 | #define READ_ONCE(x)                                                    \
         |                                                                         ^
      48 | ({                                                                      \
         | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      49 |         compiletime_assert_rwonce_type(x);                              \
         |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      50 |         __READ_ONCE(x);                                                 \
         |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      51 | })
         | ~~
   include/asm-generic/pgtable-nop4d.h:23:37: note: passing argument to parameter 'pgd' here
      23 | static inline int pgd_present(pgd_t pgd)        { return 1; }
         |                                     ^
   8 errors generated.


vim +222 mm/page_vma_mapped.c

   155	
   156	/**
   157	 * page_vma_mapped_walk - check if @pvmw->pfn is mapped in @pvmw->vma at
   158	 * @pvmw->address
   159	 * @pvmw: pointer to struct page_vma_mapped_walk. page, vma, address and flags
   160	 * must be set. pmd, pte and ptl must be NULL.
   161	 *
   162	 * Returns true if the page is mapped in the vma. @pvmw->pmd and @pvmw->pte point
   163	 * to relevant page table entries. @pvmw->ptl is locked. @pvmw->address is
   164	 * adjusted if needed (for PTE-mapped THPs).
   165	 *
   166	 * If @pvmw->pmd is set but @pvmw->pte is not, you have found PMD-mapped page
   167	 * (usually THP). For PTE-mapped THP, you should run page_vma_mapped_walk() in
   168	 * a loop to find all PTEs that map the THP.
   169	 *
   170	 * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
   171	 * regardless of which page table level the page is mapped at. @pvmw->pmd is
   172	 * NULL.
   173	 *
   174	 * Returns false if there are no more page table entries for the page in
   175	 * the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
   176	 *
   177	 * If you need to stop the walk before page_vma_mapped_walk() returned false,
   178	 * use page_vma_mapped_walk_done(). It will do the housekeeping.
   179	 */
   180	bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
   181	{
   182		struct vm_area_struct *vma = pvmw->vma;
   183		struct mm_struct *mm = vma->vm_mm;
   184		unsigned long end;
   185		spinlock_t *ptl;
   186		pgd_t *pgd;
   187		p4d_t *p4d;
   188		pud_t *pud;
   189		pmd_t pmde;
   190	
   191		/* The only possible pmd mapping has been handled on last iteration */
   192		if (pvmw->pmd && !pvmw->pte)
   193			return not_found(pvmw);
   194	
   195		if (unlikely(is_vm_hugetlb_page(vma))) {
   196			struct hstate *hstate = hstate_vma(vma);
   197			unsigned long size = huge_page_size(hstate);
   198			/* The only possible mapping was handled on last iteration */
   199			if (pvmw->pte)
   200				return not_found(pvmw);
   201			/*
   202			 * All callers that get here will already hold the
   203			 * i_mmap_rwsem.  Therefore, no additional locks need to be
   204			 * taken before calling hugetlb_walk().
   205			 */
   206			pvmw->pte = hugetlb_walk(vma, pvmw->address, size);
   207			if (!pvmw->pte)
   208				return false;
   209	
   210			pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte);
   211			if (!check_pte(pvmw, pages_per_huge_page(hstate)))
   212				return not_found(pvmw);
   213			return true;
   214		}
   215	
   216		end = vma_address_end(pvmw);
   217		if (pvmw->pte)
   218			goto next_pte;
   219	restart:
   220		do {
   221			pgd = pgd_offset(mm, pvmw->address);
 > 222			if (!pgd_present(pgdp_get(pgd))) {
   223				step_forward(pvmw, PGDIR_SIZE);
   224				continue;
   225			}
   226			p4d = p4d_offset(pgd, pvmw->address);
   227			if (!p4d_present(p4dp_get(p4d))) {
   228				step_forward(pvmw, P4D_SIZE);
   229				continue;
   230			}
   231			pud = pud_offset(p4d, pvmw->address);
   232			if (!pud_present(pudp_get(pud))) {
   233				step_forward(pvmw, PUD_SIZE);
   234				continue;
   235			}
   236	
   237			pvmw->pmd = pmd_offset(pud, pvmw->address);
   238			/*
   239			 * Make sure the pmd value isn't cached in a register by the
   240			 * compiler and used as a stale value after we've observed a
   241			 * subsequent update.
   242			 */
   243			pmde = pmdp_get_lockless(pvmw->pmd);
   244	
   245			if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) {
   246				pvmw->ptl = pmd_lock(mm, pvmw->pmd);
   247				pmde = pmdp_get(pvmw->pmd);
   248				if (!pmd_present(pmde)) {
   249					softleaf_t entry;
   250	
   251					if (!thp_migration_supported() ||
   252					    !(pvmw->flags & PVMW_MIGRATION))
   253						return not_found(pvmw);
   254					entry = softleaf_from_pmd(pmde);
   255	
   256					if (!softleaf_is_migration(entry) ||
   257					    !check_pmd(softleaf_to_pfn(entry), pvmw))
   258						return not_found(pvmw);
   259					return true;
   260				}
   261				if (likely(pmd_trans_huge(pmde))) {
   262					if (pvmw->flags & PVMW_MIGRATION)
   263						return not_found(pvmw);
   264					if (!check_pmd(pmd_pfn(pmde), pvmw))
   265						return not_found(pvmw);
   266					return true;
   267				}
   268				/* THP pmd was split under us: handle on pte level */
   269				spin_unlock(pvmw->ptl);
   270				pvmw->ptl = NULL;
   271			} else if (!pmd_present(pmde)) {
   272				const softleaf_t entry = softleaf_from_pmd(pmde);
   273	
   274				if (softleaf_is_device_private(entry)) {
   275					pvmw->ptl = pmd_lock(mm, pvmw->pmd);
   276					return true;
   277				}
   278	
   279				if ((pvmw->flags & PVMW_SYNC) &&
   280				    thp_vma_suitable_order(vma, pvmw->address,
   281							   PMD_ORDER) &&
   282				    (pvmw->nr_pages >= HPAGE_PMD_NR))
   283					sync_with_folio_pmd_zap(mm, pvmw->pmd);
   284	
   285				step_forward(pvmw, PMD_SIZE);
   286				continue;
   287			}
   288			if (!map_pte(pvmw, &pmde, &ptl)) {
   289				if (!pvmw->pte)
   290					goto restart;
   291				goto next_pte;
   292			}
   293	this_pte:
   294			if (check_pte(pvmw, 1))
   295				return true;
   296	next_pte:
   297			do {
   298				pvmw->address += PAGE_SIZE;
   299				if (pvmw->address >= end)
   300					return not_found(pvmw);
   301				/* Did we cross page table boundary? */
   302				if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
   303					if (pvmw->ptl) {
   304						spin_unlock(pvmw->ptl);
   305						pvmw->ptl = NULL;
   306					}
   307					pte_unmap(pvmw->pte);
   308					pvmw->pte = NULL;
   309					pvmw->flags |= PVMW_PGTABLE_CROSSED;
   310					goto restart;
   311				}
   312				pvmw->pte++;
   313			} while (pte_none(ptep_get_lockless(pvmw->pte)));
   314	
   315			if (!pvmw->ptl) {
   316				spin_lock(ptl);
   317				if (unlikely(!pmd_same(pmde, pmdp_get_lockless(pvmw->pmd)))) {
   318					pte_unmap_unlock(pvmw->pte, ptl);
   319					pvmw->pte = NULL;
   320					goto restart;
   321				}
   322				pvmw->ptl = ptl;
   323			}
   324			goto this_pte;
   325		} while (pvmw->address < end);
   326	
   327		return false;
   328	}
   329	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-04-30 12:12 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27  5:20 [PATCH] mm/page_vma_mapped_walk: add missing pgtable entry accessors Alexander Gordeev
2026-04-27  5:47 ` Anshuman Khandual
2026-04-27  8:40 ` Oscar Salvador
2026-04-27  8:49   ` Anshuman Khandual
2026-04-27  9:01   ` Alexander Gordeev
2026-04-27  9:02 ` David Hildenbrand (Arm)
2026-04-27  9:24   ` David Hildenbrand (Arm)
2026-04-29 13:47   ` Alexander Gordeev
2026-04-30 12:11 ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox