All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chih-En Lin <shiyn.lin@gmail.com>
To: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Vishal Moola <vishal.moola@gmail.com>,
	linux-mm@kvack.org
Subject: Re: [PATCH 1/4] mm: Use frozen pages for page tables
Date: Wed, 19 Nov 2025 10:46:54 -0500	[thread overview]
Message-ID: <20251119154654.GA606021@gmail.com> (raw)
In-Reply-To: <20251113140448.1814860-2-willy@infradead.org>

On Thu, Nov 13, 2025 at 02:04:43PM +0000, Matthew Wilcox (Oracle) wrote:
> Page tables do not use the reference count.  That means we can avoid
> two atomic operations (one on alloc, one on free) by allocating frozen
> pages here.  This does not interfere with compaction as page tables are
> non-movable allocations.
> 
> pagetable_alloc() and pagetable_free() need to move out of line to make
> this work as alloc_frozen_page() and free_frozen_page() are not exported
> outside the mm for now.  We'll want them out of line anyway soon.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/mm.h   | 53 +++++---------------------------------------
>  mm/memory.c          | 34 ++++++++++++++++++++++++++++
>  mm/pgtable-generic.c |  3 ++-
>  3 files changed, 42 insertions(+), 48 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 5087deecdd9c..e168ee23091e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2995,58 +2995,17 @@ static inline void ptdesc_clear_kernel(struct ptdesc *ptdesc)
>   */
>  static inline bool ptdesc_test_kernel(const struct ptdesc *ptdesc)
>  {
> +#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
>  	return test_bit(PT_kernel, &ptdesc->pt_flags.f);
> +#else
> +	return false;
> +#endif
>  }
>  
> -/**
> - * pagetable_alloc - Allocate pagetables
> - * @gfp:    GFP flags
> - * @order:  desired pagetable order
> - *
> - * pagetable_alloc allocates memory for page tables as well as a page table
> - * descriptor to describe that memory.
> - *
> - * Return: The ptdesc describing the allocated page tables.
> - */
> -static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
> -{
> -	struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order);
> -
> -	return page_ptdesc(page);
> -}
> +struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order);
>  #define pagetable_alloc(...)	alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
> -
> -static inline void __pagetable_free(struct ptdesc *pt)
> -{
> -	struct page *page = ptdesc_page(pt);
> -
> -	__free_pages(page, compound_order(page));
> -}
> -
> -#ifdef CONFIG_ASYNC_KERNEL_PGTABLE_FREE
> +void pagetable_free(struct ptdesc *pt);
>  void pagetable_free_kernel(struct ptdesc *pt);
> -#else
> -static inline void pagetable_free_kernel(struct ptdesc *pt)
> -{
> -	__pagetable_free(pt);
> -}
> -#endif
> -/**
> - * pagetable_free - Free pagetables
> - * @pt:	The page table descriptor
> - *
> - * pagetable_free frees the memory of all page tables described by a page
> - * table descriptor and the memory for the descriptor itself.
> - */
> -static inline void pagetable_free(struct ptdesc *pt)
> -{
> -	if (ptdesc_test_kernel(pt)) {
> -		ptdesc_clear_kernel(pt);
> -		pagetable_free_kernel(pt);
> -	} else {
> -		__pagetable_free(pt);
> -	}
> -}
>  
>  #if defined(CONFIG_SPLIT_PTE_PTLOCKS)
>  #if ALLOC_SPLIT_PTLOCKS
> diff --git a/mm/memory.c b/mm/memory.c
> index 1c66ee83a7ab..781cd7f607f7 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -7338,6 +7338,40 @@ long copy_folio_from_user(struct folio *dst_folio,
>  }
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
>  
> +/**
> + * pagetable_alloc - Allocate pagetables
> + * @gfp:    GFP flags
> + * @order:  desired pagetable order
> + *
> + * pagetable_alloc allocates memory for page tables as well as a page table
> + * descriptor to describe that memory.
> + *
> + * Return: The ptdesc describing the allocated page tables.
> + */
> +struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
> +{
> +	struct page *page = alloc_frozen_pages_noprof(gfp | __GFP_COMP, order);
> +
> +	return page_ptdesc(page);
> +}
> +
> +/**
> + * pagetable_free - Free pagetables
> + * @pt:	The page table descriptor
> + *
> + * pagetable_free frees the memory of all page tables described by a page
> + * table descriptor and the memory for the descriptor itself.
> + */
> +void pagetable_free(struct ptdesc *pt)
> +{
> +	struct page *page = ptdesc_page(pt);
> +
> +	if (ptdesc_test_kernel(pt))
> +		pagetable_free_kernel(pt);

Should we use test_and_clear_bit() here to prevent the double free?
Or it is unnecessary because the caller will guarantee there is no other
thread that will free the same pagetables.

> +	else
> +		free_frozen_pages(page, compound_order(page));
> +}
> +
>  #if defined(CONFIG_SPLIT_PTE_PTLOCKS) && ALLOC_SPLIT_PTLOCKS
>  
>  static struct kmem_cache *page_ptl_cachep;
> diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
> index d3aec7a9926a..597049e21ac1 100644
> --- a/mm/pgtable-generic.c
> +++ b/mm/pgtable-generic.c
> @@ -434,11 +434,12 @@ static void kernel_pgtable_work_func(struct work_struct *work)
>  
>  	iommu_sva_invalidate_kva_range(PAGE_OFFSET, TLB_FLUSH_ALL);
>  	list_for_each_entry_safe(pt, next, &page_list, pt_list)
> -		__pagetable_free(pt);
> +		pagetable_free(pt);
>  }
>  
>  void pagetable_free_kernel(struct ptdesc *pt)
>  {
> +	ptdesc_clear_kernel(pt);
>  	spin_lock(&kernel_pgtable_work.lock);
>  	list_add(&pt->pt_list, &kernel_pgtable_work.list);
>  	spin_unlock(&kernel_pgtable_work.lock);
> -- 
> 2.47.2
> 
>

Thanks,
Chih-En Lin


  parent reply	other threads:[~2025-11-19 15:47 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13 14:04 [PATCH 0/4] Convert pgtable to use frozen pages Matthew Wilcox (Oracle)
2025-11-13 14:04 ` [PATCH 1/4] mm: Use frozen pages for page tables Matthew Wilcox (Oracle)
2025-11-13 18:24   ` Vishal Moola (Oracle)
2025-11-13 19:14     ` Vishal Moola (Oracle)
2025-11-14 13:45       ` Matthew Wilcox
2025-11-14 14:31       ` Will Deacon
2025-11-17 14:38   ` kernel test robot
2025-11-18  0:44     ` Vishal Moola (Oracle)
2025-11-19 15:46   ` Chih-En Lin [this message]
2025-11-20 13:55     ` David Hildenbrand (Red Hat)
2025-11-13 14:04 ` [PATCH 2/4] mm: Account pagetable memory when allocated Matthew Wilcox (Oracle)
2025-11-13 19:39   ` Vishal Moola (Oracle)
2025-11-13 14:04 ` [PATCH 3/4] mm: Mark " Matthew Wilcox (Oracle)
2025-11-18 17:00   ` David Hildenbrand (Red Hat)
2025-11-13 14:04 ` [PATCH 4/4] pgtable: Remove uses of page->lru Matthew Wilcox (Oracle)
2025-11-20 13:56   ` David Hildenbrand (Red Hat)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251119154654.GA606021@gmail.com \
    --to=shiyn.lin@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=vishal.moola@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.