All of lore.kernel.org
 help / color / mirror / Atom feed
From: Muchun Song <muchun.song@linux.dev>
To: Usama Arif <usama.arif@bytedance.com>
Cc: linux-kernel@vger.kernel.org, fam.zheng@bytedance.com,
	liangma@liangbit.com, simon.evans@bytedance.com,
	punit.agrawal@bytedance.com, linux-mm@kvack.org,
	mike.kravetz@oracle.com, rppt@kernel.org
Subject: Re: [v2 1/6] mm: hugetlb: Skip prep of tail pages when HVO is enabled
Date: Tue, 1 Aug 2023 10:04:59 +0800	[thread overview]
Message-ID: <b6aaa304-e632-9f8f-ae60-63ae209ad152@linux.dev> (raw)
In-Reply-To: <20230730151606.2871391-2-usama.arif@bytedance.com>



On 2023/7/30 23:16, Usama Arif wrote:
> When vmemmap is optimizable, it will free all the duplicated tail
> pages in hugetlb_vmemmap_optimize while preparing the new hugepage.
> Hence, there is no need to prepare them.
>
> For 1G x86 hugepages, it avoids preparing
> 262144 - 64 = 262080 struct pages per hugepage.
>
> The indirection of using __prep_compound_gigantic_folio is also removed,
> as it just creates extra functions to indicate demote which can be done
> with the argument.
>
> Signed-off-by: Usama Arif <usama.arif@bytedance.com>
> ---
>   mm/hugetlb.c         | 32 ++++++++++++++------------------
>   mm/hugetlb_vmemmap.c |  2 +-
>   mm/hugetlb_vmemmap.h | 15 +++++++++++----
>   3 files changed, 26 insertions(+), 23 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 64a3239b6407..541c07b6d60f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1942,14 +1942,23 @@ static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int ni
>   	spin_unlock_irq(&hugetlb_lock);
>   }
>   
> -static bool __prep_compound_gigantic_folio(struct folio *folio,
> -					unsigned int order, bool demote)
> +static bool prep_compound_gigantic_folio(struct folio *folio, struct hstate *h, bool demote)
>   {
>   	int i, j;
> +	int order = huge_page_order(h);
>   	int nr_pages = 1 << order;
>   	struct page *p;
>   
>   	__folio_clear_reserved(folio);
> +
> +	/*
> +	 * No need to prep pages that will be freed later by hugetlb_vmemmap_optimize.
> +	 * Hence, reduce nr_pages to the pages that will be kept.
> +	 */
> +	if (IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP) &&
> +			vmemmap_should_optimize(h, &folio->page))
> +		nr_pages = HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page);

We need to initialize the refcount to zero of tail pages (see the big
comment below in this function), given a situation that someone (maybe
GUP) could get a ref on the tail pages when the vmemmap is optimizing,
what prevent this from happening?

Thanks.

> +
>   	for (i = 0; i < nr_pages; i++) {
>   		p = folio_page(folio, i);
>   
> @@ -2019,18 +2028,6 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
>   	return false;
>   }
>   
> -static bool prep_compound_gigantic_folio(struct folio *folio,
> -							unsigned int order)
> -{
> -	return __prep_compound_gigantic_folio(folio, order, false);
> -}
> -
> -static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
> -							unsigned int order)
> -{
> -	return __prep_compound_gigantic_folio(folio, order, true);
> -}
> -
>   /*
>    * PageHuge() only returns true for hugetlbfs pages, but not for normal or
>    * transparent huge pages.  See the PageTransHuge() documentation for more
> @@ -2185,7 +2182,7 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
>   	if (!folio)
>   		return NULL;
>   	if (hstate_is_gigantic(h)) {
> -		if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
> +		if (!prep_compound_gigantic_folio(folio, h, false)) {
>   			/*
>   			 * Rare failure to convert pages to compound page.
>   			 * Free pages and try again - ONCE!
> @@ -3201,7 +3198,7 @@ static void __init gather_bootmem_prealloc(void)
>   
>   		VM_BUG_ON(!hstate_is_gigantic(h));
>   		WARN_ON(folio_ref_count(folio) != 1);
> -		if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
> +		if (prep_compound_gigantic_folio(folio, h, false)) {
>   			WARN_ON(folio_test_reserved(folio));
>   			prep_new_hugetlb_folio(h, folio, folio_nid(folio));
>   			free_huge_page(page); /* add to the hugepage allocator */
> @@ -3624,8 +3621,7 @@ static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
>   		subpage = folio_page(folio, i);
>   		inner_folio = page_folio(subpage);
>   		if (hstate_is_gigantic(target_hstate))
> -			prep_compound_gigantic_folio_for_demote(inner_folio,
> -							target_hstate->order);
> +			prep_compound_gigantic_folio(inner_folio, target_hstate, true);
>   		else
>   			prep_compound_page(subpage, target_hstate->order);
>   		folio_change_private(inner_folio, NULL);
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index c2007ef5e9b0..b721e87de2b3 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -486,7 +486,7 @@ int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head)
>   }
>   
>   /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */
> -static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
> +bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
>   {
>   	if (!READ_ONCE(vmemmap_optimize_enabled))
>   		return false;
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 25bd0e002431..3e7978a9af73 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -10,16 +10,17 @@
>   #define _LINUX_HUGETLB_VMEMMAP_H
>   #include <linux/hugetlb.h>
>   
> -#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> -int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> -void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
> -
>   /*
>    * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
>    * Documentation/vm/vmemmap_dedup.rst.
>    */
>   #define HUGETLB_VMEMMAP_RESERVE_SIZE	PAGE_SIZE
>   
> +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> +int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head);
> +void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head);
> +bool vmemmap_should_optimize(const struct hstate *h, const struct page *head);
> +
>   static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
>   {
>   	return pages_per_huge_page(h) * sizeof(struct page);
> @@ -51,6 +52,12 @@ static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate
>   {
>   	return 0;
>   }
> +
> +static inline bool vmemmap_should_optimize(const struct hstate *h, const struct page *head)
> +{
> +	return false;
> +}
> +
>   #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
>   
>   static inline bool hugetlb_vmemmap_optimizable(const struct hstate *h)



  parent reply	other threads:[~2023-08-01  2:05 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-30 15:16 [v2 0/6] mm/memblock: Skip prep and initialization of struct pages freed later by HVO Usama Arif
2023-07-30 15:16 ` [v2 1/6] mm: hugetlb: Skip prep of tail pages when HVO is enabled Usama Arif
2023-07-31 23:18   ` Mike Kravetz
2023-08-02 10:05     ` [External] " Usama Arif
2023-08-01  2:04   ` Muchun Song [this message]
2023-08-02 10:06     ` Usama Arif
2023-07-30 15:16 ` [v2 2/6] mm: hugetlb_vmemmap: Use nid of the head page to reallocate it Usama Arif
2023-07-30 15:16 ` [v2 3/6] memblock: pass memblock_type to memblock_setclr_flag Usama Arif
2023-07-30 15:16 ` [v2 4/6] memblock: introduce MEMBLOCK_RSRV_NOINIT flag Usama Arif
2023-07-30 15:16 ` [v2 5/6] mm: move allocation of gigantic hstates to the start of mm_core_init Usama Arif
2023-07-30 16:49   ` kernel test robot
2023-08-01  3:07   ` Muchun Song
2023-07-30 15:16 ` [v2 6/6] mm: hugetlb: Skip initialization of struct pages freed later by HVO Usama Arif
2023-07-30 19:33   ` kernel test robot
2023-07-31  0:11   ` kernel test robot
2023-07-31  6:46   ` kernel test robot
2023-07-30 22:28 ` [v2 0/6] mm/memblock: Skip prep and " Usama Arif

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b6aaa304-e632-9f8f-ae60-63ae209ad152@linux.dev \
    --to=muchun.song@linux.dev \
    --cc=fam.zheng@bytedance.com \
    --cc=liangma@liangbit.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=punit.agrawal@bytedance.com \
    --cc=rppt@kernel.org \
    --cc=simon.evans@bytedance.com \
    --cc=usama.arif@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.