Re: [PATCH v4 5/5] hugetlb: add hugetlb demote page support

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Oscar Salvador <osalvador@suse.de>
To: Mike Kravetz <mike.kravetz@oracle.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	David Hildenbrand <david@redhat.com>,
	Michal Hocko <mhocko@suse.com>, Zi Yan <ziy@nvidia.com>,
	Muchun Song <songmuchun@bytedance.com>,
	Naoya Horiguchi <naoya.horiguchi@linux.dev>,
	David Rientjes <rientjes@google.com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>,
	Nghia Le <nghialm78@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH v4 5/5] hugetlb: add hugetlb demote page support
Date: Mon, 18 Oct 2021 10:06:34 +0200	[thread overview]
Message-ID: <20211018080634.GC11960@linux> (raw)
In-Reply-To: <6ca29b8e-527c-d6ec-900e-e6a43e4f8b73@oracle.com>

On Fri, Oct 08, 2021 at 01:57:48PM -0700, Mike Kravetz wrote:
> From 25e4dac59f4d203f3a7e86d3591d70c1e956d11c Mon Sep 17 00:00:00 2001
> From: Mike Kravetz <mike.kravetz@oracle.com>
> Date: Fri, 8 Oct 2021 13:21:21 -0700
> Subject: [PATCH v4 5/5] hugetlb: add hugetlb demote page support
> 
> Demote page functionality will split a huge page into a number of huge
> pages of a smaller size.  For example, on x86 a 1GB huge page can be
> demoted into 512 2M huge pages.  Demotion is done 'in place' by simply
> splitting the huge page.
> 
> Added '*_for_demote' wrappers for remove_hugetlb_page,
> destroy_compound_hugetlb_page and prep_compound_gigantic_page for use
> by demote code.
> 
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

Reviewed-by: Oscar Salvador <osalvador@suse.de>

> ---
>  mm/hugetlb.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 92 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 794e0c4c1b3c..e1883510309a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
>  		((node = hstate_next_node_to_free(hs, mask)) || 1);	\
>  		nr_nodes--)
>  
> -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
> +/* used to demote non-gigantic_huge pages as well */
>  static void __destroy_compound_gigantic_page(struct page *page,
>  					unsigned int order, bool demote)
>  {
> @@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
>  	__ClearPageHead(page);
>  }
>  
> +static void destroy_compound_hugetlb_page_for_demote(struct page *page,
> +					unsigned int order)
> +{
> +	__destroy_compound_gigantic_page(page, order, true);
> +}
> +
> +#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
>  static void destroy_compound_gigantic_page(struct page *page,
>  					unsigned int order)
>  {
> @@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
>  	__remove_hugetlb_page(h, page, adjust_surplus, false);
>  }
>  
> +static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
> +							bool adjust_surplus)
> +{
> +	__remove_hugetlb_page(h, page, adjust_surplus, true);
> +}
> +
>  static void add_hugetlb_page(struct hstate *h, struct page *page,
>  			     bool adjust_surplus)
>  {
> @@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
>  	return __prep_compound_gigantic_page(page, order, false);
>  }
>  
> +static bool prep_compound_gigantic_page_for_demote(struct page *page,
> +							unsigned int order)
> +{
> +	return __prep_compound_gigantic_page(page, order, true);
> +}
> +
>  /*
>   * PageHuge() only returns true for hugetlbfs pages, but not for normal or
>   * transparent huge pages.  See the PageTransHuge() documentation for more
> @@ -3304,9 +3323,72 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
>  	return 0;
>  }
>  
> +static int demote_free_huge_page(struct hstate *h, struct page *page)
> +{
> +	int i, nid = page_to_nid(page);
> +	struct hstate *target_hstate;
> +	int rc = 0;
> +
> +	target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
> +
> +	remove_hugetlb_page_for_demote(h, page, false);
> +	spin_unlock_irq(&hugetlb_lock);
> +
> +	rc = alloc_huge_page_vmemmap(h, page);
> +	if (rc) {
> +		/* Allocation of vmemmmap failed, we can not demote page */
> +		spin_lock_irq(&hugetlb_lock);
> +		set_page_refcounted(page);
> +		add_hugetlb_page(h, page, false);
> +		return rc;
> +	}
> +
> +	/*
> +	 * Use destroy_compound_hugetlb_page_for_demote for all huge page
> +	 * sizes as it will not ref count pages.
> +	 */
> +	destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
> +
> +	/*
> +	 * Taking target hstate mutex synchronizes with set_max_huge_pages.
> +	 * Without the mutex, pages added to target hstate could be marked
> +	 * as surplus.
> +	 *
> +	 * Note that we already hold h->resize_lock.  To prevent deadlock,
> +	 * use the convention of always taking larger size hstate mutex first.
> +	 */
> +	mutex_lock(&target_hstate->resize_lock);
> +	for (i = 0; i < pages_per_huge_page(h);
> +				i += pages_per_huge_page(target_hstate)) {
> +		if (hstate_is_gigantic(target_hstate))
> +			prep_compound_gigantic_page_for_demote(page + i,
> +							target_hstate->order);
> +		else
> +			prep_compound_page(page + i, target_hstate->order);
> +		set_page_private(page + i, 0);
> +		set_page_refcounted(page + i);
> +		prep_new_huge_page(target_hstate, page + i, nid);
> +		put_page(page + i);
> +	}
> +	mutex_unlock(&target_hstate->resize_lock);
> +
> +	spin_lock_irq(&hugetlb_lock);
> +
> +	/*
> +	 * Not absolutely necessary, but for consistency update max_huge_pages
> +	 * based on pool changes for the demoted page.
> +	 */
> +	h->max_huge_pages--;
> +	target_hstate->max_huge_pages += pages_per_huge_page(h);
> +
> +	return rc;
> +}
> +
>  static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
>  	__must_hold(&hugetlb_lock)
>  {
> +	int nr_nodes, node;
> +	struct page *page;
>  	int rc = 0;
>  
>  	lockdep_assert_held(&hugetlb_lock);
> @@ -3317,9 +3399,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
>  		return -EINVAL;		/* internal error */
>  	}
>  
> -	/*
> -	 * TODO - demote fucntionality will be added in subsequent patch
> -	 */
> +	for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
> +		if (!list_empty(&h->hugepage_freelists[node])) {
> +			page = list_entry(h->hugepage_freelists[node].next,
> +					struct page, lru);
> +			rc = demote_free_huge_page(h, page);
> +			break;
> +		}
> +	}
> +
>  	return rc;
>  }
>  
> @@ -3554,10 +3642,6 @@ static ssize_t demote_store(struct kobject *kobj,
>  		/*
>  		 * Check for available pages to demote each time thorough the
>  		 * loop as demote_pool_huge_page will drop hugetlb_lock.
> -		 *
> -		 * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
> -		 * but will when full demote functionality is added in a later
> -		 * patch.
>  		 */
>  		if (nid != NUMA_NO_NODE)
>  			nr_available = h->free_huge_pages_node[nid];
> -- 
> 2.31.1
> 
> 

-- 
Oscar Salvador
SUSE Labs

     prev parent reply	other threads:[~2021-10-18  8:06 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-07 18:19 [PATCH v4 0/5] hugetlb: add demote/split page functionality Mike Kravetz
2021-10-07 18:19 ` [PATCH v4 1/5] hugetlb: add demote hugetlb page sysfs interfaces Mike Kravetz
2021-10-08  7:51   ` Oscar Salvador
2021-10-08 20:24     ` Mike Kravetz
2021-10-18  7:35       ` Oscar Salvador
2021-10-22 18:58         ` Mike Kravetz
2021-10-25  7:24           ` Oscar Salvador
2021-10-07 18:19 ` [PATCH v4 2/5] mm/cma: add cma_pages_valid to determine if pages are in CMA Mike Kravetz
2021-10-08  7:53   ` Oscar Salvador
2021-10-08  7:55     ` Oscar Salvador
2021-10-07 18:19 ` [PATCH v4 3/5] hugetlb: be sure to free demoted CMA pages to CMA Mike Kravetz
2021-10-07 18:19 ` [PATCH v4 4/5] hugetlb: add demote bool to gigantic page routines Mike Kravetz
2021-10-18  7:58   ` Oscar Salvador
2021-10-22 19:05     ` Mike Kravetz
2021-10-25  7:23       ` Oscar Salvador
2021-10-07 18:19 ` [PATCH v4 5/5] hugetlb: add hugetlb demote page support Mike Kravetz
2021-10-08 20:57   ` Mike Kravetz
2021-10-18  8:06     ` Oscar Salvador [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211018080634.GC11960@linux \
    --to=osalvador@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=mike.kravetz@oracle.com \
    --cc=naoya.horiguchi@linux.dev \
    --cc=nghialm78@gmail.com \
    --cc=rientjes@google.com \
    --cc=songmuchun@bytedance.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.