linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Alexander H Duyck <alexander.duyck@gmail.com>
To: Yunsheng Lin <linyunsheng@huawei.com>,
	davem@davemloft.net, kuba@kernel.org,  pabeni@redhat.com
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org
Subject: Re: [RFC v11 08/14] mm: page_frag: some minor refactoring before adding new API
Date: Sun, 21 Jul 2024 16:40:28 -0700	[thread overview]
Message-ID: <dbf876b000158aed8380d6ac3a3f6e8dd40ace7b.camel@gmail.com> (raw)
In-Reply-To: <20240719093338.55117-9-linyunsheng@huawei.com>

On Fri, 2024-07-19 at 17:33 +0800, Yunsheng Lin wrote:
> Refactor common codes from __page_frag_alloc_va_align()
> to __page_frag_cache_refill(), so that the new API can
> make use of them.
> 
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/linux/page_frag_cache.h |  2 +-
>  mm/page_frag_cache.c            | 93 +++++++++++++++++----------------
>  2 files changed, 49 insertions(+), 46 deletions(-)
> 
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index 12a16f8e8ad0..5aa45de7a9a5 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -50,7 +50,7 @@ static inline void *encoded_page_address(unsigned long encoded_va)
>  
>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
>  {
> -	nc->encoded_va = 0;
> +	memset(nc, 0, sizeof(*nc));
>  }
>  

I do not like requiring the entire structure to be reset as a part of
init. If encoded_va is 0 then we have reset the page and the flags.
There shouldn't be anything else we need to reset as remaining and bias
will be reset when we reallocate.

>  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index 7928e5d50711..d9c9cad17af7 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -19,6 +19,28 @@
>  #include <linux/page_frag_cache.h>
>  #include "internal.h"
>  
> +static struct page *__page_frag_cache_recharge(struct page_frag_cache *nc)
> +{
> +	unsigned long encoded_va = nc->encoded_va;
> +	struct page *page;
> +
> +	page = virt_to_page((void *)encoded_va);
> +	if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> +		return NULL;
> +
> +	if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
> +		VM_BUG_ON(compound_order(page) !=
> +			  encoded_page_order(encoded_va));
> +		free_unref_page(page, encoded_page_order(encoded_va));
> +		return NULL;
> +	}
> +
> +	/* OK, page count is 0, we can safely set it */
> +	set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> +
> +	return page;
> +}
> +
>  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  					     gfp_t gfp_mask)
>  {
> @@ -26,6 +48,14 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	struct page *page = NULL;
>  	gfp_t gfp = gfp_mask;
>  
> +	if (likely(nc->encoded_va)) {
> +		page = __page_frag_cache_recharge(nc);
> +		if (page) {
> +			order = encoded_page_order(nc->encoded_va);
> +			goto out;
> +		}
> +	}
> +

This code has no business here. This is refill, you just dropped
recharge in here which will make a complete mess of the ordering and be
confusing to say the least.

The expectation was that if we are calling this function it is going to
overwrite the virtual address to NULL on failure so we discard the old
page if there is one present. This changes that behaviour. What you
effectively did is made __page_frag_cache_refill into the recharge
function.

>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>  	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
>  		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> @@ -35,7 +65,7 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	if (unlikely(!page)) {
>  		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>  		if (unlikely(!page)) {
> -			nc->encoded_va = 0;
> +			memset(nc, 0, sizeof(*nc));
>  			return NULL;
>  		}
>  

The memset will take a few more instructions than the existing code
did. I would prefer to keep this as is if at all possible.

> @@ -45,6 +75,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	nc->encoded_va = encode_aligned_va(page_address(page), order,
>  					   page_is_pfmemalloc(page));
>  
> +	/* Even if we own the page, we do not use atomic_set().
> +	 * This would break get_page_unless_zero() users.
> +	 */
> +	page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> +
> +out:
> +	/* reset page count bias and remaining to start of new frag */
> +	nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> +	nc->remaining = PAGE_SIZE << order;
> +
>  	return page;
>  }
>  

Why bother returning a page at all? It doesn't seem like you don't use
it anymore. It looks like the use cases you have for it in patch 11/12
all appear to be broken from what I can tell as you are adding page as
a variable when we don't need to be passing internal details to the
callers of the function when just a simple error return code would do.

> @@ -55,7 +95,7 @@ void page_frag_cache_drain(struct page_frag_cache *nc)
>  
>  	__page_frag_cache_drain(virt_to_head_page((void *)nc->encoded_va),
>  				nc->pagecnt_bias);
> -	nc->encoded_va = 0;
> +	memset(nc, 0, sizeof(*nc));
>  }
>  EXPORT_SYMBOL(page_frag_cache_drain);
>  
> @@ -72,31 +112,9 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
>  				 unsigned int fragsz, gfp_t gfp_mask,
>  				 unsigned int align_mask)
>  {
> -	unsigned long encoded_va = nc->encoded_va;
> -	unsigned int size, remaining;
> -	struct page *page;
> -
> -	if (unlikely(!encoded_va)) {
> -refill:
> -		page = __page_frag_cache_refill(nc, gfp_mask);
> -		if (!page)
> -			return NULL;
> -
> -		encoded_va = nc->encoded_va;
> -		size = page_frag_cache_page_size(encoded_va);
> +	unsigned int size = page_frag_cache_page_size(nc->encoded_va);
> +	unsigned int remaining = nc->remaining & align_mask;
>  
> -		/* Even if we own the page, we do not use atomic_set().
> -		 * This would break get_page_unless_zero() users.
> -		 */
> -		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
> -
> -		/* reset page count bias and remaining to start of new frag */
> -		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> -		nc->remaining = size;
> -	}
> -
> -	size = page_frag_cache_page_size(encoded_va);
> -	remaining = nc->remaining & align_mask;
>  	if (unlikely(remaining < fragsz)) {

I am not a fan of adding a dependency on remaining being set *before*
encoded_va. The fact is it relies on the size to set it. In addition
this is creating a big blob of code for the conditional paths to have
to jump over.

I think it is much better to first validate encoded_va, and then
validate remaining. Otherwise just checking remaining seems problematic
and like a recipe for NULL pointer accesses.

>  		if (unlikely(fragsz > PAGE_SIZE)) {
>  			/*
> @@ -111,32 +129,17 @@ void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
>  			return NULL;
>  		}
>  
> -		page = virt_to_page((void *)encoded_va);
> -
> -		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
> -			goto refill;
> -
> -		if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
> -			VM_BUG_ON(compound_order(page) !=
> -				  encoded_page_order(encoded_va));
> -			free_unref_page(page, encoded_page_order(encoded_va));
> -			goto refill;
> -		}
> -
> -		/* OK, page count is 0, we can safely set it */
> -		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
> -
> -		/* reset page count bias and remaining to start of new frag */
> -		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
> -		nc->remaining = size;
> +		if (unlikely(!__page_frag_cache_refill(nc, gfp_mask)))
> +			return NULL;
>  
> +		size = page_frag_cache_page_size(nc->encoded_va);

So this is adding yet another setting/reading of size to the recharge
path now. Previously the recharge path could just reuse the existing
size.

>  		remaining = size;
>  	}
>  
>  	nc->pagecnt_bias--;
>  	nc->remaining = remaining - fragsz;
>  
> -	return encoded_page_address(encoded_va) + (size - remaining);
> +	return encoded_page_address(nc->encoded_va) + (size - remaining);
>  }
>  EXPORT_SYMBOL(__page_frag_alloc_va_align);
>  



  reply	other threads:[~2024-07-21 23:40 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20240719093338.55117-1-linyunsheng@huawei.com>
2024-07-19  9:33 ` [RFC v11 01/14] mm: page_frag: add a test module for page_frag Yunsheng Lin
2024-07-21 17:34   ` Alexander Duyck
2024-07-23 13:19     ` Yunsheng Lin
2024-07-19  9:33 ` [RFC v11 02/14] mm: move the page fragment allocator from page_alloc into its own file Yunsheng Lin
2024-07-21 17:58   ` Alexander Duyck
2024-07-27 15:04     ` Yunsheng Lin
2024-07-19  9:33 ` [RFC v11 03/14] mm: page_frag: use initial zero offset for page_frag_alloc_align() Yunsheng Lin
2024-07-21 18:34   ` Alexander Duyck
2024-07-19  9:33 ` [RFC v11 04/14] mm: page_frag: add '_va' suffix to page_frag API Yunsheng Lin
     [not found]   ` <CAKgT0UcqELiXntRA_uD8eJGjt-OCLO64ax=YFXrCHNnaj9kD8g@mail.gmail.com>
2024-07-25 12:21     ` Yunsheng Lin
2024-07-19  9:33 ` [RFC v11 05/14] mm: page_frag: avoid caller accessing 'page_frag_cache' directly Yunsheng Lin
2024-07-21 23:01   ` Alexander H Duyck
2024-07-19  9:33 ` [RFC v11 07/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc' Yunsheng Lin
2024-07-21 22:59   ` Alexander H Duyck
2024-07-19  9:33 ` [RFC v11 08/14] mm: page_frag: some minor refactoring before adding new API Yunsheng Lin
2024-07-21 23:40   ` Alexander H Duyck [this message]
2024-07-22 12:55     ` Yunsheng Lin
2024-07-22 15:32       ` Alexander Duyck
2024-07-23 13:19         ` Yunsheng Lin
2024-07-30 13:20           ` Yunsheng Lin
2024-07-30 15:12             ` Alexander H Duyck
2024-07-31 12:35               ` Yunsheng Lin
2024-07-31 17:02                 ` Alexander H Duyck
2024-08-01 12:53                   ` Yunsheng Lin
2024-07-19  9:33 ` [RFC v11 09/14] mm: page_frag: use __alloc_pages() to replace alloc_pages_node() Yunsheng Lin
2024-07-21 21:41   ` Alexander H Duyck
2024-07-24 12:54     ` Yunsheng Lin
2024-07-24 15:03       ` Alexander Duyck
2024-07-25 12:19         ` Yunsheng Lin
2024-08-14 18:34           ` Alexander H Duyck
2024-07-19  9:33 ` [RFC v11 11/14] mm: page_frag: introduce prepare/probe/commit API Yunsheng Lin
2024-07-19  9:33 ` [RFC v11 13/14] mm: page_frag: update documentation for page_frag Yunsheng Lin
     [not found] ` <CAKgT0UcGvrS7=r0OCGZipzBv8RuwYtRwb2QDXqiF4qW5CNws4g@mail.gmail.com>
     [not found]   ` <b2001dba-a2d2-4b49-bc9f-59e175e7bba1@huawei.com>
2024-07-22 15:21     ` [RFC v11 00/14] Replace page_frag with page_frag_cache for sk_page_frag() Alexander Duyck
2024-07-23 13:17       ` Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dbf876b000158aed8380d6ac3a3f6e8dd40ace7b.camel@gmail.com \
    --to=alexander.duyck@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linyunsheng@huawei.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).