Re: [RFC v2 01/14] fs: Allow fine-grained control of folio sizes

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Darrick J. Wong" <djwong@kernel.org>
To: "Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	mcgrof@kernel.org, gost.dev@samsung.com,
	akpm@linux-foundation.org, kbusch@kernel.org,
	chandan.babu@oracle.com, p.raghav@samsung.com,
	linux-kernel@vger.kernel.org, hare@suse.de, willy@infradead.org,
	linux-mm@kvack.org, david@fromorbit.com
Subject: Re: [RFC v2 01/14] fs: Allow fine-grained control of folio sizes
Date: Tue, 13 Feb 2024 08:34:31 -0800	[thread overview]
Message-ID: <20240213163431.GS6184@frogsfrogsfrogs> (raw)
In-Reply-To: <20240213093713.1753368-2-kernel@pankajraghav.com>

On Tue, Feb 13, 2024 at 10:37:00AM +0100, Pankaj Raghav (Samsung) wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Some filesystems want to be able to limit the maximum size of folios,
> and some want to be able to ensure that folios are at least a certain
> size.  Add mapping_set_folio_orders() to allow this level of control.
> The max folio order parameter is ignored and it is always set to
> MAX_PAGECACHE_ORDER.

Why?  If MAX_PAGECACHE_ORDER is 8 and I instead pass in max==3, I'm
going to be surprised by my constraint being ignored.  Maybe I said that
because I'm not prepared to handle an order-7 folio; or some customer
will have some weird desire to twist this knob to make their workflow
faster.

--D

> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> ---
>  include/linux/pagemap.h | 92 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 73 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 2df35e65557d..5618f762187b 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -202,13 +202,18 @@ enum mapping_flags {
>  	AS_EXITING	= 4, 	/* final truncate in progress */
>  	/* writeback related tags are not used */
>  	AS_NO_WRITEBACK_TAGS = 5,
> -	AS_LARGE_FOLIO_SUPPORT = 6,
> -	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
> -	AS_STABLE_WRITES,	/* must wait for writeback before modifying
> +	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
> +	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
>  				   folio contents */
> -	AS_UNMOVABLE,		/* The mapping cannot be moved, ever */
> +	AS_FOLIO_ORDER_MIN = 8,
> +	AS_FOLIO_ORDER_MAX = 13, /* Bit 8-17 are used for FOLIO_ORDER */
> +	AS_UNMOVABLE = 18,		/* The mapping cannot be moved, ever */
>  };
>  
> +#define AS_FOLIO_ORDER_MIN_MASK 0x00001f00
> +#define AS_FOLIO_ORDER_MAX_MASK 0x0003e000
> +#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
> +
>  /**
>   * mapping_set_error - record a writeback error in the address_space
>   * @mapping: the mapping in which an error should be set
> @@ -344,6 +349,53 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
>  	m->gfp_mask = mask;
>  }
>  
> +/*
> + * There are some parts of the kernel which assume that PMD entries
> + * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
> + * limit the maximum allocation order to PMD size.  I'm not aware of any
> + * assumptions about maximum order if THP are disabled, but 8 seems like
> + * a good order (that's 1MB if you're using 4kB pages)
> + */
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
> +#else
> +#define MAX_PAGECACHE_ORDER	8
> +#endif
> +
> +/*
> + * mapping_set_folio_orders() - Set the range of folio sizes supported.
> + * @mapping: The file.
> + * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
> + * @max: Maximum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
> + *
> + * The filesystem should call this function in its inode constructor to
> + * indicate which sizes of folio the VFS can use to cache the contents
> + * of the file.  This should only be used if the filesystem needs special
> + * handling of folio sizes (ie there is something the core cannot know).
> + * Do not tune it based on, eg, i_size.
> + *
> + * Context: This should not be called while the inode is active as it
> + * is non-atomic.
> + */
> +static inline void mapping_set_folio_orders(struct address_space *mapping,
> +					    unsigned int min, unsigned int max)
> +{
> +	if (min == 1)
> +		min = 2;
> +	if (max < min)
> +		max = min;
> +	if (max > MAX_PAGECACHE_ORDER)
> +		max = MAX_PAGECACHE_ORDER;
> +
> +	/*
> +	 * XXX: max is ignored as only minimum folio order is supported
> +	 * currently.
> +	 */
> +	mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
> +			 (min << AS_FOLIO_ORDER_MIN) |
> +			 (MAX_PAGECACHE_ORDER << AS_FOLIO_ORDER_MAX);
> +}
> +
>  /**
>   * mapping_set_large_folios() - Indicate the file supports large folios.
>   * @mapping: The file.
> @@ -357,7 +409,22 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
>   */
>  static inline void mapping_set_large_folios(struct address_space *mapping)
>  {
> -	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
> +	mapping_set_folio_orders(mapping, 0, MAX_PAGECACHE_ORDER);
> +}
> +
> +static inline unsigned int mapping_max_folio_order(struct address_space *mapping)
> +{
> +	return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
> +}
> +
> +static inline unsigned int mapping_min_folio_order(struct address_space *mapping)
> +{
> +	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
> +}
> +
> +static inline unsigned int mapping_min_folio_nrpages(struct address_space *mapping)
> +{
> +	return 1U << mapping_min_folio_order(mapping);
>  }
>  
>  /*
> @@ -367,7 +434,7 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
>  static inline bool mapping_large_folio_support(struct address_space *mapping)
>  {
>  	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
> -		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
> +	       (mapping_max_folio_order(mapping) > 0);
>  }
>  
>  static inline int filemap_nr_thps(struct address_space *mapping)
> @@ -528,19 +595,6 @@ static inline void *detach_page_private(struct page *page)
>  	return folio_detach_private(page_folio(page));
>  }
>  
> -/*
> - * There are some parts of the kernel which assume that PMD entries
> - * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
> - * limit the maximum allocation order to PMD size.  I'm not aware of any
> - * assumptions about maximum order if THP are disabled, but 8 seems like
> - * a good order (that's 1MB if you're using 4kB pages)
> - */
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> -#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
> -#else
> -#define MAX_PAGECACHE_ORDER	8
> -#endif
> -
>  #ifdef CONFIG_NUMA
>  struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
>  #else
> -- 
> 2.43.0
> 
>

next prev parent reply	other threads:[~2024-02-13 16:34 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-13  9:36 [RFC v2 00/14] enable bs > ps in XFS Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 01/14] fs: Allow fine-grained control of folio sizes Pankaj Raghav (Samsung)
2024-02-13 12:03   ` Hannes Reinecke
2024-02-13 16:34   ` Darrick J. Wong [this message]
2024-02-13 21:05     ` Pankaj Raghav (Samsung)
2024-02-13 21:29       ` Darrick J. Wong
2024-02-14 19:00         ` Matthew Wilcox
2024-02-15 10:34           ` Pankaj Raghav (Samsung)
2024-02-14 18:49   ` Matthew Wilcox
2024-02-15 10:21     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 02/14] filemap: align the index to mapping_min_order in the page cache Pankaj Raghav (Samsung)
2024-02-13 12:20   ` Hannes Reinecke
2024-02-13 21:13     ` Pankaj Raghav (Samsung)
2024-02-13 22:00   ` Dave Chinner
2024-02-13  9:37 ` [RFC v2 03/14] filemap: use mapping_min_order while allocating folios Pankaj Raghav (Samsung)
2024-02-13 14:58   ` Hannes Reinecke
2024-02-13 16:38   ` Darrick J. Wong
2024-02-13 22:05   ` Dave Chinner
2024-02-14 10:13     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 04/14] readahead: set file_ra_state->ra_pages to be at least mapping_min_order Pankaj Raghav (Samsung)
2024-02-13 14:59   ` Hannes Reinecke
2024-02-13 16:46   ` Darrick J. Wong
2024-02-13 22:09   ` Dave Chinner
2024-02-14 13:32     ` Pankaj Raghav (Samsung)
2024-02-14 13:53       ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 05/14] readahead: align index to mapping_min_order in ondemand_ra and force_ra Pankaj Raghav (Samsung)
2024-02-13 15:00   ` Hannes Reinecke
2024-02-13 16:46   ` Darrick J. Wong
2024-02-13 22:29   ` Dave Chinner
2024-02-14 15:10     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 06/14] readahead: rework loop in page_cache_ra_unbounded() Pankaj Raghav (Samsung)
2024-02-13 16:47   ` Darrick J. Wong
2024-02-13  9:37 ` [RFC v2 07/14] readahead: allocate folios with mapping_min_order in ra_(unbounded|order) Pankaj Raghav (Samsung)
2024-02-13 15:01   ` Hannes Reinecke
2024-02-13 16:47   ` Darrick J. Wong
2024-02-13  9:37 ` [RFC v2 08/14] mm: do not split a folio if it has minimum folio order requirement Pankaj Raghav (Samsung)
2024-02-13 15:02   ` Hannes Reinecke
2024-02-13  9:37 ` [RFC v2 09/14] mm: Support order-1 folios in the page cache Pankaj Raghav (Samsung)
2024-02-13 15:03   ` Hannes Reinecke
2024-02-13  9:37 ` [RFC v2 10/14] iomap: fix iomap_dio_zero() for fs bs > system page size Pankaj Raghav (Samsung)
2024-02-13 15:06   ` Hannes Reinecke
2024-02-13 16:30   ` Darrick J. Wong
2024-02-13 21:27     ` Pankaj Raghav (Samsung)
2024-02-13 21:30       ` Darrick J. Wong
2024-02-14 15:13         ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 11/14] xfs: expose block size in stat Pankaj Raghav (Samsung)
2024-02-13 16:27   ` Darrick J. Wong
2024-02-13 21:32     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 12/14] xfs: make the calculation generic in xfs_sb_validate_fsb_count() Pankaj Raghav (Samsung)
2024-02-13 16:26   ` Darrick J. Wong
2024-02-13 21:48     ` Pankaj Raghav (Samsung)
2024-02-13 22:44       ` Dave Chinner
2024-02-14 15:51         ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 13/14] xfs: add an experimental CONFIG_XFS_LBS option Pankaj Raghav (Samsung)
2024-02-13 16:39   ` Darrick J. Wong
2024-02-13 21:19   ` Dave Chinner
2024-02-13 21:54     ` Pankaj Raghav (Samsung)
2024-02-13 22:45       ` Dave Chinner
2024-02-13  9:37 ` [RFC v2 14/14] xfs: enable block size larger than page size support Pankaj Raghav (Samsung)
2024-02-13 16:20   ` Darrick J. Wong
2024-02-14 16:40     ` Pankaj Raghav (Samsung)
2024-02-13 21:34   ` Dave Chinner
2024-02-14 16:35     ` Pankaj Raghav (Samsung)
2024-02-15 22:17       ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240213163431.GS6184@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=chandan.babu@oracle.com \
    --cc=david@fromorbit.com \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=kbusch@kernel.org \
    --cc=kernel@pankajraghav.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.