All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: "Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>
Cc: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	mcgrof@kernel.org, gost.dev@samsung.com,
	akpm@linux-foundation.org, kbusch@kernel.org,
	chandan.babu@oracle.com, p.raghav@samsung.com,
	linux-kernel@vger.kernel.org, hare@suse.de, willy@infradead.org,
	linux-mm@kvack.org, david@fromorbit.com
Subject: Re: [RFC v2 05/14] readahead: align index to mapping_min_order in ondemand_ra and force_ra
Date: Tue, 13 Feb 2024 08:46:52 -0800	[thread overview]
Message-ID: <20240213164652.GW6184@frogsfrogsfrogs> (raw)
In-Reply-To: <20240213093713.1753368-6-kernel@pankajraghav.com>

On Tue, Feb 13, 2024 at 10:37:04AM +0100, Pankaj Raghav (Samsung) wrote:
> From: Luis Chamberlain <mcgrof@kernel.org>
> 
> Align the ra->start and ra->size to mapping_min_order in
> ondemand_readahead(), and align the index to mapping_min_order in
> force_page_cache_ra(). This will ensure that the folios allocated for
> readahead that are added to the page cache are aligned to
> mapping_min_order.
> 
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>

Acked-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  mm/readahead.c | 48 ++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 40 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/readahead.c b/mm/readahead.c
> index 4fa7d0e65706..5e1ec7705c78 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -315,6 +315,7 @@ void force_page_cache_ra(struct readahead_control *ractl,
>  	struct file_ra_state *ra = ractl->ra;
>  	struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
>  	unsigned long max_pages, index;
> +	unsigned int min_nrpages = mapping_min_folio_nrpages(mapping);
>  
>  	if (unlikely(!mapping->a_ops->read_folio && !mapping->a_ops->readahead))
>  		return;
> @@ -324,6 +325,13 @@ void force_page_cache_ra(struct readahead_control *ractl,
>  	 * be up to the optimal hardware IO size
>  	 */
>  	index = readahead_index(ractl);
> +	if (!IS_ALIGNED(index, min_nrpages)) {
> +		unsigned long old_index = index;
> +
> +		index = round_down(index, min_nrpages);
> +		nr_to_read += (old_index - index);
> +	}
> +
>  	max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
>  	nr_to_read = min_t(unsigned long, nr_to_read, max_pages);
>  	while (nr_to_read) {
> @@ -332,6 +340,7 @@ void force_page_cache_ra(struct readahead_control *ractl,
>  		if (this_chunk > nr_to_read)
>  			this_chunk = nr_to_read;
>  		ractl->_index = index;
> +		VM_BUG_ON(!IS_ALIGNED(index, min_nrpages));
>  		do_page_cache_ra(ractl, this_chunk, 0);
>  
>  		index += this_chunk;
> @@ -344,11 +353,20 @@ void force_page_cache_ra(struct readahead_control *ractl,
>   * for small size, x 4 for medium, and x 2 for large
>   * for 128k (32 page) max ra
>   * 1-2 page = 16k, 3-4 page 32k, 5-8 page = 64k, > 8 page = 128k initial
> + *
> + * For higher order address space requirements we ensure no initial reads
> + * are ever less than the min number of pages required.
> + *
> + * We *always* cap the max io size allowed by the device.
>   */
> -static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
> +static unsigned long get_init_ra_size(unsigned long size,
> +				      unsigned int min_nrpages,
> +				      unsigned long max)
>  {
>  	unsigned long newsize = roundup_pow_of_two(size);
>  
> +	newsize = max_t(unsigned long, newsize, min_nrpages);
> +
>  	if (newsize <= max / 32)
>  		newsize = newsize * 4;
>  	else if (newsize <= max / 4)
> @@ -356,6 +374,8 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
>  	else
>  		newsize = max;
>  
> +	VM_BUG_ON(newsize & (min_nrpages - 1));
> +
>  	return newsize;
>  }
>  
> @@ -364,14 +384,16 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
>   *  return it as the new window size.
>   */
>  static unsigned long get_next_ra_size(struct file_ra_state *ra,
> +				      unsigned int min_nrpages,
>  				      unsigned long max)
>  {
> -	unsigned long cur = ra->size;
> +	unsigned long cur = max(ra->size, min_nrpages);
>  
>  	if (cur < max / 16)
>  		return 4 * cur;
>  	if (cur <= max / 2)
>  		return 2 * cur;
> +
>  	return max;
>  }
>  
> @@ -561,7 +583,11 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  	unsigned long add_pages;
>  	pgoff_t index = readahead_index(ractl);
>  	pgoff_t expected, prev_index;
> -	unsigned int order = folio ? folio_order(folio) : 0;
> +	unsigned int min_order = mapping_min_folio_order(ractl->mapping);
> +	unsigned int min_nrpages = mapping_min_folio_nrpages(ractl->mapping);
> +	unsigned int order = folio ? folio_order(folio) : min_order;
> +
> +	VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
>  
>  	/*
>  	 * If the request exceeds the readahead window, allow the read to
> @@ -583,8 +609,8 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  	expected = round_down(ra->start + ra->size - ra->async_size,
>  			1UL << order);
>  	if (index == expected || index == (ra->start + ra->size)) {
> -		ra->start += ra->size;
> -		ra->size = get_next_ra_size(ra, max_pages);
> +		ra->start += round_down(ra->size, min_nrpages);
> +		ra->size = get_next_ra_size(ra, min_nrpages, max_pages);
>  		ra->async_size = ra->size;
>  		goto readit;
>  	}
> @@ -603,13 +629,18 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  				max_pages);
>  		rcu_read_unlock();
>  
> +		start = round_down(start, min_nrpages);
> +
> +		VM_BUG_ON(folio->index & (folio_nr_pages(folio) - 1));
> +
>  		if (!start || start - index > max_pages)
>  			return;
>  
>  		ra->start = start;
>  		ra->size = start - index;	/* old async_size */
> +
>  		ra->size += req_size;
> -		ra->size = get_next_ra_size(ra, max_pages);
> +		ra->size = get_next_ra_size(ra, min_nrpages, max_pages);
>  		ra->async_size = ra->size;
>  		goto readit;
>  	}
> @@ -646,7 +677,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  
>  initial_readahead:
>  	ra->start = index;
> -	ra->size = get_init_ra_size(req_size, max_pages);
> +	ra->size = get_init_ra_size(req_size, min_nrpages, max_pages);
>  	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
>  
>  readit:
> @@ -657,7 +688,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  	 * Take care of maximum IO pages as above.
>  	 */
>  	if (index == ra->start && ra->size == ra->async_size) {
> -		add_pages = get_next_ra_size(ra, max_pages);
> +		add_pages = get_next_ra_size(ra, min_nrpages, max_pages);
>  		if (ra->size + add_pages <= max_pages) {
>  			ra->async_size = add_pages;
>  			ra->size += add_pages;
> @@ -668,6 +699,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
>  	}
>  
>  	ractl->_index = ra->start;
> +	VM_BUG_ON(!IS_ALIGNED(ractl->_index, min_nrpages));
>  	page_cache_ra_order(ractl, ra, order);
>  }
>  
> -- 
> 2.43.0
> 
> 

  parent reply	other threads:[~2024-02-13 16:46 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-13  9:36 [RFC v2 00/14] enable bs > ps in XFS Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 01/14] fs: Allow fine-grained control of folio sizes Pankaj Raghav (Samsung)
2024-02-13 12:03   ` Hannes Reinecke
2024-02-13 16:34   ` Darrick J. Wong
2024-02-13 21:05     ` Pankaj Raghav (Samsung)
2024-02-13 21:29       ` Darrick J. Wong
2024-02-14 19:00         ` Matthew Wilcox
2024-02-15 10:34           ` Pankaj Raghav (Samsung)
2024-02-14 18:49   ` Matthew Wilcox
2024-02-15 10:21     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 02/14] filemap: align the index to mapping_min_order in the page cache Pankaj Raghav (Samsung)
2024-02-13 12:20   ` Hannes Reinecke
2024-02-13 21:13     ` Pankaj Raghav (Samsung)
2024-02-13 22:00   ` Dave Chinner
2024-02-13  9:37 ` [RFC v2 03/14] filemap: use mapping_min_order while allocating folios Pankaj Raghav (Samsung)
2024-02-13 14:58   ` Hannes Reinecke
2024-02-13 16:38   ` Darrick J. Wong
2024-02-13 22:05   ` Dave Chinner
2024-02-14 10:13     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 04/14] readahead: set file_ra_state->ra_pages to be at least mapping_min_order Pankaj Raghav (Samsung)
2024-02-13 14:59   ` Hannes Reinecke
2024-02-13 16:46   ` Darrick J. Wong
2024-02-13 22:09   ` Dave Chinner
2024-02-14 13:32     ` Pankaj Raghav (Samsung)
2024-02-14 13:53       ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 05/14] readahead: align index to mapping_min_order in ondemand_ra and force_ra Pankaj Raghav (Samsung)
2024-02-13 15:00   ` Hannes Reinecke
2024-02-13 16:46   ` Darrick J. Wong [this message]
2024-02-13 22:29   ` Dave Chinner
2024-02-14 15:10     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 06/14] readahead: rework loop in page_cache_ra_unbounded() Pankaj Raghav (Samsung)
2024-02-13 16:47   ` Darrick J. Wong
2024-02-13  9:37 ` [RFC v2 07/14] readahead: allocate folios with mapping_min_order in ra_(unbounded|order) Pankaj Raghav (Samsung)
2024-02-13 15:01   ` Hannes Reinecke
2024-02-13 16:47   ` Darrick J. Wong
2024-02-13  9:37 ` [RFC v2 08/14] mm: do not split a folio if it has minimum folio order requirement Pankaj Raghav (Samsung)
2024-02-13 15:02   ` Hannes Reinecke
2024-02-13  9:37 ` [RFC v2 09/14] mm: Support order-1 folios in the page cache Pankaj Raghav (Samsung)
2024-02-13 15:03   ` Hannes Reinecke
2024-02-13  9:37 ` [RFC v2 10/14] iomap: fix iomap_dio_zero() for fs bs > system page size Pankaj Raghav (Samsung)
2024-02-13 15:06   ` Hannes Reinecke
2024-02-13 16:30   ` Darrick J. Wong
2024-02-13 21:27     ` Pankaj Raghav (Samsung)
2024-02-13 21:30       ` Darrick J. Wong
2024-02-14 15:13         ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 11/14] xfs: expose block size in stat Pankaj Raghav (Samsung)
2024-02-13 16:27   ` Darrick J. Wong
2024-02-13 21:32     ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 12/14] xfs: make the calculation generic in xfs_sb_validate_fsb_count() Pankaj Raghav (Samsung)
2024-02-13 16:26   ` Darrick J. Wong
2024-02-13 21:48     ` Pankaj Raghav (Samsung)
2024-02-13 22:44       ` Dave Chinner
2024-02-14 15:51         ` Pankaj Raghav (Samsung)
2024-02-13  9:37 ` [RFC v2 13/14] xfs: add an experimental CONFIG_XFS_LBS option Pankaj Raghav (Samsung)
2024-02-13 16:39   ` Darrick J. Wong
2024-02-13 21:19   ` Dave Chinner
2024-02-13 21:54     ` Pankaj Raghav (Samsung)
2024-02-13 22:45       ` Dave Chinner
2024-02-13  9:37 ` [RFC v2 14/14] xfs: enable block size larger than page size support Pankaj Raghav (Samsung)
2024-02-13 16:20   ` Darrick J. Wong
2024-02-14 16:40     ` Pankaj Raghav (Samsung)
2024-02-13 21:34   ` Dave Chinner
2024-02-14 16:35     ` Pankaj Raghav (Samsung)
2024-02-15 22:17       ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240213164652.GW6184@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=chandan.babu@oracle.com \
    --cc=david@fromorbit.com \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=kbusch@kernel.org \
    --cc=kernel@pankajraghav.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.