Linux NFS development
 help / color / mirror / Atom feed
From: trondmy@kernel.org
To: linux-nfs@vger.kernel.org
Subject: [PATCH v10 10/26] NFS: Adjust the amount of readahead performed by NFS readdir
Date: Sun, 13 Mar 2022 13:05:41 -0400	[thread overview]
Message-ID: <20220313170557.5940-11-trondmy@kernel.org> (raw)
In-Reply-To: <20220313170557.5940-10-trondmy@kernel.org>

From: Trond Myklebust <trond.myklebust@hammerspace.com>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

This patch therefore tries to tone down the amount of readahead we
perform, and adjust it to try to match the amount of data being
requested by user space.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 53 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs.h |  1 +
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 60f7feee0a16..520dc3ec4aef 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,6 +69,8 @@ const struct address_space_operations nfs_dir_aops = {
 	.freepage = nfs_readdir_clear_array,
 };
 
+#define NFS_INIT_DTSIZE PAGE_SIZE
+
 static struct nfs_open_dir_context *
 alloc_nfs_open_dir_context(struct inode *dir)
 {
@@ -78,6 +80,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
 	if (ctx != NULL) {
 		ctx->attr_gencount = nfsi->attr_gencount;
+		ctx->dtsize = NFS_INIT_DTSIZE;
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -154,6 +157,7 @@ struct nfs_readdir_descriptor {
 	struct page	*page;
 	struct dir_context *ctx;
 	pgoff_t		page_index;
+	pgoff_t		page_index_max;
 	u64		dir_cookie;
 	u64		last_cookie;
 	u64		dup_cookie;
@@ -166,12 +170,36 @@ struct nfs_readdir_descriptor {
 	unsigned long	gencount;
 	unsigned long	attr_gencount;
 	unsigned int	cache_entry_index;
+	unsigned int	buffer_fills;
+	unsigned int	dtsize;
 	signed char duped;
 	bool plus;
 	bool eob;
 	bool eof;
 };
 
+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+	unsigned int maxsize = server->dtsize;
+
+	if (sz > maxsize)
+		sz = maxsize;
+	if (sz < NFS_MIN_FILE_IO_SIZE)
+		sz = NFS_MIN_FILE_IO_SIZE;
+	desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
+{
+	nfs_set_dtsize(desc, desc->dtsize << 1);
+}
+
 static void nfs_readdir_array_init(struct nfs_cache_array *array)
 {
 	memset(array, 0, sizeof(struct nfs_cache_array));
@@ -784,6 +812,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 				break;
 			arrays++;
 			*arrays = page = new;
+			desc->page_index_max++;
 		} else {
 			new = nfs_readdir_page_get_next(mapping,
 							page->index + 1,
@@ -793,6 +822,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 			if (page != *arrays)
 				nfs_readdir_page_unlock_and_put(page);
 			page = new;
+			desc->page_index_max = new->index;
 		}
 		status = nfs_readdir_add_to_array(entry, page);
 	} while (!status && !entry->eof);
@@ -858,7 +888,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	struct nfs_entry *entry;
 	size_t array_size;
 	struct inode *inode = file_inode(desc->file);
-	size_t dtsize = NFS_SERVER(inode)->dtsize;
+	unsigned int dtsize = desc->dtsize;
 	int status = -ENOMEM;
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -894,6 +924,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 
 		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
 						 arrays, narrays);
+		desc->buffer_fills++;
 	} while (!status && nfs_readdir_page_needs_filling(page) &&
 		page_mapping(page));
 
@@ -941,6 +972,10 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 	if (!desc->page)
 		return -ENOMEM;
 	if (nfs_readdir_page_needs_filling(desc->page)) {
+		/* Grow the dtsize if we had to go back for more pages */
+		if (desc->page_index == desc->page_index_max)
+			nfs_grow_dtsize(desc);
+		desc->page_index_max = desc->page_index;
 		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
 					       &desc->page, 1);
 		if (res < 0) {
@@ -1075,6 +1110,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	desc->cache_entry_index = 0;
 	desc->last_cookie = desc->dir_cookie;
 	desc->duped = 0;
+	desc->page_index_max = 0;
 
 	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
 
@@ -1084,10 +1120,22 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 	}
 	desc->page = NULL;
 
+	/*
+	 * Grow the dtsize if we have to go back for more pages,
+	 * or shrink it if we're reading too many.
+	 */
+	if (!desc->eof) {
+		if (!desc->eob)
+			nfs_grow_dtsize(desc);
+		else if (desc->buffer_fills == 1 &&
+			 i < (desc->page_index_max >> 1))
+			nfs_shrink_dtsize(desc);
+	}
 
 	for (i = 0; i < sz && arrays[i]; i++)
 		nfs_readdir_page_array_free(arrays[i]);
 out:
+	desc->page_index_max = -1;
 	kfree(arrays);
 	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
 	return status;
@@ -1126,6 +1174,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->file = file;
 	desc->ctx = ctx;
 	desc->plus = nfs_use_readdirplus(inode, ctx);
+	desc->page_index_max = -1;
 
 	spin_lock(&file->f_lock);
 	desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1136,6 +1185,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->last_cookie = dir_ctx->last_cookie;
 	desc->attr_gencount = dir_ctx->attr_gencount;
 	desc->eof = dir_ctx->eof;
+	nfs_set_dtsize(desc, dir_ctx->dtsize);
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
 
@@ -1187,6 +1237,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->attr_gencount = desc->attr_gencount;
 	dir_ctx->page_index = desc->page_index;
 	dir_ctx->eof = desc->eof;
+	dir_ctx->dtsize = desc->dtsize;
 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
 	spin_unlock(&file->f_lock);
 out_free:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1c533f2c1f36..691a27936849 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -107,6 +107,7 @@ struct nfs_open_dir_context {
 	__u64 dup_cookie;
 	__u64 last_cookie;
 	pgoff_t page_index;
+	unsigned int dtsize;
 	signed char duped;
 	bool eof;
 };
-- 
2.35.1


  reply	other threads:[~2022-03-13 17:12 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-13 17:05 [PATCH v10 00/26] Readdir improvements trondmy
2022-03-13 17:05 ` [PATCH v10 01/26] NFS: Return valid errors from nfs2/3_decode_dirent() trondmy
2022-03-13 17:05   ` [PATCH v10 02/26] NFS: constify nfs_server_capable() and nfs_have_writebacks() trondmy
2022-03-13 17:05     ` [PATCH v10 03/26] NFS: Trace lookup revalidation failure trondmy
2022-03-13 17:05       ` [PATCH v10 04/26] NFS: Initialise the readdir verifier as best we can in nfs_opendir() trondmy
2022-03-13 17:05         ` [PATCH v10 05/26] NFS: Use kzalloc() to avoid initialising the nfs_open_dir_context trondmy
2022-03-13 17:05           ` [PATCH v10 06/26] NFS: Calculate page offsets algorithmically trondmy
2022-03-13 17:05             ` [PATCH v10 07/26] NFS: Store the change attribute in the directory page cache trondmy
2022-03-13 17:05               ` [PATCH v10 08/26] NFS: Don't re-read the entire page cache to find the next cookie trondmy
2022-03-13 17:05                 ` [PATCH v10 09/26] NFS: Don't advance the page pointer unless the page is full trondmy
2022-03-13 17:05                   ` trondmy [this message]
2022-03-13 17:05                     ` [PATCH v10 11/26] NFS: If the cookie verifier changes, we must invalidate the page cache trondmy
2022-03-13 17:05                       ` [PATCH v10 12/26] NFS: Simplify nfs_readdir_xdr_to_array() trondmy
2022-03-13 17:05                         ` [PATCH v10 13/26] NFS: Reduce use of uncached readdir trondmy
2022-03-13 17:05                           ` [PATCH v10 14/26] NFS: Improve heuristic for readdirplus trondmy
2022-03-13 17:05                             ` [PATCH v10 15/26] NFS: Don't ask for readdirplus unless it can help nfs_getattr() trondmy
2022-03-13 17:05                               ` [PATCH v10 16/26] NFSv4: Ask for a full XDR buffer of readdir goodness trondmy
2022-03-13 17:05                                 ` [PATCH v10 17/26] NFS: Readdirplus can't help lookup for case insensitive filesystems trondmy
2022-03-13 17:05                                   ` [PATCH v10 18/26] NFS: Don't request readdirplus when revalidation was forced trondmy
2022-03-13 17:05                                     ` [PATCH v10 19/26] NFS: Add basic readdir tracing trondmy
2022-03-13 17:05                                       ` [PATCH v10 20/26] NFS: Trace effects of readdirplus on the dcache trondmy
2022-03-13 17:05                                         ` [PATCH v10 21/26] NFS: Trace effects of the readdirplus heuristic trondmy
2022-03-13 17:05                                           ` [PATCH v10 22/26] NFS: Clean up page array initialisation/free trondmy
2022-03-13 17:05                                             ` [PATCH v10 23/26] NFS: Convert readdir page cache to use a cookie based index trondmy
2022-03-13 17:05                                               ` [PATCH v10 24/26] NFS: Fix up forced readdirplus trondmy
2022-03-13 17:05                                                 ` [PATCH v10 25/26] NFS: Optimise away the previous cookie field trondmy
2022-03-13 17:05                                                   ` [PATCH v10 26/26] NFS: Cache all entries in the readdirplus reply trondmy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220313170557.5940-11-trondmy@kernel.org \
    --to=trondmy@kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox