From: Trond Myklebust <trondmy@hammerspace.com>
To: "linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [PATCH v4 4/5] NFS: Improve heuristic for readdirplus
Date: Fri, 18 Feb 2022 11:37:53 +0000 [thread overview]
Message-ID: <d669a90c90e91bfc0e573a1741f9a209a4bc38de.camel@hammerspace.com> (raw)
In-Reply-To: <20220217223323.696173-5-trondmy@kernel.org>
On Thu, 2022-02-17 at 17:33 -0500, trondmy@kernel.org wrote:
> From: Trond Myklebust <trond.myklebust@hammerspace.com>
>
> The heuristic for readdirplus is designed to try to detect 'ls -l'
> and
> similar patterns. It does so by looking for cache hit/miss patterns
> in
> both the attribute cache and in the dcache of the files in a given
> directory, and then sets a flag for the readdirplus code to
> interpret.
>
> The problem with this approach is that a single attribute or dcache
> miss
> can cause the NFS code to force a refresh of the attributes for the
> entire set of files contained in the directory.
>
> To be able to make a more nuanced decision, let's sample the number
> of
> hits and misses in the set of open directory descriptors. That allows
> us
> to set thresholds at which we start preferring READDIRPLUS over
> regular
> READDIR, or at which we start to force a re-read of the remaining
> readdir cache using READDIRPLUS.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
> fs/nfs/dir.c | 77 +++++++++++++++++++++++++---------------
> --
> fs/nfs/inode.c | 4 +--
> fs/nfs/internal.h | 4 +--
> fs/nfs/nfstrace.h | 1 -
> include/linux/nfs_fs.h | 5 +--
> 5 files changed, 53 insertions(+), 38 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 43a559b34f4a..cd57df004789 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -87,8 +87,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
> nfs_set_cache_invalid(dir,
> NFS_INO_INVALID_DATA |
>
> NFS_INO_REVAL_FORCED);
> - list_add(&ctx->list, &nfsi->open_files);
> - clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> + list_add_tail_rcu(&ctx->list, &nfsi->open_files);
> spin_unlock(&dir->i_lock);
> return ctx;
> }
> @@ -98,9 +97,9 @@ alloc_nfs_open_dir_context(struct inode *dir)
> static void put_nfs_open_dir_context(struct inode *dir, struct
> nfs_open_dir_context *ctx)
> {
> spin_lock(&dir->i_lock);
> - list_del(&ctx->list);
> + list_del_rcu(&ctx->list);
> spin_unlock(&dir->i_lock);
> - kfree(ctx);
> + kfree_rcu(ctx, rcu_head);
> }
>
> /*
> @@ -567,7 +566,6 @@ static int nfs_readdir_xdr_filler(struct
> nfs_readdir_descriptor *desc,
> /* We requested READDIRPLUS, but the server doesn't
> grok it */
> if (error == -ENOTSUPP && desc->plus) {
> NFS_SERVER(inode)->caps &=
> ~NFS_CAP_READDIRPLUS;
> - clear_bit(NFS_INO_ADVISE_RDPLUS,
> &NFS_I(inode)->flags);
> desc->plus = arg.plus = false;
> goto again;
> }
> @@ -617,51 +615,57 @@ int nfs_same_file(struct dentry *dentry, struct
> nfs_entry *entry)
> return 1;
> }
>
> -static
> -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
> +#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
> +
> +static bool nfs_use_readdirplus(struct inode *dir, struct
> dir_context *ctx,
> + unsigned int cache_hits,
> + unsigned int cache_misses)
> {
> if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
> return false;
> - if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)-
> >flags))
> - return true;
> - if (ctx->pos == 0)
> + if (ctx->pos == 0 ||
> + cache_hits + cache_misses >
> NFS_READDIR_CACHE_USAGE_THRESHOLD)
> return true;
> return false;
> }
>
> /*
> - * This function is called by the lookup and getattr code to request
> the
> + * This function is called by the getattr code to request the
> * use of readdirplus to accelerate any future lookups in the same
> * directory.
> */
> -void nfs_advise_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_hit(struct inode *dir)
> {
> struct nfs_inode *nfsi = NFS_I(dir);
> + struct nfs_open_dir_context *ctx;
>
> - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> - !list_empty(&nfsi->open_files))
> - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> + list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> + atomic_inc(&ctx->cache_hits);
Missing rcu_read_lock()/rcu_read_unlock() protection. Fixed in the
version committed to the testing branch.
> + }
> }
>
> /*
> * This function is mainly for use by nfs_getattr().
> *
> * If this is an 'ls -l', we want to force use of readdirplus.
> - * Do this by checking if there is an active file descriptor
> - * and calling nfs_advise_use_readdirplus, then forcing a
> - * cache flush.
> */
> -void nfs_force_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_miss(struct inode *dir)
> {
> struct nfs_inode *nfsi = NFS_I(dir);
> + struct nfs_open_dir_context *ctx;
>
> - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> - !list_empty(&nfsi->open_files)) {
> - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> - set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> + list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> + atomic_inc(&ctx->cache_misses);
Ditto.
> }
> }
>
> +static void nfs_readdir_record_dcache_miss(struct inode *dir)
> +{
> + nfs_readdir_record_entry_cache_miss(dir);
> +}
> +
> static
> void nfs_prime_dcache(struct dentry *parent, struct nfs_entry
> *entry,
> unsigned long dir_verifier)
> @@ -1101,6 +1105,18 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
> return status;
> }
>
> +#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
> +
> +static void nfs_readdir_handle_cache_misses(struct inode *inode,
> + struct
> nfs_readdir_descriptor *desc,
> + pgoff_t page_index,
> + unsigned int
> cache_misses)
> +{
> + if (desc->ctx->pos != 0 &&
> + cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD)
> + invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> +}
> +
> /* The file offset position represents the dirent entry number. A
> last cookie cache takes care of the common case of reading the
> whole directory.
> @@ -1112,6 +1128,7 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
> struct nfs_inode *nfsi = NFS_I(inode);
> struct nfs_open_dir_context *dir_ctx = file->private_data;
> struct nfs_readdir_descriptor *desc;
> + unsigned int cache_hits, cache_misses;
> pgoff_t page_index;
> int res;
>
> @@ -1137,7 +1154,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
> goto out;
> desc->file = file;
> desc->ctx = ctx;
> - desc->plus = nfs_use_readdirplus(inode, ctx);
> desc->page_index_max = -1;
>
> spin_lock(&file->f_lock);
> @@ -1150,6 +1166,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
> desc->page_fill_misses = dir_ctx->page_fill_misses;
> nfs_set_dtsize(desc, dir_ctx->dtsize);
> memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
> + cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
> + cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
> spin_unlock(&file->f_lock);
>
> if (desc->eof) {
> @@ -1157,9 +1175,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
> goto out_free;
> }
>
> - if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags)
> &&
> - list_is_singular(&nfsi->open_files))
> - invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> + desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits,
> cache_misses);
> + nfs_readdir_handle_cache_misses(inode, desc, page_index,
> cache_misses);
>
> do {
> res = readdir_search_pagecache(desc);
> @@ -1178,7 +1195,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
> break;
> }
> if (res == -ETOOSMALL && desc->plus) {
> - clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi-
> >flags);
> nfs_zap_caches(inode);
> desc->page_index = 0;
> desc->plus = false;
> @@ -1602,7 +1618,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir,
> struct dentry *dentry,
> nfs_set_verifier(dentry, dir_verifier);
>
> /* set a readdirplus hint that we had a cache miss */
> - nfs_force_use_readdirplus(dir);
> + nfs_readdir_record_dcache_miss(dir);
> ret = 1;
> out:
> nfs_free_fattr(fattr);
> @@ -1659,7 +1675,6 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
> nfs_mark_dir_for_revalidate(dir);
> goto out_bad;
> }
> - nfs_advise_use_readdirplus(dir);
> goto out_valid;
> }
>
> @@ -1866,7 +1881,7 @@ struct dentry *nfs_lookup(struct inode *dir,
> struct dentry * dentry, unsigned in
> goto out;
>
> /* Notify readdir to use READDIRPLUS */
> - nfs_force_use_readdirplus(dir);
> + nfs_readdir_record_dcache_miss(dir);
>
> no_entry:
> res = d_splice_alias(inode, dentry);
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index f9fc506ebb29..1bef81f5373a 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -789,7 +789,7 @@ static void
> nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
> if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
> return;
> parent = dget_parent(dentry);
> - nfs_force_use_readdirplus(d_inode(parent));
> + nfs_readdir_record_entry_cache_miss(d_inode(parent));
> dput(parent);
> }
>
> @@ -800,7 +800,7 @@ static void
> nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
> if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
> return;
> parent = dget_parent(dentry);
> - nfs_advise_use_readdirplus(d_inode(parent));
> + nfs_readdir_record_entry_cache_hit(d_inode(parent));
> dput(parent);
> }
>
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 2de7c56a1fbe..46dc97b65661 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -366,8 +366,8 @@ extern struct nfs_client *nfs_init_client(struct
> nfs_client *clp,
> const struct nfs_client_initdata *);
>
> /* dir.c */
> -extern void nfs_advise_use_readdirplus(struct inode *dir);
> -extern void nfs_force_use_readdirplus(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_hit(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_miss(struct inode *dir);
> extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
> struct shrink_control
> *sc);
> extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
> diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
> index 45a310b586ce..3672f6703ee7 100644
> --- a/fs/nfs/nfstrace.h
> +++ b/fs/nfs/nfstrace.h
> @@ -36,7 +36,6 @@
>
> #define nfs_show_nfsi_flags(v) \
> __print_flags(v, "|", \
> - { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS"
> }, \
> { BIT(NFS_INO_STALE), "STALE" }, \
> { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" },
> \
> { BIT(NFS_INO_INVALIDATING), "INVALIDATING"
> }, \
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index 9e5fc29723c2..e21bd9452d27 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -101,6 +101,8 @@ struct nfs_open_context {
>
> struct nfs_open_dir_context {
> struct list_head list;
> + atomic_t cache_hits;
> + atomic_t cache_misses;
> unsigned long attr_gencount;
> __be32 verf[NFS_DIR_VERIFIER_SIZE];
> __u64 dir_cookie;
> @@ -110,6 +112,7 @@ struct nfs_open_dir_context {
> unsigned int dtsize;
> signed char duped;
> bool eof;
> + struct rcu_head rcu_head;
> };
>
> /*
> @@ -274,13 +277,11 @@ struct nfs4_copy_state {
> /*
> * Bit offsets in flags field
> */
> -#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus
> */
> #define NFS_INO_STALE (1) /* possible stale
> inode */
> #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the
> LRU list */
> #define NFS_INO_INVALIDATING (3) /* inode is being
> invalidated */
> #define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if
> removed while open */
> #define NFS_INO_FSCACHE (5) /* inode can
> be cached by FS-Cache */
> -#define NFS_INO_FORCE_READDIR (7) /* force readdirplus
> */
> #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit
> required */
> #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit
> inflight */
> #define NFS_INO_LAYOUTSTATS (11) /* layoutstats
> inflight */
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@hammerspace.com
prev parent reply other threads:[~2022-02-18 11:39 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-17 22:33 [PATCH v4 0/5] Readdir improvements trondmy
2022-02-17 22:33 ` [PATCH v4 1/5] NFS: Adjust the amount of readahead performed by NFS readdir trondmy
2022-02-17 22:33 ` [PATCH v4 2/5] NFS: Simplify nfs_readdir_xdr_to_array() trondmy
2022-02-17 22:33 ` [PATCH v4 3/5] NFS: Improve algorithm for falling back to uncached readdir trondmy
2022-02-17 22:33 ` [PATCH v4 4/5] NFS: Improve heuristic for readdirplus trondmy
2022-02-17 22:33 ` [PATCH v4 5/5] NFS: Don't ask for readdirplus if files are being written to trondmy
2022-02-18 11:40 ` Trond Myklebust
2022-02-18 11:37 ` Trond Myklebust [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d669a90c90e91bfc0e573a1741f9a209a4bc38de.camel@hammerspace.com \
--to=trondmy@hammerspace.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox