From: Jeff Layton <jlayton@kernel.org>
To: NeilBrown <neilb@suse.de>
Cc: chuck.lever@oracle.com, linux-nfs@vger.kernel.org
Subject: Re: [PATCH v2 1/3] nfsd: rework refcounting in filecache
Date: Mon, 31 Oct 2022 05:54:05 -0400 [thread overview]
Message-ID: <35e8c613d19f02e76916dbc9a2b27aa2f78cd215.camel@kernel.org> (raw)
In-Reply-To: <166699528220.13915.13614496483708690068@noble.neil.brown.name>
On Sat, 2022-10-29 at 09:14 +1100, NeilBrown wrote:
> On Fri, 28 Oct 2022, Jeff Layton wrote:
> > On Fri, 2022-10-28 at 09:51 +1100, NeilBrown wrote:
> > > On Fri, 28 Oct 2022, Jeff Layton wrote:
> > > > The filecache refcounting is a bit non-standard for something searchable
> > > > by RCU, in that we maintain a sentinel reference while it's hashed. This
> > > > in turn requires that we have to do things differently in the "put"
> > > > depending on whether its hashed, which we believe to have led to races.
> > > >
> > > > There are other problems in here too. nfsd_file_close_inode_sync can end
> > > > up freeing an nfsd_file while there are still outstanding references to
> > > > it, and the handling
> > >
> > > -EINTR ??? (you got interrupted and didn't finish the sentence?)
> > >
> >
> > Yes, I meant to go back and flesh that out, and forgot before posting.
> >
> > > >
> > > > Rework the code so that the refcount is what drives the lifecycle. When
> > > > the refcount goes to zero, then unhash and rcu free the object.
> > > >
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > ---
> > > > fs/nfsd/filecache.c | 291 +++++++++++++++++++++-----------------------
> > > > fs/nfsd/trace.h | 5 +-
> > > > 2 files changed, 144 insertions(+), 152 deletions(-)
> > > >
> > > > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> > > > index 98c6b5f51bc8..e63534f4b9f8 100644
> > > > --- a/fs/nfsd/filecache.c
> > > > +++ b/fs/nfsd/filecache.c
> > > > @@ -1,6 +1,12 @@
> > > > // SPDX-License-Identifier: GPL-2.0
> > > > /*
> > > > * The NFSD open file cache.
> > > > + *
> > > > + * Each nfsd_file is created in response to client activity -- either regular
> > > > + * file I/O for v2/v3, or opening a file for v4. Files opened via v4 are
> > > > + * cleaned up as soon as their refcount goes to 0. Entries for v2/v3 are
> > > > + * flagged with NFSD_FILE_GC. On their last put, they are added to the LRU for
> > > > + * eventual disposal if they aren't used again within a short time period.
> > > > */
> > > >
> > > > #include <linux/hash.h>
> > > > @@ -302,31 +308,43 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> > > > if (key->gc)
> > > > __set_bit(NFSD_FILE_GC, &nf->nf_flags);
> > > > nf->nf_inode = key->inode;
> > > > - /* nf_ref is pre-incremented for hash table */
> > > > - refcount_set(&nf->nf_ref, 2);
> > > > + refcount_set(&nf->nf_ref, 1);
> > > > nf->nf_may = key->need;
> > > > nf->nf_mark = NULL;
> > > > }
> > > > return nf;
> > > > }
> > > >
> > > > -static bool
> > > > +static void
> > > > +nfsd_file_flush(struct nfsd_file *nf)
> > > > +{
> > > > + struct file *file = nf->nf_file;
> > > > +
> > > > + if (!file || !(file->f_mode & FMODE_WRITE))
> > > > + return;
> > > > + this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
> > > > + if (vfs_fsync(file, 1) != 0)
> > > > + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> > > > +}
> > > > +
> > > > +static void
> > > > nfsd_file_free(struct nfsd_file *nf)
> > > > {
> > > > s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
> > > > - bool flush = false;
> > > > +
> > > > + trace_nfsd_file_free(nf);
> > > >
> > > > this_cpu_inc(nfsd_file_releases);
> > > > this_cpu_add(nfsd_file_total_age, age);
> > > >
> > > > - trace_nfsd_file_put_final(nf);
> > > > + nfsd_file_flush(nf);
> > > > +
> > > > if (nf->nf_mark)
> > > > nfsd_file_mark_put(nf->nf_mark);
> > > > if (nf->nf_file) {
> > > > get_file(nf->nf_file);
> > > > filp_close(nf->nf_file, NULL);
> > > > fput(nf->nf_file);
> > > > - flush = true;
> > > > }
> > > >
> > > > /*
> > > > @@ -334,10 +352,9 @@ nfsd_file_free(struct nfsd_file *nf)
> > > > * WARN and leak it to preserve system stability.
> > > > */
> > > > if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
> > > > - return flush;
> > > > + return;
> > > >
> > > > call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
> > > > - return flush;
> > > > }
> > > >
> > > > static bool
> > > > @@ -363,29 +380,23 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
> > > > return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
> > > > }
> > > >
> > > > -static void
> > > > -nfsd_file_flush(struct nfsd_file *nf)
> > > > -{
> > > > - struct file *file = nf->nf_file;
> > > > -
> > > > - if (!file || !(file->f_mode & FMODE_WRITE))
> > > > - return;
> > > > - this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
> > > > - if (vfs_fsync(file, 1) != 0)
> > > > - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> > > > -}
> > > > -
> > > > -static void nfsd_file_lru_add(struct nfsd_file *nf)
> > > > +static bool nfsd_file_lru_add(struct nfsd_file *nf)
> > > > {
> > > > set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
> > > > - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
> > > > + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) {
> > > > trace_nfsd_file_lru_add(nf);
> > > > + return true;
> > > > + }
> > > > + return false;
> > > > }
> > > >
> > > > -static void nfsd_file_lru_remove(struct nfsd_file *nf)
> > > > +static bool nfsd_file_lru_remove(struct nfsd_file *nf)
> > > > {
> > > > - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
> > > > + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) {
> > > > trace_nfsd_file_lru_del(nf);
> > > > + return true;
> > > > + }
> > > > + return false;
> > > > }
> > > >
> > > > static void
> > > > @@ -409,94 +420,89 @@ nfsd_file_unhash(struct nfsd_file *nf)
> > > > return false;
> > > > }
> > > >
> > > > -static void
> > > > -nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
> > > > +struct nfsd_file *
> > > > +nfsd_file_get(struct nfsd_file *nf)
> > > > {
> > > > - trace_nfsd_file_unhash_and_dispose(nf);
> > > > + if (likely(refcount_inc_not_zero(&nf->nf_ref)))
> > > > + return nf;
> > > > + return NULL;
> > > > +}
> > > > +
> > > > +/**
> > > > + * nfsd_file_unhash_and_queue - unhash a file and queue it to the dispose list
> > > > + * @nf: nfsd_file to be unhashed and queued
> > > > + * @dispose: list to which it should be queued
> > > > + *
> > > > + * Attempt to unhash a nfsd_file and queue it to the given list. Each file
> > > > + * will have a reference held on behalf of the list. That reference may come
> > > > + * from the LRU, or we may need to take one. If we can't get a reference,
> > > > + * ignore it altogether.
> > > > + */
> > > > +static bool
> > > > +nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose)
> > > > +{
> > > > + trace_nfsd_file_unhash_and_queue(nf);
> > > > if (nfsd_file_unhash(nf)) {
> > > > - /* caller must call nfsd_file_dispose_list() later */
> > > > - nfsd_file_lru_remove(nf);
> > > > + /*
> > > > + * If we remove it from the LRU, then just use that
> > > > + * reference for the dispose list. Otherwise, we need
> > > > + * to take a reference. If that fails, just ignore
> > > > + * the file altogether.
> > > > + */
> > > > + if (!nfsd_file_lru_remove(nf) && !nfsd_file_get(nf))
> > > > + return false;
> > > > list_add(&nf->nf_lru, dispose);
> > > > + return true;
> > > > }
> > > > + return false;
> > > > }
> > > >
> > > > -static void
> > > > -nfsd_file_put_noref(struct nfsd_file *nf)
> > > > +static bool
> > > > +__nfsd_file_put(struct nfsd_file *nf)
> > >
> > > The return value of this function is never tested.
> > > Maybe it should return void.
> > >
> > > Further, I don't think this is a useful abstraction.
> > > I would rather move the refcount_dec_and_test to the caller, and move
> > > the lru_remove and unash into nfsd_file_free.
> > >
> >
> > Ok, sounds reasonable.
> >
> > > > {
> > > > - trace_nfsd_file_put(nf);
> > > > -
> > > > if (refcount_dec_and_test(&nf->nf_ref)) {
> > > > - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
> > > > - nfsd_file_lru_remove(nf);
> > > > + nfsd_file_unhash(nf);
> > > > nfsd_file_free(nf);
> > > > + return true;
> > > > }
> > > > + return false;
> > > > }
> > > >
> > > > -static void
> > > > -nfsd_file_unhash_and_put(struct nfsd_file *nf)
> > > > -{
> > > > - if (nfsd_file_unhash(nf))
> > > > - nfsd_file_put_noref(nf);
> > > > -}
> > > > -
> > > > +/**
> > > > + * nfsd_file_put - put the reference to a nfsd_file
> > > > + * @nf: nfsd_file of which to put the reference
> > > > + *
> > > > + * Put a reference to a nfsd_file. In the v4 case, we just put the
> > > > + * reference immediately. In the v2/3 case, if the reference would be
> > > > + * the last one, the put it on the LRU instead to be cleaned up later.
> > > > + */
> > > > void
> > > > nfsd_file_put(struct nfsd_file *nf)
> > > > {
> > > > - might_sleep();
> > > > -
> > > > - if (test_bit(NFSD_FILE_GC, &nf->nf_flags))
> > > > - nfsd_file_lru_add(nf);
> > > > - else if (refcount_read(&nf->nf_ref) == 2)
> > > > - nfsd_file_unhash_and_put(nf);
> > > > -
> > > > - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
> > > > - nfsd_file_flush(nf);
> > > > - nfsd_file_put_noref(nf);
> > > > - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
> > > > - nfsd_file_put_noref(nf);
> > > > - nfsd_file_schedule_laundrette();
> > > > - } else
> > > > - nfsd_file_put_noref(nf);
> > > > -}
> > > > -
> > > > -struct nfsd_file *
> > > > -nfsd_file_get(struct nfsd_file *nf)
> > > > -{
> > > > - if (likely(refcount_inc_not_zero(&nf->nf_ref)))
> > > > - return nf;
> > > > - return NULL;
> > > > -}
> > > > -
> > > > -static void
> > > > -nfsd_file_dispose_list(struct list_head *dispose)
> > > > -{
> > > > - struct nfsd_file *nf;
> > > > + trace_nfsd_file_put(nf);
> > > >
> > > > - while(!list_empty(dispose)) {
> > > > - nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
> > > > - list_del_init(&nf->nf_lru);
> > > > - nfsd_file_flush(nf);
> > > > - nfsd_file_put_noref(nf);
> > > > + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
> > >
> > > I would prefer this included a test on NFSD_FILE_HASHED as well so that
> > > if the file isn't hashed, we don't consider it for the lru.
> > > This would me we can simple called nfsd_file_put() for things on the
> > > dispose list, rather then needing __nfsd_file_put()
> > >
> >
> > I had an incorrectly reversed test for that in the previous version in
> > nfsd_file_lru_add and you mentioned that it was racy. Why would that not
> > be the case here?
>
> It accept there is an apparent hypocrisy there :-)
> This proposed test isn't racy because of the intent.
> The intent isn't to ensure unhashed files never go onto the lru.
> The intent is to ensure that if I unhash a file and then call put(),
> then the file won't be put on the LRU.
>
> Any code that calls nfsd_file_unhash() will either hold a reference, or
> has just dropped the last reference. In either case it can be certain
> that no other thread will drop the last reference, so no other thread
> can cause the file to be added to the lru.
>
> So in actual fact it is not racy - I was wrong before.
>
> >
> > > > + /*
> > > > + * If this is the last reference (nf_ref == 1), then transfer
> > > > + * it to the LRU. If the add to the LRU fails, just put it as
> > > > + * usual.
> > > > + */
> > > > + if (refcount_dec_not_one(&nf->nf_ref) || nfsd_file_lru_add(nf))
> > > > + return;
> > > > }
> > > > + __nfsd_file_put(nf);
> > >
> > > As suggested above, this would become
> > > if (refcount_dec_and_test(&nf->nf_ref))
> > > nfsd_file_free(nf);
> > >
> >
> > Ok.
> >
> > > > }
> > > >
> > > > static void
> > > > -nfsd_file_dispose_list_sync(struct list_head *dispose)
> > > > +nfsd_file_dispose_list(struct list_head *dispose)
> > > > {
> > > > - bool flush = false;
> > > > struct nfsd_file *nf;
> > > >
> > > > while(!list_empty(dispose)) {
> > > > nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
> > > > list_del_init(&nf->nf_lru);
> > > > - nfsd_file_flush(nf);
> > > > - if (!refcount_dec_and_test(&nf->nf_ref))
> > > > - continue;
> > > > - if (nfsd_file_free(nf))
> > > > - flush = true;
> > > > + nfsd_file_free(nf);
> > > > }
> > > > - if (flush)
> > > > - flush_delayed_fput();
> > > > }
> > > >
> > > > static void
> > > > @@ -566,21 +572,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
> > > > struct list_head *head = arg;
> > > > struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
> > > >
> > > > - /*
> > > > - * Do a lockless refcount check. The hashtable holds one reference, so
> > > > - * we look to see if anything else has a reference, or if any have
> > > > - * been put since the shrinker last ran. Those don't get unhashed and
> > > > - * released.
> > > > - *
> > > > - * Note that in the put path, we set the flag and then decrement the
> > > > - * counter. Here we check the counter and then test and clear the flag.
> > > > - * That order is deliberate to ensure that we can do this locklessly.
> > > > - */
> > > > - if (refcount_read(&nf->nf_ref) > 1) {
> > > > - list_lru_isolate(lru, &nf->nf_lru);
> > > > - trace_nfsd_file_gc_in_use(nf);
> > > > - return LRU_REMOVED;
> > > > - }
> > > > + /* We should only be dealing with v2/3 entries here */
> > > > + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
> > > >
> > > > /*
> > > > * Don't throw out files that are still undergoing I/O or
> > > > @@ -591,40 +584,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
> > > > return LRU_SKIP;
> > > > }
> > > >
> > > > + /* If it was recently added to the list, skip it */
> > > > if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
> > > > trace_nfsd_file_gc_referenced(nf);
> > > > return LRU_ROTATE;
> > > > }
> > > >
> > > > - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
> > > > - trace_nfsd_file_gc_hashed(nf);
> > > > - return LRU_SKIP;
> > > > + /*
> > > > + * Put the reference held on behalf of the LRU. If it wasn't the last
> > > > + * one, then just remove it from the LRU and ignore it.
> > > > + */
> > > > + if (!refcount_dec_and_test(&nf->nf_ref)) {
> > > > + trace_nfsd_file_gc_in_use(nf);
> > > > + list_lru_isolate(lru, &nf->nf_lru);
> > > > + return LRU_REMOVED;
> > > > }
> > > >
> > > > + /* Refcount went to zero. Unhash it and queue it to the dispose list */
> > > > + nfsd_file_unhash(nf);
> > > > list_lru_isolate_move(lru, &nf->nf_lru, head);
> > > > this_cpu_inc(nfsd_file_evictions);
> > > > trace_nfsd_file_gc_disposed(nf);
> > > > return LRU_REMOVED;
> > > > }
> > > >
> > > > -/*
> > > > - * Unhash items on @dispose immediately, then queue them on the
> > > > - * disposal workqueue to finish releasing them in the background.
> > > > - *
> > > > - * cel: Note that between the time list_lru_shrink_walk runs and
> > > > - * now, these items are in the hash table but marked unhashed.
> > > > - * Why release these outside of lru_cb ? There's no lock ordering
> > > > - * problem since lru_cb currently takes no lock.
> > > > - */
> > > > -static void nfsd_file_gc_dispose_list(struct list_head *dispose)
> > > > -{
> > > > - struct nfsd_file *nf;
> > > > -
> > > > - list_for_each_entry(nf, dispose, nf_lru)
> > > > - nfsd_file_hash_remove(nf);
> > > > - nfsd_file_dispose_list_delayed(dispose);
> > > > -}
> > > > -
> > > > static void
> > > > nfsd_file_gc(void)
> > > > {
> > > > @@ -634,7 +617,7 @@ nfsd_file_gc(void)
> > > > ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
> > > > &dispose, list_lru_count(&nfsd_file_lru));
> > > > trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
> > > > - nfsd_file_gc_dispose_list(&dispose);
> > > > + nfsd_file_dispose_list_delayed(&dispose);
> > > > }
> > > >
> > > > static void
> > > > @@ -659,7 +642,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
> > > > ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
> > > > nfsd_file_lru_cb, &dispose);
> > > > trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
> > > > - nfsd_file_gc_dispose_list(&dispose);
> > > > + nfsd_file_dispose_list_delayed(&dispose);
> > > > return ret;
> > > > }
> > > >
> > > > @@ -670,8 +653,11 @@ static struct shrinker nfsd_file_shrinker = {
> > > > };
> > > >
> > > > /*
> > > > - * Find all cache items across all net namespaces that match @inode and
> > > > - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
> > > > + * Find all cache items across all net namespaces that match @inode, unhash
> > > > + * them, take references and then put them on @dispose if that was successful.
> > > > + *
> > > > + * The nfsd_file objects on the list will be unhashed, and each will have a
> > > > + * reference taken.
> > > > */
> > > > static unsigned int
> > > > __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
> > > > @@ -689,52 +675,58 @@ __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
> > > > nfsd_file_rhash_params);
> > > > if (!nf)
> > > > break;
> > > > - nfsd_file_unhash_and_dispose(nf, dispose);
> > > > - count++;
> > > > +
> > > > + if (nfsd_file_unhash_and_queue(nf, dispose))
> > > > + count++;
> > > > } while (1);
> > > > rcu_read_unlock();
> > > > return count;
> > > > }
> > > >
> > > > /**
> > > > - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
> > > > + * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
> > > > * @inode: inode of the file to attempt to remove
> > > > *
> > > > - * Unhash and put, then flush and fput all cache items associated with @inode.
> > > > + * Unhash and put all cache item associated with @inode.
> > > > */
> > > > -void
> > > > -nfsd_file_close_inode_sync(struct inode *inode)
> > > > +static unsigned int
> > > > +nfsd_file_close_inode(struct inode *inode)
> > > > {
> > > > - LIST_HEAD(dispose);
> > > > + struct nfsd_file *nf;
> > > > unsigned int count;
> > > > + LIST_HEAD(dispose);
> > > >
> > > > count = __nfsd_file_close_inode(inode, &dispose);
> > > > - trace_nfsd_file_close_inode_sync(inode, count);
> > > > - nfsd_file_dispose_list_sync(&dispose);
> > > > + trace_nfsd_file_close_inode(inode, count);
> > > > + if (count) {
> > > > + while(!list_empty(&dispose)) {
> > > > + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
> > > > + list_del_init(&nf->nf_lru);
> > > > + trace_nfsd_file_closing(nf);
> > > > + __nfsd_file_put(nf);
> > >
> > > If nfsd_file_put() didn't add unhashed files to the lru, this can be
> > > nfsd_file_put().
> > >
> > > > + }
> > > > + }
> > > > + return count;
> > > > }
> > > >
> > > > /**
> > > > - * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
> > > > + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
> > > > * @inode: inode of the file to attempt to remove
> > > > *
> > > > - * Unhash and put all cache item associated with @inode.
> > > > + * Unhash and put, then flush and fput all cache items associated with @inode.
> > > > */
> > > > -static void
> > > > -nfsd_file_close_inode(struct inode *inode)
> > > > +void
> > > > +nfsd_file_close_inode_sync(struct inode *inode)
> > > > {
> > > > - LIST_HEAD(dispose);
> > > > - unsigned int count;
> > > > -
> > > > - count = __nfsd_file_close_inode(inode, &dispose);
> > > > - trace_nfsd_file_close_inode(inode, count);
> > > > - nfsd_file_dispose_list_delayed(&dispose);
> > > > + if (nfsd_file_close_inode(inode))
> > > > + flush_delayed_fput();
> > > > }
> > > >
> > > > /**
> > > > * nfsd_file_delayed_close - close unused nfsd_files
> > > > * @work: dummy
> > > > *
> > > > - * Walk the LRU list and close any entries that have not been used since
> > > > + * Walk the LRU list and destroy any entries that have not been used since
> > > > * the last scan.
> > > > */
> > > > static void
> > > > @@ -892,7 +884,7 @@ __nfsd_file_cache_purge(struct net *net)
> > > > while (!IS_ERR_OR_NULL(nf)) {
> > > > if (net && nf->nf_net != net)
> > > > continue;
> > > > - nfsd_file_unhash_and_dispose(nf, &dispose);
> > > > + nfsd_file_unhash_and_queue(nf, &dispose);
> > > > nf = rhashtable_walk_next(&iter);
> > > > }
> > > >
> > > > @@ -1093,11 +1085,10 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > goto out;
> > > > }
> > > > open_retry = false;
> > > > - nfsd_file_put_noref(nf);
> > > > + __nfsd_file_put(nf);
> > >
> > > This nf is not hashed, and I think it has no other reference. So we
> > > could use nfsd_file_free() - but nfsd_file_put() would be just as good
> > > and safer.
> > >
> > > > goto retry;
> > > > }
> > > >
> > > > - nfsd_file_lru_remove(nf);
> > >
> > > Hmmm... why not remove from the lru. I guess this justifies patch 2/3,
> > > but it might be cleaner to make this
> > >
> > > if (nfsd_file_lru_remove(nf))
> > > nffsd_file_put(nf);
> > > ??
> > >
> >
> > Removing from the LRU means putting a reference now. The last "put" of a
> > nfsd_file can be rather expensive (you might need to flush data, and
> > issue a close()).
>
> True, but irrelevant. nfsd_file_do_acquire() already holds a reference.
> If it succeeds at removing from the LRU, it now holds 2 references. If
> it puts one, then it won't be that last "put", and so will be cheap.
>
> I don't object to the way you have don't it - if ! lru_remove then get -
> but it isn't necessary. You can just to the get - then if lru_remove,
> do a put.
>
> >
> > In this particular codepath, that's not so much a danger, but avoiding
> > excess "put" calls is still a good thing to do. That's the main reason
> > I've tried to "transfer" references to and from the LRU where possible.
> >
> > > > this_cpu_inc(nfsd_file_cache_hits);
> > > >
> > > > status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
> > > > @@ -1107,7 +1098,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > this_cpu_inc(nfsd_file_acquisitions);
> > > > *pnf = nf;
> > > > } else {
> > > > - nfsd_file_put(nf);
> > > > + __nfsd_file_put(nf);
> > >
> > > I don't see the justification for this change.
> > > If status == nfserr_jukebox, then it is OK.
> > > If status is whatever we might get from break_lease(), then it seems
> > > wrong.
> > > If we modify nfsd_file_put() as I suggest, it will handle both cases.
> > >
> > >
> >
> > The justification is that when we're dealing with an error from an open,
> > we don't want to put the nfsd_file onto the LRU. So, a direct call to
> > __nfsd_file_put is what's needed here.
>
> Maybe... I guess my concern arises from the fact that I'm unclear on how
> break_lease() might fail. If it is a transitory failure then dropping
> from the lru doesn't seem appropriate. Maybe I should refresh my
> understanding of break_lease() failure modes.
>
nfsd_open_break_lease calls break_lease with O_NONBLOCK, so it can fail
with -EWOULDBLOCK, which should be a transient error. So we might end up
tearing down a file when it isn't necessary due to an outstanding
lease.
That's probably worth fixing, but I'd rather not do that in the context
of this set. With this set, I want to focus on fixing up the issues with
refcounting that are causing crashes in the field.
> >
> > I'll plan to open-code those like you suggest in the next iteration.
> >
> > > > nf = NULL;
> > > > }
> > > >
> > > > @@ -1134,7 +1125,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
> > > > * then unhash.
> > > > */
> > > > if (status != nfs_ok || key.inode->i_nlink == 0)
> > > > - nfsd_file_unhash_and_put(nf);
> > > > + nfsd_file_unhash(nf);
> > > > clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
> > > > smp_mb__after_atomic();
> > > > wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
> > > > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > > > index b09ab4f92d43..a44ded06af87 100644
> > > > --- a/fs/nfsd/trace.h
> > > > +++ b/fs/nfsd/trace.h
> > > > @@ -903,10 +903,11 @@ DEFINE_EVENT(nfsd_file_class, name, \
> > > > TP_PROTO(struct nfsd_file *nf), \
> > > > TP_ARGS(nf))
> > > >
> > > > -DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final);
> > > > +DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
> > > > DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
> > > > DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
> > > > -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose);
> > > > +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
> > > > +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
> > > >
> > > > TRACE_EVENT(nfsd_file_alloc,
> > > > TP_PROTO(
> > > > --
> > > > 2.37.3
> > > >
> > > >
> > >
> > > Thanks,
> > > NeilBrown
> >
> > --
> > Jeff Layton <jlayton@kernel.org>
> >
>
> Thanks a lot,
> I quite like your latest version.
>
Thanks! I think we're getting much closer.
--
Jeff Layton <jlayton@kernel.org>
next prev parent reply other threads:[~2022-10-31 9:54 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-27 21:52 [PATCH v2 0/3] nfsd: clean up refcounting in filecache Jeff Layton
2022-10-27 21:52 ` [PATCH v2 1/3] nfsd: rework " Jeff Layton
2022-10-27 22:51 ` NeilBrown
2022-10-28 9:57 ` Jeff Layton
2022-10-28 22:14 ` NeilBrown
2022-10-30 21:29 ` NeilBrown
2022-10-31 9:58 ` Jeff Layton
2022-10-31 9:54 ` Jeff Layton [this message]
2022-10-27 21:52 ` [PATCH v2 2/3] nfsd: only keep unused entries on the LRU Jeff Layton
2022-10-27 22:20 ` NeilBrown
2022-10-27 22:47 ` Jeff Layton
2022-10-27 22:55 ` NeilBrown
2022-10-27 23:52 ` Chuck Lever III
2022-10-27 21:52 ` [PATCH v2 3/3] nfsd: start non-blocking writeback after adding nfsd_file to " Jeff Layton
2022-10-27 22:25 ` NeilBrown
2022-10-28 13:01 ` Jeff Layton
2022-10-28 13:16 ` Chuck Lever III
2022-10-28 15:05 ` Jeff Layton
2022-10-28 15:29 ` Chuck Lever III
2022-10-28 15:51 ` Jeff Layton
2022-10-28 17:21 ` Chuck Lever III
2022-10-28 17:43 ` Jeff Layton
2022-10-28 18:53 ` Chuck Lever III
2022-10-28 20:04 ` Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=35e8c613d19f02e76916dbc9a2b27aa2f78cd215.camel@kernel.org \
--to=jlayton@kernel.org \
--cc=chuck.lever@oracle.com \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox