From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org,
Trond Myklebust <trond.myklebust@hammerspace.com>,
"J. Bruce Fields" <bfields@redhat.com>,
Khazhy Kumykov <khazhy@google.com>
Subject: [PATCH 5.4 02/29] nfsd: Containerise filecache laundrette
Date: Fri, 25 Mar 2022 16:04:42 +0100 [thread overview]
Message-ID: <20220325150418.657862336@linuxfoundation.org> (raw)
In-Reply-To: <20220325150418.585286754@linuxfoundation.org>
From: Trond Myklebust <trondmy@gmail.com>
commit 9542e6a643fc69d528dfb3303f145719c61d3050 upstream.
Ensure that if the filecache laundrette gets stuck, it only affects
the knfsd instances of one container.
The notifier callbacks can be called from various contexts so avoid
using synchonous filesystem operations that might deadlock.
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Cc: Khazhy Kumykov <khazhy@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
fs/nfsd/filecache.c | 238 +++++++++++++++++++++++++++++++++++++++++++---------
fs/nfsd/filecache.h | 2
fs/nfsd/nfssvc.c | 9 +
3 files changed, 207 insertions(+), 42 deletions(-)
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -44,6 +44,17 @@ struct nfsd_fcache_bucket {
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+struct nfsd_fcache_disposal {
+ struct list_head list;
+ struct work_struct work;
+ struct net *net;
+ spinlock_t lock;
+ struct list_head freeme;
+ struct rcu_head rcu;
+};
+
+struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
@@ -52,32 +63,21 @@ static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
+static DEFINE_SPINLOCK(laundrette_lock);
+static LIST_HEAD(laundrettes);
-enum nfsd_file_laundrette_ctl {
- NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
- NFSD_FILE_LAUNDRETTE_MAY_FLUSH
-};
+static void nfsd_file_gc(void);
static void
-nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+nfsd_file_schedule_laundrette(void)
{
long count = atomic_long_read(&nfsd_filecache_count);
if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
return;
- /* Be more aggressive about scanning if over the threshold */
- if (count > NFSD_FILE_LRU_THRESHOLD)
- mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
- else
- schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
-
- if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
- return;
-
- /* ...and don't delay flushing if we're out of control */
- if (count >= NFSD_FILE_LRU_LIMIT)
- flush_delayed_work(&nfsd_filecache_laundrette);
+ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+ NFSD_LAUNDRETTE_DELAY);
}
static void
@@ -316,7 +316,9 @@ nfsd_file_put(struct nfsd_file *nf)
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
- nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+ nfsd_file_schedule_laundrette();
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
}
struct nfsd_file *
@@ -357,6 +359,58 @@ nfsd_file_dispose_list_sync(struct list_
flush_delayed_fput();
}
+static void
+nfsd_file_list_remove_disposal(struct list_head *dst,
+ struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&l->lock);
+ list_splice_init(&l->freeme, dst);
+ spin_unlock(&l->lock);
+}
+
+static void
+nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net == net) {
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void
+nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+ struct net *net)
+{
+ struct nfsd_file *nf, *tmp;
+
+ list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+ if (nf->nf_net == net)
+ list_move_tail(&nf->nf_lru, dst);
+ }
+}
+
+static void
+nfsd_file_dispose_list_delayed(struct list_head *dispose)
+{
+ LIST_HEAD(list);
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+ nfsd_file_list_add_disposal(&list, nf->nf_net);
+ }
+}
+
/*
* Note this can deadlock with nfsd_file_cache_purge.
*/
@@ -403,17 +457,40 @@ out_skip:
return LRU_SKIP;
}
-static void
-nfsd_file_lru_dispose(struct list_head *head)
+static unsigned long
+nfsd_file_lru_walk_list(struct shrink_control *sc)
{
+ LIST_HEAD(head);
struct nfsd_file *nf;
+ unsigned long ret;
- list_for_each_entry(nf, head, nf_lru) {
+ if (sc)
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &head);
+ else
+ ret = list_lru_walk(&nfsd_file_lru,
+ nfsd_file_lru_cb,
+ &head, LONG_MAX);
+ list_for_each_entry(nf, &head, nf_lru) {
spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
nfsd_file_do_unhash(nf);
spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
}
- nfsd_file_dispose_list(head);
+ nfsd_file_dispose_list_delayed(&head);
+ return ret;
+}
+
+static void
+nfsd_file_gc(void)
+{
+ nfsd_file_lru_walk_list(NULL);
+}
+
+static void
+nfsd_file_gc_worker(struct work_struct *work)
+{
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
}
static unsigned long
@@ -425,12 +502,7 @@ nfsd_file_lru_count(struct shrinker *s,
static unsigned long
nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
{
- LIST_HEAD(head);
- unsigned long ret;
-
- ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
- nfsd_file_lru_dispose(&head);
- return ret;
+ return nfsd_file_lru_walk_list(sc);
}
static struct shrinker nfsd_file_shrinker = {
@@ -492,7 +564,7 @@ nfsd_file_close_inode(struct inode *inod
__nfsd_file_close_inode(inode, hashval, &dispose);
trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
- nfsd_file_dispose_list(&dispose);
+ nfsd_file_dispose_list_delayed(&dispose);
}
/**
@@ -508,16 +580,11 @@ static void
nfsd_file_delayed_close(struct work_struct *work)
{
LIST_HEAD(head);
+ struct nfsd_fcache_disposal *l = container_of(work,
+ struct nfsd_fcache_disposal, work);
- list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
-
- if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
- nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
-
- if (!list_empty(&head)) {
- nfsd_file_lru_dispose(&head);
- flush_delayed_fput();
- }
+ nfsd_file_list_remove_disposal(&head, l);
+ nfsd_file_dispose_list(&head);
}
static int
@@ -578,6 +645,10 @@ nfsd_file_cache_init(void)
if (nfsd_file_hashtbl)
return 0;
+ nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+ if (!nfsd_filecache_wq)
+ goto out;
+
nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
if (!nfsd_file_hashtbl) {
@@ -631,7 +702,7 @@ nfsd_file_cache_init(void)
spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
}
- INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
out:
return ret;
out_notifier:
@@ -647,6 +718,8 @@ out_err:
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
goto out;
}
@@ -685,6 +758,88 @@ nfsd_file_cache_purge(struct net *net)
}
}
+static struct nfsd_fcache_disposal *
+nfsd_alloc_fcache_disposal(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = kmalloc(sizeof(*l), GFP_KERNEL);
+ if (!l)
+ return NULL;
+ INIT_WORK(&l->work, nfsd_file_delayed_close);
+ l->net = net;
+ spin_lock_init(&l->lock);
+ INIT_LIST_HEAD(&l->freeme);
+ return l;
+}
+
+static void
+nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ rcu_assign_pointer(l->net, NULL);
+ cancel_work_sync(&l->work);
+ nfsd_file_dispose_list(&l->freeme);
+ kfree_rcu(l, rcu);
+}
+
+static void
+nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_add_tail_rcu(&l->list, &laundrettes);
+ spin_unlock(&laundrette_lock);
+}
+
+static void
+nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_del_rcu(&l->list);
+ spin_unlock(&laundrette_lock);
+}
+
+static int
+nfsd_alloc_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = nfsd_alloc_fcache_disposal(net);
+ if (!l)
+ return -ENOMEM;
+ nfsd_add_fcache_disposal(l);
+ return 0;
+}
+
+static void
+nfsd_free_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net != net)
+ continue;
+ nfsd_del_fcache_disposal(l);
+ rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
+ return;
+ }
+ rcu_read_unlock();
+}
+
+int
+nfsd_file_cache_start_net(struct net *net)
+{
+ return nfsd_alloc_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown_net(struct net *net)
+{
+ nfsd_file_cache_purge(net);
+ nfsd_free_fcache_disposal_net(net);
+}
+
void
nfsd_file_cache_shutdown(void)
{
@@ -711,6 +866,8 @@ nfsd_file_cache_shutdown(void)
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
}
static bool
@@ -880,7 +1037,8 @@ open_file:
nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
nfsd_file_hashtbl[hashval].nfb_count);
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
- atomic_long_inc(&nfsd_filecache_count);
+ if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+ nfsd_file_gc();
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -51,6 +51,8 @@ struct nfsd_file {
int nfsd_file_cache_init(void);
void nfsd_file_cache_purge(struct net *);
void nfsd_file_cache_shutdown(void);
+int nfsd_file_cache_start_net(struct net *net);
+void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -394,13 +394,18 @@ static int nfsd_startup_net(int nrservs,
nn->lockd_up = 1;
}
- ret = nfs4_state_start_net(net);
+ ret = nfsd_file_cache_start_net(net);
if (ret)
goto out_lockd;
+ ret = nfs4_state_start_net(net);
+ if (ret)
+ goto out_filecache;
nn->nfsd_net_up = true;
return 0;
+out_filecache:
+ nfsd_file_cache_shutdown_net(net);
out_lockd:
if (nn->lockd_up) {
lockd_down(net);
@@ -415,7 +420,7 @@ static void nfsd_shutdown_net(struct net
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_file_cache_purge(net);
+ nfsd_file_cache_shutdown_net(net);
nfs4_state_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
next prev parent reply other threads:[~2022-03-25 15:13 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-25 15:04 [PATCH 5.4 00/29] 5.4.188-rc1 review Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 01/29] nfsd: cleanup nfsd_file_lru_dispose() Greg Kroah-Hartman
2022-03-25 15:04 ` Greg Kroah-Hartman [this message]
2022-03-25 15:04 ` [PATCH 5.4 03/29] nfc: st21nfca: Fix potential buffer overflows in EVT_TRANSACTION Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 04/29] net: ipv6: fix skb_over_panic in __ip6_append_data Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 05/29] esp: Fix possible buffer overflow in ESP transformation Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 06/29] tpm: Fix error handling in async work Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 07/29] staging: fbtft: fb_st7789v: reset display before initialization Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 08/29] thermal: int340x: fix memory leak in int3400_notify() Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 09/29] llc: fix netdevice reference leaks in llc_ui_bind() Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 10/29] swiotlb: fix info leak with DMA_FROM_DEVICE Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 11/29] swiotlb: rework "fix info leak with DMA_FROM_DEVICE" Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 12/29] ALSA: pcm: Add stream lock during PCM reset ioctl operations Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 13/29] ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 14/29] ALSA: cmipci: Restore aux vol on suspend/resume Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 15/29] ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 16/29] drivers: net: xgene: Fix regression in CRC stripping Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 17/29] ASoC: sti: Fix deadlock via snd_pcm_stop_xrun() call Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 18/29] ALSA: oss: Fix PCM OSS buffer allocation overflow Greg Kroah-Hartman
2022-03-25 15:04 ` [PATCH 5.4 19/29] ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671 Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 20/29] ALSA: hda/realtek: Add quirk for ASUS GA402 Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 21/29] netfilter: nf_tables: initialize registers in nft_do_chain() Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 22/29] ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 23/29] ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3 Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 24/29] ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 25/29] crypto: qat - disable registration of algorithms Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 26/29] rcu: Dont deboost before reporting expedited quiescent state Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 27/29] mac80211: fix potential double free on mesh join Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 28/29] tpm: use try_get_ops() in tpm-space.c Greg Kroah-Hartman
2022-03-25 15:05 ` [PATCH 5.4 29/29] nds32: fix access_ok() checks in get/put_user Greg Kroah-Hartman
2022-03-25 21:51 ` [PATCH 5.4 00/29] 5.4.188-rc1 review Florian Fainelli
2022-03-25 23:25 ` Shuah Khan
2022-03-26 13:19 ` Naresh Kamboju
2022-03-26 14:02 ` Sudip Mukherjee
2022-03-27 0:51 ` Guenter Roeck
2022-03-28 1:00 ` Samuel Zou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220325150418.657862336@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=bfields@redhat.com \
--cc=khazhy@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=trond.myklebust@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox