From: Waiman Long <longman@redhat.com>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
Jonathan Corbet <corbet@lwn.net>
Cc: linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
linux-fsdevel@vger.kernel.org,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Andrew Morton <akpm@linux-foundation.org>,
Ingo Molnar <mingo@kernel.org>,
Miklos Szeredi <mszeredi@redhat.com>,
Matthew Wilcox <willy@infradead.org>,
Larry Woodman <lwoodman@redhat.com>,
James Bottomley <James.Bottomley@HansenPartnership.com>,
Waiman Long <longman@redhat.com>
Subject: [PATCH v3 4/5] fs/dcache: Protect negative dentry pruning from racing with umount
Date: Fri, 28 Jul 2017 14:34:39 -0400 [thread overview]
Message-ID: <1501266880-26288-5-git-send-email-longman@redhat.com> (raw)
In-Reply-To: <1501266880-26288-1-git-send-email-longman@redhat.com>
The negative dentry pruning is done on a specific super_block set
in the ndblk.prune_sb variable. If the super_block is also being
un-mounted concurrently, the content of the super_block may no longer
be valid.
To protect against such racing condition, a new lock is added to
the ndblk structure to synchronize the negative dentry pruning and
umount operation. This is a regular spinlock as the pruning operation
can be quite time consuming.
Signed-off-by: Waiman Long <longman@redhat.com>
---
fs/dcache.c | 42 +++++++++++++++++++++++++++++++++++++++---
1 file changed, 39 insertions(+), 3 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 3482972..360185e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,11 +141,13 @@ struct dentry_stat_t dentry_stat = {
static long neg_dentry_nfree_init __read_mostly; /* Free pool initial value */
static struct {
raw_spinlock_t nfree_lock;
+ spinlock_t prune_lock; /* Lock for protecting pruning */
long nfree; /* Negative dentry free pool */
struct super_block *prune_sb; /* Super_block for pruning */
int neg_count, prune_count; /* Pruning counts */
} ndblk ____cacheline_aligned_in_smp;
+static void clear_prune_sb_for_umount(struct super_block *sb);
static void prune_negative_dentry(struct work_struct *work);
static DECLARE_DELAYED_WORK(prune_neg_dentry_work, prune_negative_dentry);
@@ -1355,6 +1357,7 @@ void shrink_dcache_sb(struct super_block *sb)
{
long freed;
+ clear_prune_sb_for_umount(sb);
do {
LIST_HEAD(dispose);
@@ -1385,7 +1388,8 @@ static enum lru_status dentry_negative_lru_isolate(struct list_head *item,
* list.
*/
if ((ndblk.neg_count >= NEG_PRUNING_SIZE) ||
- (ndblk.prune_count >= NEG_PRUNING_SIZE)) {
+ (ndblk.prune_count >= NEG_PRUNING_SIZE) ||
+ !READ_ONCE(ndblk.prune_sb)) {
ndblk.prune_count = 0;
return LRU_STOP;
}
@@ -1441,15 +1445,24 @@ static void prune_negative_dentry(struct work_struct *work)
{
int freed;
long nfree;
- struct super_block *sb = READ_ONCE(ndblk.prune_sb);
+ struct super_block *sb;
LIST_HEAD(dispose);
- if (!sb)
+ /*
+ * The prune_lock is used to protect negative dentry pruning from
+ * racing with concurrent umount operation.
+ */
+ spin_lock(&ndblk.prune_lock);
+ sb = READ_ONCE(ndblk.prune_sb);
+ if (!sb) {
+ spin_unlock(&ndblk.prune_lock);
return;
+ }
ndblk.neg_count = ndblk.prune_count = 0;
freed = list_lru_walk(&sb->s_dentry_lru, dentry_negative_lru_isolate,
&dispose, NEG_DENTRY_BATCH);
+ spin_unlock(&ndblk.prune_lock);
if (freed)
shrink_dentry_list(&dispose);
@@ -1472,6 +1485,27 @@ static void prune_negative_dentry(struct work_struct *work)
WRITE_ONCE(ndblk.prune_sb, NULL);
}
+/*
+ * This is called before an umount to clear ndblk.prune_sb if it
+ * matches the given super_block.
+ */
+static void clear_prune_sb_for_umount(struct super_block *sb)
+{
+ if (likely(READ_ONCE(ndblk.prune_sb) != sb))
+ return;
+ WRITE_ONCE(ndblk.prune_sb, NULL);
+ /*
+ * Need to wait until an ongoing pruning operation, if present,
+ * is completed.
+ *
+ * Clearing ndblk.prune_sb will hasten the completion of pruning.
+ * In the unlikely event that ndblk.prune_sb is set to another
+ * super_block, the waiting will last the complete pruning operation
+ * which shouldn't be that long either.
+ */
+ spin_unlock_wait(&ndblk.prune_lock);
+}
+
/**
* enum d_walk_ret - action to talke during tree walk
* @D_WALK_CONTINUE: contrinue walk
@@ -1794,6 +1828,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
+ clear_prune_sb_for_umount(sb);
dentry = sb->s_root;
sb->s_root = NULL;
do_one_tree(dentry);
@@ -3896,6 +3931,7 @@ static void __init neg_dentry_init(void)
unsigned long cnt;
raw_spin_lock_init(&ndblk.nfree_lock);
+ spin_lock_init(&ndblk.prune_lock);
/* 20% in global pool & 80% in percpu free */
ndblk.nfree = neg_dentry_nfree_init
--
1.8.3.1
next prev parent reply other threads:[~2017-07-28 18:35 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-28 18:34 [PATCH v3 0/5] fs/dcache: Limit # of negative dentries Waiman Long
2017-07-28 18:34 ` [PATCH v3 1/5] fs/dcache: Limit numbers " Waiman Long
2017-07-28 18:34 ` [PATCH v3 2/5] fs/dcache: Report negative dentry number in dentry-state Waiman Long
2017-07-28 18:34 ` [PATCH v3 3/5] fs/dcache: Enable automatic pruning of negative dentries Waiman Long
2017-07-28 18:34 ` Waiman Long [this message]
2017-07-28 18:34 ` [PATCH v3 5/5] fs/dcache: Track count of negative dentries forcibly killed Waiman Long
2017-08-15 17:15 ` [PATCH v3 0/5] fs/dcache: Limit # of negative dentries Waiman Long
2017-08-16 10:33 ` Wangkai (Kevin,C)
2017-08-16 13:29 ` Waiman Long
2017-08-17 4:00 ` Wangkai (Kevin,C)
2017-08-17 13:04 ` Waiman Long
2017-08-18 9:59 ` Wangkai (Kevin,C)
2017-08-18 14:10 ` Waiman Long
2017-08-21 3:23 ` Wangkai (Kevin,C)
2017-08-21 13:34 ` Waiman Long
2017-08-22 2:59 ` Wangkai (Kevin,C)
2017-08-28 17:58 ` Waiman Long
2017-08-28 18:59 ` Waiman Long
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1501266880-26288-5-git-send-email-longman@redhat.com \
--to=longman@redhat.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=akpm@linux-foundation.org \
--cc=corbet@lwn.net \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lwoodman@redhat.com \
--cc=mingo@kernel.org \
--cc=mszeredi@redhat.com \
--cc=paulmck@linux.vnet.ibm.com \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).