linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: Alexander Viro <viro@zeniv.linux.org.uk>,
	Jonathan Corbet <corbet@lwn.net>
Cc: linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-fsdevel@vger.kernel.org,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@kernel.org>,
	Miklos Szeredi <mszeredi@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Larry Woodman <lwoodman@redhat.com>,
	Waiman Long <longman@redhat.com>
Subject: [PATCH v2 4/4] fs/dcache: Protect negative dentry pruning from racing with umount
Date: Fri, 21 Jul 2017 09:43:10 -0400	[thread overview]
Message-ID: <1500644590-6599-5-git-send-email-longman@redhat.com> (raw)
In-Reply-To: <1500644590-6599-1-git-send-email-longman@redhat.com>

The negative dentry pruning is done on a specific super_block set
in the ndblk.prune_sb variable. If the super_block is also being
un-mounted concurrently, the content of the super_block may no longer
be valid.

To protect against such racing condition, a new lock is added to
the ndblk structure to synchronize the negative dentry pruning and
umount operation. This is a regular spinlock as the pruning operation
can be quite time consuming.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 fs/dcache.c | 42 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index c2ea876..a3159f3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -139,11 +139,13 @@ struct dentry_stat_t dentry_stat = {
 static long neg_dentry_nfree_init __read_mostly; /* Free pool initial value */
 static struct {
 	raw_spinlock_t nfree_lock;
+	spinlock_t prune_lock;		/* Lock for protecting pruning */
 	long nfree;			/* Negative dentry free pool */
 	struct super_block *prune_sb;	/* Super_block for pruning */
 	int neg_count, prune_count;	/* Pruning counts */
 } ndblk ____cacheline_aligned_in_smp;
 
+static void clear_prune_sb_for_umount(struct super_block *sb);
 static void prune_negative_dentry(struct work_struct *work);
 static DECLARE_DELAYED_WORK(prune_neg_dentry_work, prune_negative_dentry);
 
@@ -1323,6 +1325,7 @@ void shrink_dcache_sb(struct super_block *sb)
 {
 	long freed;
 
+	clear_prune_sb_for_umount(sb);
 	do {
 		LIST_HEAD(dispose);
 
@@ -1353,7 +1356,8 @@ static enum lru_status dentry_negative_lru_isolate(struct list_head *item,
 	 * list.
 	 */
 	if ((ndblk.neg_count >= NEG_DENTRY_BATCH) ||
-	    (ndblk.prune_count >= NEG_DENTRY_BATCH)) {
+	    (ndblk.prune_count >= NEG_DENTRY_BATCH) ||
+	    !READ_ONCE(ndblk.prune_sb)) {
 		ndblk.prune_count = 0;
 		return LRU_STOP;
 	}
@@ -1408,15 +1412,24 @@ static enum lru_status dentry_negative_lru_isolate(struct list_head *item,
 static void prune_negative_dentry(struct work_struct *work)
 {
 	int freed;
-	struct super_block *sb = READ_ONCE(ndblk.prune_sb);
+	struct super_block *sb;
 	LIST_HEAD(dispose);
 
-	if (!sb)
+	/*
+	 * The prune_lock is used to protect negative dentry pruning from
+	 * racing with concurrent umount operation.
+	 */
+	spin_lock(&ndblk.prune_lock);
+	sb = READ_ONCE(ndblk.prune_sb);
+	if (!sb) {
+		spin_unlock(&ndblk.prune_lock);
 		return;
+	}
 
 	ndblk.neg_count = ndblk.prune_count = 0;
 	freed = list_lru_walk(&sb->s_dentry_lru, dentry_negative_lru_isolate,
 			      &dispose, NEG_DENTRY_BATCH);
+	spin_unlock(&ndblk.prune_lock);
 
 	if (freed)
 		shrink_dentry_list(&dispose);
@@ -1433,6 +1446,27 @@ static void prune_negative_dentry(struct work_struct *work)
 		WRITE_ONCE(ndblk.prune_sb, NULL);
 }
 
+/*
+ * This is called before an umount to clear ndblk.prune_sb if it
+ * matches the given super_block.
+ */
+static void clear_prune_sb_for_umount(struct super_block *sb)
+{
+	if (likely(READ_ONCE(ndblk.prune_sb) != sb))
+		return;
+	WRITE_ONCE(ndblk.prune_sb, NULL);
+	/*
+	 * Need to wait until an ongoing pruning operation, if present,
+	 * is completed.
+	 *
+	 * Clearing ndblk.prune_sb will hasten the completion of pruning.
+	 * In the unlikely event that ndblk.prune_sb is set to another
+	 * super_block, the waiting will last the complete pruning operation
+	 * which shouldn't be that long either.
+	 */
+	spin_unlock_wait(&ndblk.prune_lock);
+}
+
 /**
  * enum d_walk_ret - action to talke during tree walk
  * @D_WALK_CONTINUE:	contrinue walk
@@ -1755,6 +1789,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
 
 	WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
 
+	clear_prune_sb_for_umount(sb);
 	dentry = sb->s_root;
 	sb->s_root = NULL;
 	do_one_tree(dentry);
@@ -3857,6 +3892,7 @@ static void __init neg_dentry_init(void)
 	unsigned long cnt;
 
 	raw_spin_lock_init(&ndblk.nfree_lock);
+	spin_lock_init(&ndblk.prune_lock);
 
 	/* 20% in global pool & 80% in percpu free */
 	ndblk.nfree = neg_dentry_nfree_init
-- 
1.8.3.1

      parent reply	other threads:[~2017-07-21 13:43 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-21 13:43 [PATCH v2 0/4] fs/dcache: Limit # of negative dentries Waiman Long
2017-07-21 13:43 ` [PATCH v2 1/4] fs/dcache: Limit numbers " Waiman Long
2017-07-21 13:43 ` [PATCH v2 2/4] fs/dcache: Report negative dentry number in dentry-state Waiman Long
2017-07-21 13:43 ` [PATCH v2 3/4] fs/dcache: Enable automatic pruning of negative dentries Waiman Long
2017-07-21 19:30   ` James Bottomley
2017-07-21 20:17     ` Waiman Long
2017-07-21 23:07       ` James Bottomley
2017-07-24 15:54         ` Waiman Long
2017-07-21 13:43 ` Waiman Long [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1500644590-6599-5-git-send-email-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lwoodman@redhat.com \
    --cc=mingo@kernel.org \
    --cc=mszeredi@redhat.com \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).