linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Oleg Nesterov <oleg@redhat.com>
To: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: __sb_start_write() && force_trylock hack
Date: Fri, 21 Aug 2015 20:30:50 +0200	[thread overview]
Message-ID: <20150821183050.GA19587@redhat.com> (raw)
In-Reply-To: <20150819232619.GR714@dastard>

On 08/20, Dave Chinner wrote:
>
> On Wed, Aug 19, 2015 at 05:00:26PM +0200, Oleg Nesterov wrote:
> >
> > Yes, we hold SB_FREEZE_WRITE lock, so recursive SB_FREEZE_FS is safe.
> >
> > But, this means that the comment in __sb_start_write() is still correct,
> > "XFS for example gets freeze protection on internal level twice" is true,
> > and we can not remove this force_trylock hack.
>
> You've hit a very rare corner case of a rare corner case.

Yes, I see, thanks.

Just fyi, I ran the tests again with the stupid debugging patch below
and I do not see anything new in dmesg.

So perhaps xfs_create() which does the "recursive" xfs_trans_alloc()
is the only source of double-lock in fs/xfs/.

Oleg.
---

diff --git a/fs/super.c b/fs/super.c
index a38ad91..32b1846 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1153,13 +1153,57 @@ out:
 	return ERR_PTR(error);
 }
 
+void __sb_writers_acquired(struct super_block *sb, int level)
+{
+	struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1;
+	struct ltrace *lt = current->ltrace + level-1;
+
+	WARN_ON(percpu_rwsem_is_held(sem));
+
+	percpu_rwsem_acquire(sem, 1, _RET_IP_);
+
+	if (!lt->lock) {
+		lt->lock = sem;
+		save_stack_trace(&lt->trace);
+	}
+}
+EXPORT_SYMBOL(__sb_writers_acquired);
+
+void __sb_writers_release(struct super_block *sb, int level)
+{
+	struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1;
+	struct ltrace *lt = current->ltrace + level-1;
+
+	WARN_ON(!lt->lock);
+
+	percpu_rwsem_release(sem, 1, _RET_IP_);
+
+	WARN_ON(percpu_rwsem_is_held(sem));
+	if (lt->lock == sem) {
+		lt->lock = NULL;
+		lt->trace.nr_entries = 0;
+	}
+}
+EXPORT_SYMBOL(__sb_writers_release);
+
+
 /*
  * This is an internal function, please use sb_end_{write,pagefault,intwrite}
  * instead.
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
+	struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1;
+	struct ltrace *lt = current->ltrace + level-1;
+
+	WARN_ON(!lt->lock);
+
 	percpu_up_read(sb->s_writers.rw_sem + level-1);
+
+	if (lt->lock == sem && !percpu_rwsem_is_held(sem)) {
+		lt->lock = NULL;
+		lt->trace.nr_entries = 0;
+	}
 }
 EXPORT_SYMBOL(__sb_end_write);
 
@@ -1169,10 +1213,22 @@ EXPORT_SYMBOL(__sb_end_write);
  */
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
+	struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1;
+	struct ltrace *lt = current->ltrace + level-1;
 	bool force_trylock = false;
 	int ret = 1;
 
+	WARN_ON(lt->lock == sem && !percpu_rwsem_is_held(sem));
+
 #ifdef CONFIG_LOCKDEP
+	if (wait && lt->lock == sem) {
+		pr_crit("XXXXX %s:%d lev=%d\n", current->comm, current->pid, level);
+		dump_stack();
+		debug_show_held_locks(current);
+		pr_crit("Prev Trace:\n");
+		print_stack_trace(&lt->trace, 0);
+	}
+
 	/*
 	 * We want lockdep to tell us about possible deadlocks with freezing
 	 * but it's it bit tricky to properly instrument it. Getting a freeze
@@ -1198,6 +1254,10 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
 		ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
 
 	WARN_ON(force_trylock & !ret);
+	if (ret && !lt->lock) {
+		lt->lock = sem;
+		save_stack_trace(&lt->trace);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(__sb_start_write);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 08d4fe4..33bf46a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -614,7 +614,7 @@ xfs_log_mount(
 	int		min_logfsbs;
 
 	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
-		xfs_notice(mp, "Mounting V%d Filesystem",
+		if (0) xfs_notice(mp, "Mounting V%d Filesystem",
 			   XFS_SB_VERSION_NUM(&mp->m_sb));
 	} else {
 		xfs_notice(mp,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 480ebba..ed241dd 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4607,7 +4607,7 @@ xlog_recover_finish(
 						     : "internal");
 		log->l_flags &= ~XLOG_RECOVERY_NEEDED;
 	} else {
-		xfs_info(log->l_mp, "Ending clean mount");
+		if (0) xfs_info(log->l_mp, "Ending clean mount");
 	}
 	return 0;
 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1fb16562..f680f3c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1573,7 +1573,7 @@ xfs_fs_put_super(
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
-	xfs_notice(mp, "Unmounting Filesystem");
+	if (0) xfs_notice(mp, "Unmounting Filesystem");
 	xfs_filestream_unmount(mp);
 	xfs_unmountfs(mp);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ce356f6..8514e65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1385,10 +1385,8 @@ extern struct timespec current_fs_time(struct super_block *sb);
 void


      reply	other threads:[~2015-08-21 18:33 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-18 14:49 __sb_start_write() && force_trylock hack Oleg Nesterov
2015-08-18 15:18 ` Oleg Nesterov
2015-08-19  3:25   ` Dave Chinner
2015-08-19  3:11 ` Dave Chinner
2015-08-19 15:00   ` Oleg Nesterov
2015-08-19 23:26     ` Dave Chinner
2015-08-21 18:30       ` Oleg Nesterov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150821183050.GA19587@redhat.com \
    --to=oleg@redhat.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).