From mboxrd@z Thu Jan 1 00:00:00 1970 From: Oleg Nesterov Subject: Re: __sb_start_write() && force_trylock hack Date: Fri, 21 Aug 2015 20:30:50 +0200 Message-ID: <20150821183050.GA19587@redhat.com> References: <20150818144900.GA26478@redhat.com> <20150819031158.GO714@dastard> <20150819150026.GA19317@redhat.com> <20150819232619.GR714@dastard> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: Jan Kara , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org To: Dave Chinner Return-path: Received: from mx1.redhat.com ([209.132.183.28]:45903 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751397AbbHUSdQ (ORCPT ); Fri, 21 Aug 2015 14:33:16 -0400 Content-Disposition: inline In-Reply-To: <20150819232619.GR714@dastard> Sender: linux-fsdevel-owner@vger.kernel.org List-ID: On 08/20, Dave Chinner wrote: > > On Wed, Aug 19, 2015 at 05:00:26PM +0200, Oleg Nesterov wrote: > > > > Yes, we hold SB_FREEZE_WRITE lock, so recursive SB_FREEZE_FS is safe. > > > > But, this means that the comment in __sb_start_write() is still correct, > > "XFS for example gets freeze protection on internal level twice" is true, > > and we can not remove this force_trylock hack. > > You've hit a very rare corner case of a rare corner case. Yes, I see, thanks. Just fyi, I ran the tests again with the stupid debugging patch below and I do not see anything new in dmesg. So perhaps xfs_create() which does the "recursive" xfs_trans_alloc() is the only source of double-lock in fs/xfs/. Oleg. --- diff --git a/fs/super.c b/fs/super.c index a38ad91..32b1846 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1153,13 +1153,57 @@ out: return ERR_PTR(error); } +void __sb_writers_acquired(struct super_block *sb, int level) +{ + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(percpu_rwsem_is_held(sem)); + + percpu_rwsem_acquire(sem, 1, _RET_IP_); + + if (!lt->lock) { + lt->lock = sem; + save_stack_trace(<->trace); + } +} +EXPORT_SYMBOL(__sb_writers_acquired); + +void __sb_writers_release(struct super_block *sb, int level) +{ + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(!lt->lock); + + percpu_rwsem_release(sem, 1, _RET_IP_); + + WARN_ON(percpu_rwsem_is_held(sem)); + if (lt->lock == sem) { + lt->lock = NULL; + lt->trace.nr_entries = 0; + } +} +EXPORT_SYMBOL(__sb_writers_release); + + /* * This is an internal function, please use sb_end_{write,pagefault,intwrite} * instead. */ void __sb_end_write(struct super_block *sb, int level) { + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(!lt->lock); + percpu_up_read(sb->s_writers.rw_sem + level-1); + + if (lt->lock == sem && !percpu_rwsem_is_held(sem)) { + lt->lock = NULL; + lt->trace.nr_entries = 0; + } } EXPORT_SYMBOL(__sb_end_write); @@ -1169,10 +1213,22 @@ EXPORT_SYMBOL(__sb_end_write); */ int __sb_start_write(struct super_block *sb, int level, bool wait) { + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; bool force_trylock = false; int ret = 1; + WARN_ON(lt->lock == sem && !percpu_rwsem_is_held(sem)); + #ifdef CONFIG_LOCKDEP + if (wait && lt->lock == sem) { + pr_crit("XXXXX %s:%d lev=%d\n", current->comm, current->pid, level); + dump_stack(); + debug_show_held_locks(current); + pr_crit("Prev Trace:\n"); + print_stack_trace(<->trace, 0); + } + /* * We want lockdep to tell us about possible deadlocks with freezing * but it's it bit tricky to properly instrument it. Getting a freeze @@ -1198,6 +1254,10 @@ int __sb_start_write(struct super_block *sb, int level, bool wait) ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); WARN_ON(force_trylock & !ret); + if (ret && !lt->lock) { + lt->lock = sem; + save_stack_trace(<->trace); + } return ret; } EXPORT_SYMBOL(__sb_start_write); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 08d4fe4..33bf46a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -614,7 +614,7 @@ xfs_log_mount( int min_logfsbs; if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - xfs_notice(mp, "Mounting V%d Filesystem", + if (0) xfs_notice(mp, "Mounting V%d Filesystem", XFS_SB_VERSION_NUM(&mp->m_sb)); } else { xfs_notice(mp, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 480ebba..ed241dd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4607,7 +4607,7 @@ xlog_recover_finish( : "internal"); log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { - xfs_info(log->l_mp, "Ending clean mount"); + if (0) xfs_info(log->l_mp, "Ending clean mount"); } return 0; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1fb16562..f680f3c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1573,7 +1573,7 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); - xfs_notice(mp, "Unmounting Filesystem"); + if (0) xfs_notice(mp, "Unmounting Filesystem"); xfs_filestream_unmount(mp); xfs_unmountfs(mp); diff --git a/include/linux/fs.h b/include/linux/fs.h index ce356f6..8514e65 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,10 +1385,8 @@ extern struct timespec current_fs_time(struct super_block *sb); void