From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: with ECARTIS (v1.0.0; list xfs); Tue, 25 Mar 2008 19:26:03 -0700 (PDT) Received: from larry.melbourne.sgi.com (larry.melbourne.sgi.com [134.14.52.130]) by oss.sgi.com (8.12.11.20060308/8.12.11/SuSE Linux 0.7) with SMTP id m2Q2PiCW027868 for ; Tue, 25 Mar 2008 19:25:46 -0700 Message-ID: <47E9B5E3.7050007@sgi.com> Date: Wed, 26 Mar 2008 13:33:07 +1100 From: Lachlan McIlroy Reply-To: lachlan@sgi.com MIME-Version: 1.0 Subject: Re: [PATCH] kill mrlock_t References: <20080320093940.GA28966@lst.de> In-Reply-To: <20080320093940.GA28966@lst.de> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Sender: xfs-bounce@oss.sgi.com Errors-to: xfs-bounce@oss.sgi.com List-Id: xfs To: Christoph Hellwig Cc: xfs@oss.sgi.com I like the idea but the i_lock_state thing needs work. See comments below. Christoph Hellwig wrote: > XFS inodes are locked via the xfs_ilock family of functions which > internally use a rw_semaphore wrapper into an abstraction called > mrlock_t. The mrlock_t should be purely internal to xfs_ilock functions > but leaks through to the callers via various lock state asserts. > > This patch: > > - adds new xfs_isilocked abstraction to make the lock state asserts > fits into the xfs_ilock API family > - opencodes the mrlock wrappers in the xfs_ilock family of functions > - makes the state tracking debug-only and merged into a single state > word > - remove superflous flags to the xfs_ilock family of functions > > This kills 8 bytes per inode for non-debug builds, which would e.g. > be the space for ACL caching on 32bit systems. > > > Signed-off-by: Christoph Hellwig > > */ > void > -xfs_ilock(xfs_inode_t *ip, > - uint lock_flags) > +xfs_ilock( > + xfs_inode_t *ip, > + uint lock_flags) > { > /* > * You can't set both SHARED and EXCL for the same lock, > @@ -608,16 +608,19 @@ xfs_ilock(xfs_inode_t *ip, > ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); > > if (lock_flags & XFS_IOLOCK_EXCL) { > - mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); > + down_write_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); > } else if (lock_flags & XFS_IOLOCK_SHARED) { > - mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); > + down_read_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); > } > if (lock_flags & XFS_ILOCK_EXCL) { > - mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); > + down_write_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); > } else if (lock_flags & XFS_ILOCK_SHARED) { > - mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); > + down_read_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); > } > xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address); > +#ifdef DEBUG > + ip->i_lock_state |= (lock_flags & (XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); > +#endif This seems racy - if we only acquire one of ilock/iolock exclusive and another thread acquires the other lock exclusive then we'll race setting i_lock_state. > } > > /* > @@ -634,11 +637,12 @@ xfs_ilock(xfs_inode_t *ip, > * > */ > int > -xfs_ilock_nowait(xfs_inode_t *ip, > - uint lock_flags) > +xfs_ilock_nowait( > + xfs_inode_t *ip, > + uint lock_flags) > { > - int iolocked; > - int ilocked; > + int iolocked; > + int ilocked; > > /* > * You can't set both SHARED and EXCL for the same lock, > @@ -653,35 +657,36 @@ xfs_ilock_nowait(xfs_inode_t *ip, > > iolocked = 0; > if (lock_flags & XFS_IOLOCK_EXCL) { > - iolocked = mrtryupdate(&ip->i_iolock); > - if (!iolocked) { > + iolocked = down_write_trylock(&ip->i_iolock); > + if (!iolocked) > return 0; > - } > } else if (lock_flags & XFS_IOLOCK_SHARED) { > - iolocked = mrtryaccess(&ip->i_iolock); > - if (!iolocked) { > + iolocked = down_read_trylock(&ip->i_iolock); > + if (!iolocked) > return 0; > - } > } > + > if (lock_flags & XFS_ILOCK_EXCL) { > - ilocked = mrtryupdate(&ip->i_lock); > - if (!ilocked) { > - if (iolocked) { > - mrunlock(&ip->i_iolock); > - } > - return 0; > - } > + ilocked = down_write_trylock(&ip->i_lock); > + if (!ilocked) > + goto out_ilock_fail; > } else if (lock_flags & XFS_ILOCK_SHARED) { > - ilocked = mrtryaccess(&ip->i_lock); > - if (!ilocked) { > - if (iolocked) { > - mrunlock(&ip->i_iolock); > - } > - return 0; > - } > + ilocked = down_read_trylock(&ip->i_lock); > + if (!ilocked) > + goto out_ilock_fail; > } > xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address); > +#ifdef DEBUG > + ip->i_lock_state |= (lock_flags & (XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); > +#endif Same deal, setting i_lock_state looks racy. > return 1; > + > + out_ilock_fail: > + if (lock_flags & XFS_IOLOCK_EXCL) > + up_write(&ip->i_iolock); > + else if (lock_flags & XFS_IOLOCK_SHARED) > + up_read(&ip->i_iolock); > + return 0; > } > > /* > @@ -697,8 +702,9 @@ xfs_ilock_nowait(xfs_inode_t *ip, > * > */ > void > -xfs_iunlock(xfs_inode_t *ip, > - uint lock_flags) > +xfs_iunlock( > + xfs_inode_t *ip, > + uint lock_flags) > { > /* > * You can't set both SHARED and EXCL for the same lock, > @@ -711,35 +717,33 @@ xfs_iunlock(xfs_inode_t *ip, > (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); > ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_IUNLOCK_NONOTIFY | > XFS_LOCK_DEP_MASK)) == 0); > + ASSERT(ip->i_lock_state & lock_flags); This assertion will always fail when *_SHARED flags are present in lock_flags. > ASSERT(lock_flags != 0); > > - if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { > - ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) || > - (ismrlocked(&ip->i_iolock, MR_ACCESS))); > - ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) || > - (ismrlocked(&ip->i_iolock, MR_UPDATE))); > - mrunlock(&ip->i_iolock); > - } > - > - if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) { > - ASSERT(!(lock_flags & XFS_ILOCK_SHARED) || > - (ismrlocked(&ip->i_lock, MR_ACCESS))); > - ASSERT(!(lock_flags & XFS_ILOCK_EXCL) || > - (ismrlocked(&ip->i_lock, MR_UPDATE))); > - mrunlock(&ip->i_lock); > + if (lock_flags & XFS_IOLOCK_EXCL) > + up_write(&ip->i_iolock); > + else if (lock_flags & XFS_IOLOCK_SHARED) > + up_read(&ip->i_iolock); > + > + if (lock_flags & XFS_ILOCK_EXCL) > + up_write(&ip->i_lock); > + else if (lock_flags & (XFS_ILOCK_SHARED)) > + up_read(&ip->i_lock); > > + if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) && > + !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) { > /* > * Let the AIL know that this item has been unlocked in case > * it is in the AIL and anyone is waiting on it. Don't do > * this if the caller has asked us not to. > */ > - if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) && > - ip->i_itemp != NULL) { > - xfs_trans_unlocked_item(ip->i_mount, > - (xfs_log_item_t*)(ip->i_itemp)); > - } > + xfs_trans_unlocked_item(ip->i_mount, > + (xfs_log_item_t *)ip->i_itemp); > } > xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); > +#ifdef DEBUG > + ip->i_lock_state &= ~(lock_flags & (XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); > +#endif This seems racy - another thread could have acquired the ilock/iolock and set the flags before we get here and remove the flags. Unsetting the flags before we release the lock would still be racy if both iolock and ilock are released at the same time by different threads. > } > > /* > @@ -747,21 +751,42 @@ xfs_iunlock(xfs_inode_t *ip, > * if it is being demoted. > */ > void > -xfs_ilock_demote(xfs_inode_t *ip, > - uint lock_flags) > +xfs_ilock_demote( > + xfs_inode_t *ip, > + uint lock_flags) > { > ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); > ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); > + ASSERT(ip->i_lock_state & lock_flags); > > - if (lock_flags & XFS_ILOCK_EXCL) { > - ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); > - mrdemote(&ip->i_lock); > - } > - if (lock_flags & XFS_IOLOCK_EXCL) { > - ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE)); > - mrdemote(&ip->i_iolock); > - } > + if (lock_flags & XFS_ILOCK_EXCL) > + downgrade_write(&ip->i_lock); > + if (lock_flags & XFS_IOLOCK_EXCL) > + downgrade_write(&ip->i_iolock); > + > +#ifdef DEBUG > + ip->i_lock_state &= ~lock_flags; > +#endif > +} > + > +#ifdef DEBUG > +/* > + * Debug-only routine, without additional rw_semaphore APIs, we can > + * now only answer requests regarding whether we hold the lock for write > + * (reader state is outside our visibility, we only track writer state). > + * > + * Note: means !xfs_isilocked would give false positives, so don't do that. > + */ > +int > +xfs_isilocked( > + xfs_inode_t *ip, > + uint lock_flags) > +{ > + if (lock_flags & (XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)) > + return (ip->i_lock_state & lock_flags); > + return 1; > } > +#endif > Splitting i_lock_state into two separate fields - one for each lock - and unsetting the fields prior to releasing/demoting the lock might be enough to solve all these races.