From: Dave Chinner <david@fromorbit.com>
To: linux-xfs@vger.kernel.org
Subject: [PATCH 1/4] xfs: xfs_iflock is no longer a completion
Date: Tue, 23 Jun 2020 19:50:12 +1000 [thread overview]
Message-ID: <20200623095015.1934171-2-david@fromorbit.com> (raw)
In-Reply-To: <20200623095015.1934171-1-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
With the recent rework of the inode cluster flushing, we no longer
ever wait on the the inode flush "lock". It was never a lock in the
first place, just a completion to allow callers to wait for inode IO
to complete. We now never wait for flush completion as all inode
flushing is non-blocking. Hence we can get rid of all the iflock
infrastructure and instead just set and check a state flag.
Rename the XFS_IFLOCK flag to XFS_IFLUSHING, convert all the
xfs_iflock_nowait() test-and-set operations on that flag, and
replace all the xfs_ifunlock() calls to clear operations.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_icache.c | 19 ++++++------
fs/xfs/xfs_inode.c | 67 +++++++++++++++--------------------------
fs/xfs/xfs_inode.h | 33 +-------------------
fs/xfs/xfs_inode_item.c | 6 ++--
fs/xfs/xfs_inode_item.h | 4 +--
5 files changed, 39 insertions(+), 90 deletions(-)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index a973f180c6cd..0d73559f2d58 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -54,7 +54,6 @@ xfs_inode_alloc(
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
- ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
/* initialise the xfs inode */
@@ -125,7 +124,7 @@ void
xfs_inode_free(
struct xfs_inode *ip)
{
- ASSERT(!xfs_isiflocked(ip));
+ ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
/*
* Because we use RCU freeing we need to ensure the inode always
@@ -999,7 +998,7 @@ xfs_reclaim_inode_grab(
* Do unlocked checks to see if the inode already is being flushed or in
* reclaim to avoid lock traffic.
*/
- if (__xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
+ if (__xfs_iflags_test(ip, XFS_IFLUSHING | XFS_IRECLAIM))
return 1;
/*
@@ -1045,23 +1044,23 @@ xfs_reclaim_inode(
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
goto out;
- if (!xfs_iflock_nowait(ip))
+ if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING))
goto out_iunlock;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
/* xfs_iflush_abort() drops the flush lock */
xfs_iflush_abort(ip);
+ ASSERT(!xfs_iflags_test(ip, XFS_IFLUSHING));
goto reclaim;
}
if (xfs_ipincount(ip))
- goto out_ifunlock;
+ goto out_clear_flush;
if (!xfs_inode_clean(ip))
- goto out_ifunlock;
+ goto out_clear_flush;
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
reclaim:
- ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always appears
@@ -1111,8 +1110,8 @@ xfs_reclaim_inode(
__xfs_inode_free(ip);
return true;
-out_ifunlock:
- xfs_ifunlock(ip);
+out_clear_flush:
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
out_iunlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bae84b3eeb9a..ba86d27b5226 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -598,22 +598,6 @@ xfs_lock_two_inodes(
}
}
-void
-__xfs_iflock(
- struct xfs_inode *ip)
-{
- wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
- DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
-
- do {
- prepare_to_wait_exclusive(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
- if (xfs_isiflocked(ip))
- io_schedule();
- } while (!xfs_iflock_nowait(ip));
-
- finish_wait(wq, &wait.wq_entry);
-}
-
STATIC uint
_xfs_dic2xflags(
uint16_t di_flags,
@@ -2547,11 +2531,8 @@ xfs_ifree_mark_inode_stale(
* valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
- if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
- spin_unlock(&ip->i_flags_lock);
- rcu_read_unlock();
- return;
- }
+ if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
+ goto out_iflags_unlock;
/*
* Don't try to lock/unlock the current inode, but we _cannot_ skip the
@@ -2568,16 +2549,14 @@ xfs_ifree_mark_inode_stale(
}
}
ip->i_flags |= XFS_ISTALE;
- spin_unlock(&ip->i_flags_lock);
- rcu_read_unlock();
/*
- * If we can't get the flush lock, the inode is already attached. All
+ * If the inode is flushing, it is already attached to the buffer. All
* we needed to do here is mark the inode stale so buffer IO completion
* will remove it from the AIL.
*/
iip = ip->i_itemp;
- if (!xfs_iflock_nowait(ip)) {
+ if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
ASSERT(iip->ili_last_fields);
goto out_iunlock;
@@ -2589,10 +2568,12 @@ xfs_ifree_mark_inode_stale(
* commit as the flock synchronises removal of the inode from the
* cluster buffer against inode reclaim.
*/
- if (!iip || list_empty(&iip->ili_item.li_bio_list)) {
- xfs_ifunlock(ip);
+ if (!iip || list_empty(&iip->ili_item.li_bio_list))
goto out_iunlock;
- }
+
+ __xfs_iflags_set(ip, XFS_IFLUSHING);
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
/* we have a dirty inode in memory that has not yet been flushed. */
spin_lock(&iip->ili_lock);
@@ -2602,9 +2583,16 @@ xfs_ifree_mark_inode_stale(
spin_unlock(&iip->ili_lock);
ASSERT(iip->ili_last_fields);
+ if (ip != free_ip)
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return;
+
out_iunlock:
if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out_iflags_unlock:
+ spin_unlock(&ip->i_flags_lock);
+ rcu_read_unlock();
}
/*
@@ -3459,7 +3447,7 @@ xfs_iflush(
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(xfs_isiflocked(ip));
+ ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
ASSERT(iip->ili_item.li_buf == bp);
@@ -3629,7 +3617,7 @@ xfs_iflush_cluster(
/*
* Quick and dirty check to avoid locks if possible.
*/
- if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK))
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
continue;
if (xfs_ipincount(ip))
continue;
@@ -3643,7 +3631,7 @@ xfs_iflush_cluster(
*/
spin_lock(&ip->i_flags_lock);
ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) {
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
spin_unlock(&ip->i_flags_lock);
continue;
}
@@ -3651,23 +3639,16 @@ xfs_iflush_cluster(
/*
* ILOCK will pin the inode against reclaim and prevent
* concurrent transactions modifying the inode while we are
- * flushing the inode.
+ * flushing the inode. If we get the lock, set the flushing
+ * state before we drop the i_flags_lock.
*/
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
spin_unlock(&ip->i_flags_lock);
continue;
}
+ __xfs_iflags_set(ip, XFS_IFLUSHING);
spin_unlock(&ip->i_flags_lock);
- /*
- * Skip inodes that are already flush locked as they have
- * already been written to the buffer.
- */
- if (!xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- continue;
- }
-
/*
* Abort flushing this inode if we are shut down because the
* inode may not currently be in the AIL. This can occur when
@@ -3686,7 +3667,7 @@ xfs_iflush_cluster(
/* don't block waiting on a log force to unpin dirty inodes */
if (xfs_ipincount(ip)) {
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
continue;
}
@@ -3694,7 +3675,7 @@ xfs_iflush_cluster(
if (!xfs_inode_clean(ip))
error = xfs_iflush(ip, bp);
else
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (error)
break;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7a8adb76c17f..991ef00370d5 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -211,8 +211,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
#define XFS_INEW (1 << __XFS_INEW_BIT)
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
-#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
-#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
+#define XFS_IFLUSHING (1 << 7) /* inode is being flushed */
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
#define XFS_IEOFBLOCKS (1 << 9) /* has the preallocblocks tag set */
@@ -233,36 +232,6 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)
-/*
- * Synchronize processes attempting to flush the in-core inode back to disk.
- */
-
-static inline int xfs_isiflocked(struct xfs_inode *ip)
-{
- return xfs_iflags_test(ip, XFS_IFLOCK);
-}
-
-extern void __xfs_iflock(struct xfs_inode *ip);
-
-static inline int xfs_iflock_nowait(struct xfs_inode *ip)
-{
- return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
-}
-
-static inline void xfs_iflock(struct xfs_inode *ip)
-{
- if (!xfs_iflock_nowait(ip))
- __xfs_iflock(ip);
-}
-
-static inline void xfs_ifunlock(struct xfs_inode *ip)
-{
- ASSERT(xfs_isiflocked(ip));
- xfs_iflags_clear(ip, XFS_IFLOCK);
- smp_mb();
- wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
-}
-
/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 3840117f8a5e..0494b907c63d 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -492,7 +492,7 @@ xfs_inode_item_push(
return XFS_ITEM_PINNED;
/* If the inode is already flush locked, we're already flushing. */
- if (xfs_isiflocked(ip))
+ if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;
if (!xfs_buf_trylock(bp))
@@ -702,7 +702,7 @@ xfs_iflush_finish(
iip->ili_last_fields = 0;
iip->ili_flush_lsn = 0;
spin_unlock(&iip->ili_lock);
- xfs_ifunlock(iip->ili_inode);
+ xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
if (drop_buffer)
xfs_buf_rele(bp);
}
@@ -789,7 +789,7 @@ xfs_iflush_abort(
list_del_init(&iip->ili_item.li_bio_list);
spin_unlock(&iip->ili_lock);
}
- xfs_ifunlock(ip);
+ xfs_iflags_clear(ip, XFS_IFLUSHING);
if (bp)
xfs_buf_rele(bp);
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 048b5e7dee90..23a7b4928727 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -25,8 +25,8 @@ struct xfs_inode_log_item {
*
* We need atomic changes between inode dirtying, inode flushing and
* inode completion, but these all hold different combinations of
- * ILOCK and iflock and hence we need some other method of serialising
- * updates to the flush state.
+ * ILOCK and IFLUSHING and hence we need some other method of
+ * serialising updates to the flush state.
*/
spinlock_t ili_lock; /* flush state lock */
unsigned int ili_last_fields; /* fields when flushed */
--
2.26.2.761.g0e0b3e54be
next prev parent reply other threads:[~2020-06-23 9:50 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-23 9:50 [PATCH 0/4] [RFC] xfs: in memory inode unlink log items Dave Chinner
2020-06-23 9:50 ` Dave Chinner [this message]
2020-06-24 15:36 ` [PATCH 1/4] xfs: xfs_iflock is no longer a completion Brian Foster
2020-07-01 5:48 ` Dave Chinner
2020-06-23 9:50 ` [PATCH 2/4] xfs: add log item precommit operation Dave Chinner
2020-06-30 18:06 ` Darrick J. Wong
2020-07-01 14:30 ` Brian Foster
2020-07-01 22:02 ` Dave Chinner
2020-06-23 9:50 ` [PATCH 3/4] xfs: track unlinked inodes in core inode Dave Chinner
2020-07-01 8:59 ` Gao Xiang
2020-07-01 22:06 ` Dave Chinner
2020-07-01 14:31 ` Brian Foster
2020-07-01 22:18 ` Dave Chinner
2020-07-02 12:24 ` Brian Foster
2020-07-07 14:39 ` Gao Xiang
2020-06-23 9:50 ` [PATCH 4/4] xfs: introduce inode unlink log item Dave Chinner
2020-06-30 18:19 ` Darrick J. Wong
2020-06-30 22:31 ` Gao Xiang
2020-07-01 6:26 ` Dave Chinner
2020-07-01 14:32 ` Brian Foster
2020-07-01 22:24 ` Dave Chinner
2020-07-02 12:25 ` Brian Foster
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200623095015.1934171-2-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).