From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues
Date: Mon, 29 Nov 2010 12:38:27 +1100 [thread overview]
Message-ID: <1290994712-21376-10-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1290994712-21376-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The log grant ticket wait queues are currently protected by the log
grant lock. However, the queues are functionally independent from
each other, and operations on them only require serialisation
against other queue operations now that all of the other log
variables they use are atomic values.
Hence, we can make them independent of the grant lock by introducing
new locks just to protect the lists operations. because the lists
are independent, we can use a lock per list and ensure that reserve
and write head queuing do not contend.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 116 +++++++++++++++++++++++++++++++++++++------------
fs/xfs/xfs_log_priv.h | 16 +++++--
2 files changed, 100 insertions(+), 32 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6298310..8365496 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -727,12 +727,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (tail_lsn != 1)
atomic64_set(&log->l_tail_lsn, tail_lsn);
- spin_lock(&log->l_grant_lock);
- if (!list_empty(&log->l_writeq)) {
+ if (!list_empty_careful(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
+ spin_lock(&log->l_grant_write_lock);
free_bytes = xlog_space_left(log->l_logsize,
atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_grant_write_head));
@@ -745,13 +745,15 @@ xfs_log_move_tail(xfs_mount_t *mp,
free_bytes -= tic->t_unit_res;
wake_up(&tic->t_wait);
}
+ spin_unlock(&log->l_grant_write_lock);
}
- if (!list_empty(&log->l_reserveq)) {
+ if (!list_empty_careful(&log->l_reserveq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
+ spin_lock(&log->l_grant_reserve_lock);
free_bytes = xlog_space_left(log->l_logsize,
atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_grant_reserve_head));
@@ -766,9 +768,9 @@ xfs_log_move_tail(xfs_mount_t *mp,
free_bytes -= need_bytes;
wake_up(&tic->t_wait);
}
+ spin_unlock(&log->l_grant_reserve_lock);
}
- spin_unlock(&log->l_grant_lock);
-} /* xfs_log_move_tail */
+}
/*
* Determine if we have a transaction that has gone to disk
@@ -1056,6 +1058,8 @@ xlog_alloc_log(xfs_mount_t *mp,
atomic64_set(&log->l_grant_write_head, xlog_assign_lsn(1, 0));
INIT_LIST_HEAD(&log->l_reserveq);
INIT_LIST_HEAD(&log->l_writeq);
+ spin_lock_init(&log->l_grant_reserve_lock);
+ spin_lock_init(&log->l_grant_write_lock);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -2525,6 +2529,18 @@ restart:
*
* Once a ticket gets put onto the reserveq, it will only return after
* the needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off the reserveq under the
+ * l_grant_reserve_lock, we only need to take that lock if we are going
+ * to add the ticket to the queue and sleep. We can avoid taking the lock if the
+ * ticket was never added to the reserveq because the t_queue list head will be
+ * empty and we hold the only reference to it so it can safely be checked
+ * unlocked.
*/
STATIC int
xlog_grant_log_space(
@@ -2540,8 +2556,6 @@ xlog_grant_log_space(
panic("grant Recovery problem");
#endif
- /* Is there space or do we need to sleep? */
- spin_lock(&log->l_grant_lock);
trace_xfs_log_grant_enter(log, tic);
ASSERT(list_empty(&tic->t_queue));
@@ -2561,9 +2575,19 @@ redo:
* are not already on the queue, we need to wait.
*/
if (free_bytes < need_bytes ||
- (!list_empty(&log->l_reserveq) && list_empty(&tic->t_queue))) {
- if (list_empty(&tic->t_queue))
+ (list_empty(&tic->t_queue) &&
+ !list_empty_careful(&log->l_reserveq))) {
+
+ spin_lock(&log->l_grant_reserve_lock);
+ if (list_empty(&tic->t_queue)) {
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_reserveq) &&
+ free_bytes >= need_bytes) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto redo;
+ }
list_add_tail(&tic->t_queue, &log->l_reserveq);
+ }
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
@@ -2572,20 +2596,29 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
+ /* co-ordinate with xfs_log_force_shutdown */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto error_return;
+ }
add_wait_queue_exclusive(&tic->t_wait, &wait);
__set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_reserve_lock);
schedule();
remove_wait_queue(&tic->t_wait, &wait);
- spin_lock(&log->l_grant_lock);
trace_xfs_log_grant_wake(log, tic);
goto redo;
}
- list_del_init(&tic->t_queue);
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
+ }
/* we've got enough space */
+ spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, need_bytes);
trace_xfs_log_grant_exit(log, tic);
@@ -2595,7 +2628,9 @@ redo:
return 0;
error_return:
+ spin_lock(&log->l_grant_reserve_lock);
list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
trace_xfs_log_grant_error(log, tic);
/*
@@ -2605,13 +2640,15 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
}
/*
* Replenish the byte reservation required by moving the grant write head.
+ *
+ * Similar to xlog_grant_log_space, the function is structured to have a lock
+ * free fast path.
*/
STATIC int
xlog_regrant_write_log_space(
@@ -2633,7 +2670,6 @@ xlog_regrant_write_log_space(
panic("regrant Recovery problem");
#endif
- spin_lock(&log->l_grant_lock);
trace_xfs_log_regrant_write_enter(log, tic);
ASSERT(list_empty(&tic->t_queue));
@@ -2650,15 +2686,26 @@ redo:
* are not already on the queue, we need to wait.
*/
if (free_bytes < need_bytes ||
- (!list_empty(&log->l_writeq) && list_empty(&tic->t_queue))) {
+ (list_empty(&tic->t_queue) &&
+ !list_empty_careful(&log->l_writeq))) {
+
+ spin_lock(&log->l_grant_write_lock);
if (list_empty(&tic->t_queue)) {
+ struct xlog_ticket *ntic;
+ int woke_all = 1;
+
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_writeq) &&
+ free_bytes >= need_bytes) {
+ spin_unlock(&log->l_grant_write_lock);
+ goto redo;
+ }
+
/*
* give existing waiters a chance at logspace before
* us. If we woke all the waiters, then immediately
* retry the space, otherwise sleep first.
*/
- struct xlog_ticket *ntic;
- int woke_all = 1;
list_for_each_entry(ntic, &log->l_writeq, t_queue) {
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -2670,8 +2717,10 @@ redo:
wake_up(&ntic->t_wait);
}
list_add_tail(&tic->t_queue, &log->l_writeq);
- if (woke_all)
+ if (woke_all) {
+ spin_unlock(&log->l_grant_write_lock);
goto redo;
+ }
}
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
@@ -2681,20 +2730,29 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_regrant_write_sleep(log, tic);
+ /* co-ordinate with xfs_log_force_shutdown */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&log->l_grant_write_lock);
+ goto error_return;
+ }
add_wait_queue_exclusive(&tic->t_wait, &wait);
__set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_write_lock);
schedule();
remove_wait_queue(&tic->t_wait, &wait);
- spin_lock(&log->l_grant_lock);
trace_xfs_log_regrant_write_wake(log, tic);
goto redo;
}
- list_del_init(&tic->t_queue);
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_write_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
+ }
/* we've got enough space */
+ spin_lock(&log->l_grant_lock);
xlog_grant_add_space_write(log, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
@@ -2705,7 +2763,9 @@ redo:
error_return:
+ spin_lock(&log->l_grant_write_lock);
list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
trace_xfs_log_regrant_write_error(log, tic);
/*
@@ -2715,7 +2775,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
}
@@ -3676,12 +3735,10 @@ xfs_log_force_umount(
xlog_cil_force(log);
/*
- * We must hold both the GRANT lock and the LOG lock,
- * before we mark the filesystem SHUTDOWN and wake
- * everybody up to tell the bad news.
+ * mark the filesystem and the as in a shutdown state and wake
+ * everybody up to tell them the bad news.
*/
spin_lock(&log->l_icloglock);
- spin_lock(&log->l_grant_lock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
XFS_BUF_DONE(mp->m_sb_bp);
@@ -3706,14 +3763,17 @@ xfs_log_force_umount(
* means we have to wake up everybody queued up on reserveq as well as
* writeq. In addition, we make sure in xlog_{re}grant_log_space that
* we don't enqueue anything once the SHUTDOWN flag is set, and this
- * action is protected by the GRANTLOCK.
+ * action is protected by the grant locks.
*/
+ spin_lock(&log->l_grant_reserve_lock);
list_for_each_entry(tic, &log->l_reserveq, t_queue)
wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_reserve_lock);
+ spin_lock(&log->l_grant_write_lock);
list_for_each_entry(tic, &log->l_writeq, t_queue)
wake_up(&tic->t_wait);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_write_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 971dc8a..621002c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -514,10 +514,6 @@ typedef struct log {
/* The following block of fields are changed while holding grant_lock */
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
- struct list_head l_reserveq;
- struct list_head l_writeq;
- atomic64_t l_grant_reserve_head;
- atomic64_t l_grant_write_head;
/*
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
@@ -530,6 +526,18 @@ typedef struct log {
/* lsn of 1st LR with unflushed * buffers */
atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+ spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
+ struct list_head l_reserveq;
+ atomic64_t l_grant_reserve_head;
+
+ spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
+ struct list_head l_writeq;
+ atomic64_t l_grant_write_head;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-11-29 1:37 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
2010-11-29 1:38 ` [PATCH 01/14] xfs: convert log grant ticket queues to list heads Dave Chinner
2010-11-30 22:59 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 02/14] xfs: clean up log space grant functions Dave Chinner
2010-12-01 12:30 ` Christoph Hellwig
2010-12-02 1:48 ` Dave Chinner
2010-12-02 11:40 ` Christoph Hellwig
2010-12-03 6:45 ` Dave Chinner
2010-11-29 1:38 ` [PATCH 03/14] xfs: convert log grant heads to LSN notation Dave Chinner
2010-12-01 12:42 ` Christoph Hellwig
2010-12-02 1:49 ` Dave Chinner
2010-12-01 13:05 ` Christoph Hellwig
2010-12-02 2:01 ` Dave Chinner
2010-12-02 11:47 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 04/14] xfs: use wait queues directly for log grant queues Dave Chinner
2010-12-01 12:34 ` Christoph Hellwig
2010-12-02 2:02 ` Dave Chinner
2010-11-29 1:38 ` [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock Dave Chinner
2010-12-01 12:45 ` Christoph Hellwig
2010-12-02 2:04 ` Dave Chinner
2010-11-29 1:38 ` [PATCH 06/14] xfs: convert l_last_sync_lsn to an atomic variable Dave Chinner
2010-12-01 12:54 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 07/14] xfs: convert l_tail_lsn " Dave Chinner
2010-12-01 12:56 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 08/14] xfs: convert log grant heads to atomic variables Dave Chinner
2010-12-01 12:59 ` Christoph Hellwig
2010-12-02 2:04 ` Dave Chinner
2010-11-29 1:38 ` Dave Chinner [this message]
2010-12-01 13:12 ` [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues Christoph Hellwig
2010-12-02 2:10 ` Dave Chinner
2010-12-02 11:48 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm Dave Chinner
2010-12-01 13:15 ` Christoph Hellwig
2010-12-02 2:11 ` Dave Chinner
2010-11-29 1:38 ` [PATCH 11/14] xfs: remove log grant lock Dave Chinner
2010-12-01 13:15 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 12/14] xfs: kill useless spinlock_destroy macro Dave Chinner
2010-12-01 13:19 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 13/14] xfs: replace use of sv_t with waitqueues in the log Dave Chinner
2010-12-01 13:20 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 14/14] xfs: remove sv wrappers Dave Chinner
2010-12-01 13:20 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1290994712-21376-10-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox