* [PATCH 01/14] xfs: convert log grant ticket queues to list heads
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-11-30 22:59 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 02/14] xfs: clean up log space grant functions Dave Chinner
` (12 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The grant write and reserve queues use a roll-your-own double linked
list, so convert it to a standard list_head structure and convert
all the list traversals to use list_for_each_entry(). We can also
get rid of the XLOG_TIC_IN_Q flag as we can use the list_empty()
check to tell if the ticket is in a list or not.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 12 ++--
fs/xfs/xfs_log.c | 123 ++++++++++++++----------------------------
fs/xfs/xfs_log_priv.h | 11 ++---
3 files changed, 51 insertions(+), 95 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index acef2e9..f400668 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -766,8 +766,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(int, curr_res)
__field(int, unit_res)
__field(unsigned int, flags)
- __field(void *, reserve_headq)
- __field(void *, write_headq)
+ __field(void *, reserveq)
+ __field(void *, writeq)
__field(int, grant_reserve_cycle)
__field(int, grant_reserve_bytes)
__field(int, grant_write_cycle)
@@ -784,8 +784,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res = tic->t_curr_res;
__entry->unit_res = tic->t_unit_res;
__entry->flags = tic->t_flags;
- __entry->reserve_headq = log->l_reserve_headq;
- __entry->write_headq = log->l_write_headq;
+ __entry->reserveq = log->l_reserveq.next;
+ __entry->writeq = log->l_writeq.next;
__entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
__entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
__entry->grant_write_cycle = log->l_grant_write_cycle;
@@ -807,8 +807,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->curr_res,
__entry->unit_res,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
- __entry->reserve_headq,
- __entry->write_headq,
+ __entry->reserveq,
+ __entry->writeq,
__entry->grant_reserve_cycle,
__entry->grant_reserve_bytes,
__entry->grant_write_cycle,
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index cee4ab9..1b82735 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -95,38 +95,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
STATIC int xlog_iclogs_empty(xlog_t *log);
-
-static void
-xlog_ins_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
-{
- if (*qp) {
- tic->t_next = (*qp);
- tic->t_prev = (*qp)->t_prev;
- (*qp)->t_prev->t_next = tic;
- (*qp)->t_prev = tic;
- } else {
- tic->t_prev = tic->t_next = tic;
- *qp = tic;
- }
-
- tic->t_flags |= XLOG_TIC_IN_Q;
-}
-
-static void
-xlog_del_ticketq(struct xlog_ticket **qp, struct xlog_ticket *tic)
-{
- if (tic == tic->t_next) {
- *qp = NULL;
- } else {
- *qp = tic->t_next;
- tic->t_next->t_prev = tic->t_prev;
- tic->t_prev->t_next = tic->t_next;
- }
-
- tic->t_next = tic->t_prev = NULL;
- tic->t_flags &= ~XLOG_TIC_IN_Q;
-}
-
static void
xlog_grant_sub_space(struct log *log, int bytes)
{
@@ -724,7 +692,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
log->l_tail_lsn = tail_lsn;
}
- if ((tic = log->l_write_headq)) {
+ if (!list_empty(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
@@ -732,7 +700,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
cycle = log->l_grant_write_cycle;
bytes = log->l_grant_write_bytes;
free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < tic->t_unit_res && tail_lsn != 1)
@@ -740,10 +708,10 @@ xfs_log_move_tail(xfs_mount_t *mp,
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
+ }
}
- if ((tic = log->l_reserve_headq)) {
+
+ if (!list_empty(&log->l_reserveq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
@@ -751,7 +719,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
cycle = log->l_grant_reserve_cycle;
bytes = log->l_grant_reserve_bytes;
free_bytes = xlog_space_left(log, cycle, bytes);
- do {
+ list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
else
@@ -761,8 +729,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
tail_lsn = 0;
free_bytes -= need_bytes;
sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
+ }
}
spin_unlock(&log->l_grant_lock);
} /* xfs_log_move_tail */
@@ -1053,6 +1020,8 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
log->l_grant_reserve_cycle = 1;
log->l_grant_write_cycle = 1;
+ INIT_LIST_HEAD(&log->l_reserveq);
+ INIT_LIST_HEAD(&log->l_writeq);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -2550,8 +2519,8 @@ xlog_grant_log_space(xlog_t *log,
trace_xfs_log_grant_enter(log, tic);
/* something is already sleeping; insert new transaction at end */
- if (log->l_reserve_headq) {
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ if (!list_empty(&log->l_reserveq)) {
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep1(log, tic);
@@ -2583,8 +2552,8 @@ redo:
free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
log->l_grant_reserve_bytes);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_reserve_headq, tic);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
trace_xfs_log_grant_sleep2(log, tic);
@@ -2602,8 +2571,9 @@ redo:
trace_xfs_log_grant_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
+ }
+
+ list_del_init(&tic->t_queue);
/* we've got enough space */
xlog_grant_add_space(log, need_bytes);
@@ -2626,9 +2596,7 @@ redo:
return 0;
error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+ list_del_init(&tic->t_queue);
trace_xfs_log_grant_error(log, tic);
/*
@@ -2653,7 +2621,6 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_ticket_t *tic)
{
int free_bytes, need_bytes;
- xlog_ticket_t *ntic;
#ifdef DEBUG
xfs_lsn_t tail_lsn;
#endif
@@ -2683,22 +2650,23 @@ xlog_regrant_write_log_space(xlog_t *log,
* this transaction.
*/
need_bytes = tic->t_unit_res;
- if ((ntic = log->l_write_headq)) {
+ if (!list_empty(&log->l_writeq)) {
+ struct xlog_ticket *ntic;
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
log->l_grant_write_bytes);
- do {
+ list_for_each_entry(ntic, &log->l_writeq, t_queue) {
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
sv_signal(&ntic->t_wait);
- ntic = ntic->t_next;
- } while (ntic != log->l_write_headq);
+ }
- if (ntic != log->l_write_headq) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
+ if (ntic != list_first_entry(&log->l_writeq,
+ struct xlog_ticket, t_queue)) {
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
trace_xfs_log_regrant_write_sleep1(log, tic);
@@ -2727,8 +2695,8 @@ redo:
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
log->l_grant_write_bytes);
if (free_bytes < need_bytes) {
- if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
- xlog_ins_ticketq(&log->l_write_headq, tic);
+ if (list_empty(&tic->t_queue))
+ list_add_tail(&tic->t_queue, &log->l_writeq);
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
@@ -2745,8 +2713,9 @@ redo:
trace_xfs_log_regrant_write_wake2(log, tic);
goto redo;
- } else if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_write_headq, tic);
+ }
+
+ list_del_init(&tic->t_queue);
/* we've got enough space */
xlog_grant_add_space_write(log, need_bytes);
@@ -2766,9 +2735,7 @@ redo:
error_return:
- if (tic->t_flags & XLOG_TIC_IN_Q)
- xlog_del_ticketq(&log->l_reserve_headq, tic);
-
+ list_del_init(&tic->t_queue);
trace_xfs_log_regrant_write_error(log, tic);
/*
@@ -3435,6 +3402,7 @@ xlog_ticket_alloc(
}
atomic_set(&tic->t_ref, 1);
+ INIT_LIST_HEAD(&tic->t_queue);
tic->t_unit_res = unit_bytes;
tic->t_curr_res = unit_bytes;
tic->t_cnt = cnt;
@@ -3742,26 +3710,17 @@ xfs_log_force_umount(
spin_unlock(&log->l_icloglock);
/*
- * We don't want anybody waiting for log reservations
- * after this. That means we have to wake up everybody
- * queued up on reserve_headq as well as write_headq.
- * In addition, we make sure in xlog_{re}grant_log_space
- * that we don't enqueue anything once the SHUTDOWN flag
- * is set, and this action is protected by the GRANTLOCK.
+ * We don't want anybody waiting for log reservations after this. That
+ * means we have to wake up everybody queued up on reserveq as well as
+ * writeq. In addition, we make sure in xlog_{re}grant_log_space that
+ * we don't enqueue anything once the SHUTDOWN flag is set, and this
+ * action is protected by the GRANTLOCK.
*/
- if ((tic = log->l_reserve_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_reserve_headq);
- }
+ list_for_each_entry(tic, &log->l_reserveq, t_queue)
+ sv_signal(&tic->t_wait);
- if ((tic = log->l_write_headq)) {
- do {
- sv_signal(&tic->t_wait);
- tic = tic->t_next;
- } while (tic != log->l_write_headq);
- }
+ list_for_each_entry(tic, &log->l_writeq, t_queue)
+ sv_signal(&tic->t_wait);
spin_unlock(&log->l_grant_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index edcdfe0..d45fe2d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -133,12 +133,10 @@ static inline uint xlog_get_client_id(__be32 i)
*/
#define XLOG_TIC_INITED 0x1 /* has been initialized */
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
-#define XLOG_TIC_IN_Q 0x4
#define XLOG_TIC_FLAGS \
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
- { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }, \
- { XLOG_TIC_IN_Q, "XLOG_TIC_IN_Q" }
+ { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
#endif /* __KERNEL__ */
@@ -245,8 +243,7 @@ typedef struct xlog_res {
typedef struct xlog_ticket {
sv_t t_wait; /* ticket wait queue : 20 */
- struct xlog_ticket *t_next; /* :4|8 */
- struct xlog_ticket *t_prev; /* :4|8 */
+ struct list_head t_queue; /* reserve/write queue */
xlog_tid_t t_tid; /* transaction identifier : 4 */
atomic_t t_ref; /* ticket reference count : 4 */
int t_curr_res; /* current reservation in bytes : 4 */
@@ -520,8 +517,8 @@ typedef struct log {
/* The following block of fields are changed while holding grant_lock */
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
- xlog_ticket_t *l_reserve_headq;
- xlog_ticket_t *l_write_headq;
+ struct list_head l_reserveq;
+ struct list_head l_writeq;
int l_grant_reserve_cycle;
int l_grant_reserve_bytes;
int l_grant_write_cycle;
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 01/14] xfs: convert log grant ticket queues to list heads
2010-11-29 1:38 ` [PATCH 01/14] xfs: convert log grant ticket queues to list heads Dave Chinner
@ 2010-11-30 22:59 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2010-11-30 22:59 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Mon, Nov 29, 2010 at 12:38:19PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The grant write and reserve queues use a roll-your-own double linked
> list, so convert it to a standard list_head structure and convert
> all the list traversals to use list_for_each_entry(). We can also
> get rid of the XLOG_TIC_IN_Q flag as we can use the list_empty()
> check to tell if the ticket is in a list or not.
Looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
A minor comment below:
> - __field(void *, reserve_headq)
> - __field(void *, write_headq)
> + __field(void *, reserveq)
> + __field(void *, writeq)
Not sure why added these to the traces originally, but imho
it's pretty pointless. If we care at all we could log a boolean
flag if the queues are empty or not.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 02/14] xfs: clean up log space grant functions
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
2010-11-29 1:38 ` [PATCH 01/14] xfs: convert log grant ticket queues to list heads Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:30 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 03/14] xfs: convert log grant heads to LSN notation Dave Chinner
` (11 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
xlog_grant_log_space and xlog_regrant_log_write_space both have very
similar structure. Both have a "wait on non-empty queue" section at
the start, followed by a "wait for space" loop of which the contents
are almost identical to the initial non-empty queue section.
In both cases, the non-empty queue wait can be folded into the wait
for space loop, simply by entering the loop if the queue is not
empty and the current ticket is not on the queue. If we trigger the
non-empty queue case, we always add ourselves to the queue, and
hence the second and subsequent loops are always driven by the "wait
for space" test.
IOWs, both wait conditions can be folded into the one loop, removing
a bunch of duplicated code and making it simpler to modify in
future patches.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 12 +--
fs/xfs/xfs_log.c | 171 +++++++++++++++---------------------------
2 files changed, 64 insertions(+), 119 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index f400668..0884f93 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -831,17 +831,13 @@ DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
+DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep);
+DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1b82735..399e559 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -68,14 +68,14 @@ STATIC void xlog_state_switch_iclogs(xlog_t *log,
STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
/* local functions to manipulate grant head */
-STATIC int xlog_grant_log_space(xlog_t *log,
- xlog_ticket_t *xtic);
+STATIC int xlog_grant_log_space(struct log *log,
+ struct xlog_ticket *xtic);
STATIC void xlog_grant_push_ail(xfs_mount_t *mp,
int need_bytes);
STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
xlog_ticket_t *ticket);
-STATIC int xlog_regrant_write_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
+STATIC int xlog_regrant_write_log_space(struct log *log,
+ struct xlog_ticket *xtic);
STATIC void xlog_ungrant_log_space(xlog_t *log,
xlog_ticket_t *ticket);
@@ -2498,52 +2498,27 @@ restart:
* the needed reservation is satisfied.
*/
STATIC int
-xlog_grant_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_grant_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes;
- int need_bytes;
+ int free_bytes;
+ int need_bytes;
#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
+ xfs_lsn_t tail_lsn;
-
-#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("grant Recovery problem");
#endif
/* Is there space or do we need to sleep? */
spin_lock(&log->l_grant_lock);
-
trace_xfs_log_grant_enter(log, tic);
+ ASSERT(list_empty(&tic->t_queue));
- /* something is already sleeping; insert new transaction at end */
- if (!list_empty(&log->l_reserveq)) {
- list_add_tail(&tic->t_queue, &log->l_reserveq);
-
- trace_xfs_log_grant_sleep1(log, tic);
-
- /*
- * Gotta check this before going to sleep, while we're
- * holding the grant lock.
- */
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
- /*
- * If we got an error, and the filesystem is shutting down,
- * we'll catch it down below. So just continue...
- */
- trace_xfs_log_grant_wake1(log, tic);
- spin_lock(&log->l_grant_lock);
- }
+ need_bytes = tic->t_unit_res;
if (tic->t_flags & XFS_LOG_PERM_RESERV)
- need_bytes = tic->t_unit_res*tic->t_ocnt;
- else
- need_bytes = tic->t_unit_res;
+ need_bytes *= tic->t_ocnt;
redo:
if (XLOG_FORCED_SHUTDOWN(log))
@@ -2551,25 +2526,25 @@ redo:
free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
log->l_grant_reserve_bytes);
- if (free_bytes < need_bytes) {
+ /*
+ * If there is not enough space or there is queued waiter and we
+ * are not already on the queue, we need to wait.
+ */
+ if (free_bytes < need_bytes ||
+ (!list_empty(&log->l_reserveq) && list_empty(&tic->t_queue))) {
if (list_empty(&tic->t_queue))
list_add_tail(&tic->t_queue, &log->l_reserveq);
- trace_xfs_log_grant_sleep2(log, tic);
-
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_grant_sleep(log, tic);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
spin_lock(&log->l_grant_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- trace_xfs_log_grant_wake2(log, tic);
-
+ trace_xfs_log_grant_wake(log, tic);
goto redo;
}
@@ -2608,21 +2583,21 @@ redo:
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
-} /* xlog_grant_log_space */
+}
/*
* Replenish the byte reservation required by moving the grant write head.
- *
- *
*/
STATIC int
-xlog_regrant_write_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_regrant_write_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes, need_bytes;
+ int free_bytes;
+ int need_bytes;
#ifdef DEBUG
- xfs_lsn_t tail_lsn;
+ xfs_lsn_t tail_lsn;
#endif
tic->t_curr_res = tic->t_unit_res;
@@ -2637,81 +2612,55 @@ xlog_regrant_write_log_space(xlog_t *log,
#endif
spin_lock(&log->l_grant_lock);
-
trace_xfs_log_regrant_write_enter(log, tic);
+ ASSERT(list_empty(&tic->t_queue));
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- /* If there are other waiters on the queue then give them a
- * chance at logspace before us. Wake up the first waiters,
- * if we do not wake up all the waiters then go to sleep waiting
- * for more free space, otherwise try to get some space for
- * this transaction.
- */
need_bytes = tic->t_unit_res;
- if (!list_empty(&log->l_writeq)) {
- struct xlog_ticket *ntic;
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
- list_for_each_entry(ntic, &log->l_writeq, t_queue) {
- ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
-
- if (free_bytes < ntic->t_unit_res)
- break;
- free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_wait);
- }
-
- if (ntic != list_first_entry(&log->l_writeq,
- struct xlog_ticket, t_queue)) {
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_writeq);
-
- trace_xfs_log_regrant_write_sleep1(log, tic);
-
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
-
- XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT,
- &log->l_grant_lock, s);
-
- /* If we're shutting down, this tic is already
- * off the queue */
- spin_lock(&log->l_grant_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- trace_xfs_log_regrant_write_wake1(log, tic);
- }
- }
-
redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
log->l_grant_write_bytes);
- if (free_bytes < need_bytes) {
- if (list_empty(&tic->t_queue))
+ /*
+ * If there is not enough space or there is queued waiter and we
+ * are not already on the queue, we need to wait.
+ */
+ if (free_bytes < need_bytes ||
+ (!list_empty(&log->l_writeq) && list_empty(&tic->t_queue))) {
+ if (list_empty(&tic->t_queue)) {
+ /*
+ * give existing waiters a chance at logspace before
+ * us. If we woke all the waiters, then immediately
+ * retry the space, otherwise sleep first.
+ */
+ struct xlog_ticket *ntic;
+ int woke_all = 1;
+ list_for_each_entry(ntic, &log->l_writeq, t_queue) {
+ ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
+
+ if (free_bytes < ntic->t_unit_res) {
+ woke_all = 0;
+ break;
+ }
+ free_bytes -= ntic->t_unit_res;
+ sv_signal(&ntic->t_wait);
+ }
list_add_tail(&tic->t_queue, &log->l_writeq);
+ if (woke_all)
+ goto redo;
+ }
+
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep2(log, tic);
-
+ trace_xfs_log_regrant_write_sleep(log, tic);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
- /* If we're shutting down, this tic is already off the queue */
spin_lock(&log->l_grant_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- trace_xfs_log_regrant_write_wake2(log, tic);
+ trace_xfs_log_regrant_write_wake(log, tic);
goto redo;
}
@@ -2747,7 +2696,7 @@ redo:
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
-} /* xlog_regrant_write_log_space */
+}
/* The first cnt-1 times through here we don't need to
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 02/14] xfs: clean up log space grant functions
2010-11-29 1:38 ` [PATCH 02/14] xfs: clean up log space grant functions Dave Chinner
@ 2010-12-01 12:30 ` Christoph Hellwig
2010-12-02 1:48 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 12:30 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Mon, Nov 29, 2010 at 12:38:20PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> xlog_grant_log_space and xlog_regrant_log_write_space both have very
> similar structure. Both have a "wait on non-empty queue" section at
> the start, followed by a "wait for space" loop of which the contents
> are almost identical to the initial non-empty queue section.
>
> In both cases, the non-empty queue wait can be folded into the wait
> for space loop, simply by entering the loop if the queue is not
> empty and the current ticket is not on the queue. If we trigger the
> non-empty queue case, we always add ourselves to the queue, and
> hence the second and subsequent loops are always driven by the "wait
> for space" test.
>
> IOWs, both wait conditions can be folded into the one loop, removing
> a bunch of duplicated code and making it simpler to modify in
> future patches.
I don't really like this patch. The new conditions are overly
complicated because of the desire to only go through the loop once
for the queue not empty case. In addition there's some behaviour
changes:
- in xlog_grant_log_space we previously didn't call xlog_grant_push_ail
for the queue not empty case, and now we do.
- in xlog_regrant_write_log_space the old version of the queue not
empty case would loop over all waiting tickets, and if we could
wake up all of them we'll skip the first wait, and given enough
free space also the second wait, while the new code always adds it
to the writeq, although it will still skip the actualy wait later.
My recommendation would be to skip this patch for now and revisit the
area later. For example the superflous xlog_grant_push_ail actually
is rather harmless these days with the xfsaild threshold, so not
skipping it for the first case probably is fine in the end. Then again
the whole add to the queue just in case if it's non-empty doesn't make
much sense to me to start with. As soon as xfs_log_move_tail makes
space in the log it wakes up all tickets waiting for it anyway, so
adding us to the queue just in case seems rather inefficient, and not
overl helpful.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 02/14] xfs: clean up log space grant functions
2010-12-01 12:30 ` Christoph Hellwig
@ 2010-12-02 1:48 ` Dave Chinner
2010-12-02 11:40 ` Christoph Hellwig
0 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 1:48 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 07:30:32AM -0500, Christoph Hellwig wrote:
> On Mon, Nov 29, 2010 at 12:38:20PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > xlog_grant_log_space and xlog_regrant_log_write_space both have very
> > similar structure. Both have a "wait on non-empty queue" section at
> > the start, followed by a "wait for space" loop of which the contents
> > are almost identical to the initial non-empty queue section.
> >
> > In both cases, the non-empty queue wait can be folded into the wait
> > for space loop, simply by entering the loop if the queue is not
> > empty and the current ticket is not on the queue. If we trigger the
> > non-empty queue case, we always add ourselves to the queue, and
> > hence the second and subsequent loops are always driven by the "wait
> > for space" test.
> >
> > IOWs, both wait conditions can be folded into the one loop, removing
> > a bunch of duplicated code and making it simpler to modify in
> > future patches.
>
> I don't really like this patch. The new conditions are overly
> complicated because of the desire to only go through the loop once
> for the queue not empty case. In addition there's some behaviour
> changes:
>
> - in xlog_grant_log_space we previously didn't call xlog_grant_push_ail
> for the queue not empty case, and now we do.
As you point out, there's no actual harm in doing that.
> - in xlog_regrant_write_log_space the old version of the queue not
> empty case would loop over all waiting tickets, and if we could
> wake up all of them we'll skip the first wait, and given enough
> free space also the second wait, while the new code always adds it
> to the writeq, although it will still skip the actualy wait later.
I didn't think there's any harm in that, either, because we're
walking the entire queue anyway and it does not dirty any global
shared cachelines we aren't already dirtying by taking the queue
lock.
> My recommendation would be to skip this patch for now and revisit the
> area later.
Ugh. That means the following 10 patches need to be reworked.
> For example the superflous xlog_grant_push_ail actually
> is rather harmless these days with the xfsaild threshold, so not
> skipping it for the first case probably is fine in the end. Then again
> the whole add to the queue just in case if it's non-empty doesn't make
> much sense to me to start with. As soon as xfs_log_move_tail makes
> space in the log it wakes up all tickets waiting for it anyway, so
> adding us to the queue just in case seems rather inefficient, and not
> overl helpful.
Ok, so your main objection is the needless addition to the queue?
That can be avoided easily enough via a local variable. Would that
be sufficient to alleviate your concerns?
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 02/14] xfs: clean up log space grant functions
2010-12-02 1:48 ` Dave Chinner
@ 2010-12-02 11:40 ` Christoph Hellwig
2010-12-03 6:45 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-02 11:40 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, xfs
On Thu, Dec 02, 2010 at 12:48:33PM +1100, Dave Chinner wrote:
> Ok, so your main objection is the needless addition to the queue?
> That can be avoided easily enough via a local variable. Would that
> be sufficient to alleviate your concerns?
The main objection is that the new code might be a bit shorted,
but is a lot less readable. If you really want it go with it and
an updated changelog mentioning the behaviour change, but I don't
really feel too good about it.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 02/14] xfs: clean up log space grant functions
2010-12-02 11:40 ` Christoph Hellwig
@ 2010-12-03 6:45 ` Dave Chinner
0 siblings, 0 replies; 42+ messages in thread
From: Dave Chinner @ 2010-12-03 6:45 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Thu, Dec 02, 2010 at 06:40:50AM -0500, Christoph Hellwig wrote:
> On Thu, Dec 02, 2010 at 12:48:33PM +1100, Dave Chinner wrote:
> > Ok, so your main objection is the needless addition to the queue?
> > That can be avoided easily enough via a local variable. Would that
> > be sufficient to alleviate your concerns?
>
> The main objection is that the new code might be a bit shorted,
> but is a lot less readable. If you really want it go with it and
> an updated changelog mentioning the behaviour change, but I don't
> really feel too good about it.
OK, I'll drop it.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 03/14] xfs: convert log grant heads to LSN notation
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
2010-11-29 1:38 ` [PATCH 01/14] xfs: convert log grant ticket queues to list heads Dave Chinner
2010-11-29 1:38 ` [PATCH 02/14] xfs: clean up log space grant functions Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:42 ` Christoph Hellwig
2010-12-01 13:05 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 04/14] xfs: use wait queues directly for log grant queues Dave Chinner
` (10 subsequent siblings)
13 siblings, 2 replies; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Before we can start to remove the grant lock from grant head
accounting operations, we need to be able to do atomic operations on
the grant heads themselves. Currently they are made up of two 32 bit
integers - the cycle number and the byte offset into the cycle - and
as such cannot be updated atomically.
To solve this problem, encode the grant heads into a single 64 bit
variable as though they were an LSN, where the cycle number is held
in the upper 32 bits and the byte offset in the lower 32 bits. By
using this encoding, we can use the existing lsn manipulation macros
to encode and extract the relevant details for calculations. Once
we have the the grant heads encoded into a 64 bit variable it will
be trivial to convert them to atomic variables for lockless
manipulation later on.
Further, rework xlog_space_left() so that it is passed the new
combined value into xlog_space_left rather than a split cycle/bytes
pair to simplify the caller code by hiding the encoding inside the
xlog_space_left(). Also just pass the tail lsn in prepartion for
atomic variale conversion.
Also, factor out the common write head vs tail lsn debug code into
it's own function to hide the encoding and remove some ifdef DEBUG
mess.
Finally, redo the space calculations to work on a pointer to a grant
head so that the same calculation code is not duplicated for each
grant head. This will further simplify the conversion to atomic
grant head manipulation.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 20 ++---
fs/xfs/xfs_log.c | 225 ++++++++++++++++++++++++------------------
fs/xfs/xfs_log_priv.h | 6 +-
fs/xfs/xfs_log_recover.c | 8 +-
4 files changed, 144 insertions(+), 115 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 0884f93..05a4bb9 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -768,10 +768,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(unsigned int, flags)
__field(void *, reserveq)
__field(void *, writeq)
- __field(int, grant_reserve_cycle)
- __field(int, grant_reserve_bytes)
- __field(int, grant_write_cycle)
- __field(int, grant_write_bytes)
+ __field(xfs_lsn_t, grant_reserve_lsn)
+ __field(xfs_lsn_t, grant_write_lsn)
__field(int, curr_cycle)
__field(int, curr_block)
__field(xfs_lsn_t, tail_lsn)
@@ -786,10 +784,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->flags = tic->t_flags;
__entry->reserveq = log->l_reserveq.next;
__entry->writeq = log->l_writeq.next;
- __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
- __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
- __entry->grant_write_cycle = log->l_grant_write_cycle;
- __entry->grant_write_bytes = log->l_grant_write_bytes;
+ __entry->grant_reserve_lsn = log->l_grant_reserve_lsn;
+ __entry->grant_write_lsn = log->l_grant_write_lsn;
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
__entry->tail_lsn = log->l_tail_lsn;
@@ -809,10 +805,10 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
__entry->reserveq,
__entry->writeq,
- __entry->grant_reserve_cycle,
- __entry->grant_reserve_bytes,
- __entry->grant_write_cycle,
- __entry->grant_write_bytes,
+ CYCLE_LSN(__entry->grant_reserve_lsn),
+ BLOCK_LSN(__entry->grant_reserve_lsn),
+ CYCLE_LSN(__entry->grant_write_lsn),
+ BLOCK_LSN(__entry->grant_write_lsn),
__entry->curr_cycle,
__entry->curr_block,
CYCLE_LSN(__entry->tail_lsn),
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 399e559..69a9563 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -47,7 +47,8 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
xfs_buftarg_t *log_target,
xfs_daddr_t blk_offset,
int num_bblks);
-STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
+STATIC int xlog_space_left(int logsize, xfs_lsn_t tail_lsn,
+ xfs_lsn_t head);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
@@ -81,7 +82,8 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
+STATIC void xlog_verify_grant_head(struct log *log, int equals);
+STATIC void xlog_verify_grant_tail(struct log *log);
STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
int count, boolean_t syncing);
STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
@@ -89,51 +91,82 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_head(a,b)
+#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c,d)
#define xlog_verify_tail_lsn(a,b,c)
#endif
STATIC int xlog_iclogs_empty(xlog_t *log);
+/*
+ * Grant space calculations use 64 bit atomic variables to store the current reserve
+ * and write grant markers. However, these are really two 32 bit numbers which
+ * need to be cracked out of the 64 bit variable, modified, recombined and then
+ * written back into the 64 bit atomic variable. And it has to be done
+ * atomically (i.e. without locks).
+ *
+ * The upper 32 bits is the log cycle, just like a xfs_lsn_t. The lower 32 bits
+ * is the byte offset into the log for the marker. Unlike the xfs_lsn_t, this
+ * is held in bytes rather than basic blocks, even though it uses the
+ * BLOCK_LSN() macro to extract it.
+ */
static void
-xlog_grant_sub_space(struct log *log, int bytes)
+__xlog_grant_sub_space(
+ xfs_lsn_t *head,
+ int bytes,
+ int logsize)
{
- log->l_grant_write_bytes -= bytes;
- if (log->l_grant_write_bytes < 0) {
- log->l_grant_write_bytes += log->l_logsize;
- log->l_grant_write_cycle--;
- }
+ int cycle, space;
- log->l_grant_reserve_bytes -= bytes;
- if ((log)->l_grant_reserve_bytes < 0) {
- log->l_grant_reserve_bytes += log->l_logsize;
- log->l_grant_reserve_cycle--;
+ cycle = CYCLE_LSN(*head);
+ space = BLOCK_LSN(*head);
+ space -= bytes;
+ if (space < 0) {
+ cycle--;
+ space += logsize;
}
-
+ *head = xlog_assign_lsn(cycle, space);
}
static void
-xlog_grant_add_space_write(struct log *log, int bytes)
+__xlog_grant_add_space(
+ xfs_lsn_t *head,
+ int bytes,
+ int logsize)
{
- int tmp = log->l_logsize - log->l_grant_write_bytes;
+ int cycle, space, tmp;
+
+ cycle = CYCLE_LSN(*head);
+ space = BLOCK_LSN(*head);
+ tmp = logsize - space;
if (tmp > bytes)
- log->l_grant_write_bytes += bytes;
+ space += bytes;
else {
- log->l_grant_write_cycle++;
- log->l_grant_write_bytes = bytes - tmp;
+ cycle++;
+ space = bytes - tmp;
}
+ *head = xlog_assign_lsn(cycle, space);
+}
+
+static inline void
+xlog_grant_sub_space(struct log *log, int bytes)
+{
+ __xlog_grant_sub_space(&log->l_grant_write_lsn, bytes, log->l_logsize);
+ __xlog_grant_sub_space(&log->l_grant_reserve_lsn, bytes,
+ log->l_logsize);
+}
+
+static inline void
+xlog_grant_add_space_write(struct log *log, int bytes)
+{
+ __xlog_grant_add_space(&log->l_grant_write_lsn, bytes, log->l_logsize);
}
static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
- int tmp = log->l_logsize - log->l_grant_reserve_bytes;
- if (tmp > bytes)
- log->l_grant_reserve_bytes += bytes;
- else {
- log->l_grant_reserve_cycle++;
- log->l_grant_reserve_bytes = bytes - tmp;
- }
+ __xlog_grant_add_space(&log->l_grant_reserve_lsn, bytes,
+ log->l_logsize);
}
static inline void
@@ -671,7 +704,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
{
xlog_ticket_t *tic;
xlog_t *log = mp->m_log;
- int need_bytes, free_bytes, cycle, bytes;
+ int need_bytes, free_bytes;
if (XLOG_FORCED_SHUTDOWN(log))
return;
@@ -697,9 +730,8 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_write_cycle;
- bytes = log->l_grant_write_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
+ free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ log->l_grant_write_lsn);
list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -716,9 +748,8 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- cycle = log->l_grant_reserve_cycle;
- bytes = log->l_grant_reserve_bytes;
- free_bytes = xlog_space_left(log, cycle, bytes);
+ free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ log->l_grant_reserve_lsn);
list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
@@ -831,37 +862,40 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
* result is that we return the size of the log as the amount of space left.
*/
STATIC int
-xlog_space_left(xlog_t *log, int cycle, int bytes)
+xlog_space_left(
+ int logsize,
+ xfs_lsn_t tail_lsn,
+ xfs_lsn_t head)
{
- int free_bytes;
- int tail_bytes;
- int tail_cycle;
-
- tail_bytes = BBTOB(BLOCK_LSN(log->l_tail_lsn));
- tail_cycle = CYCLE_LSN(log->l_tail_lsn);
- if ((tail_cycle == cycle) && (bytes >= tail_bytes)) {
- free_bytes = log->l_logsize - (bytes - tail_bytes);
- } else if ((tail_cycle + 1) < cycle) {
+ int free_bytes;
+ int tail_bytes = BBTOB(BLOCK_LSN(tail_lsn));
+ int tail_cycle = CYCLE_LSN(tail_lsn);
+ int head_cycle = CYCLE_LSN(head);
+ int head_bytes = BLOCK_LSN(head);
+
+ if ((tail_cycle == head_cycle) && (head_bytes >= tail_bytes)) {
+ free_bytes = logsize - (head_bytes - tail_bytes);
+ } else if ((tail_cycle + 1) < head_cycle) {
return 0;
- } else if (tail_cycle < cycle) {
- ASSERT(tail_cycle == (cycle - 1));
- free_bytes = tail_bytes - bytes;
+ } else if (tail_cycle < head_cycle) {
+ ASSERT(tail_cycle == (head_cycle - 1));
+ free_bytes = tail_bytes - head_bytes;
} else {
/*
* The reservation head is behind the tail.
* In this case we just want to return the size of the
* log as the amount of space left.
*/
- xfs_fs_cmn_err(CE_ALERT, log->l_mp,
+ cmn_err(CE_ALERT,
"xlog_space_left: head behind tail\n"
" tail_cycle = %d, tail_bytes = %d\n"
" GH cycle = %d, GH bytes = %d",
- tail_cycle, tail_bytes, cycle, bytes);
+ tail_cycle, tail_bytes, head_cycle, head_bytes);
ASSERT(0);
- free_bytes = log->l_logsize;
+ free_bytes = logsize;
}
return free_bytes;
-} /* xlog_space_left */
+}
/*
@@ -1018,8 +1052,8 @@ xlog_alloc_log(xfs_mount_t *mp,
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
log->l_last_sync_lsn = log->l_tail_lsn;
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
- log->l_grant_reserve_cycle = 1;
- log->l_grant_write_cycle = 1;
+ log->l_grant_reserve_lsn = xlog_assign_lsn(1, 0);
+ log->l_grant_write_lsn = xlog_assign_lsn(1, 0);
INIT_LIST_HEAD(&log->l_reserveq);
INIT_LIST_HEAD(&log->l_writeq);
@@ -1207,9 +1241,8 @@ xlog_grant_push_ail(xfs_mount_t *mp,
ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
spin_lock(&log->l_grant_lock);
- free_bytes = xlog_space_left(log,
- log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
+ free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ log->l_grant_reserve_lsn);
tail_lsn = log->l_tail_lsn;
free_blocks = BTOBBT(free_bytes);
@@ -2504,9 +2537,8 @@ xlog_grant_log_space(
{
int free_bytes;
int need_bytes;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
+#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("grant Recovery problem");
#endif
@@ -2524,8 +2556,8 @@ redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- free_bytes = xlog_space_left(log, log->l_grant_reserve_cycle,
- log->l_grant_reserve_bytes);
+ free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ log->l_grant_reserve_lsn);
/*
* If there is not enough space or there is queued waiter and we
* are not already on the queue, we need to wait.
@@ -2552,20 +2584,9 @@ redo:
/* we've got enough space */
xlog_grant_add_space(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- /*
- * Check to make sure the grant write head didn't just over lap the
- * tail. If the cycles are the same, we can't be overlapping.
- * Otherwise, make sure that the cycles differ by exactly one and
- * check the byte count.
- */
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
- }
-#endif
+
trace_xfs_log_grant_exit(log, tic);
+ xlog_verify_grant_tail(log);
xlog_verify_grant_head(log, 1);
spin_unlock(&log->l_grant_lock);
return 0;
@@ -2596,9 +2617,6 @@ xlog_regrant_write_log_space(
{
int free_bytes;
int need_bytes;
-#ifdef DEBUG
- xfs_lsn_t tail_lsn;
-#endif
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2620,8 +2638,8 @@ redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
- log->l_grant_write_bytes);
+ free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ log->l_grant_write_lsn);
/*
* If there is not enough space or there is queued waiter and we
* are not already on the queue, we need to wait.
@@ -2668,16 +2686,9 @@ redo:
/* we've got enough space */
xlog_grant_add_space_write(log, need_bytes);
-#ifdef DEBUG
- tail_lsn = log->l_tail_lsn;
- if (CYCLE_LSN(tail_lsn) != log->l_grant_write_cycle) {
- ASSERT(log->l_grant_write_cycle-1 == CYCLE_LSN(tail_lsn));
- ASSERT(log->l_grant_write_bytes <= BBTOB(BLOCK_LSN(tail_lsn)));
- }
-#endif
trace_xfs_log_regrant_write_exit(log, tic);
-
+ xlog_verify_grant_tail(log);
xlog_verify_grant_head(log, 1);
spin_unlock(&log->l_grant_lock);
return 0;
@@ -3401,18 +3412,42 @@ xlog_verify_dest_ptr(
}
STATIC void
-xlog_verify_grant_head(xlog_t *log, int equals)
+xlog_verify_grant_head(
+ struct log *log,
+ int equals)
{
- if (log->l_grant_reserve_cycle == log->l_grant_write_cycle) {
- if (equals)
- ASSERT(log->l_grant_reserve_bytes >= log->l_grant_write_bytes);
- else
- ASSERT(log->l_grant_reserve_bytes > log->l_grant_write_bytes);
- } else {
- ASSERT(log->l_grant_reserve_cycle-1 == log->l_grant_write_cycle);
- ASSERT(log->l_grant_write_bytes >= log->l_grant_reserve_bytes);
- }
-} /* xlog_verify_grant_head */
+ xfs_lsn_t reserve = log->l_grant_reserve_lsn;
+ xfs_lsn_t write = log->l_grant_write_lsn;
+
+ if (CYCLE_LSN(reserve) == CYCLE_LSN(write)) {
+ if (equals)
+ ASSERT(BLOCK_LSN(reserve) >= BLOCK_LSN(write));
+ else
+ ASSERT(BLOCK_LSN(reserve) > BLOCK_LSN(write));
+ } else {
+ ASSERT(CYCLE_LSN(reserve) - 1 == CYCLE_LSN(write));
+ ASSERT(BLOCK_LSN(write) >= BLOCK_LSN(reserve));
+ }
+}
+
+STATIC void
+xlog_verify_grant_tail(
+ struct log *log)
+{
+ xfs_lsn_t tail_lsn = log->l_tail_lsn;
+ xfs_lsn_t write_lsn = log->l_grant_write_lsn;
+
+ /*
+ * Check to make sure the grant write head didn't just over lap the
+ * tail. If the cycles are the same, we can't be overlapping.
+ * Otherwise, make sure that the cycles differ by exactly one and
+ * check the byte count.
+ */
+ if (CYCLE_LSN(tail_lsn) != CYCLE_LSN(write_lsn)) {
+ ASSERT(CYCLE_LSN(write_lsn) - 1 == CYCLE_LSN(tail_lsn));
+ ASSERT(BLOCK_LSN(write_lsn) <= BBTOB(BLOCK_LSN(tail_lsn)));
+ }
+}
/* check if it will fit */
STATIC void
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d45fe2d..4ebaf07 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -519,10 +519,8 @@ typedef struct log {
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
struct list_head l_reserveq;
struct list_head l_writeq;
- int l_grant_reserve_cycle;
- int l_grant_reserve_bytes;
- int l_grant_write_cycle;
- int l_grant_write_bytes;
+ xfs_lsn_t l_grant_reserve_lsn;
+ xfs_lsn_t l_grant_write_lsn;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index baad94a..d8c62f0 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,10 +927,10 @@ xlog_find_tail(
log->l_curr_cycle++;
log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
- log->l_grant_reserve_cycle = log->l_curr_cycle;
- log->l_grant_reserve_bytes = BBTOB(log->l_curr_block);
- log->l_grant_write_cycle = log->l_curr_cycle;
- log->l_grant_write_bytes = BBTOB(log->l_curr_block);
+ log->l_grant_reserve_lsn = xlog_assign_lsn(log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
+ log->l_grant_write_lsn = xlog_assign_lsn(log->l_curr_cycle,
+ BBTOB(log->l_curr_block));
/*
* Look for unmount record. If we find it, then we know there
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 03/14] xfs: convert log grant heads to LSN notation
2010-11-29 1:38 ` [PATCH 03/14] xfs: convert log grant heads to LSN notation Dave Chinner
@ 2010-12-01 12:42 ` Christoph Hellwig
2010-12-02 1:49 ` Dave Chinner
2010-12-01 13:05 ` Christoph Hellwig
1 sibling, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 12:42 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
The patch looks good to me, but I really hate overloading the lsn types
and helpers. Just add duplicated of CYCLE_LSN/BLOCK_LSN and
xlog_assign_lsn for a new type as use them.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 03/14] xfs: convert log grant heads to LSN notation
2010-11-29 1:38 ` [PATCH 03/14] xfs: convert log grant heads to LSN notation Dave Chinner
2010-12-01 12:42 ` Christoph Hellwig
@ 2010-12-01 13:05 ` Christoph Hellwig
2010-12-02 2:01 ` Dave Chinner
1 sibling, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 13:05 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
> -STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
> +STATIC int xlog_space_left(int logsize, xfs_lsn_t tail_lsn,
> + xfs_lsn_t head);
Looking further through the series I have to say I really hate
passing in the logsize instead of the log structure. Passing the
log pointer from higher level functions just makes a lot more sense.
Also in this case passing the tail_lsn explicitly doesn't make any sense
- it becomes atomic later and thus there is no locking requirement for
it.
> -xlog_grant_sub_space(struct log *log, int bytes)
> +__xlog_grant_sub_space(
> + xfs_lsn_t *head,
> + int bytes,
> + int logsize)
Same thing here - the calling convention would be a lot more regular
by still passing the log as first argument.
After the factoring I also think we could cut down on the amount of
wrappers in this area. Just rename __xlog_grant_sub_space and
__xlog_grant_add_space to not have the __ prefix, and kill the wrappers
around it - they only have one or two callers, and just having two
helpers and passing the heads we're interested in is a lot simpler and
cleaner.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 03/14] xfs: convert log grant heads to LSN notation
2010-12-01 13:05 ` Christoph Hellwig
@ 2010-12-02 2:01 ` Dave Chinner
2010-12-02 11:47 ` Christoph Hellwig
0 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:01 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 08:05:04AM -0500, Christoph Hellwig wrote:
> > -STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
> > +STATIC int xlog_space_left(int logsize, xfs_lsn_t tail_lsn,
> > + xfs_lsn_t head);
>
> Looking further through the series I have to say I really hate
> passing in the logsize instead of the log structure. Passing the
> log pointer from higher level functions just makes a lot more sense.
>
> Also in this case passing the tail_lsn explicitly doesn't make any sense
> - it becomes atomic later and thus there is no locking requirement for
> it.
What I wanted to make clear is that the calculation works on fixed
values and doesn't sample values internally itself. I guess that's
not important for the log size, but for stuff like the tail lsn
it avoids needing to sample inside xlog_space_left() before we
crack it. i.e. something like this is wrong:
cycle = CYCLE_LSN(atomic64_read(&log->l_tail_lsn));
block = BLOCK_LSN(atomic64_read(&log->l_tail_lsn));
and this is correct:
tail_lsn = atomic64_read(&log->l_tail_lsn);
cycle = CYCLE_LSN(tail_lsn);
block = BLOCK_LSN(tail_lsn);
So it makes sense to me to have the value of of the tail lsn and
other variables that should only be sampled once passed into the
function. That avoids misunderstandings further down the track
because it is obvious that the calculation works on constant values.
Perhaps I should add "const" to the parameter declarations to help
make my intentions clear...
>
> > -xlog_grant_sub_space(struct log *log, int bytes)
> > +__xlog_grant_sub_space(
> > + xfs_lsn_t *head,
> > + int bytes,
> > + int logsize)
>
> Same thing here - the calling convention would be a lot more regular
> by still passing the log as first argument.
Ok, it's only for the logsize again, but that's not the important
part of the calculation...
> After the factoring I also think we could cut down on the amount of
> wrappers in this area. Just rename __xlog_grant_sub_space and
> __xlog_grant_add_space to not have the __ prefix, and kill the wrappers
> around it - they only have one or two callers, and just having two
> helpers and passing the heads we're interested in is a lot simpler and
> cleaner.
I thought about that - my first version even did this. I thought it
was easier to understand the changes if I didn't change the calling
conventions for modifying the grant heads. As such, I'd prefer to
make this change to the wrappers in a separate patch.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 03/14] xfs: convert log grant heads to LSN notation
2010-12-02 2:01 ` Dave Chinner
@ 2010-12-02 11:47 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-02 11:47 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, xfs
On Thu, Dec 02, 2010 at 01:01:32PM +1100, Dave Chinner wrote:
> On Wed, Dec 01, 2010 at 08:05:04AM -0500, Christoph Hellwig wrote:
> > > -STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
> > > +STATIC int xlog_space_left(int logsize, xfs_lsn_t tail_lsn,
> > > + xfs_lsn_t head);
> >
> > Looking further through the series I have to say I really hate
> > passing in the logsize instead of the log structure. Passing the
> > log pointer from higher level functions just makes a lot more sense.
> >
> > Also in this case passing the tail_lsn explicitly doesn't make any sense
> > - it becomes atomic later and thus there is no locking requirement for
> > it.
>
> What I wanted to make clear is that the calculation works on fixed
> values and doesn't sample values internally itself. I guess that's
> not important for the log size, but for stuff like the tail lsn
> it avoids needing to sample inside xlog_space_left() before we
> crack it. i.e. something like this is wrong:
>
> cycle = CYCLE_LSN(atomic64_read(&log->l_tail_lsn));
> block = BLOCK_LSN(atomic64_read(&log->l_tail_lsn));
>
> and this is correct:
>
> tail_lsn = atomic64_read(&log->l_tail_lsn);
> cycle = CYCLE_LSN(tail_lsn);
> block = BLOCK_LSN(tail_lsn);
>
> So it makes sense to me to have the value of of the tail lsn and
> other variables that should only be sampled once passed into the
> function. That avoids misunderstandings further down the track
> because it is obvious that the calculation works on constant values.
> Perhaps I should add "const" to the parameter declarations to help
> make my intentions clear...
I don't think obsfucating the code is a good idea to reach this goal.
What might be better is a helper like:
static inline void xlog_crack_lsn(atomic64_t *lsn, int *cycle, int *block)
{
xfs_lsn_t = atomic64_read(lsn);
*cycle = CYCLE_LSN(tail_lsn);
*block = BLOCK_LSN(tail_lsn);
}
and a long comment explaining how it needs to be used.
> I thought about that - my first version even did this. I thought it
> was easier to understand the changes if I didn't change the calling
> conventions for modifying the grant heads. As such, I'd prefer to
> make this change to the wrappers in a separate patch.
Heh, when looking at the patch I actually found this part pretty hard
to read already. So moving the factoring of the helpers out into a
separate patch might indeed be a good idea, and that patch can also
remove the wrappers.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 04/14] xfs: use wait queues directly for log grant queues
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (2 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 03/14] xfs: convert log grant heads to LSN notation Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:34 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock Dave Chinner
` (9 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The log grant queues are one of the few places left using sv_t
constructs for waiting. Convert them to use wait queues directly
while we are cleaning up the code to move one step closer to remove
the sv_t type from the XFS codebase.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 32 +++++++++++++++++++++-----------
fs/xfs/xfs_log_priv.h | 2 +-
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 69a9563..93b5b2d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -739,7 +739,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_wait);
+ wake_up(&tic->t_wait);
}
}
@@ -759,7 +759,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_wait);
+ wake_up(&tic->t_wait);
}
}
spin_unlock(&log->l_grant_lock);
@@ -2537,6 +2537,7 @@ xlog_grant_log_space(
{
int free_bytes;
int need_bytes;
+ DECLARE_WAITQUEUE(wait, current);
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
@@ -2573,7 +2574,12 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+
+ add_wait_queue_exclusive(&tic->t_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&log->l_grant_lock);
+ schedule();
+ remove_wait_queue(&tic->t_wait, &wait);
spin_lock(&log->l_grant_lock);
trace_xfs_log_grant_wake(log, tic);
@@ -2617,6 +2623,7 @@ xlog_regrant_write_log_space(
{
int free_bytes;
int need_bytes;
+ DECLARE_WAITQUEUE(wait, current);
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2662,7 +2669,7 @@ redo:
break;
}
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_wait);
+ wake_up(&ntic->t_wait);
}
list_add_tail(&tic->t_queue, &log->l_writeq);
if (woke_all)
@@ -2675,7 +2682,12 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_regrant_write_sleep(log, tic);
- sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
+
+ add_wait_queue_exclusive(&tic->t_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&log->l_grant_lock);
+ schedule();
+ remove_wait_queue(&tic->t_wait, &wait);
spin_lock(&log->l_grant_lock);
trace_xfs_log_regrant_write_wake(log, tic);
@@ -3237,10 +3249,8 @@ xfs_log_ticket_put(
xlog_ticket_t *ticket)
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
- if (atomic_dec_and_test(&ticket->t_ref)) {
- sv_destroy(&ticket->t_wait);
+ if (atomic_dec_and_test(&ticket->t_ref))
kmem_zone_free(xfs_log_ticket_zone, ticket);
- }
}
xlog_ticket_t *
@@ -3373,7 +3383,7 @@ xlog_ticket_alloc(
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
+ init_waitqueue_head(&tic->t_wait);
xlog_tic_reset_res(tic);
@@ -3701,10 +3711,10 @@ xfs_log_force_umount(
* action is protected by the GRANTLOCK.
*/
list_for_each_entry(tic, &log->l_reserveq, t_queue)
- sv_signal(&tic->t_wait);
+ wake_up(&tic->t_wait);
list_for_each_entry(tic, &log->l_writeq, t_queue)
- sv_signal(&tic->t_wait);
+ wake_up(&tic->t_wait);
spin_unlock(&log->l_grant_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4ebaf07..6fcee10 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -242,7 +242,7 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_wait; /* ticket wait queue : 20 */
+ wait_queue_head_t t_wait; /* ticket wait queue */
struct list_head t_queue; /* reserve/write queue */
xlog_tid_t t_tid; /* transaction identifier : 4 */
atomic_t t_ref; /* ticket reference count : 4 */
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 04/14] xfs: use wait queues directly for log grant queues
2010-11-29 1:38 ` [PATCH 04/14] xfs: use wait queues directly for log grant queues Dave Chinner
@ 2010-12-01 12:34 ` Christoph Hellwig
2010-12-02 2:02 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 12:34 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Mon, Nov 29, 2010 at 12:38:22PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The log grant queues are one of the few places left using sv_t
> constructs for waiting. Convert them to use wait queues directly
> while we are cleaning up the code to move one step closer to remove
> the sv_t type from the XFS codebase.
Doing this one separately from the other sv_t removal seems a bit
pointless.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 04/14] xfs: use wait queues directly for log grant queues
2010-12-01 12:34 ` Christoph Hellwig
@ 2010-12-02 2:02 ` Dave Chinner
0 siblings, 0 replies; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:02 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 07:34:54AM -0500, Christoph Hellwig wrote:
> On Mon, Nov 29, 2010 at 12:38:22PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > The log grant queues are one of the few places left using sv_t
> > constructs for waiting. Convert them to use wait queues directly
> > while we are cleaning up the code to move one step closer to remove
> > the sv_t type from the XFS codebase.
>
> Doing this one separately from the other sv_t removal seems a bit
> pointless.
Ok, as you suggest later Iii'll just fold the other sv_t patches
into this one.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (3 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 04/14] xfs: use wait queues directly for log grant queues Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:45 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 06/14] xfs: convert l_last_sync_lsn to an atomic variable Dave Chinner
` (8 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The xlog_grant_push_ail() currently takes the grant lock internally to sample
the tail lsn, last sync lsn and the reserve grant head. Most of the callers
already hold the grant lock but have to drop it before calling
xlog_grant_push_ail(). This is a left over from when the AIl tail pushing was
done in line and hence xlog_grant_push_ail had to drop the grant lock. AIL push
is now done in another thread and hence we can safely hold the grant lock over
the entire xlog_grant_push_ail call.
Push the grant lock outside of xlog_grant_push_ail() and pass the tail lsn and
last sync lsn values as parameters to make the internals completely oblivious
to the source of the values and independent of whatever locking or
synchronisation they require. This will reduce traffic on the grant lock by
itself, but this is only one step in preparing for the complete removal of the
grant lock.
While there, clean up the indenting of xlog_grant_push_ail() to match the
rest of the XFS code.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 123 ++++++++++++++++++++++++++++--------------------------
1 files changed, 64 insertions(+), 59 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 93b5b2d..a35ef8f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -71,7 +71,9 @@ STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
/* local functions to manipulate grant head */
STATIC int xlog_grant_log_space(struct log *log,
struct xlog_ticket *xtic);
-STATIC void xlog_grant_push_ail(xfs_mount_t *mp,
+STATIC void xlog_grant_push_ail(struct log *log,
+ xfs_lsn_t tail_lsn,
+ xfs_lsn_t last_sync_lsn,
int need_bytes);
STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
xlog_ticket_t *ticket);
@@ -356,7 +358,11 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
+ spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, log->l_tail_lsn,
+ log->l_last_sync_lsn,
+ internal_ticket->t_unit_res);
+ spin_unlock(&log->l_grant_lock);
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
@@ -370,9 +376,12 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- xlog_grant_push_ail(mp,
+ spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, log->l_tail_lsn,
+ log->l_last_sync_lsn,
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
+ spin_unlock(&log->l_grant_lock);
retval = xlog_grant_log_space(log, internal_ticket);
}
@@ -1226,60 +1235,58 @@ xlog_commit_record(
* water mark. In this manner, we would be creating a low water mark.
*/
STATIC void
-xlog_grant_push_ail(xfs_mount_t *mp,
- int need_bytes)
-{
- xlog_t *log = mp->m_log; /* pointer to the log */
- xfs_lsn_t tail_lsn; /* lsn of the log tail */
- xfs_lsn_t threshold_lsn = 0; /* lsn we'd like to be at */
- int free_blocks; /* free blocks left to write to */
- int free_bytes; /* free bytes left to write to */
- int threshold_block; /* block in lsn we'd like to be at */
- int threshold_cycle; /* lsn cycle we'd like to be at */
- int free_threshold;
-
- ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
-
- spin_lock(&log->l_grant_lock);
- free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
- log->l_grant_reserve_lsn);
- tail_lsn = log->l_tail_lsn;
- free_blocks = BTOBBT(free_bytes);
-
- /*
- * Set the threshold for the minimum number of free blocks in the
- * log to the maximum of what the caller needs, one quarter of the
- * log, and 256 blocks.
- */
- free_threshold = BTOBB(need_bytes);
- free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
- free_threshold = MAX(free_threshold, 256);
- if (free_blocks < free_threshold) {
- threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
- threshold_cycle = CYCLE_LSN(tail_lsn);
- if (threshold_block >= log->l_logBBsize) {
- threshold_block -= log->l_logBBsize;
- threshold_cycle += 1;
- }
- threshold_lsn = xlog_assign_lsn(threshold_cycle, threshold_block);
-
- /* Don't pass in an lsn greater than the lsn of the last
- * log record known to be on disk.
+xlog_grant_push_ail(
+ struct log *log,
+ xfs_lsn_t tail_lsn,
+ xfs_lsn_t last_sync_lsn,
+ int need_bytes)
+{
+ xfs_lsn_t threshold_lsn = 0;
+ int free_blocks;
+ int free_bytes;
+ int threshold_block;
+ int threshold_cycle;
+ int free_threshold;
+
+ ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
+
+ free_bytes = xlog_space_left(log->l_logsize, tail_lsn,
+ log->l_grant_reserve_lsn);
+ free_blocks = BTOBBT(free_bytes);
+
+ /*
+ * Set the threshold for the minimum number of free blocks in the
+ * log to the maximum of what the caller needs, one quarter of the
+ * log, and 256 blocks.
*/
- if (XFS_LSN_CMP(threshold_lsn, log->l_last_sync_lsn) > 0)
- threshold_lsn = log->l_last_sync_lsn;
- }
- spin_unlock(&log->l_grant_lock);
-
- /*
- * Get the transaction layer to kick the dirty buffers out to
- * disk asynchronously. No point in trying to do this if
- * the filesystem is shutting down.
- */
- if (threshold_lsn &&
- !XLOG_FORCED_SHUTDOWN(log))
- xfs_trans_ail_push(log->l_ailp, threshold_lsn);
-} /* xlog_grant_push_ail */
+ free_threshold = BTOBB(need_bytes);
+ free_threshold = MAX(free_threshold, (log->l_logBBsize >> 2));
+ free_threshold = MAX(free_threshold, 256);
+ if (free_blocks < free_threshold) {
+ threshold_block = BLOCK_LSN(tail_lsn) + free_threshold;
+ threshold_cycle = CYCLE_LSN(tail_lsn);
+ if (threshold_block >= log->l_logBBsize) {
+ threshold_block -= log->l_logBBsize;
+ threshold_cycle += 1;
+ }
+ threshold_lsn = xlog_assign_lsn(threshold_cycle,
+ threshold_block);
+ /*
+ * Don't pass in an lsn greater than the lsn of the last
+ * log record known to be on disk.
+ */
+ if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
+ threshold_lsn = last_sync_lsn;
+ }
+
+ /*
+ * Get the transaction layer to kick the dirty buffers out to
+ * disk asynchronously. No point in trying to do this if
+ * the filesystem is shutting down.
+ */
+ if (threshold_lsn && !XLOG_FORCED_SHUTDOWN(log))
+ xfs_trans_ail_push(log->l_ailp, threshold_lsn);
+}
/*
* The bdstrat callback function for log bufs. This gives us a central
@@ -2568,9 +2575,8 @@ redo:
if (list_empty(&tic->t_queue))
list_add_tail(&tic->t_queue, &log->l_reserveq);
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, log->l_tail_lsn,
+ log->l_last_sync_lsn, need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
@@ -2676,9 +2682,8 @@ redo:
goto redo;
}
- spin_unlock(&log->l_grant_lock);
- xlog_grant_push_ail(log->l_mp, need_bytes);
- spin_lock(&log->l_grant_lock);
+ xlog_grant_push_ail(log, log->l_tail_lsn,
+ log->l_last_sync_lsn, need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_regrant_write_sleep(log, tic);
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock
2010-11-29 1:38 ` [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock Dave Chinner
@ 2010-12-01 12:45 ` Christoph Hellwig
2010-12-02 2:04 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 12:45 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Once l_tail_lsn and l_last_sync_lsn are converted to atomic64_ts later
in the series passing them in separately becomes pointless again. Maybe
scale the patch back a bit and require the caller to hold l_grant_lock
for now, until the atomic64_t conversion happens.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock
2010-12-01 12:45 ` Christoph Hellwig
@ 2010-12-02 2:04 ` Dave Chinner
0 siblings, 0 replies; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:04 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 07:45:40AM -0500, Christoph Hellwig wrote:
> Once l_tail_lsn and l_last_sync_lsn are converted to atomic64_ts later
> in the series passing them in separately becomes pointless again. Maybe
> scale the patch back a bit and require the caller to hold l_grant_lock
> for now, until the atomic64_t conversion happens.
Same reasoning as for the change to xlog_space_left - the values
should only be sampled once for correctness, which is why I pass
them into the function. more "const" annotations, I guess.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 06/14] xfs: convert l_last_sync_lsn to an atomic variable
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (4 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 05/14] xfs: make AIL tail pushing independent of the grant lock Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:54 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 07/14] xfs: convert l_tail_lsn " Dave Chinner
` (7 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
log->l_last_sync_lsn is updated in only one critical spot - log
buffer Io completion - and is protected by the grant lock here. This
requires the grant lock to be taken for every log buffer IO
completion. Converting the l_last_sync_lsn variable to an atomic64_t
means that we do not need to take the grant lock in log buffer IO
completion to update it.
This also removes the need for explicitly holding a spinlock to read
the l_last_sync_lsn on 32 bit platforms.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 57 ++++++++++++++++++++-------------------------
fs/xfs/xfs_log_priv.h | 9 ++++++-
fs/xfs/xfs_log_recover.c | 6 ++--
3 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a35ef8f..90a605cc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -360,7 +360,7 @@ xfs_log_reserve(
spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log, log->l_tail_lsn,
- log->l_last_sync_lsn,
+ atomic64_read(&log->l_last_sync_lsn),
internal_ticket->t_unit_res);
spin_unlock(&log->l_grant_lock);
retval = xlog_regrant_write_log_space(log, internal_ticket);
@@ -378,7 +378,7 @@ xfs_log_reserve(
spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log, log->l_tail_lsn,
- log->l_last_sync_lsn,
+ atomic64_read(&log->l_last_sync_lsn),
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
spin_unlock(&log->l_grant_lock);
@@ -718,12 +718,8 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (XLOG_FORCED_SHUTDOWN(log))
return;
- if (tail_lsn == 0) {
- /* needed since sync_lsn is 64 bits */
- spin_lock(&log->l_icloglock);
- tail_lsn = log->l_last_sync_lsn;
- spin_unlock(&log->l_icloglock);
- }
+ if (tail_lsn == 0)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
spin_lock(&log->l_grant_lock);
@@ -845,11 +841,9 @@ xlog_assign_tail_lsn(xfs_mount_t *mp)
tail_lsn = xfs_trans_ail_tail(mp->m_ail);
spin_lock(&log->l_grant_lock);
- if (tail_lsn != 0) {
- log->l_tail_lsn = tail_lsn;
- } else {
- tail_lsn = log->l_tail_lsn = log->l_last_sync_lsn;
- }
+ if (!tail_lsn)
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
+ log->l_tail_lsn = tail_lsn;
spin_unlock(&log->l_grant_lock);
return tail_lsn;
@@ -1057,10 +1051,9 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_flags |= XLOG_ACTIVE_RECOVERY;
log->l_prev_block = -1;
- log->l_tail_lsn = xlog_assign_lsn(1, 0);
- /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
- log->l_last_sync_lsn = log->l_tail_lsn;
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
+ log->l_tail_lsn = xlog_assign_lsn(1, 0);
+ atomic64_set(&log->l_last_sync_lsn, log->l_tail_lsn);
log->l_grant_reserve_lsn = xlog_assign_lsn(1, 0);
log->l_grant_write_lsn = xlog_assign_lsn(1, 0);
INIT_LIST_HEAD(&log->l_reserveq);
@@ -2241,7 +2234,7 @@ xlog_state_do_callback(
lowest_lsn = xlog_get_lowest_lsn(log);
if (lowest_lsn &&
XFS_LSN_CMP(lowest_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
+ be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
iclog = iclog->ic_next;
continue; /* Leave this iclog for
* another thread */
@@ -2249,23 +2242,21 @@ xlog_state_do_callback(
iclog->ic_state = XLOG_STATE_CALLBACK;
- spin_unlock(&log->l_icloglock);
- /* l_last_sync_lsn field protected by
- * l_grant_lock. Don't worry about iclog's lsn.
- * No one else can be here except us.
+ /*
+ * update the last_sync_lsn before we drop the
+ * icloglock to ensure we are the only one that
+ * can update it.
*/
- spin_lock(&log->l_grant_lock);
- ASSERT(XFS_LSN_CMP(log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- log->l_last_sync_lsn =
- be64_to_cpu(iclog->ic_header.h_lsn);
- spin_unlock(&log->l_grant_lock);
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
+ be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
+ atomic64_set(&log->l_last_sync_lsn,
+ be64_to_cpu(iclog->ic_header.h_lsn));
- } else {
- spin_unlock(&log->l_icloglock);
+ } else
ioerrors++;
- }
+
+ spin_unlock(&log->l_icloglock);
/*
* Keep processing entries in the callback list until
@@ -2576,7 +2567,8 @@ redo:
list_add_tail(&tic->t_queue, &log->l_reserveq);
xlog_grant_push_ail(log, log->l_tail_lsn,
- log->l_last_sync_lsn, need_bytes);
+ atomic64_read(&log->l_last_sync_lsn),
+ need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
@@ -2683,7 +2675,8 @@ redo:
}
xlog_grant_push_ail(log, log->l_tail_lsn,
- log->l_last_sync_lsn, need_bytes);
+ atomic64_read(&log->l_last_sync_lsn),
+ need_bytes);
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_regrant_write_sleep(log, tic);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 6fcee10..97db8a8 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -508,7 +508,6 @@ typedef struct log {
spinlock_t l_icloglock; /* grab to change iclog state */
xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
* buffers */
- xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */
int l_curr_cycle; /* Cycle number of log writes */
int l_prev_cycle; /* Cycle number before last
* block increment */
@@ -522,6 +521,14 @@ typedef struct log {
xfs_lsn_t l_grant_reserve_lsn;
xfs_lsn_t l_grant_write_lsn;
+ /*
+ * l_last_sync_lsn is an atomic so it can be set and read without
+ * needing to hold specific locks. To avoid operations contending with
+ * other hot objects, place it on a separate cacheline.
+ */
+ /* lsn of last LR on disk */
+ atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d8c62f0..2ce7b48 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -926,7 +926,7 @@ xlog_find_tail(
if (found == 2)
log->l_curr_cycle++;
log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
- log->l_last_sync_lsn = be64_to_cpu(rhead->h_lsn);
+ atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
log->l_grant_reserve_lsn = xlog_assign_lsn(log->l_curr_cycle,
BBTOB(log->l_curr_block));
log->l_grant_write_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -978,9 +978,9 @@ xlog_find_tail(
log->l_tail_lsn =
xlog_assign_lsn(log->l_curr_cycle,
after_umount_blk);
- log->l_last_sync_lsn =
+ atomic64_set(&log->l_last_sync_lsn,
xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
+ after_umount_blk));
*tail_blk = after_umount_blk;
/*
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 07/14] xfs: convert l_tail_lsn to an atomic variable.
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (5 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 06/14] xfs: convert l_last_sync_lsn to an atomic variable Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:56 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 08/14] xfs: convert log grant heads to atomic variables Dave Chinner
` (6 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
log->l_tail_lsn is currently protected by the log grant lock. The
lock is only needed for serialising readers against writers, so we
don't really need the lock if we make the l_tail_lsn variable an
atomic. Converting the l_tail_lsn variable to an atomic64_t means we
can start to peel back the grant lock from various operations.
This also removes the need for explicitly holding a spinlock to read
the l_tail_lsn on 32 bit platforms.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 2 +-
fs/xfs/xfs_log.c | 62 ++++++++++++++++++++---------------------
fs/xfs/xfs_log_priv.h | 11 ++++---
fs/xfs/xfs_log_recover.c | 8 +++---
4 files changed, 41 insertions(+), 42 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 05a4bb9..d2cdc85 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -788,7 +788,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->grant_write_lsn = log->l_grant_write_lsn;
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
- __entry->tail_lsn = log->l_tail_lsn;
+ __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
),
TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
"t_unit_res %u t_flags %s reserve_headq 0x%p "
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 90a605cc..647f724 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -359,7 +359,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
spin_lock(&log->l_grant_lock);
- xlog_grant_push_ail(log, log->l_tail_lsn,
+ xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
internal_ticket->t_unit_res);
spin_unlock(&log->l_grant_lock);
@@ -377,7 +377,7 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
spin_lock(&log->l_grant_lock);
- xlog_grant_push_ail(log, log->l_tail_lsn,
+ xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
@@ -721,22 +721,19 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (tail_lsn == 0)
tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- spin_lock(&log->l_grant_lock);
-
- /* Also an invalid lsn. 1 implies that we aren't passing in a valid
- * tail_lsn.
- */
- if (tail_lsn != 1) {
- log->l_tail_lsn = tail_lsn;
- }
+ /* tail_lsn == 1 implies that we aren't passing in a valid value. */
+ if (tail_lsn != 1)
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
+ spin_lock(&log->l_grant_lock);
if (!list_empty(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
- log->l_grant_write_lsn);
+ free_bytes = xlog_space_left(log->l_logsize,
+ atomic64_read(&log->l_tail_lsn),
+ log->l_grant_write_lsn);
list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -753,8 +750,9 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
- free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
- log->l_grant_reserve_lsn);
+ free_bytes = xlog_space_left(log->l_logsize,
+ atomic64_read(&log->l_tail_lsn),
+ log->l_grant_reserve_lsn);
list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
@@ -834,21 +832,19 @@ xfs_log_need_covered(xfs_mount_t *mp)
* We may be holding the log iclog lock upon entering this routine.
*/
xfs_lsn_t
-xlog_assign_tail_lsn(xfs_mount_t *mp)
+xlog_assign_tail_lsn(
+ struct xfs_mount *mp)
{
- xfs_lsn_t tail_lsn;
- xlog_t *log = mp->m_log;
+ xfs_lsn_t tail_lsn;
+ struct log *log = mp->m_log;
tail_lsn = xfs_trans_ail_tail(mp->m_ail);
- spin_lock(&log->l_grant_lock);
if (!tail_lsn)
tail_lsn = atomic64_read(&log->l_last_sync_lsn);
- log->l_tail_lsn = tail_lsn;
- spin_unlock(&log->l_grant_lock);
+ atomic64_set(&log->l_tail_lsn, tail_lsn);
return tail_lsn;
-} /* xlog_assign_tail_lsn */
-
+}
/*
* Return the space in the log between the tail and the head. The head
@@ -1052,8 +1048,8 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_prev_block = -1;
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
- log->l_tail_lsn = xlog_assign_lsn(1, 0);
- atomic64_set(&log->l_last_sync_lsn, log->l_tail_lsn);
+ atomic64_set(&log->l_tail_lsn, xlog_assign_lsn(1, 0));
+ atomic64_set(&log->l_last_sync_lsn, atomic64_read(&log->l_tail_lsn));
log->l_grant_reserve_lsn = xlog_assign_lsn(1, 0);
log->l_grant_write_lsn = xlog_assign_lsn(1, 0);
INIT_LIST_HEAD(&log->l_reserveq);
@@ -2555,7 +2551,8 @@ redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ free_bytes = xlog_space_left(log->l_logsize,
+ atomic64_read(&log->l_tail_lsn),
log->l_grant_reserve_lsn);
/*
* If there is not enough space or there is queued waiter and we
@@ -2566,7 +2563,7 @@ redo:
if (list_empty(&tic->t_queue))
list_add_tail(&tic->t_queue, &log->l_reserveq);
- xlog_grant_push_ail(log, log->l_tail_lsn,
+ xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
need_bytes);
@@ -2643,7 +2640,8 @@ redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
- free_bytes = xlog_space_left(log->l_logsize, log->l_tail_lsn,
+ free_bytes = xlog_space_left(log->l_logsize,
+ atomic64_read(&log->l_tail_lsn),
log->l_grant_write_lsn);
/*
* If there is not enough space or there is queued waiter and we
@@ -2674,7 +2672,7 @@ redo:
goto redo;
}
- xlog_grant_push_ail(log, log->l_tail_lsn,
+ xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
need_bytes);
@@ -2838,11 +2836,11 @@ xlog_state_release_iclog(
if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
/* update tail before writing to iclog */
- xlog_assign_tail_lsn(log->l_mp);
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
sync++;
iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
- xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
/* cycle incremented when incrementing curr_block */
}
spin_unlock(&log->l_icloglock);
@@ -3442,7 +3440,7 @@ STATIC void
xlog_verify_grant_tail(
struct log *log)
{
- xfs_lsn_t tail_lsn = log->l_tail_lsn;
+ xfs_lsn_t tail_lsn = atomic64_read(&log->l_tail_lsn);
xfs_lsn_t write_lsn = log->l_grant_write_lsn;
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 97db8a8..667d8cb 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -506,8 +506,6 @@ typedef struct log {
* log entries" */
xlog_in_core_t *l_iclog; /* head log queue */
spinlock_t l_icloglock; /* grab to change iclog state */
- xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
- * buffers */
int l_curr_cycle; /* Cycle number of log writes */
int l_prev_cycle; /* Cycle number before last
* block increment */
@@ -522,12 +520,15 @@ typedef struct log {
xfs_lsn_t l_grant_write_lsn;
/*
- * l_last_sync_lsn is an atomic so it can be set and read without
- * needing to hold specific locks. To avoid operations contending with
- * other hot objects, place it on a separate cacheline.
+ * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
+ * read without needing to hold specific locks. To avoid operations
+ * contending with other hot objects, place each of them on a separate
+ * cacheline.
*/
/* lsn of last LR on disk */
atomic64_t l_last_sync_lsn ____cacheline_aligned_in_smp;
+ /* lsn of 1st LR with unflushed * buffers */
+ atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 2ce7b48..c6285fd 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -925,7 +925,7 @@ xlog_find_tail(
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
if (found == 2)
log->l_curr_cycle++;
- log->l_tail_lsn = be64_to_cpu(rhead->h_tail_lsn);
+ atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
log->l_grant_reserve_lsn = xlog_assign_lsn(log->l_curr_cycle,
BBTOB(log->l_curr_block));
@@ -960,7 +960,7 @@ xlog_find_tail(
}
after_umount_blk = (i + hblks + (int)
BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
- tail_lsn = log->l_tail_lsn;
+ tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = (i + hblks) % log->l_logBBsize;
@@ -975,9 +975,9 @@ xlog_find_tail(
* log records will point recovery to after the
* current unmount record.
*/
- log->l_tail_lsn =
+ atomic64_set(&log->l_tail_lsn,
xlog_assign_lsn(log->l_curr_cycle,
- after_umount_blk);
+ after_umount_blk));
atomic64_set(&log->l_last_sync_lsn,
xlog_assign_lsn(log->l_curr_cycle,
after_umount_blk));
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 08/14] xfs: convert log grant heads to atomic variables
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (6 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 07/14] xfs: convert l_tail_lsn " Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 12:59 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues Dave Chinner
` (5 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Convert the log grant heads to atomic64_t types in preparation for
converting the accounting algorithms to atomic operations. his patch
just converts the variables; the algorithmic changes are in a
separate patch for clarity.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_trace.h | 18 +++++++------
fs/xfs/xfs_log.c | 54 +++++++++++++++++++++--------------------
fs/xfs/xfs_log_priv.h | 4 +-
fs/xfs/xfs_log_recover.c | 8 +++---
4 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d2cdc85..68c3bdd 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -768,8 +768,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__field(unsigned int, flags)
__field(void *, reserveq)
__field(void *, writeq)
- __field(xfs_lsn_t, grant_reserve_lsn)
- __field(xfs_lsn_t, grant_write_lsn)
+ __field(xfs_lsn_t, grant_reserve_head)
+ __field(xfs_lsn_t, grant_write_head)
__field(int, curr_cycle)
__field(int, curr_block)
__field(xfs_lsn_t, tail_lsn)
@@ -784,8 +784,10 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__entry->flags = tic->t_flags;
__entry->reserveq = log->l_reserveq.next;
__entry->writeq = log->l_writeq.next;
- __entry->grant_reserve_lsn = log->l_grant_reserve_lsn;
- __entry->grant_write_lsn = log->l_grant_write_lsn;
+ __entry->grant_reserve_head =
+ atomic64_read(&log->l_grant_reserve_head);
+ __entry->grant_write_head =
+ atomic64_read(&log->l_grant_write_head);
__entry->curr_cycle = log->l_curr_cycle;
__entry->curr_block = log->l_curr_block;
__entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
@@ -805,10 +807,10 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
__print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
__entry->reserveq,
__entry->writeq,
- CYCLE_LSN(__entry->grant_reserve_lsn),
- BLOCK_LSN(__entry->grant_reserve_lsn),
- CYCLE_LSN(__entry->grant_write_lsn),
- BLOCK_LSN(__entry->grant_write_lsn),
+ CYCLE_LSN(__entry->grant_reserve_head),
+ BLOCK_LSN(__entry->grant_reserve_head),
+ CYCLE_LSN(__entry->grant_write_head),
+ BLOCK_LSN(__entry->grant_write_head),
__entry->curr_cycle,
__entry->curr_block,
CYCLE_LSN(__entry->tail_lsn),
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 647f724..6298310 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -114,32 +114,34 @@ STATIC int xlog_iclogs_empty(xlog_t *log);
*/
static void
__xlog_grant_sub_space(
- xfs_lsn_t *head,
+ atomic64_t *head,
int bytes,
int logsize)
{
+ xfs_lsn_t head_lsn = atomic64_read(head);
int cycle, space;
- cycle = CYCLE_LSN(*head);
- space = BLOCK_LSN(*head);
+ cycle = CYCLE_LSN(head_lsn);
+ space = BLOCK_LSN(head_lsn);
space -= bytes;
if (space < 0) {
cycle--;
space += logsize;
}
- *head = xlog_assign_lsn(cycle, space);
+ atomic64_set(head, xlog_assign_lsn(cycle, space));
}
static void
__xlog_grant_add_space(
- xfs_lsn_t *head,
+ atomic64_t *head,
int bytes,
int logsize)
{
+ xfs_lsn_t head_lsn = atomic64_read(head);
int cycle, space, tmp;
- cycle = CYCLE_LSN(*head);
- space = BLOCK_LSN(*head);
+ cycle = CYCLE_LSN(head_lsn);
+ space = BLOCK_LSN(head_lsn);
tmp = logsize - space;
if (tmp > bytes)
space += bytes;
@@ -147,27 +149,27 @@ __xlog_grant_add_space(
cycle++;
space = bytes - tmp;
}
- *head = xlog_assign_lsn(cycle, space);
+ atomic64_set(head, xlog_assign_lsn(cycle, space));
}
static inline void
xlog_grant_sub_space(struct log *log, int bytes)
{
- __xlog_grant_sub_space(&log->l_grant_write_lsn, bytes, log->l_logsize);
- __xlog_grant_sub_space(&log->l_grant_reserve_lsn, bytes,
+ __xlog_grant_sub_space(&log->l_grant_write_head, bytes, log->l_logsize);
+ __xlog_grant_sub_space(&log->l_grant_reserve_head, bytes,
log->l_logsize);
}
static inline void
xlog_grant_add_space_write(struct log *log, int bytes)
{
- __xlog_grant_add_space(&log->l_grant_write_lsn, bytes, log->l_logsize);
+ __xlog_grant_add_space(&log->l_grant_write_head, bytes, log->l_logsize);
}
static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
- __xlog_grant_add_space(&log->l_grant_reserve_lsn, bytes,
+ __xlog_grant_add_space(&log->l_grant_reserve_head, bytes,
log->l_logsize);
}
@@ -732,8 +734,8 @@ xfs_log_move_tail(xfs_mount_t *mp,
panic("Recovery problem");
#endif
free_bytes = xlog_space_left(log->l_logsize,
- atomic64_read(&log->l_tail_lsn),
- log->l_grant_write_lsn);
+ atomic64_read(&log->l_tail_lsn),
+ atomic64_read(&log->l_grant_write_head));
list_for_each_entry(tic, &log->l_writeq, t_queue) {
ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -751,8 +753,8 @@ xfs_log_move_tail(xfs_mount_t *mp,
panic("Recovery problem");
#endif
free_bytes = xlog_space_left(log->l_logsize,
- atomic64_read(&log->l_tail_lsn),
- log->l_grant_reserve_lsn);
+ atomic64_read(&log->l_tail_lsn),
+ atomic64_read(&log->l_grant_reserve_head));
list_for_each_entry(tic, &log->l_reserveq, t_queue) {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
need_bytes = tic->t_unit_res*tic->t_cnt;
@@ -1050,8 +1052,8 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
atomic64_set(&log->l_tail_lsn, xlog_assign_lsn(1, 0));
atomic64_set(&log->l_last_sync_lsn, atomic64_read(&log->l_tail_lsn));
- log->l_grant_reserve_lsn = xlog_assign_lsn(1, 0);
- log->l_grant_write_lsn = xlog_assign_lsn(1, 0);
+ atomic64_set(&log->l_grant_reserve_head, xlog_assign_lsn(1, 0));
+ atomic64_set(&log->l_grant_write_head, xlog_assign_lsn(1, 0));
INIT_LIST_HEAD(&log->l_reserveq);
INIT_LIST_HEAD(&log->l_writeq);
@@ -1240,7 +1242,7 @@ xlog_grant_push_ail(
ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
free_bytes = xlog_space_left(log->l_logsize, tail_lsn,
- log->l_grant_reserve_lsn);
+ atomic64_read(&log->l_grant_reserve_head));
free_blocks = BTOBBT(free_bytes);
/*
@@ -2552,8 +2554,8 @@ redo:
goto error_return;
free_bytes = xlog_space_left(log->l_logsize,
- atomic64_read(&log->l_tail_lsn),
- log->l_grant_reserve_lsn);
+ atomic64_read(&log->l_tail_lsn),
+ atomic64_read(&log->l_grant_reserve_head));
/*
* If there is not enough space or there is queued waiter and we
* are not already on the queue, we need to wait.
@@ -2641,8 +2643,8 @@ redo:
goto error_return;
free_bytes = xlog_space_left(log->l_logsize,
- atomic64_read(&log->l_tail_lsn),
- log->l_grant_write_lsn);
+ atomic64_read(&log->l_tail_lsn),
+ atomic64_read(&log->l_grant_write_head));
/*
* If there is not enough space or there is queued waiter and we
* are not already on the queue, we need to wait.
@@ -3422,8 +3424,8 @@ xlog_verify_grant_head(
struct log *log,
int equals)
{
- xfs_lsn_t reserve = log->l_grant_reserve_lsn;
- xfs_lsn_t write = log->l_grant_write_lsn;
+ xfs_lsn_t reserve = atomic64_read(&log->l_grant_reserve_head);
+ xfs_lsn_t write = atomic64_read(&log->l_grant_write_head);
if (CYCLE_LSN(reserve) == CYCLE_LSN(write)) {
if (equals)
@@ -3441,7 +3443,7 @@ xlog_verify_grant_tail(
struct log *log)
{
xfs_lsn_t tail_lsn = atomic64_read(&log->l_tail_lsn);
- xfs_lsn_t write_lsn = log->l_grant_write_lsn;
+ xfs_lsn_t write_lsn = atomic64_read(&log->l_grant_write_head);
/*
* Check to make sure the grant write head didn't just over lap the
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 667d8cb..971dc8a 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -516,8 +516,8 @@ typedef struct log {
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
struct list_head l_reserveq;
struct list_head l_writeq;
- xfs_lsn_t l_grant_reserve_lsn;
- xfs_lsn_t l_grant_write_lsn;
+ atomic64_t l_grant_reserve_head;
+ atomic64_t l_grant_write_head;
/*
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c6285fd..6e7dfbb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -927,10 +927,10 @@ xlog_find_tail(
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
- log->l_grant_reserve_lsn = xlog_assign_lsn(log->l_curr_cycle,
- BBTOB(log->l_curr_block));
- log->l_grant_write_lsn = xlog_assign_lsn(log->l_curr_cycle,
- BBTOB(log->l_curr_block));
+ atomic64_set(&log->l_grant_reserve_head,
+ xlog_assign_lsn(log->l_curr_cycle, BBTOB(log->l_curr_block)));
+ atomic64_set(&log->l_grant_write_head,
+ xlog_assign_lsn(log->l_curr_cycle, BBTOB(log->l_curr_block)));
/*
* Look for unmount record. If we find it, then we know there
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 08/14] xfs: convert log grant heads to atomic variables
2010-11-29 1:38 ` [PATCH 08/14] xfs: convert log grant heads to atomic variables Dave Chinner
@ 2010-12-01 12:59 ` Christoph Hellwig
2010-12-02 2:04 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 12:59 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Mon, Nov 29, 2010 at 12:38:26PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> Convert the log grant heads to atomic64_t types in preparation for
> converting the accounting algorithms to atomic operations. his patch
> just converts the variables; the algorithmic changes are in a
> separate patch for clarity.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> fs/xfs/linux-2.6/xfs_trace.h | 18 +++++++------
> fs/xfs/xfs_log.c | 54 +++++++++++++++++++++--------------------
> fs/xfs/xfs_log_priv.h | 4 +-
> fs/xfs/xfs_log_recover.c | 8 +++---
> 4 files changed, 44 insertions(+), 40 deletions(-)
>
> diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
> index d2cdc85..68c3bdd 100644
> --- a/fs/xfs/linux-2.6/xfs_trace.h
> +++ b/fs/xfs/linux-2.6/xfs_trace.h
> @@ -768,8 +768,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
> __field(unsigned int, flags)
> __field(void *, reserveq)
> __field(void *, writeq)
> - __field(xfs_lsn_t, grant_reserve_lsn)
> - __field(xfs_lsn_t, grant_write_lsn)
> + __field(xfs_lsn_t, grant_reserve_head)
> + __field(xfs_lsn_t, grant_write_head)
What about just naming them _head from the beginning? That would
be a tad cleaner. Same for the actual variables in the log, even
if they change the type here.
Otherwise looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH 08/14] xfs: convert log grant heads to atomic variables
2010-12-01 12:59 ` Christoph Hellwig
@ 2010-12-02 2:04 ` Dave Chinner
0 siblings, 0 replies; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:04 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 07:59:20AM -0500, Christoph Hellwig wrote:
> On Mon, Nov 29, 2010 at 12:38:26PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > Convert the log grant heads to atomic64_t types in preparation for
> > converting the accounting algorithms to atomic operations. his patch
> > just converts the variables; the algorithmic changes are in a
> > separate patch for clarity.
> >
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> > fs/xfs/linux-2.6/xfs_trace.h | 18 +++++++------
> > fs/xfs/xfs_log.c | 54 +++++++++++++++++++++--------------------
> > fs/xfs/xfs_log_priv.h | 4 +-
> > fs/xfs/xfs_log_recover.c | 8 +++---
> > 4 files changed, 44 insertions(+), 40 deletions(-)
> >
> > diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
> > index d2cdc85..68c3bdd 100644
> > --- a/fs/xfs/linux-2.6/xfs_trace.h
> > +++ b/fs/xfs/linux-2.6/xfs_trace.h
> > @@ -768,8 +768,8 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
> > __field(unsigned int, flags)
> > __field(void *, reserveq)
> > __field(void *, writeq)
> > - __field(xfs_lsn_t, grant_reserve_lsn)
> > - __field(xfs_lsn_t, grant_write_lsn)
> > + __field(xfs_lsn_t, grant_reserve_head)
> > + __field(xfs_lsn_t, grant_write_head)
>
> What about just naming them _head from the beginning? That would
> be a tad cleaner. Same for the actual variables in the log, even
> if they change the type here.
Will do.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (7 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 08/14] xfs: convert log grant heads to atomic variables Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:12 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm Dave Chinner
` (4 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The log grant ticket wait queues are currently protected by the log
grant lock. However, the queues are functionally independent from
each other, and operations on them only require serialisation
against other queue operations now that all of the other log
variables they use are atomic values.
Hence, we can make them independent of the grant lock by introducing
new locks just to protect the lists operations. because the lists
are independent, we can use a lock per list and ensure that reserve
and write head queuing do not contend.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 116 +++++++++++++++++++++++++++++++++++++------------
fs/xfs/xfs_log_priv.h | 16 +++++--
2 files changed, 100 insertions(+), 32 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6298310..8365496 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -727,12 +727,12 @@ xfs_log_move_tail(xfs_mount_t *mp,
if (tail_lsn != 1)
atomic64_set(&log->l_tail_lsn, tail_lsn);
- spin_lock(&log->l_grant_lock);
- if (!list_empty(&log->l_writeq)) {
+ if (!list_empty_careful(&log->l_writeq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
+ spin_lock(&log->l_grant_write_lock);
free_bytes = xlog_space_left(log->l_logsize,
atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_grant_write_head));
@@ -745,13 +745,15 @@ xfs_log_move_tail(xfs_mount_t *mp,
free_bytes -= tic->t_unit_res;
wake_up(&tic->t_wait);
}
+ spin_unlock(&log->l_grant_write_lock);
}
- if (!list_empty(&log->l_reserveq)) {
+ if (!list_empty_careful(&log->l_reserveq)) {
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
panic("Recovery problem");
#endif
+ spin_lock(&log->l_grant_reserve_lock);
free_bytes = xlog_space_left(log->l_logsize,
atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_grant_reserve_head));
@@ -766,9 +768,9 @@ xfs_log_move_tail(xfs_mount_t *mp,
free_bytes -= need_bytes;
wake_up(&tic->t_wait);
}
+ spin_unlock(&log->l_grant_reserve_lock);
}
- spin_unlock(&log->l_grant_lock);
-} /* xfs_log_move_tail */
+}
/*
* Determine if we have a transaction that has gone to disk
@@ -1056,6 +1058,8 @@ xlog_alloc_log(xfs_mount_t *mp,
atomic64_set(&log->l_grant_write_head, xlog_assign_lsn(1, 0));
INIT_LIST_HEAD(&log->l_reserveq);
INIT_LIST_HEAD(&log->l_writeq);
+ spin_lock_init(&log->l_grant_reserve_lock);
+ spin_lock_init(&log->l_grant_write_lock);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -2525,6 +2529,18 @@ restart:
*
* Once a ticket gets put onto the reserveq, it will only return after
* the needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off the reserveq under the
+ * l_grant_reserve_lock, we only need to take that lock if we are going
+ * to add the ticket to the queue and sleep. We can avoid taking the lock if the
+ * ticket was never added to the reserveq because the t_queue list head will be
+ * empty and we hold the only reference to it so it can safely be checked
+ * unlocked.
*/
STATIC int
xlog_grant_log_space(
@@ -2540,8 +2556,6 @@ xlog_grant_log_space(
panic("grant Recovery problem");
#endif
- /* Is there space or do we need to sleep? */
- spin_lock(&log->l_grant_lock);
trace_xfs_log_grant_enter(log, tic);
ASSERT(list_empty(&tic->t_queue));
@@ -2561,9 +2575,19 @@ redo:
* are not already on the queue, we need to wait.
*/
if (free_bytes < need_bytes ||
- (!list_empty(&log->l_reserveq) && list_empty(&tic->t_queue))) {
- if (list_empty(&tic->t_queue))
+ (list_empty(&tic->t_queue) &&
+ !list_empty_careful(&log->l_reserveq))) {
+
+ spin_lock(&log->l_grant_reserve_lock);
+ if (list_empty(&tic->t_queue)) {
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_reserveq) &&
+ free_bytes >= need_bytes) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto redo;
+ }
list_add_tail(&tic->t_queue, &log->l_reserveq);
+ }
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
@@ -2572,20 +2596,29 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_grant_sleep(log, tic);
+ /* co-ordinate with xfs_log_force_shutdown */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&log->l_grant_reserve_lock);
+ goto error_return;
+ }
add_wait_queue_exclusive(&tic->t_wait, &wait);
__set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_reserve_lock);
schedule();
remove_wait_queue(&tic->t_wait, &wait);
- spin_lock(&log->l_grant_lock);
trace_xfs_log_grant_wake(log, tic);
goto redo;
}
- list_del_init(&tic->t_queue);
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_reserve_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
+ }
/* we've got enough space */
+ spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, need_bytes);
trace_xfs_log_grant_exit(log, tic);
@@ -2595,7 +2628,9 @@ redo:
return 0;
error_return:
+ spin_lock(&log->l_grant_reserve_lock);
list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_reserve_lock);
trace_xfs_log_grant_error(log, tic);
/*
@@ -2605,13 +2640,15 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
}
/*
* Replenish the byte reservation required by moving the grant write head.
+ *
+ * Similar to xlog_grant_log_space, the function is structured to have a lock
+ * free fast path.
*/
STATIC int
xlog_regrant_write_log_space(
@@ -2633,7 +2670,6 @@ xlog_regrant_write_log_space(
panic("regrant Recovery problem");
#endif
- spin_lock(&log->l_grant_lock);
trace_xfs_log_regrant_write_enter(log, tic);
ASSERT(list_empty(&tic->t_queue));
@@ -2650,15 +2686,26 @@ redo:
* are not already on the queue, we need to wait.
*/
if (free_bytes < need_bytes ||
- (!list_empty(&log->l_writeq) && list_empty(&tic->t_queue))) {
+ (list_empty(&tic->t_queue) &&
+ !list_empty_careful(&log->l_writeq))) {
+
+ spin_lock(&log->l_grant_write_lock);
if (list_empty(&tic->t_queue)) {
+ struct xlog_ticket *ntic;
+ int woke_all = 1;
+
+ /* recheck the queue now we are locked */
+ if (list_empty(&log->l_writeq) &&
+ free_bytes >= need_bytes) {
+ spin_unlock(&log->l_grant_write_lock);
+ goto redo;
+ }
+
/*
* give existing waiters a chance at logspace before
* us. If we woke all the waiters, then immediately
* retry the space, otherwise sleep first.
*/
- struct xlog_ticket *ntic;
- int woke_all = 1;
list_for_each_entry(ntic, &log->l_writeq, t_queue) {
ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
@@ -2670,8 +2717,10 @@ redo:
wake_up(&ntic->t_wait);
}
list_add_tail(&tic->t_queue, &log->l_writeq);
- if (woke_all)
+ if (woke_all) {
+ spin_unlock(&log->l_grant_write_lock);
goto redo;
+ }
}
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
@@ -2681,20 +2730,29 @@ redo:
XFS_STATS_INC(xs_sleep_logspace);
trace_xfs_log_regrant_write_sleep(log, tic);
+ /* co-ordinate with xfs_log_force_shutdown */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&log->l_grant_write_lock);
+ goto error_return;
+ }
add_wait_queue_exclusive(&tic->t_wait, &wait);
__set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_write_lock);
schedule();
remove_wait_queue(&tic->t_wait, &wait);
- spin_lock(&log->l_grant_lock);
trace_xfs_log_regrant_write_wake(log, tic);
goto redo;
}
- list_del_init(&tic->t_queue);
+ if (!list_empty(&tic->t_queue)) {
+ spin_lock(&log->l_grant_write_lock);
+ list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
+ }
/* we've got enough space */
+ spin_lock(&log->l_grant_lock);
xlog_grant_add_space_write(log, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
@@ -2705,7 +2763,9 @@ redo:
error_return:
+ spin_lock(&log->l_grant_write_lock);
list_del_init(&tic->t_queue);
+ spin_unlock(&log->l_grant_write_lock);
trace_xfs_log_regrant_write_error(log, tic);
/*
@@ -2715,7 +2775,6 @@ redo:
*/
tic->t_curr_res = 0;
tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- spin_unlock(&log->l_grant_lock);
return XFS_ERROR(EIO);
}
@@ -3676,12 +3735,10 @@ xfs_log_force_umount(
xlog_cil_force(log);
/*
- * We must hold both the GRANT lock and the LOG lock,
- * before we mark the filesystem SHUTDOWN and wake
- * everybody up to tell the bad news.
+ * mark the filesystem and the as in a shutdown state and wake
+ * everybody up to tell them the bad news.
*/
spin_lock(&log->l_icloglock);
- spin_lock(&log->l_grant_lock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
XFS_BUF_DONE(mp->m_sb_bp);
@@ -3706,14 +3763,17 @@ xfs_log_force_umount(
* means we have to wake up everybody queued up on reserveq as well as
* writeq. In addition, we make sure in xlog_{re}grant_log_space that
* we don't enqueue anything once the SHUTDOWN flag is set, and this
- * action is protected by the GRANTLOCK.
+ * action is protected by the grant locks.
*/
+ spin_lock(&log->l_grant_reserve_lock);
list_for_each_entry(tic, &log->l_reserveq, t_queue)
wake_up(&tic->t_wait);
+ spin_unlock(&log->l_grant_reserve_lock);
+ spin_lock(&log->l_grant_write_lock);
list_for_each_entry(tic, &log->l_writeq, t_queue)
wake_up(&tic->t_wait);
- spin_unlock(&log->l_grant_lock);
+ spin_unlock(&log->l_grant_write_lock);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 971dc8a..621002c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -514,10 +514,6 @@ typedef struct log {
/* The following block of fields are changed while holding grant_lock */
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
- struct list_head l_reserveq;
- struct list_head l_writeq;
- atomic64_t l_grant_reserve_head;
- atomic64_t l_grant_write_head;
/*
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
@@ -530,6 +526,18 @@ typedef struct log {
/* lsn of 1st LR with unflushed * buffers */
atomic64_t l_tail_lsn ____cacheline_aligned_in_smp;
+ /*
+ * ticket grant locks, queues and accounting have their own cachlines
+ * as these are quite hot and can be operated on concurrently.
+ */
+ spinlock_t l_grant_reserve_lock ____cacheline_aligned_in_smp;
+ struct list_head l_reserveq;
+ atomic64_t l_grant_reserve_head;
+
+ spinlock_t l_grant_write_lock ____cacheline_aligned_in_smp;
+ struct list_head l_writeq;
+ atomic64_t l_grant_write_head;
+
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
char *l_iclog_bak[XLOG_MAX_ICLOGS];
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues
2010-11-29 1:38 ` [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues Dave Chinner
@ 2010-12-01 13:12 ` Christoph Hellwig
2010-12-02 2:10 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 13:12 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
> + /* co-ordinate with xfs_log_force_shutdown */
> + if (XLOG_FORCED_SHUTDOWN(log)) {
> + spin_unlock(&log->l_grant_reserve_lock);
> + goto error_return;
> + }
Where is this coming from? Otherwise the patch looks good to me.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues
2010-12-01 13:12 ` Christoph Hellwig
@ 2010-12-02 2:10 ` Dave Chinner
2010-12-02 11:48 ` Christoph Hellwig
0 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:10 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 08:12:08AM -0500, Christoph Hellwig wrote:
> > + /* co-ordinate with xfs_log_force_shutdown */
> > + if (XLOG_FORCED_SHUTDOWN(log)) {
> > + spin_unlock(&log->l_grant_reserve_lock);
> > + goto error_return;
> > + }
>
> Where is this coming from? Otherwise the patch looks good to me.
To handles the race condition between xfs_log_force_shutdown() clearing
all the tickets off the queue and a racing log reserve that had
already checked the shutdown flag and was spinning waiting for the
reserve lock to add the ticket to the queue. The race condition is
documented in xfs_log_force_shutdown()...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues
2010-12-02 2:10 ` Dave Chinner
@ 2010-12-02 11:48 ` Christoph Hellwig
0 siblings, 0 replies; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-02 11:48 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, xfs
On Thu, Dec 02, 2010 at 01:10:31PM +1100, Dave Chinner wrote:
> On Wed, Dec 01, 2010 at 08:12:08AM -0500, Christoph Hellwig wrote:
> > > + /* co-ordinate with xfs_log_force_shutdown */
> > > + if (XLOG_FORCED_SHUTDOWN(log)) {
> > > + spin_unlock(&log->l_grant_reserve_lock);
> > > + goto error_return;
> > > + }
> >
> > Where is this coming from? Otherwise the patch looks good to me.
>
> To handles the race condition between xfs_log_force_shutdown() clearing
> all the tickets off the queue and a racing log reserve that had
> already checked the shutdown flag and was spinning waiting for the
> reserve lock to add the ticket to the queue. The race condition is
> documented in xfs_log_force_shutdown()...
Ok. Please add something like that to the patch description.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (8 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 09/14] xfs: introduce new locks for the log grant ticket wait queues Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:15 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 11/14] xfs: remove log grant lock Dave Chinner
` (3 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The only thing that the grant lock remains to protect is the grant head
manipulations when adding or removing space from the log. These calculations
are already based on atomic variables, so we can already update them safely
without locks. However, the grant head manpulations require atomic multi-step
calculations to be executed, which the algorithms currently don't allow.
To make these multi-step calculations atomic, convert the algorithms to
compare-and-exchange loops on the atomic variables. That is, we sample the old
value, perform the calculation and use atomic64_cmpxchg() to attempt to update
the head with the new value. If the head has not changed since we sampled it,
it will succeed and we are done. Otherwise, we rerun the calculation again from
a new sample of the head.
This allows us to remove the grant lock from around all the grant head space
manipulations, and that effectively removes the grant lock from the log
completely.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 133 ++++++++++++++++++++++++++++++------------------------
1 files changed, 74 insertions(+), 59 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8365496..50cf6af 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -111,6 +111,13 @@ STATIC int xlog_iclogs_empty(xlog_t *log);
* is the byte offset into the log for the marker. Unlike the xfs_lsn_t, this
* is held in bytes rather than basic blocks, even though it uses the
* BLOCK_LSN() macro to extract it.
+ *
+ * We use a lock-free compare and exchange algorithm to atomically update
+ * the grant head. That is, we sample the current head, crack it, perform the
+ * calculation, recombine it into a new value, and then conditionally set the
+ * value back into the atomic variable only if it hasn't changed since we first
+ * sampled it. This provides atomic updates of the head, even though we do
+ * non-atomic, multi-step calculation to arrive at the new value.
*/
static void
__xlog_grant_sub_space(
@@ -119,16 +126,23 @@ __xlog_grant_sub_space(
int logsize)
{
xfs_lsn_t head_lsn = atomic64_read(head);
- int cycle, space;
+ xfs_lsn_t old, new;
- cycle = CYCLE_LSN(head_lsn);
- space = BLOCK_LSN(head_lsn);
- space -= bytes;
- if (space < 0) {
- cycle--;
- space += logsize;
- }
- atomic64_set(head, xlog_assign_lsn(cycle, space));
+ do {
+ int cycle, space;
+
+ cycle = CYCLE_LSN(head_lsn);
+ space = BLOCK_LSN(head_lsn);
+ space -= bytes;
+ if (space < 0) {
+ cycle--;
+ space += logsize;
+ }
+
+ old = head_lsn;
+ new = xlog_assign_lsn(cycle, space);
+ head_lsn = atomic64_cmpxchg(head, old, new);
+ } while (head_lsn != old);
}
static void
@@ -138,18 +152,25 @@ __xlog_grant_add_space(
int logsize)
{
xfs_lsn_t head_lsn = atomic64_read(head);
- int cycle, space, tmp;
+ xfs_lsn_t old, new;
- cycle = CYCLE_LSN(head_lsn);
- space = BLOCK_LSN(head_lsn);
- tmp = logsize - space;
- if (tmp > bytes)
- space += bytes;
- else {
- cycle++;
- space = bytes - tmp;
- }
- atomic64_set(head, xlog_assign_lsn(cycle, space));
+ do {
+ int cycle, space, tmp;
+
+ cycle = CYCLE_LSN(head_lsn);
+ space = BLOCK_LSN(head_lsn);
+ tmp = logsize - space;
+ if (tmp > bytes)
+ space += bytes;
+ else {
+ cycle++;
+ space = bytes - tmp;
+ }
+
+ old = head_lsn;
+ new = xlog_assign_lsn(cycle, space);
+ head_lsn = atomic64_cmpxchg(head, old, new);
+ } while (head_lsn != old);
}
static inline void
@@ -360,11 +381,9 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
internal_ticket->t_unit_res);
- spin_unlock(&log->l_grant_lock);
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
@@ -378,12 +397,10 @@ xfs_log_reserve(
trace_xfs_log_reserve(log, internal_ticket);
- spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
atomic64_read(&log->l_last_sync_lsn),
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
- spin_unlock(&log->l_grant_lock);
retval = xlog_grant_log_space(log, internal_ticket);
}
@@ -1376,9 +1393,7 @@ xlog_sync(xlog_t *log,
roundoff < BBTOB(1)));
/* move grant heads by roundoff in sync */
- spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, roundoff);
- spin_unlock(&log->l_grant_lock);
/* put cycle number in every block */
xlog_pack_data(log, iclog, roundoff);
@@ -2618,13 +2633,11 @@ redo:
}
/* we've got enough space */
- spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, need_bytes);
trace_xfs_log_grant_exit(log, tic);
xlog_verify_grant_tail(log);
xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
return 0;
error_return:
@@ -2752,13 +2765,11 @@ redo:
}
/* we've got enough space */
- spin_lock(&log->l_grant_lock);
xlog_grant_add_space_write(log, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
xlog_verify_grant_tail(log);
xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
return 0;
@@ -2779,23 +2790,23 @@ redo:
}
-/* The first cnt-1 times through here we don't need to
- * move the grant write head because the permanent
- * reservation has reserved cnt times the unit amount.
- * Release part of current permanent unit reservation and
- * reset current reservation to be one units worth. Also
- * move grant reservation head forward.
+/*
+ * The first cnt-1 times through here we don't need to move the grant write
+ * head because the permanent reservation has reserved cnt times the unit
+ * amount. Release part of current permanent unit reservation and reset
+ * current reservation to be one units worth. Also move grant reservation head
+ * forward.
*/
STATIC void
-xlog_regrant_reserve_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_regrant_reserve_log_space(
+ struct log *log,
+ struct xlog_ticket *ticket)
{
trace_xfs_log_regrant_reserve_enter(log, ticket);
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- spin_lock(&log->l_grant_lock);
xlog_grant_sub_space(log, ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
@@ -2805,20 +2816,17 @@ xlog_regrant_reserve_log_space(xlog_t *log,
xlog_verify_grant_head(log, 1);
/* just return if we still have some of the pre-reserved space */
- if (ticket->t_cnt > 0) {
- spin_unlock(&log->l_grant_lock);
+ if (ticket->t_cnt > 0)
return;
- }
xlog_grant_add_space_reserve(log, ticket->t_unit_res);
trace_xfs_log_regrant_reserve_exit(log, ticket);
xlog_verify_grant_head(log, 0);
- spin_unlock(&log->l_grant_lock);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
-} /* xlog_regrant_reserve_log_space */
+}
/*
@@ -2836,34 +2844,33 @@ xlog_regrant_reserve_log_space(xlog_t *log,
* in the current reservation field.
*/
STATIC void
-xlog_ungrant_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_ungrant_log_space(
+ struct log *log,
+ struct xlog_ticket *ticket)
{
- if (ticket->t_cnt > 0)
- ticket->t_cnt--;
+ int space;
- spin_lock(&log->l_grant_lock);
trace_xfs_log_ungrant_enter(log, ticket);
+ if (ticket->t_cnt > 0)
+ ticket->t_cnt--;
- xlog_grant_sub_space(log, ticket->t_curr_res);
-
- trace_xfs_log_ungrant_sub(log, ticket);
-
- /* If this is a permanent reservation ticket, we may be able to free
+ /*
+ * If this is a permanent reservation ticket, we may be able to free
* up more space based on the remaining count.
*/
+ space = ticket->t_curr_res;
if (ticket->t_cnt > 0) {
ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
- xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
+ space += ticket->t_unit_res * ticket->t_cnt;
}
+ trace_xfs_log_ungrant_sub(log, ticket);
+ xlog_grant_sub_space(log, space);
trace_xfs_log_ungrant_exit(log, ticket);
-
xlog_verify_grant_head(log, 1);
- spin_unlock(&log->l_grant_lock);
- xfs_log_move_tail(log->l_mp, 1);
-} /* xlog_ungrant_log_space */
+ xfs_log_move_tail(log->l_mp, 1);
+}
/*
* Flush iclog to disk if this is the last reference to the given iclog and
@@ -3478,11 +3485,18 @@ xlog_verify_dest_ptr(
xlog_panic("xlog_verify_dest_ptr: invalid ptr");
}
+/*
+ * XXX: because we cannot atomically sample both the reserve and write heads,
+ * we cannot verify them reliably as they may be sampled in the middle of
+ * racing modifications. Hence just taking snapshots of the heads can give an
+ * incorrect view of the state of log. Hence just disable this check for now.
+ */
STATIC void
xlog_verify_grant_head(
struct log *log,
int equals)
{
+#if 0
xfs_lsn_t reserve = atomic64_read(&log->l_grant_reserve_head);
xfs_lsn_t write = atomic64_read(&log->l_grant_write_head);
@@ -3495,6 +3509,7 @@ xlog_verify_grant_head(
ASSERT(CYCLE_LSN(reserve) - 1 == CYCLE_LSN(write));
ASSERT(BLOCK_LSN(write) >= BLOCK_LSN(reserve));
}
+#endif
}
STATIC void
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm
2010-11-29 1:38 ` [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm Dave Chinner
@ 2010-12-01 13:15 ` Christoph Hellwig
2010-12-02 2:11 ` Dave Chinner
0 siblings, 1 reply; 42+ messages in thread
From: Christoph Hellwig @ 2010-12-01 13:15 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Mon, Nov 29, 2010 at 12:38:28PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The only thing that the grant lock remains to protect is the grant head
> manipulations when adding or removing space from the log. These calculations
> are already based on atomic variables, so we can already update them safely
> without locks. However, the grant head manpulations require atomic multi-step
> calculations to be executed, which the algorithms currently don't allow.
>
> To make these multi-step calculations atomic, convert the algorithms to
> compare-and-exchange loops on the atomic variables. That is, we sample the old
> value, perform the calculation and use atomic64_cmpxchg() to attempt to update
> the head with the new value. If the head has not changed since we sampled it,
> it will succeed and we are done. Otherwise, we rerun the calculation again from
> a new sample of the head.
>
> This allows us to remove the grant lock from around all the grant head space
> manipulations, and that effectively removes the grant lock from the log
> completely.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> fs/xfs/xfs_log.c | 133 ++++++++++++++++++++++++++++++------------------------
> 1 files changed, 74 insertions(+), 59 deletions(-)
>
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index 8365496..50cf6af 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -111,6 +111,13 @@ STATIC int xlog_iclogs_empty(xlog_t *log);
> * is the byte offset into the log for the marker. Unlike the xfs_lsn_t, this
> * is held in bytes rather than basic blocks, even though it uses the
> * BLOCK_LSN() macro to extract it.
> + *
> + * We use a lock-free compare and exchange algorithm to atomically update
> + * the grant head. That is, we sample the current head, crack it, perform the
> + * calculation, recombine it into a new value, and then conditionally set the
> + * value back into the atomic variable only if it hasn't changed since we first
> + * sampled it. This provides atomic updates of the head, even though we do
> + * non-atomic, multi-step calculation to arrive at the new value.
> */
> static void
> __xlog_grant_sub_space(
> @@ -119,16 +126,23 @@ __xlog_grant_sub_space(
> int logsize)
> {
> xfs_lsn_t head_lsn = atomic64_read(head);
> - int cycle, space;
> + xfs_lsn_t old, new;
>
> - cycle = CYCLE_LSN(head_lsn);
> - space = BLOCK_LSN(head_lsn);
> - space -= bytes;
> - if (space < 0) {
> - cycle--;
> - space += logsize;
> - }
> - atomic64_set(head, xlog_assign_lsn(cycle, space));
> + do {
> + int cycle, space;
> +
> + cycle = CYCLE_LSN(head_lsn);
> + space = BLOCK_LSN(head_lsn);
> + space -= bytes;
> + if (space < 0) {
> + cycle--;
> + space += logsize;
> + }
> +
> + old = head_lsn;
> + new = xlog_assign_lsn(cycle, space);
> + head_lsn = atomic64_cmpxchg(head, old, new);
> + } while (head_lsn != old);
> }
>
> static void
> @@ -138,18 +152,25 @@ __xlog_grant_add_space(
> int logsize)
> {
> xfs_lsn_t head_lsn = atomic64_read(head);
> - int cycle, space, tmp;
> + xfs_lsn_t old, new;
>
> - cycle = CYCLE_LSN(head_lsn);
> - space = BLOCK_LSN(head_lsn);
> - tmp = logsize - space;
> - if (tmp > bytes)
> - space += bytes;
> - else {
> - cycle++;
> - space = bytes - tmp;
> - }
> - atomic64_set(head, xlog_assign_lsn(cycle, space));
> + do {
> + int cycle, space, tmp;
> +
> + cycle = CYCLE_LSN(head_lsn);
> + space = BLOCK_LSN(head_lsn);
> + tmp = logsize - space;
> + if (tmp > bytes)
> + space += bytes;
> + else {
> + cycle++;
> + space = bytes - tmp;
> + }
> +
> + old = head_lsn;
> + new = xlog_assign_lsn(cycle, space);
> + head_lsn = atomic64_cmpxchg(head, old, new);
> + } while (head_lsn != old);
> }
>
> static inline void
> @@ -360,11 +381,9 @@ xfs_log_reserve(
>
> trace_xfs_log_reserve(log, internal_ticket);
>
> - spin_lock(&log->l_grant_lock);
> xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
> atomic64_read(&log->l_last_sync_lsn),
> internal_ticket->t_unit_res);
> - spin_unlock(&log->l_grant_lock);
> retval = xlog_regrant_write_log_space(log, internal_ticket);
> } else {
> /* may sleep if need to allocate more tickets */
> @@ -378,12 +397,10 @@ xfs_log_reserve(
>
> trace_xfs_log_reserve(log, internal_ticket);
>
> - spin_lock(&log->l_grant_lock);
> xlog_grant_push_ail(log, atomic64_read(&log->l_tail_lsn),
> atomic64_read(&log->l_last_sync_lsn),
> (internal_ticket->t_unit_res *
> internal_ticket->t_cnt));
> - spin_unlock(&log->l_grant_lock);
> retval = xlog_grant_log_space(log, internal_ticket);
> }
>
> @@ -1376,9 +1393,7 @@ xlog_sync(xlog_t *log,
> roundoff < BBTOB(1)));
>
> /* move grant heads by roundoff in sync */
> - spin_lock(&log->l_grant_lock);
> xlog_grant_add_space(log, roundoff);
> - spin_unlock(&log->l_grant_lock);
>
> /* put cycle number in every block */
> xlog_pack_data(log, iclog, roundoff);
> @@ -2618,13 +2633,11 @@ redo:
> }
>
> /* we've got enough space */
> - spin_lock(&log->l_grant_lock);
> xlog_grant_add_space(log, need_bytes);
>
> trace_xfs_log_grant_exit(log, tic);
> xlog_verify_grant_tail(log);
> xlog_verify_grant_head(log, 1);
> - spin_unlock(&log->l_grant_lock);
> return 0;
>
> error_return:
> @@ -2752,13 +2765,11 @@ redo:
> }
>
> /* we've got enough space */
> - spin_lock(&log->l_grant_lock);
> xlog_grant_add_space_write(log, need_bytes);
>
> trace_xfs_log_regrant_write_exit(log, tic);
> xlog_verify_grant_tail(log);
> xlog_verify_grant_head(log, 1);
> - spin_unlock(&log->l_grant_lock);
> return 0;
>
>
> @@ -2779,23 +2790,23 @@ redo:
> }
>
>
> -/* The first cnt-1 times through here we don't need to
> - * move the grant write head because the permanent
> - * reservation has reserved cnt times the unit amount.
> - * Release part of current permanent unit reservation and
> - * reset current reservation to be one units worth. Also
> - * move grant reservation head forward.
> +/*
> + * The first cnt-1 times through here we don't need to move the grant write
> + * head because the permanent reservation has reserved cnt times the unit
> + * amount. Release part of current permanent unit reservation and reset
> + * current reservation to be one units worth. Also move grant reservation head
> + * forward.
> */
> STATIC void
> -xlog_regrant_reserve_log_space(xlog_t *log,
> - xlog_ticket_t *ticket)
> +xlog_regrant_reserve_log_space(
> + struct log *log,
> + struct xlog_ticket *ticket)
> {
> trace_xfs_log_regrant_reserve_enter(log, ticket);
>
> if (ticket->t_cnt > 0)
> ticket->t_cnt--;
>
> - spin_lock(&log->l_grant_lock);
> xlog_grant_sub_space(log, ticket->t_curr_res);
> ticket->t_curr_res = ticket->t_unit_res;
> xlog_tic_reset_res(ticket);
> @@ -2805,20 +2816,17 @@ xlog_regrant_reserve_log_space(xlog_t *log,
> xlog_verify_grant_head(log, 1);
>
> /* just return if we still have some of the pre-reserved space */
> - if (ticket->t_cnt > 0) {
> - spin_unlock(&log->l_grant_lock);
> + if (ticket->t_cnt > 0)
> return;
> - }
>
> xlog_grant_add_space_reserve(log, ticket->t_unit_res);
>
> trace_xfs_log_regrant_reserve_exit(log, ticket);
>
> xlog_verify_grant_head(log, 0);
> - spin_unlock(&log->l_grant_lock);
> ticket->t_curr_res = ticket->t_unit_res;
> xlog_tic_reset_res(ticket);
> -} /* xlog_regrant_reserve_log_space */
> +}
>
>
> /*
> @@ -2836,34 +2844,33 @@ xlog_regrant_reserve_log_space(xlog_t *log,
> * in the current reservation field.
> */
> STATIC void
> -xlog_ungrant_log_space(xlog_t *log,
> - xlog_ticket_t *ticket)
> +xlog_ungrant_log_space(
> + struct log *log,
> + struct xlog_ticket *ticket)
> {
> - if (ticket->t_cnt > 0)
> - ticket->t_cnt--;
> + int space;
>
> - spin_lock(&log->l_grant_lock);
> trace_xfs_log_ungrant_enter(log, ticket);
> + if (ticket->t_cnt > 0)
> + ticket->t_cnt--;
>
> - xlog_grant_sub_space(log, ticket->t_curr_res);
> -
> - trace_xfs_log_ungrant_sub(log, ticket);
> -
> - /* If this is a permanent reservation ticket, we may be able to free
> + /*
> + * If this is a permanent reservation ticket, we may be able to free
> * up more space based on the remaining count.
> */
> + space = ticket->t_curr_res;
> if (ticket->t_cnt > 0) {
> ASSERT(ticket->t_flags & XLOG_TIC_PERM_RESERV);
> - xlog_grant_sub_space(log, ticket->t_unit_res*ticket->t_cnt);
> + space += ticket->t_unit_res * ticket->t_cnt;
> }
> + trace_xfs_log_ungrant_sub(log, ticket);
> + xlog_grant_sub_space(log, space);
>
> trace_xfs_log_ungrant_exit(log, ticket);
> -
> xlog_verify_grant_head(log, 1);
> - spin_unlock(&log->l_grant_lock);
> - xfs_log_move_tail(log->l_mp, 1);
> -} /* xlog_ungrant_log_space */
>
> + xfs_log_move_tail(log->l_mp, 1);
> +}
>
> /*
> * Flush iclog to disk if this is the last reference to the given iclog and
> @@ -3478,11 +3485,18 @@ xlog_verify_dest_ptr(
> xlog_panic("xlog_verify_dest_ptr: invalid ptr");
> }
>
> +/*
> + * XXX: because we cannot atomically sample both the reserve and write heads,
> + * we cannot verify them reliably as they may be sampled in the middle of
> + * racing modifications. Hence just taking snapshots of the heads can give an
> + * incorrect view of the state of log. Hence just disable this check for now.
> + */
> STATIC void
> xlog_verify_grant_head(
I can't see any way to keep this check with the atomic reserve/write
heads, so we might as well remove it entirely.
Otherwise the patch looks good,
Reviewed-by: Christoph Hellwig <hch@lst.de>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm
2010-12-01 13:15 ` Christoph Hellwig
@ 2010-12-02 2:11 ` Dave Chinner
0 siblings, 0 replies; 42+ messages in thread
From: Dave Chinner @ 2010-12-02 2:11 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: xfs
On Wed, Dec 01, 2010 at 08:15:13AM -0500, Christoph Hellwig wrote:
> On Mon, Nov 29, 2010 at 12:38:28PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > The only thing that the grant lock remains to protect is the grant head
> > manipulations when adding or removing space from the log. These calculations
> > are already based on atomic variables, so we can already update them safely
> > without locks. However, the grant head manpulations require atomic multi-step
> > calculations to be executed, which the algorithms currently don't allow.
> >
> > To make these multi-step calculations atomic, convert the algorithms to
> > compare-and-exchange loops on the atomic variables. That is, we sample the old
> > value, perform the calculation and use atomic64_cmpxchg() to attempt to update
> > the head with the new value. If the head has not changed since we sampled it,
> > it will succeed and we are done. Otherwise, we rerun the calculation again from
> > a new sample of the head.
> >
> > This allows us to remove the grant lock from around all the grant head space
> > manipulations, and that effectively removes the grant lock from the log
> > completely.
> >
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
....
> > @@ -3478,11 +3485,18 @@ xlog_verify_dest_ptr(
> > xlog_panic("xlog_verify_dest_ptr: invalid ptr");
> > }
> >
> > +/*
> > + * XXX: because we cannot atomically sample both the reserve and write heads,
> > + * we cannot verify them reliably as they may be sampled in the middle of
> > + * racing modifications. Hence just taking snapshots of the heads can give an
> > + * incorrect view of the state of log. Hence just disable this check for now.
> > + */
> > STATIC void
> > xlog_verify_grant_head(
>
> I can't see any way to keep this check with the atomic reserve/write
> heads, so we might as well remove it entirely.
Ok, will do.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH 11/14] xfs: remove log grant lock
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (9 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 10/14] xfs: convert grant head manipulations to lockless algorithm Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:15 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 12/14] xfs: kill useless spinlock_destroy macro Dave Chinner
` (2 subsequent siblings)
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The log grant lock no longer protects anything, so remove the remaining
initialisation and teardown code that still exists.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 3 ---
fs/xfs/xfs_log_priv.h | 3 ---
2 files changed, 0 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 50cf6af..d54d8f8 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1118,7 +1118,6 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
- spin_lock_init(&log->l_grant_lock);
sv_init(&log->l_flush_wait, 0, "flush_wait");
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
@@ -1199,7 +1198,6 @@ out_free_iclog:
kmem_free(iclog);
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
@@ -1516,7 +1514,6 @@ xlog_dealloc_log(xlog_t *log)
iclog = next_iclog;
}
spinlock_destroy(&log->l_icloglock);
- spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 621002c..cd62400 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -512,9 +512,6 @@ typedef struct log {
int l_curr_block; /* current logical log block */
int l_prev_block; /* previous logical log block */
- /* The following block of fields are changed while holding grant_lock */
- spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
-
/*
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
* read without needing to hold specific locks. To avoid operations
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 12/14] xfs: kill useless spinlock_destroy macro
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (10 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 11/14] xfs: remove log grant lock Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:19 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 13/14] xfs: replace use of sv_t with waitqueues in the log Dave Chinner
2010-11-29 1:38 ` [PATCH 14/14] xfs: remove sv wrappers Dave Chinner
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
It is only used in 2 places in the log code, and is an empty macro,
so get rid of it.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/xfs_linux.h | 2 --
fs/xfs/xfs_log.c | 2 --
2 files changed, 0 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 9fa4f2a..559bf25 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -114,8 +114,6 @@
#define current_restore_flags_nested(sp, f) \
(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-#define spinlock_destroy(lock)
-
#define NBBY 8 /* number of bits per byte */
/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d54d8f8..5c455cb 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1197,7 +1197,6 @@ out_free_iclog:
}
kmem_free(iclog);
}
- spinlock_destroy(&log->l_icloglock);
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
@@ -1513,7 +1512,6 @@ xlog_dealloc_log(xlog_t *log)
kmem_free(iclog);
iclog = next_iclog;
}
- spinlock_destroy(&log->l_icloglock);
xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 13/14] xfs: replace use of sv_t with waitqueues in the log
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (11 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 12/14] xfs: kill useless spinlock_destroy macro Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:20 ` Christoph Hellwig
2010-11-29 1:38 ` [PATCH 14/14] xfs: remove sv wrappers Dave Chinner
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
Only the log still uses the old Irix wait queue interface. The log
grant lock scaling series killed one of the users, so convert
the remaining users of sv_t to plain wait queues so the sv_t wrapper can
be removed.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_log.c | 51 +++++++++++++++++-------------------------------
fs/xfs/xfs_log_cil.c | 8 +++---
fs/xfs/xfs_log_priv.h | 23 ++++++++++++++++++---
3 files changed, 41 insertions(+), 41 deletions(-)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 5c455cb..abdcbd0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -613,8 +613,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -654,8 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_force_wait, PMEM,
- &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait,
+ &log->l_icloglock);
} else {
spin_unlock(&log->l_icloglock);
}
@@ -1118,7 +1118,7 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
- sv_init(&log->l_flush_wait, 0, "flush_wait");
+ init_waitqueue_head(&log->l_flush_wait);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1174,8 +1174,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
+ init_waitqueue_head(&iclog->ic_force_wait);
+ init_waitqueue_head(&iclog->ic_write_wait);
iclogp = &iclog->ic_next;
}
@@ -1190,11 +1190,8 @@ xlog_alloc_log(xfs_mount_t *mp,
out_free_iclog:
for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
prev_iclog = iclog->ic_next;
- if (iclog->ic_bp) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
+ if (iclog->ic_bp)
xfs_buf_free(iclog->ic_bp);
- }
kmem_free(iclog);
}
xfs_buf_free(log->l_xbuf);
@@ -1505,8 +1502,6 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_force_wait);
- sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
kmem_free(iclog);
@@ -2309,7 +2304,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_force_wait);
+ wake_up_all(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2356,7 +2351,7 @@ xlog_state_do_callback(
spin_unlock(&log->l_icloglock);
if (wake)
- sv_broadcast(&log->l_flush_wait);
+ wake_up_all(&log->l_flush_wait);
}
@@ -2407,7 +2402,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_write_wait);
+ wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2456,7 +2451,7 @@ restart:
XFS_STATS_INC(xs_log_noiclogs);
/* Wait for log writes to have flushed */
- sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
+ xlog_wait(&log->l_flush_wait, &log->l_icloglock);
goto restart;
}
@@ -2559,7 +2554,6 @@ xlog_grant_log_space(
{
int free_bytes;
int need_bytes;
- DECLARE_WAITQUEUE(wait, current);
#ifdef DEBUG
if (log->l_flags & XLOG_ACTIVE_RECOVERY)
@@ -2611,12 +2605,8 @@ redo:
spin_unlock(&log->l_grant_reserve_lock);
goto error_return;
}
- add_wait_queue_exclusive(&tic->t_wait, &wait);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_reserve_lock);
- schedule();
- remove_wait_queue(&tic->t_wait, &wait);
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
trace_xfs_log_grant_wake(log, tic);
goto redo;
}
@@ -2665,7 +2655,6 @@ xlog_regrant_write_log_space(
{
int free_bytes;
int need_bytes;
- DECLARE_WAITQUEUE(wait, current);
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2743,12 +2732,8 @@ redo:
spin_unlock(&log->l_grant_write_lock);
goto error_return;
}
- add_wait_queue_exclusive(&tic->t_wait, &wait);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(&log->l_grant_write_lock);
- schedule();
- remove_wait_queue(&tic->t_wait, &wait);
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
trace_xfs_log_regrant_write_wake(log, tic);
goto redo;
}
@@ -3086,7 +3071,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3204,8 +3189,8 @@ try_again:
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_write_wait,
- PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_prev->ic_write_wait,
+ &log->l_icloglock);
if (log_flushed)
*log_flushed = 1;
already_slept = 1;
@@ -3233,7 +3218,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index f36f1a2..9dc8125 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -61,7 +61,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
init_rwsem(&cil->xc_ctx_lock);
- sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
+ init_waitqueue_head(&cil->xc_commit_wait);
INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents);
@@ -563,7 +563,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
}
@@ -587,7 +587,7 @@ restart:
*/
spin_lock(&cil->xc_cil_lock);
ctx->commit_lsn = commit_lsn;
- sv_broadcast(&cil->xc_commit_wait);
+ wake_up_all(&cil->xc_commit_wait);
spin_unlock(&cil->xc_cil_lock);
/* release the hounds! */
@@ -752,7 +752,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
goto restart;
}
if (ctx->sequence != sequence)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index cd62400..230d5ce 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -350,8 +350,8 @@ typedef union xlog_in_core2 {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_in_core {
- sv_t ic_force_wait;
- sv_t ic_write_wait;
+ wait_queue_head_t ic_force_wait;
+ wait_queue_head_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -418,7 +418,7 @@ struct xfs_cil {
struct xfs_cil_ctx *xc_ctx;
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
- sv_t xc_commit_wait;
+ wait_queue_head_t xc_commit_wait;
xfs_lsn_t xc_current_sequence;
};
@@ -500,7 +500,7 @@ typedef struct log {
int l_logBBsize; /* size of log in BB chunks */
/* The following block of fields are changed while holding icloglock */
- sv_t l_flush_wait ____cacheline_aligned_in_smp;
+ wait_queue_head_t l_flush_wait ____cacheline_aligned_in_smp;
/* waiting for iclog flush */
int l_covered_state;/* state of "covering disk
* log entries" */
@@ -593,6 +593,21 @@ xlog_cil_force(struct log *log)
*/
#define XLOG_UNMOUNT_REC_TYPE (-1U)
+/*
+ * Wrapper function for waiting on a wait queue serialised against wakeups
+ * by a spinlock. This matches the semantics of all the wait queues used in the
+ * log code.
+ */
+static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(wq, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(lock);
+ schedule();
+ remove_wait_queue(wq, &wait);
+}
#endif /* __KERNEL__ */
#endif /* __XFS_LOG_PRIV_H__ */
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH 14/14] xfs: remove sv wrappers
2010-11-29 1:38 [PATCH 0/14] xfs: grant lock scaling and removal V2 Dave Chinner
` (12 preceding siblings ...)
2010-11-29 1:38 ` [PATCH 13/14] xfs: replace use of sv_t with waitqueues in the log Dave Chinner
@ 2010-11-29 1:38 ` Dave Chinner
2010-12-01 13:20 ` Christoph Hellwig
13 siblings, 1 reply; 42+ messages in thread
From: Dave Chinner @ 2010-11-29 1:38 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The sv_t type is no longer used in XFS. Remove the header file defining the
wrapper and the fragments that still reference it.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/linux-2.6/sv.h | 59 ------------------------------------------
fs/xfs/linux-2.6/xfs_linux.h | 1 -
fs/xfs/quota/xfs_dquot.c | 1 -
3 files changed, 0 insertions(+), 61 deletions(-)
delete mode 100644 fs/xfs/linux-2.6/sv.h
diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h
deleted file mode 100644
index 4dfc7c3..0000000
--- a/fs/xfs/linux-2.6/sv.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SV_H__
-#define __XFS_SUPPORT_SV_H__
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-
-/*
- * Synchronisation variables.
- *
- * (Parameters "pri", "svf" and "rts" are not implemented)
- */
-
-typedef struct sv_s {
- wait_queue_head_t waiters;
-} sv_t;
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue_exclusive(&sv->waiters, &wait);
- __set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock(lock);
-
- schedule();
-
- remove_wait_queue(&sv->waiters, &wait);
-}
-
-#define sv_init(sv,flag,name) \
- init_waitqueue_head(&(sv)->waiters)
-#define sv_destroy(sv) \
- /*NOTHING*/
-#define sv_wait(sv, pri, lock, s) \
- _sv_wait(sv, lock)
-#define sv_signal(sv) \
- wake_up(&(sv)->waiters)
-#define sv_broadcast(sv) \
- wake_up_all(&(sv)->waiters)
-
-#endif /* __XFS_SUPPORT_SV_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 559bf25..e7cfa27 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -37,7 +37,6 @@
#include <kmem.h>
#include <mrlock.h>
-#include <sv.h>
#include <time.h>
#include <support/debug.h>
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index faf8e1a..d22aa31 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -149,7 +149,6 @@ xfs_qm_dqdestroy(
ASSERT(list_empty(&dqp->q_freelist));
mutex_destroy(&dqp->q_qlock);
- sv_destroy(&dqp->q_pinwait);
kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
atomic_dec(&xfs_Gqm->qm_totaldquots);
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 42+ messages in thread