* [PATCH 1/6] Clean up stale references to semaphores
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
2008-06-27 8:44 ` [PATCH 2/6] Replace the XFS buf iodone semaphore with a completion Dave Chinner
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
A lot of code has been converted away from semaphores,
but there are still comments that reference semaphore
behaviour. The log code is the worst offender. Update
the comments to reflect what the code really does now.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/xfs_vnode.c | 6 ++--
fs/xfs/xfs_log.c | 67 ++++++++++++++++++++----------------------
fs/xfs/xfs_log_priv.h | 12 ++++----
3 files changed, 41 insertions(+), 44 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe0..1881c79 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
/*
- * Dedicated vnode inactive/reclaim sync semaphores.
+ * Dedicated vnode inactive/reclaim sync wait queues.
* Prime number of hash buckets since address is used as the key.
*/
#define NVSYNC 37
@@ -84,8 +84,8 @@ vn_ioerror(
/*
* Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
+ * Note: i_size _not_ updated; we must hold the i_mutex when doing
+ * that - callers responsibility.
*/
int
vn_revalidate(
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2497de8..760d543 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -357,11 +357,11 @@ xfs_log_done(xfs_mount_t *mp,
* Asynchronous forces are implemented by setting the WANT_SYNC
* bit in the appropriate in-core log and then returning.
*
- * Synchronous forces are implemented with a semaphore. All callers
- * to force a given lsn to disk will wait on a semaphore attached to the
+ * Synchronous forces are implemented with a signal variable. All callers
+ * to force a given lsn to disk will wait on a the sv attached to the
* specific in-core log. When given in-core log finally completes its
* write to disk, that thread will wake up all threads waiting on the
- * semaphore.
+ * sv.
*/
int
_xfs_log_force(
@@ -707,7 +707,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -748,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -838,7 +838,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
@@ -859,7 +859,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
@@ -1285,8 +1285,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
+ sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
+ sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
iclogp = &iclog->ic_next;
}
@@ -1565,8 +1565,8 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_forcesema);
- sv_destroy(&iclog->ic_writesema);
+ sv_destroy(&iclog->ic_force_wait);
+ sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
#ifdef XFS_LOG_TRACE
if (iclog->ic_trace != NULL) {
@@ -1976,7 +1976,7 @@ xlog_write(xfs_mount_t * mp,
/* Clean iclogs starting from the head. This ordering must be
* maintained, so an iclog doesn't become ACTIVE beyond one that
* is SYNCING. This is also required to maintain the notion that we use
- * a counting semaphore to hold off would be writers to the log when every
+ * a ordered wait queue to hold off would be writers to the log when every
* iclog is trying to sync to disk.
*
* State Change: DIRTY -> ACTIVE
@@ -2240,7 +2240,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_forcesema);
+ sv_broadcast(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2302,8 +2302,7 @@ xlog_state_do_callback(
* the second completion goes through.
*
* Callbacks could take time, so they are done outside the scope of the
- * global state machine log lock. Assume that the calls to cvsema won't
- * take a long time. At least we know it won't sleep.
+ * global state machine log lock.
*/
STATIC void
xlog_state_done_syncing(
@@ -2339,7 +2338,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_writesema);
+ sv_broadcast(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2347,11 +2346,9 @@ xlog_state_done_syncing(
/*
* If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
- * sleep. The flush semaphore is set to the number of in-core buffers and
- * decremented around disk syncing. Therefore, if all buffers are syncing,
- * this semaphore will cause new writes to sleep until a sync completes.
- * Otherwise, this code just does p() followed by v(). This approximates
- * a sleep/wakeup except we can't race.
+ * sleep. We wait on the flush queue on the head iclog as that should be
+ * the first iclog to complete flushing. Hence if all iclogs are syncing,
+ * we will wait here and all new writes will sleep until a sync completes.
*
* The in-core logs are used in a circular fashion. They are not used
* out-of-order even when an iclog past the head is free.
@@ -2501,7 +2498,7 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
@@ -2527,7 +2524,7 @@ redo:
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
@@ -2626,7 +2623,7 @@ xlog_regrant_write_log_space(xlog_t *log,
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_sema);
+ sv_signal(&ntic->t_wait);
ntic = ntic->t_next;
} while (ntic != log->l_write_headq);
@@ -2637,7 +2634,7 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT,
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
@@ -2666,7 +2663,7 @@ redo:
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2909,7 +2906,7 @@ xlog_state_switch_iclogs(xlog_t *log,
* 2. the current iclog is drity, and the previous iclog is in the
* active or dirty state.
*
- * We may sleep (call psema) if:
+ * We may sleep if:
*
* 1. the current iclog is not in the active nor dirty state.
* 2. the current iclog dirty, and the previous iclog is not in the
@@ -3006,7 +3003,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3089,7 +3086,7 @@ try_again:
XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
+ sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
&log->l_icloglock, s);
*log_flushed = 1;
already_slept = 1;
@@ -3109,7 +3106,7 @@ try_again:
!(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
/*
- * Don't wait on the forcesema if we know that we've
+ * Don't wait on completion if we know that we've
* gotten a log write error.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3117,7 +3114,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3173,7 +3170,7 @@ STATIC void
xlog_ticket_put(xlog_t *log,
xlog_ticket_t *ticket)
{
- sv_destroy(&ticket->t_sema);
+ sv_destroy(&ticket->t_wait);
kmem_zone_free(xfs_log_ticket_zone, ticket);
} /* xlog_ticket_put */
@@ -3263,7 +3260,7 @@ xlog_ticket_get(xlog_t *log,
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
+ sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
xlog_tic_reset_res(tic);
@@ -3550,14 +3547,14 @@ xfs_log_force_umount(
*/
if ((tic = log->l_reserve_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
if ((tic = log->l_write_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 6245913..7dcf11e 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_sema; /* sleep on this semaphore : 20 */
+ sv_t t_wait; /* ticket wait queue : 20 */
struct xlog_ticket *t_next; /* :4|8 */
struct xlog_ticket *t_prev; /* :4|8 */
xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
* xlog_rec_header_t into the reserved space.
* - ic_data follows, so a write to disk can start at the beginning of
* the iclog.
- * - ic_forcesema is used to implement synchronous forcing of the iclog to disk.
+ * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
* - ic_next is the pointer to the next iclog in the ring.
* - ic_bp is a pointer to the buffer used to write this incore log to disk.
* - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_iclog_fields {
- sv_t ic_forcesema;
- sv_t ic_writesema;
+ sv_t ic_force_wait;
+ sv_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
/*
* Defines to save our code from this glop.
*/
-#define ic_forcesema hic_fields.ic_forcesema
-#define ic_writesema hic_fields.ic_writesema
+#define ic_force_wait hic_fields.ic_force_wait
+#define ic_write_wait hic_fields.ic_write_wait
#define ic_next hic_fields.ic_next
#define ic_prev hic_fields.ic_prev
#define ic_bp hic_fields.ic_bp
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/6] Replace the XFS buf iodone semaphore with a completion
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
2008-06-27 8:44 ` [PATCH 1/6] Clean up stale references to semaphores Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
2008-06-27 8:44 ` [PATCH 3/6] Extend completions to provide XFS object flush requirements Dave Chinner
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
The xfs_buf_t b_iodonesema is really just a semaphore
that wants to be a completion. Change it to a completion
and remove the last user of the sema_t from XFS.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/xfs_buf.c | 6 +++---
fs/xfs/linux-2.6/xfs_buf.h | 4 ++--
fs/xfs/xfs_buf_item.c | 2 +-
fs/xfs/xfs_rw.c | 2 +-
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9cc8f02..9438c90 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -253,7 +253,7 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
- init_MUTEX_LOCKED(&bp->b_iodonesema);
+ init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
INIT_LIST_HEAD(&bp->b_hash_list);
init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
@@ -1037,7 +1037,7 @@ xfs_buf_ioend(
xfs_buf_iodone_work(&bp->b_iodone_work);
}
} else {
- up(&bp->b_iodonesema);
+ complete(&bp->b_iowait);
}
}
@@ -1275,7 +1275,7 @@ xfs_buf_iowait(
XB_TRACE(bp, "iowait", 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
- down(&bp->b_iodonesema);
+ wait_for_completion(&bp->b_iowait);
XB_TRACE(bp, "iowaited", (long)bp->b_error);
return bp->b_error;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 29d1d4a..fe01099 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -157,7 +157,7 @@ typedef struct xfs_buf {
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
xfs_buf_bdstrat_t b_strat; /* pre-write function */
- struct semaphore b_iodonesema; /* Semaphore for I/O waiters */
+ struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
void *b_fspriv3;
@@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *);
#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema);
+#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
#define XFS_BUF_TARGET(bp) ((bp)->b_target)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index d86ca2c..215c36d 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks(
anyway. */
XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse);
XFS_BUF_DONE(bp);
- XFS_BUF_V_IODONESEMA(bp);
+ XFS_BUF_FINISH_IOWAIT(bp);
}
return;
}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index b0f31c0..3a82576 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -314,7 +314,7 @@ xfs_bioerror_relse(
* ASYNC buffers.
*/
XFS_BUF_ERROR(bp, EIO);
- XFS_BUF_V_IODONESEMA(bp);
+ XFS_BUF_FINISH_IOWAIT(bp);
} else {
xfs_buf_relse(bp);
}
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/6] Extend completions to provide XFS object flush requirements
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
2008-06-27 8:44 ` [PATCH 1/6] Clean up stale references to semaphores Dave Chinner
2008-06-27 8:44 ` [PATCH 2/6] Replace the XFS buf iodone semaphore with a completion Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
2008-06-27 8:44 ` [PATCH 4/6] Replace inode flush semaphore with a completion Dave Chinner
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
XFS object flushing doesn't quite match existing completion semantics. It
mixed exclusive access with completion. That is, we need to mark an object as
being flushed before flushing it to disk, and then block any other attempt to
flush it until the completion occurs. We do this but adding an extra count
to the completion before we start using them. However, we still need
to determine if there is a completion in progress, and allow no-blocking
attempts fo completions to decrement the count.
To do this we introduce:
int try_wait_for_completion(struct completion *x)
returns a failure status if done == 0, otherwise decrements done
to zero and returns a "started" status. This is provided
to allow counted completions to begin safely while holding
object locks in inverted order.
int completion_done(struct completion *x)
returns 1 if there is no waiter, 0 if there is a waiter
(i.e. a completion in progress).
This replaces the use of semaphores for providing this exclusion
and completion mechanism.
Version 2:
o remove "flush" based API and just add the minimum necessary
extensions to allow counting completions to do what is needed
by XFS.
o docbook format comments
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
include/linux/completion.h | 45 ++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/include/linux/completion.h b/include/linux/completion.h
index d2961b6..57faa60 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -55,4 +55,49 @@ extern void complete_all(struct completion *);
#define INIT_COMPLETION(x) ((x).done = 0)
+
+/**
+ * try_wait_for_completion - try to decrement a completion without blocking
+ * @x: completion structure
+ *
+ * Returns: 0 if a decrement cannot be done without blocking
+ * 1 if a decrement succeeded.
+ *
+ * If a completion is being used as a counting completion,
+ * attempt to decrement the counter without blocking. This
+ * enables us to avoid waiting if the resource the completion
+ * is protecting is not available.
+ */
+static inline bool try_wait_for_completion(struct completion *x)
+{
+ int ret = 1;
+
+ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+ else
+ x->done--;
+ spin_unlock_irq(&x->wait.lock);
+ return ret;
+}
+
+/**
+ * completion_done - Test to see if a completion has any waiters
+ * @x: completion structure
+ *
+ * Returns: 0 if there are waiters (wait_for_completion() in progress)
+ * 1 if there are no waiters.
+ *
+ */
+static inline bool completion_done(struct completion *x)
+{
+ int ret = 1;
+
+ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+ spin_unlock_irq(&x->wait.lock);
+ return ret;
+}
+
#endif
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/6] Replace inode flush semaphore with a completion
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
` (2 preceding siblings ...)
2008-06-27 8:44 ` [PATCH 3/6] Extend completions to provide XFS object flush requirements Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
2008-06-27 8:44 ` [PATCH 5/6] Replace dquot " Dave Chinner
2008-06-27 8:44 ` [PATCH 6/6] Remove the sema_t from XFS Dave Chinner
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
Use the new completion flush code to implement the inode
flush lock. Removes one of the final users of semaphores
in the XFS code base.
Version 2:
o make flock functions static inlines
o use new completion interfaces
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/xfs_iget.c | 32 ++++++++------------------------
fs/xfs/xfs_inode.c | 11 +++++------
fs/xfs/xfs_inode.h | 25 +++++++++++++++++++++----
fs/xfs/xfs_inode_item.c | 11 +++++------
4 files changed, 39 insertions(+), 40 deletions(-)
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index b07604b..eb4db18 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -216,7 +216,14 @@ finish_inode:
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
init_waitqueue_head(&ip->i_ipin_wait);
atomic_set(&ip->i_pincount, 0);
- initnsema(&ip->i_flock, 1, "xfsfino");
+
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&ip->i_flush);
+ complete(&ip->i_flush);
if (lock_flags)
xfs_ilock(ip, lock_flags);
@@ -775,26 +782,3 @@ xfs_isilocked(
}
#endif
-/*
- * The following three routines simply manage the i_flock
- * semaphore embedded in the inode. This semaphore synchronizes
- * processes attempting to flush the in-core inode back to disk.
- */
-void
-xfs_iflock(xfs_inode_t *ip)
-{
- psema(&(ip->i_flock), PINOD|PLTWAIT);
-}
-
-int
-xfs_iflock_nowait(xfs_inode_t *ip)
-{
- return (cpsema(&(ip->i_flock)));
-}
-
-void
-xfs_ifunlock(xfs_inode_t *ip)
-{
- ASSERT(issemalocked(&(ip->i_flock)));
- vsema(&(ip->i_flock));
-}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bedc661..1047bdc 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2630,7 +2630,6 @@ xfs_idestroy(
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
mrfree(&ip->i_lock);
mrfree(&ip->i_iolock);
- freesema(&ip->i_flock);
#ifdef XFS_INODE_TRACE
ktrace_free(ip->i_trace);
@@ -3048,10 +3047,10 @@ cluster_corrupt_out:
/*
* xfs_iflush() will write a modified inode's changes out to the
* inode's on disk home. The caller must have the inode lock held
- * in at least shared mode and the inode flush semaphore must be
- * held as well. The inode lock will still be held upon return from
+ * in at least shared mode and the inode flush completion must be
+ * active as well. The inode lock will still be held upon return from
* the call and the caller is free to unlock it.
- * The inode flush lock will be unlocked when the inode reaches the disk.
+ * The inode flush will be completed when the inode reaches the disk.
* The flags indicate how the inode's buffer should be written out.
*/
int
@@ -3070,7 +3069,7 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3233,7 +3232,7 @@ xfs_iflush_int(
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 17a04b6..086282d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -223,7 +223,7 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
- sema_t i_flock; /* inode flush lock */
+ struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
@@ -473,11 +473,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
int xfs_isilocked(xfs_inode_t *, uint);
-void xfs_iflock(xfs_inode_t *);
-int xfs_iflock_nowait(xfs_inode_t *);
uint xfs_ilock_map_shared(xfs_inode_t *);
void xfs_iunlock_map_shared(xfs_inode_t *, uint);
-void xfs_ifunlock(xfs_inode_t *);
void xfs_ireclaim(xfs_inode_t *);
int xfs_finish_reclaim(xfs_inode_t *, int, int);
int xfs_finish_reclaim_all(struct xfs_mount *, int);
@@ -570,6 +567,26 @@ extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
+/*
+ * Manage the i_flush queue embedded in the inode. This completion
+ * queue synchronizes processes attempting to flush the in-core
+ * inode back to disk.
+ */
+static inline void xfs_iflock(xfs_inode_t *ip)
+{
+ wait_for_completion(&ip->i_flush);
+}
+
+static inline int xfs_iflock_nowait(xfs_inode_t *ip)
+{
+ return try_wait_for_completion(&ip->i_flush);
+}
+
+static inline void xfs_ifunlock(xfs_inode_t *ip)
+{
+ complete(&ip->i_flush);
+}
+
#endif /* __KERNEL__ */
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 0eee08a..97c7452 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -779,11 +779,10 @@ xfs_inode_item_pushbuf(
ASSERT(iip->ili_push_owner == current_pid());
/*
- * If flushlock isn't locked anymore, chances are that the
- * inode flush completed and the inode was taken off the AIL.
- * So, just get out.
+ * If a flush is not in progress anymore, chances are that the
+ * inode was taken off the AIL. So, just get out.
*/
- if (!issemalocked(&(ip->i_flock)) ||
+ if (completion_done(&ip->i_flush) ||
((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
iip->ili_pushbuf_flag = 0;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -805,7 +804,7 @@ xfs_inode_item_pushbuf(
* If not, we can flush it async.
*/
dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
- issemalocked(&(ip->i_flock)));
+ !completion_done(&ip->i_flush));
iip->ili_pushbuf_flag = 0;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_buftrace("INODE ITEM PUSH", bp);
@@ -858,7 +857,7 @@ xfs_inode_item_push(
ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
- ASSERT(issemalocked(&(ip->i_flock)));
+ ASSERT(!completion_done(&ip->i_flush));
/*
* Since we were able to lock the inode's flush lock and
* we found it on the AIL, the inode must be dirty. This
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/6] Replace dquot flush semaphore with a completion
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
` (3 preceding siblings ...)
2008-06-27 8:44 ` [PATCH 4/6] Replace inode flush semaphore with a completion Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
2008-06-27 8:44 ` [PATCH 6/6] Remove the sema_t from XFS Dave Chinner
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
Use the new completion flush code to implement the dquot
flush lock. Removes one of the final users of semaphores
in the XFS code base.
Version 2:
o make xfs_qm_dqflock_nowait() a static inline and rename to
match xfs_dqflock/xfs_dqfunlock operations.
o use new completion interface.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/quota/xfs_dquot.c | 34 ++++++++++++----------------------
fs/xfs/quota/xfs_dquot.h | 29 ++++++++++++++++++-----------
fs/xfs/quota/xfs_dquot_item.c | 8 ++++----
fs/xfs/quota/xfs_qm.c | 8 ++++----
4 files changed, 38 insertions(+), 41 deletions(-)
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index fc9f3fb..4a2194d 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,9 +101,16 @@ xfs_qm_dqinit(
if (brandnewdquot) {
dqp->dq_flnext = dqp->dq_flprev = dqp;
mutex_init(&dqp->q_qlock);
- initnsema(&dqp->q_flock, 1, "fdq");
sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&dqp->q_flush);
+ complete(&dqp->q_flush);
+
#ifdef XFS_DQUOT_TRACE
dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP);
xfs_dqtrace_entry(dqp, "DQINIT");
@@ -150,7 +157,6 @@ xfs_qm_dqdestroy(
ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
mutex_destroy(&dqp->q_qlock);
- freesema(&dqp->q_flock);
sv_destroy(&dqp->q_pinwait);
#ifdef XFS_DQUOT_TRACE
@@ -1211,7 +1217,7 @@ xfs_qm_dqflush(
int error;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+ ASSERT(!completion_done(&dqp->q_flush));
xfs_dqtrace_entry(dqp, "DQFLUSH");
/*
@@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done(
xfs_dqfunlock(dqp);
}
-
-int
-xfs_qm_dqflock_nowait(
- xfs_dquot_t *dqp)
-{
- int locked;
-
- locked = cpsema(&((dqp)->q_flock));
-
- /* XXX ifdef these out */
- if (locked)
- (dqp)->dq_flags |= XFS_DQ_FLOCKED;
- return (locked);
-}
-
-
int
xfs_qm_dqlock_nowait(
xfs_dquot_t *dqp)
{
- return (mutex_trylock(&((dqp)->q_qlock)));
+ return mutex_trylock(&dqp->q_qlock);
}
void
xfs_dqlock(
xfs_dquot_t *dqp)
{
- mutex_lock(&(dqp->q_qlock));
+ mutex_lock(&dqp->q_qlock);
}
void
@@ -1468,7 +1458,7 @@ xfs_qm_dqpurge(
* if we're turning off quotas. Basically, we need this flush
* lock, and are willing to block on it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* Block on the flush lock after nudging dquot buffer,
* if it is incore.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index f7393bb..8958d0f 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -82,7 +82,7 @@ typedef struct xfs_dquot {
xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
mutex_t q_qlock; /* quota lock */
- sema_t q_flock; /* flush lock */
+ struct completion q_flush; /* flush completion queue */
uint q_pincount; /* pin count for this dquot */
sv_t q_pinwait; /* sync var for pinning */
#ifdef XFS_DQUOT_TRACE
@@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
/*
- * The following three routines simply manage the q_flock
- * semaphore embedded in the dquot. This semaphore synchronizes
- * processes attempting to flush the in-core dquot back to disk.
+ * Manage the q_flush completion queue embedded in the dquot. This completion
+ * queue synchronizes processes attempting to flush the in-core dquot back to
+ * disk.
*/
-#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\
- (dqp)->dq_flags |= XFS_DQ_FLOCKED; }
-#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \
- vsema(&((dqp)->q_flock)); \
- (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); }
+static inline void xfs_dqflock(xfs_dquot_t *dqp)
+{
+ wait_for_completion(&dqp->q_flush);
+}
+
+static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
+{
+ return try_wait_for_completion(&dqp->q_flush);
+}
+
+static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+{
+ complete(&dqp->q_flush);
+}
-#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock)))
#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -167,7 +175,6 @@ extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
extern int xfs_qm_dqpurge(xfs_dquot_t *);
extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
xfs_disk_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 08d2fc8..f028644 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push(
dqp = logitem->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp));
+ ASSERT(!completion_done(&dqp->q_flush));
/*
* Since we were able to lock the dquot's flush lock and
@@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf(
* inode flush completed and the inode was taken off the AIL.
* So, just get out.
*/
- if (!issemalocked(&(dqp->q_flock)) ||
+ if (completion_done(&dqp->q_flush) ||
((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
@@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf(
if (bp != NULL) {
if (XFS_BUF_ISDELAYWRITE(bp)) {
dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
- issemalocked(&(dqp->q_flock)));
+ !completion_done(&dqp->q_flush));
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
@@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock(
return (XFS_ITEM_LOCKED);
retval = XFS_ITEM_SUCCESS;
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* The dquot is already being flushed. It may have been
* flushed delayed write, however, and we don't want to
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 26370a3..234b6d1 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -484,7 +484,7 @@ again:
xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
/*
* If we can't grab the flush lock then check
* to see if the dquot has been flushed delayed
@@ -1062,7 +1062,7 @@ xfs_qm_sync(
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
if (nowait) {
xfs_dqunlock(dqp);
continue;
@@ -2079,7 +2079,7 @@ xfs_qm_shake_freelist(
* Try to grab the flush lock. If this dquot is in the process of
* getting flushed to disk, we don't want to reclaim it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
xfs_dqunlock(dqp);
dqp = dqp->dq_flnext;
continue;
@@ -2257,7 +2257,7 @@ xfs_qm_dqreclaim_one(void)
* Try to grab the flush lock. If this dquot is in the process of
* getting flushed to disk, we don't want to reclaim it.
*/
- if (! xfs_qm_dqflock_nowait(dqp)) {
+ if (!xfs_dqflock_nowait(dqp)) {
xfs_dqunlock(dqp);
continue;
}
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 6/6] Remove the sema_t from XFS.
2008-06-27 8:44 [PATCH 0/6] Remove most users of semaphores from XFS V2 Dave Chinner
` (4 preceding siblings ...)
2008-06-27 8:44 ` [PATCH 5/6] Replace dquot " Dave Chinner
@ 2008-06-27 8:44 ` Dave Chinner
5 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-06-27 8:44 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, dwalker, Dave Chinner
Now that all users of the sema_t are gone from XFS we
can finally kill it.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/sema.h | 52 ------------------------------------------
fs/xfs/linux-2.6/xfs_linux.h | 2 +-
2 files changed, 1 insertions(+), 53 deletions(-)
delete mode 100644 fs/xfs/linux-2.6/sema.h
diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h
deleted file mode 100644
index 3abe7e9..0000000
--- a/fs/xfs/linux-2.6/sema.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_SEMA_H__
-#define __XFS_SUPPORT_SEMA_H__
-
-#include <linux/time.h>
-#include <linux/wait.h>
-#include <linux/semaphore.h>
-#include <asm/atomic.h>
-
-/*
- * sema_t structure just maps to struct semaphore in Linux kernel.
- */
-
-typedef struct semaphore sema_t;
-
-#define initnsema(sp, val, name) sema_init(sp, val)
-#define psema(sp, b) down(sp)
-#define vsema(sp) up(sp)
-#define freesema(sema) do { } while (0)
-
-static inline int issemalocked(sema_t *sp)
-{
- return down_trylock(sp) || (up(sp), 0);
-}
-
-/*
- * Map cpsema (try to get the sema) to down_trylock. We need to switch
- * the return values since cpsema returns 1 (acquired) 0 (failed) and
- * down_trylock returns the reverse 0 (acquired) 1 (failed).
- */
-static inline int cpsema(sema_t *sp)
-{
- return down_trylock(sp) ? 0 : 1;
-}
-
-#endif /* __XFS_SUPPORT_SEMA_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4d45d93..4fb838f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -45,13 +45,13 @@
#include <mrlock.h>
#include <sv.h>
#include <mutex.h>
-#include <sema.h>
#include <time.h>
#include <support/ktrace.h>
#include <support/debug.h>
#include <support/uuid.h>
+#include <linux/semaphore.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
--
1.5.5.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 1/6] Clean up stale references to semaphores
2008-07-11 1:13 [RESEND, PATCH 0/6] Remove most users of semaphores " Dave Chinner
@ 2008-07-11 1:13 ` Dave Chinner
0 siblings, 0 replies; 8+ messages in thread
From: Dave Chinner @ 2008-07-11 1:13 UTC (permalink / raw)
To: xfs; +Cc: linux-kernel, matthew, Dave Chinner
A lot of code has been converted away from semaphores,
but there are still comments that reference semaphore
behaviour. The log code is the worst offender. Update
the comments to reflect what the code really does now.
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
fs/xfs/linux-2.6/xfs_vnode.c | 6 ++--
fs/xfs/xfs_log.c | 67 ++++++++++++++++++++----------------------
fs/xfs/xfs_log_priv.h | 12 ++++----
3 files changed, 41 insertions(+), 44 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe0..1881c79 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -33,7 +33,7 @@
/*
- * Dedicated vnode inactive/reclaim sync semaphores.
+ * Dedicated vnode inactive/reclaim sync wait queues.
* Prime number of hash buckets since address is used as the key.
*/
#define NVSYNC 37
@@ -84,8 +84,8 @@ vn_ioerror(
/*
* Revalidate the Linux inode from the XFS inode.
- * Note: i_size _not_ updated; we must hold the inode
- * semaphore when doing that - callers responsibility.
+ * Note: i_size _not_ updated; we must hold the i_mutex when doing
+ * that - callers responsibility.
*/
int
vn_revalidate(
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2497de8..760d543 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -357,11 +357,11 @@ xfs_log_done(xfs_mount_t *mp,
* Asynchronous forces are implemented by setting the WANT_SYNC
* bit in the appropriate in-core log and then returning.
*
- * Synchronous forces are implemented with a semaphore. All callers
- * to force a given lsn to disk will wait on a semaphore attached to the
+ * Synchronous forces are implemented with a signal variable. All callers
+ * to force a given lsn to disk will wait on a the sv attached to the
* specific in-core log. When given in-core log finally completes its
* write to disk, that thread will wake up all threads waiting on the
- * semaphore.
+ * sv.
*/
int
_xfs_log_force(
@@ -707,7 +707,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -748,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -838,7 +838,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
@@ -859,7 +859,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
@@ -1285,8 +1285,8 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
+ sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
+ sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
iclogp = &iclog->ic_next;
}
@@ -1565,8 +1565,8 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_forcesema);
- sv_destroy(&iclog->ic_writesema);
+ sv_destroy(&iclog->ic_force_wait);
+ sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
#ifdef XFS_LOG_TRACE
if (iclog->ic_trace != NULL) {
@@ -1976,7 +1976,7 @@ xlog_write(xfs_mount_t * mp,
/* Clean iclogs starting from the head. This ordering must be
* maintained, so an iclog doesn't become ACTIVE beyond one that
* is SYNCING. This is also required to maintain the notion that we use
- * a counting semaphore to hold off would be writers to the log when every
+ * a ordered wait queue to hold off would be writers to the log when every
* iclog is trying to sync to disk.
*
* State Change: DIRTY -> ACTIVE
@@ -2240,7 +2240,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_forcesema);
+ sv_broadcast(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2302,8 +2302,7 @@ xlog_state_do_callback(
* the second completion goes through.
*
* Callbacks could take time, so they are done outside the scope of the
- * global state machine log lock. Assume that the calls to cvsema won't
- * take a long time. At least we know it won't sleep.
+ * global state machine log lock.
*/
STATIC void
xlog_state_done_syncing(
@@ -2339,7 +2338,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_writesema);
+ sv_broadcast(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2347,11 +2346,9 @@ xlog_state_done_syncing(
/*
* If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
- * sleep. The flush semaphore is set to the number of in-core buffers and
- * decremented around disk syncing. Therefore, if all buffers are syncing,
- * this semaphore will cause new writes to sleep until a sync completes.
- * Otherwise, this code just does p() followed by v(). This approximates
- * a sleep/wakeup except we can't race.
+ * sleep. We wait on the flush queue on the head iclog as that should be
+ * the first iclog to complete flushing. Hence if all iclogs are syncing,
+ * we will wait here and all new writes will sleep until a sync completes.
*
* The in-core logs are used in a circular fashion. They are not used
* out-of-order even when an iclog past the head is free.
@@ -2501,7 +2498,7 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
@@ -2527,7 +2524,7 @@ redo:
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
@@ -2626,7 +2623,7 @@ xlog_regrant_write_log_space(xlog_t *log,
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_sema);
+ sv_signal(&ntic->t_wait);
ntic = ntic->t_next;
} while (ntic != log->l_write_headq);
@@ -2637,7 +2634,7 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT,
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
@@ -2666,7 +2663,7 @@ redo:
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2909,7 +2906,7 @@ xlog_state_switch_iclogs(xlog_t *log,
* 2. the current iclog is drity, and the previous iclog is in the
* active or dirty state.
*
- * We may sleep (call psema) if:
+ * We may sleep if:
*
* 1. the current iclog is not in the active nor dirty state.
* 2. the current iclog dirty, and the previous iclog is not in the
@@ -3006,7 +3003,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3089,7 +3086,7 @@ try_again:
XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
+ sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
&log->l_icloglock, s);
*log_flushed = 1;
already_slept = 1;
@@ -3109,7 +3106,7 @@ try_again:
!(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
/*
- * Don't wait on the forcesema if we know that we've
+ * Don't wait on completion if we know that we've
* gotten a log write error.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3117,7 +3114,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3173,7 +3170,7 @@ STATIC void
xlog_ticket_put(xlog_t *log,
xlog_ticket_t *ticket)
{
- sv_destroy(&ticket->t_sema);
+ sv_destroy(&ticket->t_wait);
kmem_zone_free(xfs_log_ticket_zone, ticket);
} /* xlog_ticket_put */
@@ -3263,7 +3260,7 @@ xlog_ticket_get(xlog_t *log,
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
+ sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
xlog_tic_reset_res(tic);
@@ -3550,14 +3547,14 @@ xfs_log_force_umount(
*/
if ((tic = log->l_reserve_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
if ((tic = log->l_write_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 6245913..7dcf11e 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -241,7 +241,7 @@ typedef struct xlog_res {
} xlog_res_t;
typedef struct xlog_ticket {
- sv_t t_sema; /* sleep on this semaphore : 20 */
+ sv_t t_wait; /* ticket wait queue : 20 */
struct xlog_ticket *t_next; /* :4|8 */
struct xlog_ticket *t_prev; /* :4|8 */
xlog_tid_t t_tid; /* transaction identifier : 4 */
@@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header {
* xlog_rec_header_t into the reserved space.
* - ic_data follows, so a write to disk can start at the beginning of
* the iclog.
- * - ic_forcesema is used to implement synchronous forcing of the iclog to disk.
+ * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
* - ic_next is the pointer to the next iclog in the ring.
* - ic_bp is a pointer to the buffer used to write this incore log to disk.
* - ic_log is a pointer back to the global log structure.
@@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header {
* and move everything else out to subsequent cachelines.
*/
typedef struct xlog_iclog_fields {
- sv_t ic_forcesema;
- sv_t ic_writesema;
+ sv_t ic_force_wait;
+ sv_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
struct xfs_buf *ic_bp;
@@ -377,8 +377,8 @@ typedef struct xlog_in_core {
/*
* Defines to save our code from this glop.
*/
-#define ic_forcesema hic_fields.ic_forcesema
-#define ic_writesema hic_fields.ic_writesema
+#define ic_force_wait hic_fields.ic_force_wait
+#define ic_write_wait hic_fields.ic_write_wait
#define ic_next hic_fields.ic_next
#define ic_prev hic_fields.ic_prev
#define ic_bp hic_fields.ic_bp
--
1.5.6
^ permalink raw reply related [flat|nested] 8+ messages in thread