All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 6/6] xfs: convert xfsbufd to use a workqueue
Date: Thu, 25 Aug 2011 17:17:06 +1000	[thread overview]
Message-ID: <1314256626-11136-7-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1314256626-11136-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

There is no reason we need a thread per filesystem to do the
flushing of the delayed write buffer queue. This can be easily
handled by a global concurrency managed workqueue.

Convert the delayed write buffer handling to use workqueues and
workqueue flushes to implement buffer writeback by embedding a
delayed work structure into the struct xfs_buftarg and using that to
control flushing.  This greatly simplifes the process of flushing
and also removes a bunch of duplicated code between buftarg flushing
and delwri buffer writeback.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_buf.c       |  165 ++++++++++++++++++++----------------------------
 fs/xfs/xfs_buf.h       |    5 +-
 fs/xfs/xfs_dquot.c     |    1 -
 fs/xfs/xfs_trans_ail.c |    2 +-
 4 files changed, 72 insertions(+), 101 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 410de9f..b1b8c0c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -42,9 +42,9 @@
 #include "xfs_trace.h"
 
 static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
+STATIC void xfs_buf_delwri_queue(xfs_buf_t *bp, int unlock);
 
+static struct workqueue_struct *xfs_buf_wq;
 static struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
 struct workqueue_struct *xfsconvertd_workqueue;
@@ -1407,8 +1407,9 @@ xfs_buf_delwri_queue(
 	}
 
 	if (list_empty(dwq)) {
-		/* start xfsbufd as it is about to have something to do */
-		wake_up_process(bp->b_target->bt_task);
+		/* queue a delayed flush as we are about to queue a buffer */
+		queue_delayed_work(xfs_buf_wq, &bp->b_target->bt_delwrite_work,
+			xfs_buf_timer_centisecs * msecs_to_jiffies(10));
 	}
 
 	bp->b_flags |= _XBF_DELWRI_Q;
@@ -1486,13 +1487,13 @@ STATIC int
 xfs_buf_delwri_split(
 	xfs_buftarg_t	*target,
 	struct list_head *list,
-	unsigned long	age)
+	unsigned long	age,
+	int		force)
 {
 	xfs_buf_t	*bp, *n;
 	struct list_head *dwq = &target->bt_delwrite_queue;
 	spinlock_t	*dwlk = &target->bt_delwrite_lock;
 	int		skipped = 0;
-	int		force;
 
 	force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
 	INIT_LIST_HEAD(list);
@@ -1543,90 +1544,36 @@ xfs_buf_cmp(
 	return 0;
 }
 
-STATIC int
-xfsbufd(
-	void		*data)
-{
-	xfs_buftarg_t   *target = (xfs_buftarg_t *)data;
-
-	current->flags |= PF_MEMALLOC;
-
-	set_freezable();
-
-	do {
-		long	age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
-		long	tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
-		struct list_head tmp;
-		struct blk_plug plug;
-
-		if (unlikely(freezing(current))) {
-			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-			refrigerator();
-		} else {
-			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-		}
-
-		/* sleep for a long time if there is nothing to do. */
-		if (list_empty(&target->bt_delwrite_queue))
-			tout = MAX_SCHEDULE_TIMEOUT;
-		schedule_timeout_interruptible(tout);
-
-		xfs_buf_delwri_split(target, &tmp, age);
-		list_sort(NULL, &tmp, xfs_buf_cmp);
-
-		blk_start_plug(&plug);
-		while (!list_empty(&tmp)) {
-			struct xfs_buf *bp;
-			bp = list_first_entry(&tmp, struct xfs_buf, b_list);
-			list_del_init(&bp->b_list);
-			xfs_bdstrat_cb(bp);
-		}
-		blk_finish_plug(&plug);
-	} while (!kthread_should_stop());
-
-	return 0;
-}
-
-/*
- *	Handling of buffer targets (buftargs).
- */
-
 /*
- *	Go through all incore buffers, and release buffers if they belong to
- *	the given device. This is used in filesystem error handling to
- *	preserve the consistency of its metadata.
+ * If we are doing a forced flush, then we need to wait for the IO that we
+ * issue to complete.
  */
-int
-xfs_flush_buftarg(
-	xfs_buftarg_t	*target,
-	int		wait)
+static void
+xfs_buf_delwri_work(
+	struct work_struct *work)
 {
-	xfs_buf_t	*bp;
-	int		pincount = 0;
+	struct xfs_buftarg *btp = container_of(to_delayed_work(work),
+					struct xfs_buftarg, bt_delwrite_work);
+	struct xfs_buf	*bp;
+	struct blk_plug	plug;
 	LIST_HEAD(tmp_list);
 	LIST_HEAD(wait_list);
-	struct blk_plug plug;
-
-	xfs_buf_runall_queues(xfsconvertd_workqueue);
-	xfs_buf_runall_queues(xfsdatad_workqueue);
-	xfs_buf_runall_queues(xfslogd_workqueue);
+	long		age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
+	int		force = 0;
 
-	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
-	pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
+	if (test_and_clear_bit(XBT_FORCE_FLUSH, &btp->bt_flags)) {
+		force = 1;
+		age = 0;
+	}
 
-	/*
-	 * Dropped the delayed write list lock, now walk the temporary list.
-	 * All I/O is issued async and then if we need to wait for completion
-	 * we do that after issuing all the IO.
-	 */
+	xfs_buf_delwri_split(btp, &tmp_list, age, force);
 	list_sort(NULL, &tmp_list, xfs_buf_cmp);
 
 	blk_start_plug(&plug);
 	while (!list_empty(&tmp_list)) {
 		bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
-		ASSERT(target == bp->b_target);
 		list_del_init(&bp->b_list);
-		if (wait) {
+		if (force) {
 			bp->b_flags &= ~XBF_ASYNC;
 			list_add(&bp->b_list, &wait_list);
 		}
@@ -1634,7 +1581,7 @@ xfs_flush_buftarg(
 	}
 	blk_finish_plug(&plug);
 
-	if (wait) {
+	if (force) {
 		/* Wait for IO to complete. */
 		while (!list_empty(&wait_list)) {
 			bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
@@ -1645,7 +1592,39 @@ xfs_flush_buftarg(
 		}
 	}
 
-	return pincount;
+	if (list_empty(&btp->bt_delwrite_queue))
+		return;
+
+	queue_delayed_work(xfs_buf_wq, &btp->bt_delwrite_work,
+			xfs_buf_timer_centisecs * msecs_to_jiffies(10));
+}
+
+/*
+ *	Handling of buffer targets (buftargs).
+ */
+
+/*
+ * Flush all the queued buffer work, then flush any remaining dirty buffers
+ * and wait for them to complete. If there are buffers remaining on the delwri
+ * queue, then they were pinned so couldn't be flushed. Return a value of 1 to
+ * indicate that there were pinned buffers and the caller needs to retry the
+ * flush.
+ */
+int
+xfs_flush_buftarg(
+	xfs_buftarg_t	*target,
+	int		wait)
+{
+	xfs_buf_runall_queues(xfsconvertd_workqueue);
+	xfs_buf_runall_queues(xfsdatad_workqueue);
+	xfs_buf_runall_queues(xfslogd_workqueue);
+
+	set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+	flush_delayed_work_sync(&target->bt_delwrite_work);
+
+	if (!list_empty(&target->bt_delwrite_queue))
+		return 1;
+	return 0;
 }
 
 /*
@@ -1740,7 +1719,6 @@ xfs_free_buftarg(
 	if (mp->m_flags & XFS_MOUNT_BARRIER)
 		xfs_blkdev_issue_flush(btp);
 
-	kthread_stop(btp->bt_task);
 	kmem_free(btp);
 }
 
@@ -1788,20 +1766,6 @@ xfs_setsize_buftarg(
 	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
 }
 
-STATIC int
-xfs_alloc_delwrite_queue(
-	xfs_buftarg_t		*btp,
-	const char		*fsname)
-{
-	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
-	spin_lock_init(&btp->bt_delwrite_lock);
-	btp->bt_flags = 0;
-	btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
-	if (IS_ERR(btp->bt_task))
-		return PTR_ERR(btp->bt_task);
-	return 0;
-}
-
 xfs_buftarg_t *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
@@ -1824,8 +1788,11 @@ xfs_alloc_buftarg(
 	spin_lock_init(&btp->bt_lru_lock);
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
-	if (xfs_alloc_delwrite_queue(btp, fsname))
-		goto error;
+
+	INIT_LIST_HEAD(&btp->bt_delwrite_queue);
+	spin_lock_init(&btp->bt_delwrite_lock);
+	INIT_DELAYED_WORK(&btp->bt_delwrite_work, xfs_buf_delwri_work);
+
 	btp->bt_shrinker.shrink = xfs_buftarg_shrink;
 	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
 	register_shrinker(&btp->bt_shrinker);
@@ -1860,8 +1827,13 @@ xfs_buf_init(void)
 	if (!xfsconvertd_workqueue)
 		goto out_destroy_xfsdatad_workqueue;
 
+	xfs_buf_wq = alloc_workqueue("xfsbufd", WQ_MEM_RECLAIM, 8);
+	if (!xfs_buf_wq)
+		goto out_destroy_xfsconvertd_wq;
 	return 0;
 
+ out_destroy_xfsconvertd_wq:
+	destroy_workqueue(xfsconvertd_workqueue);
  out_destroy_xfsdatad_workqueue:
 	destroy_workqueue(xfsdatad_workqueue);
  out_destroy_xfslogd_workqueue:
@@ -1875,6 +1847,7 @@ xfs_buf_init(void)
 void
 xfs_buf_terminate(void)
 {
+	destroy_workqueue(xfs_buf_wq);
 	destroy_workqueue(xfsconvertd_workqueue);
 	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 620972b..c1aabfd 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
 	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
 
 typedef enum {
-	XBT_FORCE_SLEEP = 0,
-	XBT_FORCE_FLUSH = 1,
+	XBT_FORCE_FLUSH = 0,
 } xfs_buftarg_flags_t;
 
 typedef struct xfs_buftarg {
@@ -104,7 +103,7 @@ typedef struct xfs_buftarg {
 	size_t			bt_smask;
 
 	/* per device delwri queue */
-	struct task_struct	*bt_task;
+	struct delayed_work	bt_delwrite_work;
 	struct list_head	bt_delwrite_queue;
 	spinlock_t		bt_delwrite_lock;
 	unsigned long		bt_flags;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index db62959..1fb9d93 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1446,7 +1446,6 @@ xfs_qm_dqflock_pushbuf_wait(
 		if (xfs_buf_ispinned(bp))
 			xfs_log_force(mp, 0);
 		xfs_buf_delwri_promote(bp);
-		wake_up_process(bp->b_target->bt_task);
 	}
 	xfs_buf_relse(bp);
 out_lock:
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index dd966e0..919a31e 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -495,7 +495,7 @@ xfs_ail_worker(
 
 	if (push_xfsbufd) {
 		/* we've got delayed write buffers to flush */
-		wake_up_process(mp->m_ddev_targp->bt_task);
+		flush_delayed_work(&mp->m_ddev_targp->bt_delwrite_work);
 	}
 
 	/* assume we have more work to do in a short while */
-- 
1.7.5.4

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2011-08-25  7:17 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-08-25  7:17 [PATCH 0/6] xfs: patch queue for Linux 3.2 Dave Chinner
2011-08-25  7:17 ` [PATCH 1/6] xfs: don't serialise direct IO reads on page cache checks Dave Chinner
2011-08-25  7:17 ` [PATCH 2/6] xfs: don't serialise adjacent concurrent direct IO appending writes Dave Chinner
2011-08-25 21:08   ` Alex Elder
2011-08-26  2:19     ` Dave Chinner
2011-08-25  7:17 ` [PATCH 3/6] xfs: Don't allocate new buffers on every call to _xfs_buf_find Dave Chinner
2011-08-25 20:56   ` Alex Elder
2011-08-25 23:57     ` Dave Chinner
2011-08-25  7:17 ` [PATCH 4/6] xfs: reduce the number of log forces from tail pushing Dave Chinner
2011-08-25 20:57   ` Alex Elder
2011-08-25 23:47     ` Dave Chinner
2011-08-25  7:17 ` [PATCH 5/6] xfs: re-arrange all the xfsbufd delwri queue code Dave Chinner
2011-08-25 20:57   ` Alex Elder
2011-08-25  7:17 ` Dave Chinner [this message]
2011-08-25 20:57   ` [PATCH 6/6] xfs: convert xfsbufd to use a workqueue Alex Elder
2011-08-25 23:46     ` Dave Chinner
2011-08-26  0:18       ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1314256626-11136-7-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.