public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 17/18] xfs: add a lru to the XFS buffer cache
Date: Tue, 14 Sep 2010 20:56:16 +1000	[thread overview]
Message-ID: <1284461777-1496-18-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1284461777-1496-1-git-send-email-david@fromorbit.com>

From: Dave Chinner <dchinner@redhat.com>

Introduce a per-buftarg LRU for memory reclaim to operate on. This
is the last piece we need to put in place so that we can fully
control the buffer lifecycle. This allows XFS to be responsibile for
maintaining the working set of buffers under memory pressure instead
of relying on the VM reclaim not to take pages we need out from
underneath us.

The implementation is currently a bit naive - it does not rotate
buffers on the LRU when they are accessed multiple times. Solving
this problem is for a later patch series that re-introduces the
buffer type specific reclaim reference counts to prioritise reclaim
more effectively.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/linux-2.6/xfs_buf.c |   91 ++++++++++++++++++++++++++++++++++---------
 fs/xfs/linux-2.6/xfs_buf.h |    5 ++
 2 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 3b54fee..12b37c6 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -182,6 +182,7 @@ _xfs_buf_initialize(
 	memset(bp, 0, sizeof(xfs_buf_t));
 	atomic_set(&bp->b_hold, 1);
 	init_completion(&bp->b_iowait);
+	INIT_LIST_HEAD(&bp->b_lru);
 	INIT_LIST_HEAD(&bp->b_list);
 	RB_CLEAR_NODE(&bp->b_rbnode);
 	init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
@@ -257,6 +258,8 @@ xfs_buf_free(
 {
 	trace_xfs_buf_free(bp, _RET_IP_);
 
+	ASSERT(list_empty(&bp->b_lru));
+
 	if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
 		uint		i;
 
@@ -471,6 +474,13 @@ _xfs_buf_find(
 		/* the buffer keeps the perag reference until it is freed */
 		new_bp->b_pag = pag;
 		spin_unlock(&pag->pag_buf_lock);
+
+		/* add to LRU */
+		spin_lock(&btp->bt_lru_lock);
+		list_add_tail(&new_bp->b_lru, &btp->bt_lru);
+		btp->bt_lru_nr++;
+		atomic_inc(&new_bp->b_hold);
+		spin_unlock(&btp->bt_lru_lock);
 	} else {
 		XFS_STATS_INC(xb_miss_locked);
 		spin_unlock(&pag->pag_buf_lock);
@@ -834,12 +844,14 @@ xfs_buf_rele(
 
 	if (!pag) {
 		ASSERT(!bp->b_relse);
+		ASSERT(list_empty(&bp->b_lru));
 		ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
 		if (atomic_dec_and_test(&bp->b_hold))
 			xfs_buf_free(bp);
 		return;
 	}
 
+	ASSERT(!list_empty(&bp->b_lru));
 	ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
 	ASSERT(atomic_read(&bp->b_hold) > 0);
 	if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
@@ -848,6 +860,14 @@ xfs_buf_rele(
 			spin_unlock(&pag->pag_buf_lock);
 			bp->b_relse(bp);
 		} else {
+			struct xfs_buftarg *btp = bp->b_target;
+
+			/* remove from LRU */
+			spin_lock(&btp->bt_lru_lock);
+			list_del_init(&bp->b_lru);
+			btp->bt_lru_nr--;
+			spin_unlock(&btp->bt_lru_lock);
+
 			ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
 			rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
 			spin_unlock(&pag->pag_buf_lock);
@@ -1446,27 +1466,29 @@ xfs_buf_iomove(
  */
 
 /*
- *	Wait for any bufs with callbacks that have been submitted but
- *	have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
  */
 void
 xfs_wait_buftarg(
 	struct xfs_buftarg	*btp)
 {
-	struct xfs_perag	*pag;
-	uint			i;
-
-	for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
-		pag = xfs_perag_get(btp->bt_mount, i);
-		spin_lock(&pag->pag_buf_lock);
-		while (rb_first(&pag->pag_buf_tree)) {
-			spin_unlock(&pag->pag_buf_lock);
+	struct xfs_buf		*bp;
+restart:
+	spin_lock(&btp->bt_lru_lock);
+	while (!list_empty(&btp->bt_lru)) {
+		bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+		if (atomic_read(&bp->b_hold) > 1) {
+			spin_unlock(&btp->bt_lru_lock);
 			delay(100);
-			spin_lock(&pag->pag_buf_lock);
+			goto restart;
 		}
-		spin_unlock(&pag->pag_buf_lock);
-		xfs_perag_put(pag);
+		spin_unlock(&btp->bt_lru_lock);
+		xfs_buf_rele(bp);
+		spin_lock(&btp->bt_lru_lock);
 	}
+	spin_unlock(&btp->bt_lru_lock);
 }
 
 int
@@ -1477,15 +1499,44 @@ xfs_buftarg_shrink(
 {
 	struct xfs_buftarg	*btp = container_of(shrink,
 					struct xfs_buftarg, bt_shrinker);
-	if (nr_to_scan) {
-		if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
-			return -1;
-		if (list_empty(&btp->bt_delwrite_queue))
-			return -1;
+	struct xfs_buf		*bp, *n;
+
+	if (!nr_to_scan)
+		return btp->bt_lru_nr;
+
+	spin_lock(&btp->bt_lru_lock);
+	if (test_and_set_bit(XBT_SHRINKER_ACTIVE, &btp->bt_flags)) {
+		/* LRU walk already in progress */
+		spin_unlock(&btp->bt_lru_lock);
+		return -1;
+	}
+
+	list_for_each_entry_safe(bp, n, &btp->bt_lru, b_lru) {
+		if (nr_to_scan-- <= 0)
+			break;
+		/*
+		 * If the lru holds the only reference count on the buffer,
+		 * release it. Otherwise there is another user of the buffer
+		 * and it will be getting repositioned real soon.
+		 */
+		if (atomic_read(&bp->b_hold) > 1)
+			continue;
+		spin_unlock(&btp->bt_lru_lock);
+		xfs_buf_rele(bp);
+		spin_lock(&btp->bt_lru_lock);
+	}
+	clear_bit(XBT_SHRINKER_ACTIVE, &btp->bt_flags);
+	spin_unlock(&btp->bt_lru_lock);
+
+	/* kick the xfsbufd to write and release dirty buffers */
+	if (!test_bit(XBT_FORCE_SLEEP, &btp->bt_flags) &&
+	    !test_bit(XBT_FORCE_FLUSH, &btp->bt_flags) &&
+	    !list_empty(&btp->bt_delwrite_queue)) {
 		set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
 		wake_up_process(btp->bt_task);
 	}
-	return list_empty(&btp->bt_delwrite_queue) ? -1 : 1;
+
+	return btp->bt_lru_nr;
 }
 
 void
@@ -1619,6 +1670,8 @@ xfs_alloc_buftarg(
 	btp->bt_mount = mp;
 	btp->bt_dev =  bdev->bd_dev;
 	btp->bt_bdev = bdev;
+	INIT_LIST_HEAD(&btp->bt_lru);
+	spin_lock_init(&btp->bt_lru_lock);
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
 	if (xfs_mapping_buftarg(btp, bdev))
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 6e9310b..36f71aa 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -119,6 +119,7 @@ typedef unsigned int xfs_buf_flags_t;
 typedef enum {
 	XBT_FORCE_SLEEP = 0,
 	XBT_FORCE_FLUSH = 1,
+	XBT_SHRINKER_ACTIVE = 2,
 } xfs_buftarg_flags_t;
 
 typedef struct xfs_bufhash {
@@ -143,6 +144,9 @@ typedef struct xfs_buftarg {
 
 	/* LRU control structures */
 	struct shrinker		bt_shrinker;
+	struct list_head	bt_lru;
+	spinlock_t		bt_lru_lock;
+	unsigned int		bt_lru_nr;
 } xfs_buftarg_t;
 
 /*
@@ -178,6 +182,7 @@ typedef struct xfs_buf {
 	xfs_buf_flags_t		b_flags;	/* status flags */
 	struct semaphore	b_sema;		/* semaphore for lockables */
 
+	struct list_head	b_lru;		/* lru list */
 	wait_queue_head_t	b_waiters;	/* unpin waiters */
 	struct list_head	b_list;
 	struct xfs_perag	*b_pag;		/* contains rbtree root */
-- 
1.7.1

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2010-09-14 10:56 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-14 10:55 [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Dave Chinner
2010-09-14 10:56 ` [PATCH 01/18] xfs: single thread inode cache shrinking Dave Chinner
2010-09-14 18:48   ` Alex Elder
2010-09-14 22:48     ` Dave Chinner
2010-09-14 10:56 ` [PATCH 02/18] xfs: reduce the number of CIL lock round trips during commit Dave Chinner
2010-09-14 14:48   ` Christoph Hellwig
2010-09-14 17:21   ` Alex Elder
2010-09-14 10:56 ` [PATCH 03/18] xfs: remove debug assert for per-ag reference counting Dave Chinner
2010-09-14 14:48   ` Christoph Hellwig
2010-09-14 17:22   ` Alex Elder
2010-09-14 10:56 ` [PATCH 04/18] xfs: lockless per-ag lookups Dave Chinner
2010-09-14 12:35   ` Dave Chinner
2010-09-14 14:50   ` Christoph Hellwig
2010-09-14 17:28   ` Alex Elder
2010-09-14 10:56 ` [PATCH 05/18] xfs: convert inode cache lookups to use RCU locking Dave Chinner
2010-09-14 16:27   ` Christoph Hellwig
2010-09-14 23:17     ` Dave Chinner
2010-09-14 21:23   ` Alex Elder
2010-09-14 23:42     ` Dave Chinner
2010-09-14 10:56 ` [PATCH 06/18] xfs: convert pag_ici_lock to a spin lock Dave Chinner
2010-09-14 21:26   ` Alex Elder
2010-09-14 10:56 ` [PATCH 07/18] xfs: don't use vfs writeback for pure metadata modifications Dave Chinner
2010-09-14 14:54   ` Christoph Hellwig
2010-09-15  0:14     ` Dave Chinner
2010-09-15  0:17       ` Christoph Hellwig
2010-09-14 22:12   ` Alex Elder
2010-09-15  0:28     ` Dave Chinner
2010-11-08 10:47   ` Christoph Hellwig
2010-09-14 10:56 ` [PATCH 08/18] xfs: rename xfs_buf_get_nodaddr to be more appropriate Dave Chinner
2010-09-14 14:56   ` Christoph Hellwig
2010-09-14 22:14   ` Alex Elder
2010-09-14 10:56 ` [PATCH 09/18] xfs: introduced uncached buffer read primitve Dave Chinner
2010-09-14 14:56   ` Christoph Hellwig
2010-09-14 22:16   ` Alex Elder
2010-09-14 10:56 ` [PATCH 10/18] xfs: store xfs_mount in the buftarg instead of in the xfs_buf Dave Chinner
2010-09-14 14:57   ` Christoph Hellwig
2010-09-14 22:21   ` Alex Elder
2010-09-14 10:56 ` [PATCH 11/18] xfs: kill XBF_FS_MANAGED buffers Dave Chinner
2010-09-14 14:59   ` Christoph Hellwig
2010-09-14 22:26   ` Alex Elder
2010-09-14 10:56 ` [PATCH 12/18] xfs: use unhashed buffers for size checks Dave Chinner
2010-09-14 15:00   ` Christoph Hellwig
2010-09-14 22:29   ` Alex Elder
2010-09-14 10:56 ` [PATCH 13/18] xfs: remove buftarg hash for external devices Dave Chinner
2010-09-14 22:29   ` Alex Elder
2010-09-14 10:56 ` [PATCH 14/18] xfs: convert buffer cache hash to rbtree Dave Chinner
2010-09-14 16:29   ` Christoph Hellwig
2010-09-15 17:46   ` Alex Elder
2010-09-14 10:56 ` [PATCH 15/18] xfs; pack xfs_buf structure more tightly Dave Chinner
2010-09-14 16:30   ` Christoph Hellwig
2010-09-15 18:01   ` Alex Elder
2010-09-14 10:56 ` [PATCH 16/18] xfs: convert xfsbud shrinker to a per-buftarg shrinker Dave Chinner
2010-09-14 16:32   ` Christoph Hellwig
2010-09-15 20:19   ` Alex Elder
2010-09-16  0:28     ` Dave Chinner
2010-09-14 10:56 ` Dave Chinner [this message]
2010-09-14 23:16   ` [PATCH 17/18] xfs: add a lru to the XFS buffer cache Christoph Hellwig
2010-09-15  0:05     ` Dave Chinner
2010-09-15 21:28   ` Alex Elder
2010-09-14 10:56 ` [PATCH 18/18] xfs: stop using the page cache to back the " Dave Chinner
2010-09-14 23:20   ` Christoph Hellwig
2010-09-15  0:06     ` Dave Chinner
2010-09-14 14:25 ` [PATCH 0/18] xfs: metadata and buffer cache scalability improvements Christoph Hellwig
2010-09-17 13:21 ` Alex Elder
2010-09-21  2:02   ` Dave Chinner
2010-09-21 16:23     ` Alex Elder
2010-09-21 22:34       ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1284461777-1496-18-git-send-email-david@fromorbit.com \
    --to=david@fromorbit.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox