From: Dave Chinner <david@fromorbit.com>
To: xfs@oss.sgi.com
Subject: [PATCH 12/34] xfs: add a lru to the XFS buffer cache
Date: Tue, 21 Dec 2010 18:29:08 +1100 [thread overview]
Message-ID: <1292916570-25015-13-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1292916570-25015-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Introduce a per-buftarg LRU for memory reclaim to operate on. This
is the last piece we need to put in place so that we can fully
control the buffer lifecycle. This allows XFS to be responsibile for
maintaining the working set of buffers under memory pressure instead
of relying on the VM reclaim not to take pages we need out from
underneath us.
The implementation introduces a b_lru_ref counter into the buffer.
This is currently set to 1 whenever the buffer is referenced and so is used to
determine if the buffer should be added to the LRU or not when freed.
Effectively it allows lazy LRU initialisation of the buffer so we do not need
to touch the LRU list and locks in xfs_buf_find().
Instead, when the buffer is being released and we drop the last
reference to it, we check the b_lru_ref count and if it is none zero
we re-add the buffer reference and add the inode to the LRU. The
b_lru_ref counter is decremented by the shrinker, and whenever the
shrinker comes across a buffer with a zero b_lru_ref counter, if
released the LRU reference on the buffer. In the absence of a lookup
race, this will result in the buffer being freed.
This counting mechanism is used instead of a reference flag so that
it is simple to re-introduce buffer-type specific reclaim reference
counts to prioritise reclaim more effectively. We still have all
those hooks in the XFS code, so this will provide the infrastructure
to re-implement that functionality.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/linux-2.6/xfs_buf.c | 166 ++++++++++++++++++++++++++++++++++++++------
fs/xfs/linux-2.6/xfs_buf.h | 8 ++-
2 files changed, 151 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0a00d7a..92f1f2a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -163,8 +163,79 @@ test_page_region(
}
/*
- * Internal xfs_buf_t object manipulation
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
*/
+STATIC void
+xfs_buf_lru_add(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (list_empty(&bp->b_lru)) {
+ atomic_inc(&bp->b_hold);
+ list_add_tail(&bp->b_lru, &btp->bt_lru);
+ btp->bt_lru_nr++;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ if (list_empty(&bp->b_lru))
+ return;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+ struct xfs_buf *bp)
+{
+ bp->b_flags |= XBF_STALE;
+ atomic_set(&(bp)->b_lru_ref, 0);
+ if (!list_empty(&bp->b_lru)) {
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ atomic_dec(&bp->b_hold);
+ }
+ spin_unlock(&btp->bt_lru_lock);
+ }
+ ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
STATIC void
_xfs_buf_initialize(
@@ -181,7 +252,9 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
+ atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
+ INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
RB_CLEAR_NODE(&bp->b_rbnode);
sema_init(&bp->b_sema, 0); /* held, no waiters */
@@ -257,6 +330,8 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
+ ASSERT(list_empty(&bp->b_lru));
+
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -822,6 +897,7 @@ xfs_buf_rele(
if (!pag) {
ASSERT(!bp->b_relse);
+ ASSERT(list_empty(&bp->b_lru));
ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
@@ -829,13 +905,19 @@ xfs_buf_rele(
}
ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
ASSERT(atomic_read(&bp->b_hold) > 0);
if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
spin_unlock(&pag->pag_buf_lock);
bp->b_relse(bp);
+ } else if (!(bp->b_flags & XBF_STALE) &&
+ atomic_read(&bp->b_lru_ref)) {
+ xfs_buf_lru_add(bp);
+ spin_unlock(&pag->pag_buf_lock);
} else {
+ xfs_buf_lru_del(bp);
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
spin_unlock(&pag->pag_buf_lock);
@@ -1432,27 +1514,35 @@ xfs_buf_iomove(
*/
/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
*/
void
xfs_wait_buftarg(
struct xfs_buftarg *btp)
{
- struct xfs_perag *pag;
- uint i;
+ struct xfs_buf *bp;
- for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
- pag = xfs_perag_get(btp->bt_mount, i);
- spin_lock(&pag->pag_buf_lock);
- while (rb_first(&pag->pag_buf_tree)) {
- spin_unlock(&pag->pag_buf_lock);
+restart:
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+ if (atomic_read(&bp->b_hold) > 1) {
+ spin_unlock(&btp->bt_lru_lock);
delay(100);
- spin_lock(&pag->pag_buf_lock);
+ goto restart;
}
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
+ /*
+ * clear the LRU reference count so the bufer doesn't get
+ * ignored in xfs_buf_rele().
+ */
+ atomic_set(&bp->b_lru_ref, 0);
+ spin_unlock(&btp->bt_lru_lock);
+ xfs_buf_rele(bp);
+ spin_lock(&btp->bt_lru_lock);
}
+ spin_unlock(&btp->bt_lru_lock);
}
int
@@ -1463,15 +1553,45 @@ xfs_buftarg_shrink(
{
struct xfs_buftarg *btp = container_of(shrink,
struct xfs_buftarg, bt_shrinker);
- if (nr_to_scan) {
- if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
- return -1;
- if (list_empty(&btp->bt_delwrite_queue))
- return -1;
- set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
- wake_up_process(btp->bt_task);
+ struct xfs_buf *bp;
+ LIST_HEAD(dispose);
+
+ if (!nr_to_scan)
+ return btp->bt_lru_nr;
+
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ if (nr_to_scan-- <= 0)
+ break;
+
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+
+ /*
+ * Decrement the b_lru_ref count unless the value is already
+ * zero. If the value is already zero, we need to reclaim the
+ * buffer, otherwise it gets another trip through the LRU.
+ */
+ if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
+ continue;
+ }
+
+ /*
+ * remove the buffer from the LRU now to avoid needing another
+ * lock round trip inside xfs_buf_rele().
+ */
+ list_move(&bp->b_lru, &dispose);
+ btp->bt_lru_nr--;
}
- return list_empty(&btp->bt_delwrite_queue) ? -1 : 1;
+ spin_unlock(&btp->bt_lru_lock);
+
+ while (!list_empty(&dispose)) {
+ bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+ list_del_init(&bp->b_lru);
+ xfs_buf_rele(bp);
+ }
+
+ return btp->bt_lru_nr;
}
void
@@ -1606,6 +1726,8 @@ xfs_alloc_buftarg(
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
+ INIT_LIST_HEAD(&btp->bt_lru);
+ spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9344103..4601eab 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -134,6 +134,9 @@ typedef struct xfs_buftarg {
/* LRU control structures */
struct shrinker bt_shrinker;
+ struct list_head bt_lru;
+ spinlock_t bt_lru_lock;
+ unsigned int bt_lru_nr;
} xfs_buftarg_t;
/*
@@ -166,9 +169,11 @@ typedef struct xfs_buf {
xfs_off_t b_file_offset; /* offset in file */
size_t b_buffer_length;/* size of buffer in bytes */
atomic_t b_hold; /* reference count */
+ atomic_t b_lru_ref; /* lru reclaim ref count */
xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
+ struct list_head b_lru; /* lru list */
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
@@ -266,7 +271,8 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
-#define XFS_BUF_STALE(bp) ((bp)->b_flags |= XBF_STALE)
+void xfs_buf_stale(struct xfs_buf *bp);
+#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
#define XFS_BUF_SUPER_STALE(bp) do { \
--
1.7.2.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
next prev parent reply other threads:[~2010-12-21 7:28 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-21 7:28 [PATCH 00/34] xfs: scalability patchset for 2.6.38 Dave Chinner
2010-12-21 7:28 ` [PATCH 01/34] xfs: provide a inode iolock lockdep class Dave Chinner
2010-12-21 15:15 ` Christoph Hellwig
2010-12-21 7:28 ` [PATCH 02/34] xfs: use KM_NOFS for allocations during attribute list operations Dave Chinner
2010-12-21 15:16 ` Christoph Hellwig
2010-12-21 7:28 ` [PATCH 03/34] lib: percpu counter add unless less than functionality Dave Chinner
2010-12-22 2:20 ` Alex Elder
2010-12-22 3:46 ` Dave Chinner
2010-12-21 7:29 ` [PATCH 04/34] xfs: use generic per-cpu counter infrastructure Dave Chinner
2010-12-21 7:29 ` [PATCH 05/34] xfs: demultiplex xfs_icsb_modify_counters() Dave Chinner
2010-12-21 7:29 ` [PATCH 06/34] xfs: dynamic speculative EOF preallocation Dave Chinner
2010-12-21 15:15 ` Christoph Hellwig
2010-12-21 21:42 ` Dave Chinner
2010-12-21 23:44 ` Dave Chinner
2010-12-22 2:29 ` Alex Elder
2010-12-29 12:56 ` Christoph Hellwig
2010-12-27 14:57 ` Alex Elder
2010-12-27 15:00 ` Alex Elder
2011-01-06 18:16 ` Christoph Hellwig
2010-12-21 7:29 ` [PATCH 07/34] xfs: don't truncate prealloc from frequently accessed inodes Dave Chinner
2010-12-21 16:45 ` Christoph Hellwig
2010-12-21 7:29 ` [PATCH 08/34] xfs: rcu free inodes Dave Chinner
2010-12-21 7:29 ` [PATCH 09/34] xfs: convert inode cache lookups to use RCU locking Dave Chinner
2010-12-21 7:29 ` [PATCH 10/34] xfs: convert pag_ici_lock to a spin lock Dave Chinner
2010-12-21 7:29 ` [PATCH 11/34] xfs: convert xfsbud shrinker to a per-buftarg shrinker Dave Chinner
2010-12-21 7:29 ` Dave Chinner [this message]
2010-12-21 7:29 ` [PATCH 13/34] xfs: connect up buffer reclaim priority hooks Dave Chinner
2010-12-21 7:29 ` [PATCH 14/34] xfs: fix EFI transaction cancellation Dave Chinner
2010-12-21 7:29 ` [PATCH 15/34] xfs: Pull EFI/EFD handling out from under the AIL lock Dave Chinner
2010-12-21 7:29 ` [PATCH 16/34] xfs: clean up xfs_ail_delete() Dave Chinner
2010-12-21 7:29 ` [PATCH 17/34] xfs: bulk AIL insertion during transaction commit Dave Chinner
2010-12-21 7:29 ` [PATCH 18/34] xfs: reduce the number of AIL push wakeups Dave Chinner
2010-12-21 7:29 ` [PATCH 19/34] xfs: consume iodone callback items on buffers as they are processed Dave Chinner
2010-12-21 7:29 ` [PATCH 20/34] xfs: remove all the inodes on a buffer from the AIL in bulk Dave Chinner
2010-12-22 2:20 ` Alex Elder
2010-12-22 3:49 ` Dave Chinner
2010-12-21 7:29 ` [PATCH 22/34] xfs: use AIL bulk delete function to implement single delete Dave Chinner
2010-12-21 7:29 ` [PATCH 23/34] xfs: convert log grant ticket queues to list heads Dave Chinner
2010-12-21 7:29 ` [PATCH 24/34] xfs: fact out common grant head/log tail verification code Dave Chinner
2010-12-21 7:29 ` [PATCH 25/34] xfs: rework log grant space calculations Dave Chinner
2010-12-21 7:29 ` [PATCH 26/34] xfs: combine grant heads into a single 64 bit integer Dave Chinner
2010-12-21 7:29 ` [PATCH 27/34] xfs: use wait queues directly for the log wait queues Dave Chinner
2010-12-21 7:29 ` [PATCH 28/34] xfs: make AIL tail pushing independent of the grant lock Dave Chinner
2010-12-21 7:29 ` [PATCH 29/34] xfs: convert l_last_sync_lsn to an atomic variable Dave Chinner
2010-12-21 7:29 ` [PATCH 30/34] xfs: convert l_tail_lsn " Dave Chinner
2010-12-29 12:52 ` Christoph Hellwig
2010-12-29 15:49 ` Alex Elder
2010-12-21 7:29 ` [PATCH 31/34] xfs: convert log grant heads to atomic variables Dave Chinner
2010-12-21 7:29 ` [PATCH 32/34] xfs: introduce new locks for the log grant ticket wait queues Dave Chinner
2010-12-21 7:29 ` [PATCH 33/34] xfs: convert grant head manipulations to lockless algorithm Dave Chinner
2010-12-21 7:29 ` [PATCH 34/34] xfs: kill useless spinlock_destroy macro Dave Chinner
2010-12-23 1:15 ` [PATCH 00/34] xfs: scalability patchset for 2.6.38 Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1292916570-25015-13-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox