From: Dave Chinner <david@fromorbit.com>
To: glommer@parallels.com
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-mm@kvack.org, xfs@oss.sgi.com
Subject: [PATCH 14/19] xfs: use generic AG walk for background inode reclaim
Date: Wed, 28 Nov 2012 10:14:41 +1100 [thread overview]
Message-ID: <1354058086-27937-15-git-send-email-david@fromorbit.com> (raw)
In-Reply-To: <1354058086-27937-1-git-send-email-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
The per-ag inode cache radix trees are not walked by the shrinkers
any more, so there is no need for a special walker that contained
heurisitcs to prevent multiple shrinker instances from colliding
with each other. Hence we can just remote that and convert the code
to use the generic walker.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_ag.h | 2 -
fs/xfs/xfs_icache.c | 217 +++++++++++-----------------------------------
fs/xfs/xfs_icache.h | 4 +-
fs/xfs/xfs_mount.c | 1 -
fs/xfs/xfs_qm_syscalls.c | 2 +-
5 files changed, 55 insertions(+), 171 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index f2aeedb..40a7df9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -218,8 +218,6 @@ typedef struct xfs_perag {
spinlock_t pag_ici_lock; /* incore inode cache lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
- struct mutex pag_ici_reclaim_lock; /* serialisation point */
- unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
/* buffer cache index */
spinlock_t pag_buf_lock; /* lock for pag_buf_tree */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 82b053f..5cfc2eb 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -468,7 +468,8 @@ out_error_or_again:
STATIC int
xfs_inode_ag_walk_grab(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ int flags)
{
struct inode *inode = VFS_I(ip);
@@ -517,6 +518,7 @@ STATIC int
xfs_inode_ag_walk(
struct xfs_mount *mp,
struct xfs_perag *pag,
+ int (*grab)(struct xfs_inode *ip, int flags),
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags,
void *args),
@@ -530,6 +532,9 @@ xfs_inode_ag_walk(
int done;
int nr_found;
+ if (!grab)
+ grab = xfs_inode_ag_walk_grab;
+
restart:
done = 0;
skipped = 0;
@@ -564,7 +569,7 @@ restart:
for (i = 0; i < nr_found; i++) {
struct xfs_inode *ip = batch[i];
- if (done || xfs_inode_ag_walk_grab(ip))
+ if (done || grab(ip, flags))
batch[i] = NULL;
/*
@@ -593,7 +598,8 @@ restart:
if (!batch[i])
continue;
error = execute(batch[i], pag, flags, args);
- IRELE(batch[i]);
+ if (grab == xfs_inode_ag_walk_grab)
+ IRELE(batch[i]);
if (error == EAGAIN) {
skipped++;
continue;
@@ -617,35 +623,10 @@ restart:
return last_error;
}
-/*
- * Background scanning to trim post-EOF preallocated space. This is queued
- * based on the 'background_prealloc_discard_period' tunable (5m by default).
- */
-STATIC void
-xfs_queue_eofblocks(
- struct xfs_mount *mp)
-{
- rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
- queue_delayed_work(mp->m_eofblocks_workqueue,
- &mp->m_eofblocks_work,
- msecs_to_jiffies(xfs_eofb_secs * 1000));
- rcu_read_unlock();
-}
-
-void
-xfs_eofblocks_worker(
- struct work_struct *work)
-{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_eofblocks_work);
- xfs_icache_free_eofblocks(mp, NULL);
- xfs_queue_eofblocks(mp);
-}
-
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
+ int (*grab)(struct xfs_inode *ip, int flags),
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags,
void *args),
@@ -660,7 +641,8 @@ xfs_inode_ag_iterator(
ag = 0;
while ((pag = xfs_perag_get(mp, ag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1);
+ error = xfs_inode_ag_walk(mp, pag, grab, execute,
+ flags, args, -1);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -674,6 +656,7 @@ xfs_inode_ag_iterator(
int
xfs_inode_ag_iterator_tag(
struct xfs_mount *mp,
+ int (*grab)(struct xfs_inode *ip, int flags),
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags,
void *args),
@@ -689,7 +672,8 @@ xfs_inode_ag_iterator_tag(
ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag);
+ error = xfs_inode_ag_walk(mp, pag, grab, execute,
+ flags, args, tag);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -904,7 +888,8 @@ STATIC int
xfs_reclaim_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int sync_mode)
+ int sync_mode,
+ void *args)
{
struct xfs_buf *bp = NULL;
int error;
@@ -1032,140 +1017,14 @@ out:
return 0;
}
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-STATIC int
-xfs_reclaim_inodes_ag(
- struct xfs_mount *mp,
- int flags,
- long *nr_to_scan)
-{
- struct xfs_perag *pag;
- int error = 0;
- int last_error = 0;
- xfs_agnumber_t ag;
- int trylock = flags & SYNC_TRYLOCK;
- int skipped;
-
-restart:
- ag = 0;
- skipped = 0;
- while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
- unsigned long first_index = 0;
- int done = 0;
- int nr_found = 0;
-
- ag = pag->pag_agno + 1;
-
- if (trylock) {
- if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
- skipped++;
- xfs_perag_put(pag);
- continue;
- }
- first_index = pag->pag_ici_reclaim_cursor;
- } else
- mutex_lock(&pag->pag_ici_reclaim_lock);
-
- do {
- struct xfs_inode *batch[XFS_LOOKUP_BATCH];
- int i;
-
- rcu_read_lock();
- nr_found = radix_tree_gang_lookup_tag(
- &pag->pag_ici_root,
- (void **)batch, first_index,
- XFS_LOOKUP_BATCH,
- XFS_ICI_RECLAIM_TAG);
- if (!nr_found) {
- done = 1;
- rcu_read_unlock();
- break;
- }
-
- /*
- * Grab the inodes before we drop the lock. if we found
- * nothing, nr == 0 and the loop will be skipped.
- */
- for (i = 0; i < nr_found; i++) {
- struct xfs_inode *ip = batch[i];
-
- if (done || xfs_reclaim_inode_grab(ip, flags))
- batch[i] = NULL;
-
- /*
- * Update the index for the next lookup. Catch
- * overflows into the next AG range which can
- * occur if we have inodes in the last block of
- * the AG and we are currently pointing to the
- * last inode.
- *
- * Because we may see inodes that are from the
- * wrong AG due to RCU freeing and
- * reallocation, only update the index if it
- * lies in this AG. It was a race that lead us
- * to see this inode, so another lookup from
- * the same index will not find it again.
- */
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
- pag->pag_agno)
- continue;
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- done = 1;
- }
-
- /* unlock now we've grabbed the inodes. */
- rcu_read_unlock();
-
- for (i = 0; i < nr_found; i++) {
- if (!batch[i])
- continue;
- error = xfs_reclaim_inode(batch[i], pag, flags);
- if (error && last_error != EFSCORRUPTED)
- last_error = error;
- }
-
- *nr_to_scan -= XFS_LOOKUP_BATCH;
-
- cond_resched();
-
- } while (nr_found && !done && *nr_to_scan > 0);
-
- if (trylock && !done)
- pag->pag_ici_reclaim_cursor = first_index;
- else
- pag->pag_ici_reclaim_cursor = 0;
- mutex_unlock(&pag->pag_ici_reclaim_lock);
- xfs_perag_put(pag);
- }
-
- /*
- * if we skipped any AG, and we still have scan count remaining, do
- * another pass this time using blocking reclaim semantics (i.e
- * waiting on the reclaim locks and ignoring the reclaim cursors). This
- * ensure that when we get more reclaimers than AGs we block rather
- * than spin trying to execute reclaim.
- */
- if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
- trylock = 0;
- goto restart;
- }
- return XFS_ERROR(last_error);
-}
-
int
xfs_reclaim_inodes(
- xfs_mount_t *mp,
- int mode)
+ struct xfs_mount *mp,
+ int flags)
{
- long nr_to_scan = LONG_MAX;
-
- return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
+ return xfs_inode_ag_iterator_tag(mp, xfs_reclaim_inode_grab,
+ xfs_reclaim_inode, flags,
+ NULL, XFS_ICI_RECLAIM_TAG);
}
static int
@@ -1229,13 +1088,39 @@ xfs_reclaim_inodes_nr(
pag = xfs_perag_get(mp,
XFS_INO_TO_AGNO(mp, XFS_I(inode)->i_ino));
- xfs_reclaim_inode(XFS_I(inode), pag, SYNC_WAIT);
+ xfs_reclaim_inode(XFS_I(inode), pag, SYNC_WAIT, NULL);
xfs_perag_put(pag);
}
return freed;
}
+/*
+ * Background scanning to trim post-EOF preallocated space. This is queued
+ * based on the 'background_prealloc_discard_period' tunable (5m by default).
+ */
+STATIC void
+xfs_queue_eofblocks(
+ struct xfs_mount *mp)
+{
+ rcu_read_lock();
+ if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
+ queue_delayed_work(mp->m_eofblocks_workqueue,
+ &mp->m_eofblocks_work,
+ msecs_to_jiffies(xfs_eofb_secs * 1000));
+ rcu_read_unlock();
+}
+
+void
+xfs_eofblocks_worker(
+ struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(to_delayed_work(work),
+ struct xfs_mount, m_eofblocks_work);
+ xfs_icache_free_eofblocks(mp, NULL);
+ xfs_queue_eofblocks(mp);
+}
+
STATIC int
xfs_inode_match_id(
struct xfs_inode *ip,
@@ -1310,8 +1195,8 @@ xfs_icache_free_eofblocks(
if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC))
flags = SYNC_WAIT;
- return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
- eofb, XFS_ICI_EOFBLOCKS_TAG);
+ return xfs_inode_ag_iterator_tag(mp, NULL, xfs_inode_free_eofblocks,
+ flags, eofb, XFS_ICI_EOFBLOCKS_TAG);
}
void
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 4214518..a3380bf 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -29,7 +29,7 @@ int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
void xfs_reclaim_worker(struct work_struct *work);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int flags);
long xfs_reclaim_inodes_nr(struct xfs_mount *mp, long nr_to_scan,
nodemask_t *nodes_to_scan);
@@ -42,10 +42,12 @@ void xfs_eofblocks_worker(struct work_struct *);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
+ int (*grab)(struct xfs_inode *ip, int flags),
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
int flags, void *args),
int flags, void *args);
int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
+ int (*grab)(struct xfs_inode *ip, int flags),
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,
int flags, void *args),
int flags, void *args, int tag);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da50846..6985a32 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -456,7 +456,6 @@ xfs_initialize_perag(
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
- mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
spin_lock_init(&pag->pag_buf_lock);
pag->pag_buf_tree = RB_ROOT;
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 5f53e75..85294a6 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -883,5 +883,5 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
+ xfs_inode_ag_iterator(mp, NULL, xfs_dqrele_inode, flags, NULL);
}
--
1.7.10
next prev parent reply other threads:[~2012-11-27 23:14 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-27 23:14 [RFC, PATCH 00/19] Numa aware LRU lists and shrinkers Dave Chinner
2012-11-27 23:14 ` [PATCH 01/19] dcache: convert dentry_stat.nr_unused to per-cpu counters Dave Chinner
2012-11-27 23:14 ` [PATCH 02/19] dentry: move to per-sb LRU locks Dave Chinner
2012-11-27 23:14 ` [PATCH 03/19] dcache: remove dentries from LRU before putting on dispose list Dave Chinner
2012-11-27 23:14 ` [PATCH 04/19] mm: new shrinker API Dave Chinner
2012-11-27 23:14 ` [PATCH 05/19] shrinker: convert superblock shrinkers to new API Dave Chinner
2012-12-20 11:06 ` Glauber Costa
2012-12-21 1:46 ` Dave Chinner
2012-12-21 10:17 ` Glauber Costa
2012-11-27 23:14 ` [PATCH 06/19] list: add a new LRU list type Dave Chinner
2012-11-28 16:10 ` Christoph Hellwig
2012-11-27 23:14 ` [PATCH 07/19] inode: convert inode lru list to generic lru list code Dave Chinner
2012-11-27 23:14 ` [PATCH 08/19] dcache: convert to use new lru list infrastructure Dave Chinner
2012-11-27 23:14 ` [PATCH 09/19] list_lru: per-node " Dave Chinner
2012-12-20 11:21 ` Glauber Costa
2012-12-21 1:54 ` Dave Chinner
2013-01-16 19:21 ` Glauber Costa
2013-01-16 22:55 ` Dave Chinner
2013-01-17 0:35 ` Glauber Costa
2013-01-17 4:22 ` Dave Chinner
2013-01-17 18:21 ` Glauber Costa
2013-01-18 0:10 ` Dave Chinner
2013-01-18 0:14 ` Glauber Costa
2013-01-18 8:11 ` Dave Chinner
2013-01-18 19:10 ` Glauber Costa
2013-01-19 0:10 ` Dave Chinner
2013-01-19 0:13 ` Glauber Costa
2013-01-18 0:51 ` Glauber Costa
2013-01-18 8:08 ` Dave Chinner
2013-01-18 19:01 ` Glauber Costa
2012-11-27 23:14 ` [PATCH 10/19] shrinker: add node awareness Dave Chinner
2012-11-27 23:14 ` [PATCH 11/19] fs: convert inode and dentry shrinking to be node aware Dave Chinner
2012-11-27 23:14 ` [PATCH 12/19] xfs: convert buftarg LRU to generic code Dave Chinner
2012-11-27 23:14 ` [PATCH 13/19] xfs: Node aware direct inode reclaim Dave Chinner
2012-11-27 23:14 ` Dave Chinner [this message]
2012-11-27 23:14 ` [PATCH 15/19] xfs: convert dquot cache lru to list_lru Dave Chinner
2012-11-28 16:17 ` Christoph Hellwig
2012-11-27 23:14 ` [PATCH 16/19] fs: convert fs shrinkers to new scan/count API Dave Chinner
2012-11-27 23:14 ` [PATCH 17/19] drivers: convert shrinkers to new count/scan API Dave Chinner
2012-11-28 1:13 ` Chris Wilson
2012-11-28 3:17 ` Dave Chinner
2012-11-28 8:21 ` Glauber Costa
2012-11-28 21:28 ` Dave Chinner
2012-11-29 10:29 ` Glauber Costa
2012-11-29 22:02 ` Dave Chinner
2013-06-07 13:37 ` Konrad Rzeszutek Wilk
2012-11-27 23:14 ` [PATCH 18/19] shrinker: convert remaining shrinkers to " Dave Chinner
2012-11-27 23:14 ` [PATCH 19/19] shrinker: Kill old ->shrink API Dave Chinner
2012-11-29 19:02 ` [RFC, PATCH 00/19] Numa aware LRU lists and shrinkers Andi Kleen
2012-11-29 22:09 ` Dave Chinner
2012-12-20 11:45 ` Glauber Costa
2012-12-21 2:50 ` Dave Chinner
2012-12-21 10:41 ` Glauber Costa
2013-01-21 16:08 ` Glauber Costa
2013-01-21 23:21 ` Dave Chinner
2013-01-23 14:36 ` Glauber Costa
2013-01-23 23:46 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1354058086-27937-15-git-send-email-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=glommer@parallels.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=xfs@oss.sgi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).