* [PATCH v3 1/8] xfs: add EOFBLOCKS inode tagging/untagging
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 2/8] xfs: support a tag-based inode_ag_iterator Brian Foster
` (7 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
Add the XFS_ICI_EOFBLOCKS_TAG inode tag to identify inodes with
speculatively preallocated blocks beyond EOF. An inode is tagged
when speculative preallocation occurs and untagged either via
truncate down or when post-EOF blocks are freed via release or
reclaim.
The tag management is intentionally not aggressive to prefer
simplicity over the complexity of handling all the corner cases
under which post-EOF blocks could be freed (i.e., forward
truncation, fallocate, write error conditions, etc.). This means
that a tagged inode may or may not have post-EOF blocks after a
period of time. The tag is eventually cleared when the inode is
released or reclaimed.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_ag.h | 1 +
fs/xfs/xfs_iomap.c | 7 +++++
fs/xfs/xfs_iops.c | 3 ++
fs/xfs/xfs_sync.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sync.h | 3 ++
fs/xfs/xfs_trace.h | 5 ++++
fs/xfs/xfs_vnodeops.c | 2 +
7 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 44d65c1..22bd4db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -233,6 +233,7 @@ typedef struct xfs_perag {
#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
+#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6..2968ee8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -459,6 +459,13 @@ retry:
if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_alert_fsblock_zero(ip, &imap[0]);
+ /*
+ * Tag the inode as speculatively preallocated so we can reclaim this
+ * space on demand, if necessary.
+ */
+ if (prealloc)
+ xfs_inode_set_eofblocks_tag(ip);
+
*ret_imap = imap[0];
return 0;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 4e00cf0..dcd1d5f 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -854,6 +854,9 @@ xfs_setattr_size(
* and do not wait the usual (long) time for writeout.
*/
xfs_iflags_set(ip, XFS_ITRUNCATED);
+
+ /* A truncate down always removes post-EOF blocks. */
+ xfs_inode_clear_eofblocks_tag(ip);
}
if (mask & ATTR_CTIME) {
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 9654817..00c6224 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -971,3 +971,65 @@ xfs_reclaim_inodes_count(
return reclaimable;
}
+void
+xfs_inode_set_eofblocks_tag(
+ xfs_inode_t *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+ int tagged;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ spin_lock(&pag->pag_ici_lock);
+ trace_xfs_inode_set_eofblocks_tag(ip);
+
+ tagged = radix_tree_tagged(&pag->pag_ici_root,
+ XFS_ICI_EOFBLOCKS_TAG);
+ radix_tree_tag_set(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ if (!tagged) {
+ /* propagate the eofblocks tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+
+ trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
+
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
+void
+xfs_inode_clear_eofblocks_tag(
+ xfs_inode_t *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ spin_lock(&pag->pag_ici_lock);
+ trace_xfs_inode_clear_eofblocks_tag(ip);
+
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) {
+ /* clear the eofblocks tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
+
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e..4486491 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -43,6 +43,9 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
+void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
+void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
+
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7d36ccf..6f46e03 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -130,6 +130,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks);
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
@@ -585,6 +587,9 @@ DEFINE_INODE_EVENT(xfs_update_time);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
+DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
+
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 2a5c6373..d883881 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -237,6 +237,8 @@ xfs_free_eofblocks(
} else {
error = xfs_trans_commit(tp,
XFS_TRANS_RELEASE_LOG_RES);
+ if (!error)
+ xfs_inode_clear_eofblocks_tag(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 2/8] xfs: support a tag-based inode_ag_iterator
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
2012-09-14 12:19 ` [PATCH v3 1/8] xfs: add EOFBLOCKS inode tagging/untagging Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 3/8] xfs: create helper to check whether to free eofblocks on inode Brian Foster
` (6 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
Genericize xfs_inode_ag_walk() to support an optional radix tree tag
and args argument for the execute function. Create a new wrapper
called xfs_inode_ag_iterator_tag() that performs a tag based walk
of perag's and inodes.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_qm_syscalls.c | 5 ++-
fs/xfs/xfs_sync.c | 61 +++++++++++++++++++++++++++++++++++++++-------
fs/xfs/xfs_sync.h | 7 ++++-
3 files changed, 60 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 858a3b1..848bd8e 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -845,7 +845,8 @@ STATIC int
xfs_dqrele_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int flags)
+ int flags,
+ void *args)
{
/* skip quota inodes */
if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
@@ -881,5 +882,5 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
}
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 00c6224..0da93c9 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -101,8 +101,11 @@ xfs_inode_ag_walk(
struct xfs_mount *mp,
struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args,
+ int tag)
{
uint32_t first_index;
int last_error = 0;
@@ -121,9 +124,17 @@ restart:
int i;
rcu_read_lock();
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+
+ if (tag == -1)
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH);
+ else
+ nr_found = radix_tree_gang_lookup_tag(
+ &pag->pag_ici_root,
+ (void **) batch, first_index,
+ XFS_LOOKUP_BATCH, tag);
+
if (!nr_found) {
rcu_read_unlock();
break;
@@ -164,7 +175,7 @@ restart:
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
- error = execute(batch[i], pag, flags);
+ error = execute(batch[i], pag, flags, args);
IRELE(batch[i]);
if (error == EAGAIN) {
skipped++;
@@ -193,8 +204,10 @@ int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args)
{
struct xfs_perag *pag;
int error = 0;
@@ -204,7 +217,36 @@ xfs_inode_ag_iterator(
ag = 0;
while ((pag = xfs_perag_get(mp, ag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1);
+ xfs_perag_put(pag);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
+}
+
+int
+xfs_inode_ag_iterator_tag(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args,
+ int tag)
+{
+ struct xfs_perag *pag;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+
+ ag = 0;
+ while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
+ ag = pag->pag_agno + 1;
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -219,7 +261,8 @@ STATIC int
xfs_sync_inode_data(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int flags)
+ int flags,
+ void *args)
{
struct inode *inode = VFS_I(ip);
struct address_space *mapping = inode->i_mapping;
@@ -252,7 +295,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, NULL);
if (error)
return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 4486491..463ea0a 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -48,7 +48,10 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags);
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args),
+ int flags, void *args);
+int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args),
+ int flags, void *args, int tag);
#endif
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 3/8] xfs: create helper to check whether to free eofblocks on inode
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
2012-09-14 12:19 ` [PATCH v3 1/8] xfs: add EOFBLOCKS inode tagging/untagging Brian Foster
2012-09-14 12:19 ` [PATCH v3 2/8] xfs: support a tag-based inode_ag_iterator Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure Brian Foster
` (5 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
This check is used in multiple places to determine whether we
should check for (and potentially free) post EOF blocks on an
inode. Add a helper to consolidate the check.
Note that when we remove an inode from the cache (xfs_inactive()),
we are required to trim post-EOF blocks even if the inode is marked
preallocated or append-only to maintain correct space accounting.
The 'force' parameter to xfs_can_free_eofblocks() specifies whether
we should ignore the prealloc/append-only status of the inode.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_vnodeops.c | 19 +++++++------------
fs/xfs/xfs_vnodeops.h | 40 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 46 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d883881..12f5087 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -435,11 +435,7 @@ xfs_release(
if (ip->i_d.di_nlink == 0)
return 0;
- if ((S_ISREG(ip->i_d.di_mode) &&
- (VFS_I(ip)->i_size > 0 ||
- (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ if (xfs_can_free_eofblocks(ip, false)) {
/*
* If we can't get the iolock just skip truncating the blocks
@@ -515,13 +511,12 @@ xfs_inactive(
goto out;
if (ip->i_d.di_nlink != 0) {
- if ((S_ISREG(ip->i_d.di_mode) &&
- (VFS_I(ip)->i_size > 0 ||
- (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS) &&
- (!(ip->i_d.di_flags &
- (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
- ip->i_delayed_blks != 0))) {
+ /*
+ * force is true because we are evicting an inode from the
+ * cache. Post-eof blocks must be freed, lest we end up with
+ * broken free space accounting.
+ */
+ if (xfs_can_free_eofblocks(ip, true)) {
error = xfs_free_eofblocks(mp, ip, false);
if (error)
return VN_INACTIVE_CACHE;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 447e146..d5701e3 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -1,6 +1,10 @@
#ifndef _XFS_VNODEOPS_H
#define _XFS_VNODEOPS_H 1
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+
struct attrlist_cursor_kern;
struct file;
struct iattr;
@@ -9,8 +13,42 @@ struct iovec;
struct kiocb;
struct pipe_inode_info;
struct uio;
-struct xfs_inode;
+/*
+ * Test whether it is appropriate to check an inode for and free post EOF
+ * blocks. The 'force' parameter determines whether we should also consider
+ * regular files that are marked preallocated or append-only.
+ */
+static inline bool
+xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
+{
+ /* prealloc/delalloc exists only on regular files */
+ if (!S_ISREG(ip->i_d.di_mode))
+ return false;
+
+ /*
+ * Zero sized files with no cached pages and delalloc blocks will not
+ * have speculative prealloc/delalloc blocks to remove.
+ */
+ if (VFS_I(ip)->i_size == 0 &&
+ VN_CACHED(VFS_I(ip)) == 0 &&
+ ip->i_delayed_blks == 0)
+ return false;
+
+ /* If we haven't read in the extent list, then don't do it now. */
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+ return false;
+
+ /*
+ * Do not free real preallocated or append-only files unless the file
+ * has delalloc blocks and we are forced to remove them.
+ */
+ if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
+ if (!force || ip->i_delayed_blks == 0)
+ return false;
+
+ return true;
+}
int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (2 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 3/8] xfs: create helper to check whether to free eofblocks on inode Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 5/8] xfs: create function to scan and clear EOFBLOCKS inodes Brian Foster
` (4 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
Turn xfs_free_eofblocks() into a non-static function, return EAGAIN to
indicate trylock failure and make sure this error is not propagated in
xfs_release().
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_vnodeops.c | 6 +++---
fs/xfs/xfs_vnodeops.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 12f5087..a61e852 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -150,7 +150,7 @@ xfs_readlink(
* when the link count isn't zero and by xfs_dm_punch_hole() when
* punching a hole to EOF.
*/
-STATIC int
+int
xfs_free_eofblocks(
xfs_mount_t *mp,
xfs_inode_t *ip,
@@ -199,7 +199,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
- return 0;
+ return EAGAIN;
}
}
@@ -462,7 +462,7 @@ xfs_release(
return 0;
error = xfs_free_eofblocks(mp, ip, true);
- if (error)
+ if (error && error != EAGAIN)
return error;
/* delalloc blocks after truncation means it really is dirty */
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index d5701e3..1e03c4b 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -95,5 +95,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int xfs_free_eofblocks(struct xfs_mount *, struct xfs_inode *, bool);
#endif /* _XFS_VNODEOPS_H */
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 5/8] xfs: create function to scan and clear EOFBLOCKS inodes
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (3 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl Brian Foster
` (3 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
xfs_inodes_free_eofblocks() implements scanning functionality for
EOFBLOCKS inodes. It uses the AG iterator to walk the tagged inodes
and free post-EOF blocks via the xfs_inode_free_eofblocks() execute
function. The scan can be invoked in best-effort mode or wait
(force) mode.
A best-effort scan (default) handles all inodes that do not have a
dirty cache and we successfully acquire the io lock via trylock. In
wait mode, we continue to cycle through an AG until all inodes are
handled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_sync.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sync.h | 1 +
fs/xfs/xfs_trace.h | 1 +
3 files changed, 42 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 0da93c9..48cab9f 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -1014,6 +1014,46 @@ xfs_reclaim_inodes_count(
return reclaimable;
}
+STATIC int
+xfs_inode_free_eofblocks(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags,
+ void *args)
+{
+ int ret;
+ bool force = flags & SYNC_WAIT;
+
+ if (!xfs_can_free_eofblocks(ip, false)) {
+ /* inode could be preallocated or append-only */
+ trace_xfs_inode_free_eofblocks_invalid(ip);
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
+ if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
+ PAGECACHE_TAG_DIRTY))
+ return 0;
+
+ ret = xfs_free_eofblocks(ip->i_mount, ip, false);
+
+ /* ignore EAGAIN on a best effort scan */
+ if (!force && (ret == EAGAIN))
+ ret = 0;
+
+ return ret;
+}
+
+int
+xfs_inodes_free_eofblocks(
+ struct xfs_mount *mp,
+ int flags)
+{
+ ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+ return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
+ NULL, XFS_ICI_EOFBLOCKS_TAG);
+}
+
void
xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 463ea0a..307654a 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -45,6 +45,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
+int xfs_inodes_free_eofblocks(struct xfs_mount *, int);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6f46e03..cb52346 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -589,6 +589,7 @@ DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
+DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (4 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 5/8] xfs: create function to scan and clear EOFBLOCKS inodes Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 7/8] xfs: add enhanced filtering to EOFBLOCKS scan Brian Foster
` (2 subsequent siblings)
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
The XFS_IOC_FREE_EOFBLOCKS ioctl allows users to invoke an EOFBLOCKS
scan. The xfs_eofblocks structure is defined to support the command
parameters (scan mode).
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_fs.h | 14 ++++++++++++++
fs/xfs/xfs_ioctl.c | 12 ++++++++++++
2 files changed, 26 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c13fed8..32bb2e8 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -339,6 +339,19 @@ typedef struct xfs_error_injection {
/*
+ * Speculative preallocation trimming.
+ */
+struct xfs_eofblocks {
+ __u32 eof_flags;
+ __s32 version;
+ unsigned char pad[12];
+};
+
+/* eof_flags values */
+#define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
+
+
+/*
* The user-level Handle Request interface structure.
*/
typedef struct xfs_fsop_handlereq {
@@ -456,6 +469,7 @@ typedef struct xfs_handle {
/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
+#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c..216ca7a 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1602,6 +1602,18 @@ xfs_file_ioctl(
error = xfs_errortag_clearall(mp, 1);
return -error;
+ case XFS_IOC_FREE_EOFBLOCKS: {
+ struct xfs_eofblocks eofb;
+ int flags;
+
+ if (copy_from_user(&eofb, arg, sizeof(eofb)))
+ return -XFS_ERROR(EFAULT);
+
+ flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
+ error = xfs_inodes_free_eofblocks(mp, flags);
+ return -error;
+ }
+
default:
return -ENOTTY;
}
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 7/8] xfs: add enhanced filtering to EOFBLOCKS scan
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (5 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-14 12:19 ` [PATCH v3 8/8] xfs: add background scanning to clear EOFBLOCKS inodes Brian Foster
2012-09-26 15:19 ` [PATCH v3 0/8] speculative preallocation inode tracking Ben Myers
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
Support EOFBLOCKS scan filtering by quota ID or minimum file size.
Add the appropriate fields/flags to the xfs_eofblocks structure and
pass it down to xfs_inode_free_eofblocks() where filtering
functionality is implemented.
A (user requested) quota ID based scan requires the associated
quota mode be enabled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_fs.h | 9 +++++++++
fs/xfs/xfs_ioctl.c | 10 +++++++++-
fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++----
fs/xfs/xfs_sync.h | 2 +-
4 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 32bb2e8..54c0f39 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -343,12 +343,21 @@ typedef struct xfs_error_injection {
*/
struct xfs_eofblocks {
__u32 eof_flags;
+ __u32 eof_id;
+ __u64 eof_min_file_size;
__s32 version;
unsigned char pad[12];
};
/* eof_flags values */
#define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
+#define XFS_EOF_FLAGS_USRID 0x02 /* filter by user id */
+#define XFS_EOF_FLAGS_GRPID 0x04 /* filter by group id */
+#define XFS_EOF_FLAGS_PROJID 0x08 /* filter by project id */
+#define XFS_EOF_FLAGS_MINFILESIZE 0x10 /* minimum file size */
+
+#define XFS_EOF_VALID_QUOTA (XFS_EOF_FLAGS_USRID|XFS_EOF_FLAGS_GRPID| \
+ XFS_EOF_FLAGS_PROJID)
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 216ca7a..a7bf847 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1609,8 +1609,16 @@ xfs_file_ioctl(
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
+ if (((eofb.eof_flags & XFS_EOF_FLAGS_USRID) &&
+ !XFS_IS_UQUOTA_ON(mp)) ||
+ ((eofb.eof_flags & XFS_EOF_FLAGS_GRPID) &&
+ !XFS_IS_GQUOTA_ON(mp)) ||
+ ((eofb.eof_flags & XFS_EOF_FLAGS_PROJID) &&
+ !XFS_IS_PQUOTA_ON(mp)))
+ return -XFS_ERROR(EINVAL);
+
flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
- error = xfs_inodes_free_eofblocks(mp, flags);
+ error = xfs_inodes_free_eofblocks(mp, flags, &eofb);
return -error;
}
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 48cab9f..13cd9da 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -1015,6 +1015,21 @@ xfs_reclaim_inodes_count(
}
STATIC int
+xfs_inode_match_quota_id(
+ struct xfs_inode *ip,
+ struct xfs_eofblocks *eofb)
+{
+ if (eofb->eof_flags & XFS_EOF_FLAGS_USRID)
+ return ip->i_d.di_uid == eofb->eof_id;
+ else if (eofb->eof_flags & XFS_EOF_FLAGS_GRPID)
+ return ip->i_d.di_gid == eofb->eof_id;
+ else if (eofb->eof_flags & XFS_EOF_FLAGS_PROJID)
+ return xfs_get_projid(ip) == eofb->eof_id;
+
+ return 0;
+}
+
+STATIC int
xfs_inode_free_eofblocks(
struct xfs_inode *ip,
struct xfs_perag *pag,
@@ -1022,6 +1037,7 @@ xfs_inode_free_eofblocks(
void *args)
{
int ret;
+ struct xfs_eofblocks *eofb = args;
bool force = flags & SYNC_WAIT;
if (!xfs_can_free_eofblocks(ip, false)) {
@@ -1031,8 +1047,13 @@ xfs_inode_free_eofblocks(
return 0;
}
- if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
- PAGECACHE_TAG_DIRTY))
+ if ((eofb &&
+ (((eofb->eof_flags & XFS_EOF_VALID_QUOTA) &&
+ !xfs_inode_match_quota_id(ip, eofb)) ||
+ ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
+ (XFS_ISIZE(ip) < eofb->eof_min_file_size)))) ||
+ (!force && mapping_tagged(VFS_I(ip)->i_mapping,
+ PAGECACHE_TAG_DIRTY)))
return 0;
ret = xfs_free_eofblocks(ip->i_mount, ip, false);
@@ -1047,11 +1068,12 @@ xfs_inode_free_eofblocks(
int
xfs_inodes_free_eofblocks(
struct xfs_mount *mp,
- int flags)
+ int flags,
+ struct xfs_eofblocks *eofb)
{
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
- NULL, XFS_ICI_EOFBLOCKS_TAG);
+ eofb, XFS_ICI_EOFBLOCKS_TAG);
}
void
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 307654a..167f06c 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -45,7 +45,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
-int xfs_inodes_free_eofblocks(struct xfs_mount *, int);
+int xfs_inodes_free_eofblocks(struct xfs_mount *, int, struct xfs_eofblocks *);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH v3 8/8] xfs: add background scanning to clear EOFBLOCKS inodes
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (6 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 7/8] xfs: add enhanced filtering to EOFBLOCKS scan Brian Foster
@ 2012-09-14 12:19 ` Brian Foster
2012-09-26 15:19 ` [PATCH v3 0/8] speculative preallocation inode tracking Ben Myers
8 siblings, 0 replies; 13+ messages in thread
From: Brian Foster @ 2012-09-14 12:19 UTC (permalink / raw)
To: xfs
Create a delayed_work to enable background scanning and freeing
of EOFBLOCKS inodes. The scanner kicks in once speculative
preallocation occurs and stops requeueing itself when no EOFBLOCKS
inodes exist.
Scans are queued on the existing syncd workqueue and the interval
is based on the new eofb_timer tunable (default to 5m). The
background scanner performs unfiltered, best effort scans (which
skips inodes under lock contention or with a dirty cache mapping).
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_globals.c | 1 +
fs/xfs/xfs_linux.h | 1 +
fs/xfs/xfs_mount.h | 2 ++
fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++++++
fs/xfs/xfs_sysctl.c | 9 +++++++++
fs/xfs/xfs_sysctl.h | 1 +
6 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 76e81cf..fda9a66 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -40,4 +40,5 @@ xfs_param_t xfs_params = {
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 30*100, 3600*100},
+ .eofb_timer = { 1*100, 300*100, 7200*100},
};
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 828662f..bbad99b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -118,6 +118,7 @@
#define xfs_rotorstep xfs_params.rotorstep.val
#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
+#define xfs_eofb_centisecs xfs_params.eofb_timer.val
#define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index deee09e..bf5ecfa 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -199,6 +199,8 @@ typedef struct xfs_mount {
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_sync_work; /* background sync work */
struct delayed_work m_reclaim_work; /* background inode reclaim */
+ struct delayed_work m_eofblocks_work; /* background eof blocks
+ trimming */
struct work_struct m_flush_work; /* background inode flush */
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 13cd9da..778cf6a 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -532,6 +532,31 @@ xfs_flush_worker(
xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
}
+/*
+ * Background scanning to trim post-EOF preallocated space. This is queued
+ * based on the 'eofb_centisecs' tunable (5m by default).
+ */
+STATIC void
+xfs_queue_eofblocks(
+ struct xfs_mount *mp)
+{
+ rcu_read_lock();
+ if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
+ queue_delayed_work(xfs_syncd_wq, &mp->m_eofblocks_work,
+ msecs_to_jiffies(xfs_eofb_centisecs * 10));
+ rcu_read_unlock();
+}
+
+STATIC void
+xfs_eofblocks_worker(
+struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(to_delayed_work(work),
+ struct xfs_mount, m_eofblocks_work);
+ xfs_inodes_free_eofblocks(mp, SYNC_TRYLOCK, NULL);
+ xfs_queue_eofblocks(mp);
+}
+
int
xfs_syncd_init(
struct xfs_mount *mp)
@@ -539,6 +564,7 @@ xfs_syncd_init(
INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
xfs_syncd_queue_sync(mp);
@@ -551,6 +577,7 @@ xfs_syncd_stop(
{
cancel_delayed_work_sync(&mp->m_sync_work);
cancel_delayed_work_sync(&mp->m_reclaim_work);
+ cancel_delayed_work_sync(&mp->m_eofblocks_work);
cancel_work_sync(&mp->m_flush_work);
}
@@ -1101,6 +1128,9 @@ xfs_inode_set_eofblocks_tag(
XFS_ICI_EOFBLOCKS_TAG);
spin_unlock(&ip->i_mount->m_perag_lock);
+ /* kick off background trimming */
+ xfs_queue_eofblocks(ip->i_mount);
+
trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno,
-1, _RET_IP_);
}
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index ee2d2ad..45d74fc 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -202,6 +202,15 @@ static ctl_table xfs_table[] = {
.extra1 = &xfs_params.fstrm_timer.min,
.extra2 = &xfs_params.fstrm_timer.max,
},
+ {
+ .procname = "eofb_centisecs",
+ .data = &xfs_params.eofb_timer.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &xfs_params.eofb_timer.min,
+ .extra2 = &xfs_params.eofb_timer.max,
+ },
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index b9937d4..bd8e157 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
+ xfs_sysctl_val_t eofb_timer; /* Interval between eofb scan wakeups */
} xfs_param_t;
/*
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH v3 0/8] speculative preallocation inode tracking
2012-09-14 12:19 [PATCH v3 0/8] speculative preallocation inode tracking Brian Foster
` (7 preceding siblings ...)
2012-09-14 12:19 ` [PATCH v3 8/8] xfs: add background scanning to clear EOFBLOCKS inodes Brian Foster
@ 2012-09-26 15:19 ` Ben Myers
2012-09-26 15:39 ` Ben Myers
2012-09-26 15:45 ` Brian Foster
8 siblings, 2 replies; 13+ messages in thread
From: Ben Myers @ 2012-09-26 15:19 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
Hi Brian,
On Fri, Sep 14, 2012 at 08:19:47AM -0400, Brian Foster wrote:
> This is v3 of the speculative preallocation inode tracking patchset. This
> functionality tracks inodes with post-EOF speculative preallocation for the
> purpose of background and on-demand trimming.
>
> Background scanning occurs on a longish interval (5 minutes by default) and in
> a best-effort mode (i.e., inodes are skipped due to lock contention or dirty
> cache). The intent is to clear up post-EOF blocks on inodes that might have
> allocations hanging around due to open-write-close sequences (NFS).
>
> On demand scanning is provided via a new ioctl and supports various parameters
> such as scan mode, filtering by quota id and minimum file size. A pending use
> case for on demand scanning is for accurate quota accounting via the gluster
> scale out filesystem (i.e., to free up preallocated space when near a usage
> limit).
[33084.794491] XFS (sda2): Ending clean mount
[33170.400045] XFS: Assertion failed: !atomic_read(&VFS_I(ip)->i_count) || xfs_isilocked(ip, XFS_IOLOCK_EXCL), file: /root/xfs/fs/xfs/xfs_inode.c, line: 1128
[33170.41422[ 0.000000] Initializing cgroup subsys cpuset
[ 0.000000] Initializing cgroup subsys cpu
[ 0.000000] Linux version 3.6.0-rc1-1.2-desktop+ (root@nfs7) (gcc version 4.6.2 (SUSE Linux) ) #26 SMP PREEMPT Fri Sep 21 18:26:16 CDT 2012
[ 0.000000] e820: BIOS-provided physical RAM map:
[ 0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009fbff] usable
crash> bt
PID: 1289 TASK: f38d71d0 CPU: 1 COMMAND: "kworker/1:2"
#0 [f17c9b88] crash_kexec at c0295045
#1 [f17c9be0] oops_end at c06ab2f2
#2 [f17c9bf8] die at c020539a
#3 [f17c9c10] do_trap at c06aadc1
#4 [f17c9c28] do_invalid_op at c0202eb1
#5 [f17c9cc4] error_code (via invalid_op) at c06aab7c
EAX: 0000008e EBX: ec3d9400 ECX: 0000071e EDX: 00000046 EBP: f17c9d18
DS: 007b ESI: ec3d9400 ES: 007b EDI: ef973d00 GS: 2e30
CS: 0060 EIP: f9d1dbb6 ERR: ffffffff EFLAGS: 00010292
#6 [f17c9cf8] assfail at f9d1dbb6 [xfs]
#7 [f17c9d1c] xfs_itruncate_extents at f9d6335f [xfs]
#8 [f17c9d98] xfs_free_eofblocks at f9d237d9 [xfs]
#9 [f17c9df8] xfs_inode_free_eofblocks at f9d221b4 [xfs]
#10 [f17c9e14] xfs_inode_ag_walk at f9d20ab9 [xfs]
#11 [f17c9ee4] xfs_inode_ag_iterator_tag at f9d20d6b [xfs]
#12 [f17c9f18] xfs_inodes_free_eofblocks at f9d21c95 [xfs]
#13 [f17c9f34] xfs_eofblocks_worker at f9d21cc3 [xfs]
#14 [f17c9f40] process_one_work at c0251ea5
#15 [f17c9f84] worker_thread at c0252504
#16 [f17c9fbc] kthread at c025672b
#17 [f17c9fe8] kernel_thread_helper at c06b06f4
It seems that test 133 was running at the time of the crash in two cases. This
is a neat patch set but we need to resolve this before pulling it in.
Regards,
Ben
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH v3 0/8] speculative preallocation inode tracking
2012-09-26 15:19 ` [PATCH v3 0/8] speculative preallocation inode tracking Ben Myers
@ 2012-09-26 15:39 ` Ben Myers
2012-09-26 15:45 ` Brian Foster
1 sibling, 0 replies; 13+ messages in thread
From: Ben Myers @ 2012-09-26 15:39 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
Hey Brian,
On Wed, Sep 26, 2012 at 10:19:23AM -0500, Ben Myers wrote:
> On Fri, Sep 14, 2012 at 08:19:47AM -0400, Brian Foster wrote:
> > This is v3 of the speculative preallocation inode tracking patchset. This
> > functionality tracks inodes with post-EOF speculative preallocation for the
> > purpose of background and on-demand trimming.
> >
> > Background scanning occurs on a longish interval (5 minutes by default) and in
> > a best-effort mode (i.e., inodes are skipped due to lock contention or dirty
> > cache). The intent is to clear up post-EOF blocks on inodes that might have
> > allocations hanging around due to open-write-close sequences (NFS).
> >
> > On demand scanning is provided via a new ioctl and supports various parameters
> > such as scan mode, filtering by quota id and minimum file size. A pending use
> > case for on demand scanning is for accurate quota accounting via the gluster
> > scale out filesystem (i.e., to free up preallocated space when near a usage
> > limit).
Maybe it would help if I would give you some context! I was running your patch
set along side with the inode64 related patch set from Carlos on an i386 box
and hit this assert on two subsequent test runs... both happened to be in test
133 at the time of the crash.
Regards,
Ben
> [33084.794491] XFS (sda2): Ending clean mount
> [33170.400045] XFS: Assertion failed: !atomic_read(&VFS_I(ip)->i_count) || xfs_isilocked(ip, XFS_IOLOCK_EXCL), file: /root/xfs/fs/xfs/xfs_inode.c, line: 1128
> [33170.41422[ 0.000000] Initializing cgroup subsys cpuset
> [ 0.000000] Initializing cgroup subsys cpu
> [ 0.000000] Linux version 3.6.0-rc1-1.2-desktop+ (root@nfs7) (gcc version 4.6.2 (SUSE Linux) ) #26 SMP PREEMPT Fri Sep 21 18:26:16 CDT 2012
> [ 0.000000] e820: BIOS-provided physical RAM map:
> [ 0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009fbff] usable
>
> crash> bt
> PID: 1289 TASK: f38d71d0 CPU: 1 COMMAND: "kworker/1:2"
> #0 [f17c9b88] crash_kexec at c0295045
> #1 [f17c9be0] oops_end at c06ab2f2
> #2 [f17c9bf8] die at c020539a
> #3 [f17c9c10] do_trap at c06aadc1
> #4 [f17c9c28] do_invalid_op at c0202eb1
> #5 [f17c9cc4] error_code (via invalid_op) at c06aab7c
> EAX: 0000008e EBX: ec3d9400 ECX: 0000071e EDX: 00000046 EBP: f17c9d18
> DS: 007b ESI: ec3d9400 ES: 007b EDI: ef973d00 GS: 2e30
> CS: 0060 EIP: f9d1dbb6 ERR: ffffffff EFLAGS: 00010292
> #6 [f17c9cf8] assfail at f9d1dbb6 [xfs]
> #7 [f17c9d1c] xfs_itruncate_extents at f9d6335f [xfs]
> #8 [f17c9d98] xfs_free_eofblocks at f9d237d9 [xfs]
> #9 [f17c9df8] xfs_inode_free_eofblocks at f9d221b4 [xfs]
> #10 [f17c9e14] xfs_inode_ag_walk at f9d20ab9 [xfs]
> #11 [f17c9ee4] xfs_inode_ag_iterator_tag at f9d20d6b [xfs]
> #12 [f17c9f18] xfs_inodes_free_eofblocks at f9d21c95 [xfs]
> #13 [f17c9f34] xfs_eofblocks_worker at f9d21cc3 [xfs]
> #14 [f17c9f40] process_one_work at c0251ea5
> #15 [f17c9f84] worker_thread at c0252504
> #16 [f17c9fbc] kthread at c025672b
> #17 [f17c9fe8] kernel_thread_helper at c06b06f4
>
> It seems that test 133 was running at the time of the crash in two cases. This
> is a neat patch set but we need to resolve this before pulling it in.
>
> Regards,
> Ben
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/8] speculative preallocation inode tracking
2012-09-26 15:19 ` [PATCH v3 0/8] speculative preallocation inode tracking Ben Myers
2012-09-26 15:39 ` Ben Myers
@ 2012-09-26 15:45 ` Brian Foster
2012-09-26 15:51 ` Ben Myers
1 sibling, 1 reply; 13+ messages in thread
From: Brian Foster @ 2012-09-26 15:45 UTC (permalink / raw)
To: Ben Myers; +Cc: xfs
On 09/26/2012 11:19 AM, Ben Myers wrote:
> Hi Brian,
>
> On Fri, Sep 14, 2012 at 08:19:47AM -0400, Brian Foster wrote:
>> This is v3 of the speculative preallocation inode tracking patchset. This
>> functionality tracks inodes with post-EOF speculative preallocation for the
>> purpose of background and on-demand trimming.
>>
>> Background scanning occurs on a longish interval (5 minutes by default) and in
>> a best-effort mode (i.e., inodes are skipped due to lock contention or dirty
>> cache). The intent is to clear up post-EOF blocks on inodes that might have
>> allocations hanging around due to open-write-close sequences (NFS).
>>
>> On demand scanning is provided via a new ioctl and supports various parameters
>> such as scan mode, filtering by quota id and minimum file size. A pending use
>> case for on demand scanning is for accurate quota accounting via the gluster
>> scale out filesystem (i.e., to free up preallocated space when near a usage
>> limit).
>
> [33084.794491] XFS (sda2): Ending clean mount
> [33170.400045] XFS: Assertion failed: !atomic_read(&VFS_I(ip)->i_count) || xfs_isilocked(ip, XFS_IOLOCK_EXCL), file: /root/xfs/fs/xfs/xfs_inode.c, line: 1128
> [33170.41422[ 0.000000] Initializing cgroup subsys cpuset
> [ 0.000000] Initializing cgroup subsys cpu
> [ 0.000000] Linux version 3.6.0-rc1-1.2-desktop+ (root@nfs7) (gcc version 4.6.2 (SUSE Linux) ) #26 SMP PREEMPT Fri Sep 21 18:26:16 CDT 2012
> [ 0.000000] e820: BIOS-provided physical RAM map:
> [ 0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009fbff] usable
>
> crash> bt
> PID: 1289 TASK: f38d71d0 CPU: 1 COMMAND: "kworker/1:2"
> #0 [f17c9b88] crash_kexec at c0295045
> #1 [f17c9be0] oops_end at c06ab2f2
> #2 [f17c9bf8] die at c020539a
> #3 [f17c9c10] do_trap at c06aadc1
> #4 [f17c9c28] do_invalid_op at c0202eb1
> #5 [f17c9cc4] error_code (via invalid_op) at c06aab7c
> EAX: 0000008e EBX: ec3d9400 ECX: 0000071e EDX: 00000046 EBP: f17c9d18
> DS: 007b ESI: ec3d9400 ES: 007b EDI: ef973d00 GS: 2e30
> CS: 0060 EIP: f9d1dbb6 ERR: ffffffff EFLAGS: 00010292
> #6 [f17c9cf8] assfail at f9d1dbb6 [xfs]
> #7 [f17c9d1c] xfs_itruncate_extents at f9d6335f [xfs]
> #8 [f17c9d98] xfs_free_eofblocks at f9d237d9 [xfs]
> #9 [f17c9df8] xfs_inode_free_eofblocks at f9d221b4 [xfs]
> #10 [f17c9e14] xfs_inode_ag_walk at f9d20ab9 [xfs]
> #11 [f17c9ee4] xfs_inode_ag_iterator_tag at f9d20d6b [xfs]
> #12 [f17c9f18] xfs_inodes_free_eofblocks at f9d21c95 [xfs]
> #13 [f17c9f34] xfs_eofblocks_worker at f9d21cc3 [xfs]
> #14 [f17c9f40] process_one_work at c0251ea5
> #15 [f17c9f84] worker_thread at c0252504
> #16 [f17c9fbc] kthread at c025672b
> #17 [f17c9fe8] kernel_thread_helper at c06b06f4
>
> It seems that test 133 was running at the time of the crash in two cases. This
> is a neat patch set but we need to resolve this before pulling it in.
>
Indeed. It looks like I botched the need_iolock parameter to
xfs_free_eofblocks() when I migrated to rely on EAGAIN rather than a
blocking lock. Thanks for the report.
I'm surprised I didn't reproduce this. I will try and do so before I
submit an updated set so I can verify a fix. Was this a repeated 133
test or full xfstests run? Thanks again.
Brian
> Regards,
> Ben
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v3 0/8] speculative preallocation inode tracking
2012-09-26 15:45 ` Brian Foster
@ 2012-09-26 15:51 ` Ben Myers
0 siblings, 0 replies; 13+ messages in thread
From: Ben Myers @ 2012-09-26 15:51 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
Hey Brian,
On Wed, Sep 26, 2012 at 11:45:06AM -0400, Brian Foster wrote:
> On 09/26/2012 11:19 AM, Ben Myers wrote:
> > On Fri, Sep 14, 2012 at 08:19:47AM -0400, Brian Foster wrote:
> >> This is v3 of the speculative preallocation inode tracking patchset. This
> >> functionality tracks inodes with post-EOF speculative preallocation for the
> >> purpose of background and on-demand trimming.
> >>
> >> Background scanning occurs on a longish interval (5 minutes by default) and in
> >> a best-effort mode (i.e., inodes are skipped due to lock contention or dirty
> >> cache). The intent is to clear up post-EOF blocks on inodes that might have
> >> allocations hanging around due to open-write-close sequences (NFS).
> >>
> >> On demand scanning is provided via a new ioctl and supports various parameters
> >> such as scan mode, filtering by quota id and minimum file size. A pending use
> >> case for on demand scanning is for accurate quota accounting via the gluster
> >> scale out filesystem (i.e., to free up preallocated space when near a usage
> >> limit).
> >
> > [33084.794491] XFS (sda2): Ending clean mount
> > [33170.400045] XFS: Assertion failed: !atomic_read(&VFS_I(ip)->i_count) || xfs_isilocked(ip, XFS_IOLOCK_EXCL), file: /root/xfs/fs/xfs/xfs_inode.c, line: 1128
> > [33170.41422[ 0.000000] Initializing cgroup subsys cpuset
> > [ 0.000000] Initializing cgroup subsys cpu
> > [ 0.000000] Linux version 3.6.0-rc1-1.2-desktop+ (root@nfs7) (gcc version 4.6.2 (SUSE Linux) ) #26 SMP PREEMPT Fri Sep 21 18:26:16 CDT 2012
> > [ 0.000000] e820: BIOS-provided physical RAM map:
> > [ 0.000000] BIOS-e820: [mem 0x0000000000000100-0x000000000009fbff] usable
> >
> > crash> bt
> > PID: 1289 TASK: f38d71d0 CPU: 1 COMMAND: "kworker/1:2"
> > #0 [f17c9b88] crash_kexec at c0295045
> > #1 [f17c9be0] oops_end at c06ab2f2
> > #2 [f17c9bf8] die at c020539a
> > #3 [f17c9c10] do_trap at c06aadc1
> > #4 [f17c9c28] do_invalid_op at c0202eb1
> > #5 [f17c9cc4] error_code (via invalid_op) at c06aab7c
> > EAX: 0000008e EBX: ec3d9400 ECX: 0000071e EDX: 00000046 EBP: f17c9d18
> > DS: 007b ESI: ec3d9400 ES: 007b EDI: ef973d00 GS: 2e30
> > CS: 0060 EIP: f9d1dbb6 ERR: ffffffff EFLAGS: 00010292
> > #6 [f17c9cf8] assfail at f9d1dbb6 [xfs]
> > #7 [f17c9d1c] xfs_itruncate_extents at f9d6335f [xfs]
> > #8 [f17c9d98] xfs_free_eofblocks at f9d237d9 [xfs]
> > #9 [f17c9df8] xfs_inode_free_eofblocks at f9d221b4 [xfs]
> > #10 [f17c9e14] xfs_inode_ag_walk at f9d20ab9 [xfs]
> > #11 [f17c9ee4] xfs_inode_ag_iterator_tag at f9d20d6b [xfs]
> > #12 [f17c9f18] xfs_inodes_free_eofblocks at f9d21c95 [xfs]
> > #13 [f17c9f34] xfs_eofblocks_worker at f9d21cc3 [xfs]
> > #14 [f17c9f40] process_one_work at c0251ea5
> > #15 [f17c9f84] worker_thread at c0252504
> > #16 [f17c9fbc] kthread at c025672b
> > #17 [f17c9fe8] kernel_thread_helper at c06b06f4
> >
> > It seems that test 133 was running at the time of the crash in two cases. This
> > is a neat patch set but we need to resolve this before pulling it in.
> >
>
> Indeed. It looks like I botched the need_iolock parameter to
> xfs_free_eofblocks() when I migrated to rely on EAGAIN rather than a
> blocking lock. Thanks for the report.
>
> I'm surprised I didn't reproduce this. I will try and do so before I
> submit an updated set so I can verify a fix. Was this a repeated 133
> test or full xfstests run? Thanks again.
NP. This is what I was running:
while true
do
./check -g auto
done
-Ben
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 13+ messages in thread