* [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:04 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 2/8] xfs: support a tag-based inode_ag_iterator Brian Foster
` (6 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
Add the XFS_ICI_EOFBLOCKS_TAG inode tag to identify inodes with
speculatively preallocated blocks beyond EOF. An inode is tagged
when speculative preallocation occurs and untagged either via
truncate down or when post-EOF blocks are freed via release or
reclaim.
The tag management is intentionally not aggressive to prefer
simplicity over the complexity of handling all the corner cases
under which post-EOF blocks could be freed (i.e., forward
truncation, fallocate, write error conditions, etc.). This means
that a tagged inode may or may not have post-EOF blocks after a
period of time. The tag is eventually cleared when the inode is
released or reclaimed.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_ag.h | 1 +
fs/xfs/xfs_iomap.c | 7 +++++
fs/xfs/xfs_iops.c | 3 ++
fs/xfs/xfs_sync.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sync.h | 3 ++
fs/xfs/xfs_trace.h | 5 ++++
fs/xfs/xfs_vnodeops.c | 2 +
7 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 44d65c1..22bd4db 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -233,6 +233,7 @@ typedef struct xfs_perag {
#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
+#define XFS_ICI_EOFBLOCKS_TAG 1 /* inode has blocks beyond EOF */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
#define XFS_MIN_FREELIST_RAW(bl,cl,mp) \
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 973dff6..2968ee8 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -459,6 +459,13 @@ retry:
if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_alert_fsblock_zero(ip, &imap[0]);
+ /*
+ * Tag the inode as speculatively preallocated so we can reclaim this
+ * space on demand, if necessary.
+ */
+ if (prealloc)
+ xfs_inode_set_eofblocks_tag(ip);
+
*ret_imap = imap[0];
return 0;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 4e00cf0..dcd1d5f 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -854,6 +854,9 @@ xfs_setattr_size(
* and do not wait the usual (long) time for writeout.
*/
xfs_iflags_set(ip, XFS_ITRUNCATED);
+
+ /* A truncate down always removes post-EOF blocks. */
+ xfs_inode_clear_eofblocks_tag(ip);
}
if (mask & ATTR_CTIME) {
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 9654817..00c6224 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -971,3 +971,65 @@ xfs_reclaim_inodes_count(
return reclaimable;
}
+void
+xfs_inode_set_eofblocks_tag(
+ xfs_inode_t *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+ int tagged;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ spin_lock(&pag->pag_ici_lock);
+ trace_xfs_inode_set_eofblocks_tag(ip);
+
+ tagged = radix_tree_tagged(&pag->pag_ici_root,
+ XFS_ICI_EOFBLOCKS_TAG);
+ radix_tree_tag_set(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ if (!tagged) {
+ /* propagate the eofblocks tag up into the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+
+ trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
+
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
+void
+xfs_inode_clear_eofblocks_tag(
+ xfs_inode_t *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ spin_lock(&pag->pag_ici_lock);
+ trace_xfs_inode_clear_eofblocks_tag(ip);
+
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) {
+ /* clear the eofblocks tag from the perag radix tree */
+ spin_lock(&ip->i_mount->m_perag_lock);
+ radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+ XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+ XFS_ICI_EOFBLOCKS_TAG);
+ spin_unlock(&ip->i_mount->m_perag_lock);
+ trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno,
+ -1, _RET_IP_);
+ }
+
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e..4486491 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -43,6 +43,9 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
+void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
+void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
+
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7d36ccf..6f46e03 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -130,6 +130,8 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks);
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
@@ -585,6 +587,9 @@ DEFINE_INODE_EVENT(xfs_update_time);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
+DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
+
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 2a5c6373..d883881 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -237,6 +237,8 @@ xfs_free_eofblocks(
} else {
error = xfs_trans_commit(tp,
XFS_TRANS_RELEASE_LOG_RES);
+ if (!error)
+ xfs_inode_clear_eofblocks_tag(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging
2012-09-27 17:45 ` [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging Brian Foster
@ 2012-09-28 7:04 ` Dave Chinner
2012-09-28 20:40 ` Brian Foster
0 siblings, 1 reply; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 7:04 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:45PM -0400, Brian Foster wrote:
> Add the XFS_ICI_EOFBLOCKS_TAG inode tag to identify inodes with
> speculatively preallocated blocks beyond EOF. An inode is tagged
> when speculative preallocation occurs and untagged either via
> truncate down or when post-EOF blocks are freed via release or
> reclaim.
>
> The tag management is intentionally not aggressive to prefer
> simplicity over the complexity of handling all the corner cases
> under which post-EOF blocks could be freed (i.e., forward
> truncation, fallocate, write error conditions, etc.). This means
> that a tagged inode may or may not have post-EOF blocks after a
> period of time. The tag is eventually cleared when the inode is
> released or reclaimed.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
Apart from the fact this conflicts with my xfssyncd killing patchset
and xfs_sync.c no longer exists, it looks fine.
Can you rebase this on top of my patch series? Mostly it is simply
making your changes to xfs_icache.c rather than xfs_sync.c as that's
where all this inode cache radix tree walking code is now.....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging
2012-09-28 7:04 ` Dave Chinner
@ 2012-09-28 20:40 ` Brian Foster
0 siblings, 0 replies; 22+ messages in thread
From: Brian Foster @ 2012-09-28 20:40 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 09/28/2012 03:04 AM, Dave Chinner wrote:
> On Thu, Sep 27, 2012 at 01:45:45PM -0400, Brian Foster wrote:
>> Add the XFS_ICI_EOFBLOCKS_TAG inode tag to identify inodes with
>> speculatively preallocated blocks beyond EOF. An inode is tagged
>> when speculative preallocation occurs and untagged either via
>> truncate down or when post-EOF blocks are freed via release or
>> reclaim.
>>
>> The tag management is intentionally not aggressive to prefer
>> simplicity over the complexity of handling all the corner cases
>> under which post-EOF blocks could be freed (i.e., forward
>> truncation, fallocate, write error conditions, etc.). This means
>> that a tagged inode may or may not have post-EOF blocks after a
>> period of time. The tag is eventually cleared when the inode is
>> released or reclaimed.
>>
>> Signed-off-by: Brian Foster <bfoster@redhat.com>
>
> Apart from the fact this conflicts with my xfssyncd killing patchset
> and xfs_sync.c no longer exists, it looks fine.
>
> Can you rebase this on top of my patch series? Mostly it is simply
> making your changes to xfs_icache.c rather than xfs_sync.c as that's
> where all this inode cache radix tree walking code is now.....
>
Sure. I skimmed through the first set and it didn't seem like a major
conflict. I'll rebase against the v3 xfssyncd set for my next version.
Brian
> Cheers,
>
> Dave.
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 2/8] xfs: support a tag-based inode_ag_iterator
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
2012-09-27 17:45 ` [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:05 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode Brian Foster
` (5 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
Genericize xfs_inode_ag_walk() to support an optional radix tree tag
and args argument for the execute function. Create a new wrapper
called xfs_inode_ag_iterator_tag() that performs a tag based walk
of perag's and inodes.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_qm_syscalls.c | 5 ++-
fs/xfs/xfs_sync.c | 61 +++++++++++++++++++++++++++++++++++++++-------
fs/xfs/xfs_sync.h | 7 ++++-
3 files changed, 60 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 858a3b1..848bd8e 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -845,7 +845,8 @@ STATIC int
xfs_dqrele_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int flags)
+ int flags,
+ void *args)
{
/* skip quota inodes */
if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
@@ -881,5 +882,5 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
}
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 00c6224..0da93c9 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -101,8 +101,11 @@ xfs_inode_ag_walk(
struct xfs_mount *mp,
struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args,
+ int tag)
{
uint32_t first_index;
int last_error = 0;
@@ -121,9 +124,17 @@ restart:
int i;
rcu_read_lock();
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+
+ if (tag == -1)
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH);
+ else
+ nr_found = radix_tree_gang_lookup_tag(
+ &pag->pag_ici_root,
+ (void **) batch, first_index,
+ XFS_LOOKUP_BATCH, tag);
+
if (!nr_found) {
rcu_read_unlock();
break;
@@ -164,7 +175,7 @@ restart:
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
- error = execute(batch[i], pag, flags);
+ error = execute(batch[i], pag, flags, args);
IRELE(batch[i]);
if (error == EAGAIN) {
skipped++;
@@ -193,8 +204,10 @@ int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args)
{
struct xfs_perag *pag;
int error = 0;
@@ -204,7 +217,36 @@ xfs_inode_ag_iterator(
ag = 0;
while ((pag = xfs_perag_get(mp, ag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1);
+ xfs_perag_put(pag);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
+}
+
+int
+xfs_inode_ag_iterator_tag(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags,
+ void *args),
+ int flags,
+ void *args,
+ int tag)
+{
+ struct xfs_perag *pag;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+
+ ag = 0;
+ while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
+ ag = pag->pag_agno + 1;
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -219,7 +261,8 @@ STATIC int
xfs_sync_inode_data(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int flags)
+ int flags,
+ void *args)
{
struct inode *inode = VFS_I(ip);
struct address_space *mapping = inode->i_mapping;
@@ -252,7 +295,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, NULL);
if (error)
return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 4486491..463ea0a 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -48,7 +48,10 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags);
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args),
+ int flags, void *args);
+int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags, void *args),
+ int flags, void *args, int tag);
#endif
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 2/8] xfs: support a tag-based inode_ag_iterator
2012-09-27 17:45 ` [PATCH v4 2/8] xfs: support a tag-based inode_ag_iterator Brian Foster
@ 2012-09-28 7:05 ` Dave Chinner
0 siblings, 0 replies; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 7:05 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:46PM -0400, Brian Foster wrote:
> Genericize xfs_inode_ag_walk() to support an optional radix tree tag
> and args argument for the execute function. Create a new wrapper
> called xfs_inode_ag_iterator_tag() that performs a tag based walk
> of perag's and inodes.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
> fs/xfs/xfs_qm_syscalls.c | 5 ++-
> fs/xfs/xfs_sync.c | 61 +++++++++++++++++++++++++++++++++++++++-------
> fs/xfs/xfs_sync.h | 7 ++++-
> 3 files changed, 60 insertions(+), 13 deletions(-)
Looks fine, but needs the same xfs_icache.c rebase...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
2012-09-27 17:45 ` [PATCH v4 1/8] xfs: add EOFBLOCKS inode tagging/untagging Brian Foster
2012-09-27 17:45 ` [PATCH v4 2/8] xfs: support a tag-based inode_ag_iterator Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 6:59 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure Brian Foster
` (4 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
This check is used in multiple places to determine whether we
should check for (and potentially free) post EOF blocks on an
inode. Add a helper to consolidate the check.
Note that when we remove an inode from the cache (xfs_inactive()),
we are required to trim post-EOF blocks even if the inode is marked
preallocated or append-only to maintain correct space accounting.
The 'force' parameter to xfs_can_free_eofblocks() specifies whether
we should ignore the prealloc/append-only status of the inode.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_vnodeops.c | 19 +++++++------------
fs/xfs/xfs_vnodeops.h | 40 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 46 insertions(+), 13 deletions(-)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d883881..12f5087 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -435,11 +435,7 @@ xfs_release(
if (ip->i_d.di_nlink == 0)
return 0;
- if ((S_ISREG(ip->i_d.di_mode) &&
- (VFS_I(ip)->i_size > 0 ||
- (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
- (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
+ if (xfs_can_free_eofblocks(ip, false)) {
/*
* If we can't get the iolock just skip truncating the blocks
@@ -515,13 +511,12 @@ xfs_inactive(
goto out;
if (ip->i_d.di_nlink != 0) {
- if ((S_ISREG(ip->i_d.di_mode) &&
- (VFS_I(ip)->i_size > 0 ||
- (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
- (ip->i_df.if_flags & XFS_IFEXTENTS) &&
- (!(ip->i_d.di_flags &
- (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
- ip->i_delayed_blks != 0))) {
+ /*
+ * force is true because we are evicting an inode from the
+ * cache. Post-eof blocks must be freed, lest we end up with
+ * broken free space accounting.
+ */
+ if (xfs_can_free_eofblocks(ip, true)) {
error = xfs_free_eofblocks(mp, ip, false);
if (error)
return VN_INACTIVE_CACHE;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 447e146..d5701e3 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -1,6 +1,10 @@
#ifndef _XFS_VNODEOPS_H
#define _XFS_VNODEOPS_H 1
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+
struct attrlist_cursor_kern;
struct file;
struct iattr;
@@ -9,8 +13,42 @@ struct iovec;
struct kiocb;
struct pipe_inode_info;
struct uio;
-struct xfs_inode;
+/*
+ * Test whether it is appropriate to check an inode for and free post EOF
+ * blocks. The 'force' parameter determines whether we should also consider
+ * regular files that are marked preallocated or append-only.
+ */
+static inline bool
+xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
+{
+ /* prealloc/delalloc exists only on regular files */
+ if (!S_ISREG(ip->i_d.di_mode))
+ return false;
+
+ /*
+ * Zero sized files with no cached pages and delalloc blocks will not
+ * have speculative prealloc/delalloc blocks to remove.
+ */
+ if (VFS_I(ip)->i_size == 0 &&
+ VN_CACHED(VFS_I(ip)) == 0 &&
+ ip->i_delayed_blks == 0)
+ return false;
+
+ /* If we haven't read in the extent list, then don't do it now. */
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+ return false;
+
+ /*
+ * Do not free real preallocated or append-only files unless the file
+ * has delalloc blocks and we are forced to remove them.
+ */
+ if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
+ if (!force || ip->i_delayed_blks == 0)
+ return false;
+
+ return true;
+}
int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode
2012-09-27 17:45 ` [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode Brian Foster
@ 2012-09-28 6:59 ` Dave Chinner
2012-09-28 20:41 ` Brian Foster
0 siblings, 1 reply; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 6:59 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:47PM -0400, Brian Foster wrote:
> This check is used in multiple places to determine whether we
> should check for (and potentially free) post EOF blocks on an
> inode. Add a helper to consolidate the check.
>
> Note that when we remove an inode from the cache (xfs_inactive()),
> we are required to trim post-EOF blocks even if the inode is marked
> preallocated or append-only to maintain correct space accounting.
> The 'force' parameter to xfs_can_free_eofblocks() specifies whether
> we should ignore the prealloc/append-only status of the inode.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
....
> diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
> index 447e146..d5701e3 100644
> --- a/fs/xfs/xfs_vnodeops.h
> +++ b/fs/xfs/xfs_vnodeops.h
> @@ -1,6 +1,10 @@
> #ifndef _XFS_VNODEOPS_H
> #define _XFS_VNODEOPS_H 1
>
> +#include "xfs_bmap_btree.h"
> +#include "xfs_dinode.h"
> +#include "xfs_inode.h"
> +
> struct attrlist_cursor_kern;
> struct file;
> struct iattr;
Generally we try to avoid including XFS header files in XFS
header files, and instead order the header files in the .c files
appropriately.
> @@ -9,8 +13,42 @@ struct iovec;
> struct kiocb;
> struct pipe_inode_info;
> struct uio;
> -struct xfs_inode;
>
> +/*
> + * Test whether it is appropriate to check an inode for and free post EOF
> + * blocks. The 'force' parameter determines whether we should also consider
> + * regular files that are marked preallocated or append-only.
> + */
> +static inline bool
> +xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
> +{
> + /* prealloc/delalloc exists only on regular files */
> + if (!S_ISREG(ip->i_d.di_mode))
> + return false;
> +
> + /*
> + * Zero sized files with no cached pages and delalloc blocks will not
> + * have speculative prealloc/delalloc blocks to remove.
> + */
> + if (VFS_I(ip)->i_size == 0 &&
> + VN_CACHED(VFS_I(ip)) == 0 &&
> + ip->i_delayed_blks == 0)
> + return false;
> +
> + /* If we haven't read in the extent list, then don't do it now. */
> + if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
> + return false;
> +
> + /*
> + * Do not free real preallocated or append-only files unless the file
> + * has delalloc blocks and we are forced to remove them.
> + */
> + if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
> + if (!force || ip->i_delayed_blks == 0)
> + return false;
> +
> + return true;
> +}
And I'd say that function is too large for a static inline function
in a header file, so moving it to xfs_inode.c is probably
appropriate. I know, I asked you to write it like this, but
not looking at it for a couple weeks brings new persepctive ;)
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode
2012-09-28 6:59 ` Dave Chinner
@ 2012-09-28 20:41 ` Brian Foster
0 siblings, 0 replies; 22+ messages in thread
From: Brian Foster @ 2012-09-28 20:41 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 09/28/2012 02:59 AM, Dave Chinner wrote:
> On Thu, Sep 27, 2012 at 01:45:47PM -0400, Brian Foster wrote:
>> This check is used in multiple places to determine whether we
>> should check for (and potentially free) post EOF blocks on an
>> inode. Add a helper to consolidate the check.
>>
>> Note that when we remove an inode from the cache (xfs_inactive()),
>> we are required to trim post-EOF blocks even if the inode is marked
>> preallocated or append-only to maintain correct space accounting.
>> The 'force' parameter to xfs_can_free_eofblocks() specifies whether
>> we should ignore the prealloc/append-only status of the inode.
>>
>> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ....
>> diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
>> index 447e146..d5701e3 100644
>> --- a/fs/xfs/xfs_vnodeops.h
>> +++ b/fs/xfs/xfs_vnodeops.h
>> @@ -1,6 +1,10 @@
>> #ifndef _XFS_VNODEOPS_H
>> #define _XFS_VNODEOPS_H 1
>>
>> +#include "xfs_bmap_btree.h"
>> +#include "xfs_dinode.h"
>> +#include "xfs_inode.h"
>> +
>> struct attrlist_cursor_kern;
>> struct file;
>> struct iattr;
>
> Generally we try to avoid including XFS header files in XFS
> header files, and instead order the header files in the .c files
> appropriately.
>
Ok, good to know. I'll try to clean that up.
>> @@ -9,8 +13,42 @@ struct iovec;
>> struct kiocb;
>> struct pipe_inode_info;
>> struct uio;
>> -struct xfs_inode;
>>
>> +/*
>> + * Test whether it is appropriate to check an inode for and free post EOF
>> + * blocks. The 'force' parameter determines whether we should also consider
>> + * regular files that are marked preallocated or append-only.
>> + */
>> +static inline bool
>> +xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
>> +{
>> + /* prealloc/delalloc exists only on regular files */
>> + if (!S_ISREG(ip->i_d.di_mode))
>> + return false;
>> +
>> + /*
>> + * Zero sized files with no cached pages and delalloc blocks will not
>> + * have speculative prealloc/delalloc blocks to remove.
>> + */
>> + if (VFS_I(ip)->i_size == 0 &&
>> + VN_CACHED(VFS_I(ip)) == 0 &&
>> + ip->i_delayed_blks == 0)
>> + return false;
>> +
>> + /* If we haven't read in the extent list, then don't do it now. */
>> + if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
>> + return false;
>> +
>> + /*
>> + * Do not free real preallocated or append-only files unless the file
>> + * has delalloc blocks and we are forced to remove them.
>> + */
>> + if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
>> + if (!force || ip->i_delayed_blks == 0)
>> + return false;
>> +
>> + return true;
>> +}
>
> And I'd say that function is too large for a static inline function
> in a header file, so moving it to xfs_inode.c is probably
> appropriate. I know, I asked you to write it like this, but
> not looking at it for a couple weeks brings new persepctive ;)
>
No problem. I guess that takes care of the include problem. :)
Brian
> Cheers,
>
> Dave.
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
` (2 preceding siblings ...)
2012-09-27 17:45 ` [PATCH v4 3/8] xfs: create helper to check whether to free eofblocks on inode Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:00 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes Brian Foster
` (3 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
Turn xfs_free_eofblocks() into a non-static function, return EAGAIN to
indicate trylock failure and make sure this error is not propagated in
xfs_release().
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_vnodeops.c | 6 +++---
fs/xfs/xfs_vnodeops.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 12f5087..a61e852 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -150,7 +150,7 @@ xfs_readlink(
* when the link count isn't zero and by xfs_dm_punch_hole() when
* punching a hole to EOF.
*/
-STATIC int
+int
xfs_free_eofblocks(
xfs_mount_t *mp,
xfs_inode_t *ip,
@@ -199,7 +199,7 @@ xfs_free_eofblocks(
if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
- return 0;
+ return EAGAIN;
}
}
@@ -462,7 +462,7 @@ xfs_release(
return 0;
error = xfs_free_eofblocks(mp, ip, true);
- if (error)
+ if (error && error != EAGAIN)
return error;
/* delalloc blocks after truncation means it really is dirty */
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index d5701e3..1e03c4b 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -95,5 +95,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int xfs_free_eofblocks(struct xfs_mount *, struct xfs_inode *, bool);
#endif /* _XFS_VNODEOPS_H */
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
` (3 preceding siblings ...)
2012-09-27 17:45 ` [PATCH v4 4/8] xfs: export xfs_free_eofblocks() and return EAGAIN on trylock failure Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:21 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl Brian Foster
` (2 subsequent siblings)
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
xfs_inodes_free_eofblocks() implements scanning functionality for
EOFBLOCKS inodes. It uses the AG iterator to walk the tagged inodes
and free post-EOF blocks via the xfs_inode_free_eofblocks() execute
function. The scan can be invoked in best-effort mode or wait
(force) mode.
A best-effort scan (default) handles all inodes that do not have a
dirty cache and we successfully acquire the io lock via trylock. In
wait mode, we continue to cycle through an AG until all inodes are
handled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_sync.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_sync.h | 1 +
fs/xfs/xfs_trace.h | 1 +
3 files changed, 42 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 0da93c9..6854800 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -1014,6 +1014,46 @@ xfs_reclaim_inodes_count(
return reclaimable;
}
+STATIC int
+xfs_inode_free_eofblocks(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags,
+ void *args)
+{
+ int ret;
+ bool force = flags & SYNC_WAIT;
+
+ if (!xfs_can_free_eofblocks(ip, false)) {
+ /* inode could be preallocated or append-only */
+ trace_xfs_inode_free_eofblocks_invalid(ip);
+ xfs_inode_clear_eofblocks_tag(ip);
+ return 0;
+ }
+
+ if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
+ PAGECACHE_TAG_DIRTY))
+ return 0;
+
+ ret = xfs_free_eofblocks(ip->i_mount, ip, true);
+
+ /* ignore EAGAIN on a best effort scan */
+ if (!force && (ret == EAGAIN))
+ ret = 0;
+
+ return ret;
+}
+
+int
+xfs_inodes_free_eofblocks(
+ struct xfs_mount *mp,
+ int flags)
+{
+ ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+ return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
+ NULL, XFS_ICI_EOFBLOCKS_TAG);
+}
+
void
xfs_inode_set_eofblocks_tag(
xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 463ea0a..307654a 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -45,6 +45,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
+int xfs_inodes_free_eofblocks(struct xfs_mount *, int);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6f46e03..cb52346 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -589,6 +589,7 @@ DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
+DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes
2012-09-27 17:45 ` [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes Brian Foster
@ 2012-09-28 7:21 ` Dave Chinner
2012-09-28 20:41 ` Brian Foster
0 siblings, 1 reply; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 7:21 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:49PM -0400, Brian Foster wrote:
> xfs_inodes_free_eofblocks() implements scanning functionality for
> EOFBLOCKS inodes. It uses the AG iterator to walk the tagged inodes
> and free post-EOF blocks via the xfs_inode_free_eofblocks() execute
> function. The scan can be invoked in best-effort mode or wait
> (force) mode.
>
> A best-effort scan (default) handles all inodes that do not have a
> dirty cache and we successfully acquire the io lock via trylock. In
> wait mode, we continue to cycle through an AG until all inodes are
> handled.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
xfs_icache.c rebase, and...
> ---
> fs/xfs/xfs_sync.c | 40 ++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_sync.h | 1 +
> fs/xfs/xfs_trace.h | 1 +
> 3 files changed, 42 insertions(+), 0 deletions(-)
>
> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
> index 0da93c9..6854800 100644
> --- a/fs/xfs/xfs_sync.c
> +++ b/fs/xfs/xfs_sync.c
> @@ -1014,6 +1014,46 @@ xfs_reclaim_inodes_count(
> return reclaimable;
> }
>
> +STATIC int
> +xfs_inode_free_eofblocks(
> + struct xfs_inode *ip,
> + struct xfs_perag *pag,
> + int flags,
> + void *args)
> +{
> + int ret;
> + bool force = flags & SYNC_WAIT;
> +
> + if (!xfs_can_free_eofblocks(ip, false)) {
> + /* inode could be preallocated or append-only */
> + trace_xfs_inode_free_eofblocks_invalid(ip);
> + xfs_inode_clear_eofblocks_tag(ip);
> + return 0;
> + }
> +
> + if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
> + PAGECACHE_TAG_DIRTY))
> + return 0;
This reads rather strangely. I'd prefer that you don't use a "force"
variable because we're not really "forcing" anything. SYNC_WAIT is
telling us if we should block (wait) or not. i.e.
/*
* if the mapping is dirty the operation can block and wait
* for some time. So unless we are waiting, skip it.
*/
if (!(flags & SYNC_WAIT) &&
(mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
return 0;
makes more sense and is consistent with xfs_reclaim_inode() usage.
> + ret = xfs_free_eofblocks(ip->i_mount, ip, true);
> +
> + /* ignore EAGAIN on a best effort scan */
> + if (!force && (ret == EAGAIN))
> + ret = 0;
/* don't revisit the inode if we not waiting */
if (ret == EAGAIN && !(flags & SYNC_WAIT))
return 0;
return ret;
> +
> + return ret;
> +}
> +
> +int
> +xfs_inodes_free_eofblocks(
> + struct xfs_mount *mp,
> + int flags)
> +{
> + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
> + return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
> + NULL, XFS_ICI_EOFBLOCKS_TAG);
> +}
TWo functions very similarly named. Perhaps the latter would be
better named xfs_icache_free_eofblocks() to indicate it is an inode
cache operation, rather than an inode operation.
Then at some point in another patch set we can rename
xfs_reclaim_inodes* to xfs_icache_reclaim_* and
xfs_inode_ag_iterator* to xfs_icache_iterator* and so one so that
there is a clear naming difference between operations on the inode
cache and individual inodes...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes
2012-09-28 7:21 ` Dave Chinner
@ 2012-09-28 20:41 ` Brian Foster
0 siblings, 0 replies; 22+ messages in thread
From: Brian Foster @ 2012-09-28 20:41 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 09/28/2012 03:21 AM, Dave Chinner wrote:
> On Thu, Sep 27, 2012 at 01:45:49PM -0400, Brian Foster wrote:
>> xfs_inodes_free_eofblocks() implements scanning functionality for
>> EOFBLOCKS inodes. It uses the AG iterator to walk the tagged inodes
>> and free post-EOF blocks via the xfs_inode_free_eofblocks() execute
>> function. The scan can be invoked in best-effort mode or wait
>> (force) mode.
>>
>> A best-effort scan (default) handles all inodes that do not have a
>> dirty cache and we successfully acquire the io lock via trylock. In
>> wait mode, we continue to cycle through an AG until all inodes are
>> handled.
>>
>> Signed-off-by: Brian Foster <bfoster@redhat.com>
>
> xfs_icache.c rebase, and...
>
>> ---
>> fs/xfs/xfs_sync.c | 40 ++++++++++++++++++++++++++++++++++++++++
>> fs/xfs/xfs_sync.h | 1 +
>> fs/xfs/xfs_trace.h | 1 +
>> 3 files changed, 42 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
>> index 0da93c9..6854800 100644
>> --- a/fs/xfs/xfs_sync.c
>> +++ b/fs/xfs/xfs_sync.c
>> @@ -1014,6 +1014,46 @@ xfs_reclaim_inodes_count(
>> return reclaimable;
>> }
>>
>> +STATIC int
>> +xfs_inode_free_eofblocks(
>> + struct xfs_inode *ip,
>> + struct xfs_perag *pag,
>> + int flags,
>> + void *args)
>> +{
>> + int ret;
>> + bool force = flags & SYNC_WAIT;
>> +
>> + if (!xfs_can_free_eofblocks(ip, false)) {
>> + /* inode could be preallocated or append-only */
>> + trace_xfs_inode_free_eofblocks_invalid(ip);
>> + xfs_inode_clear_eofblocks_tag(ip);
>> + return 0;
>> + }
>> +
>> + if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
>> + PAGECACHE_TAG_DIRTY))
>> + return 0;
>
> This reads rather strangely. I'd prefer that you don't use a "force"
> variable because we're not really "forcing" anything. SYNC_WAIT is
> telling us if we should block (wait) or not. i.e.
>
> /*
> * if the mapping is dirty the operation can block and wait
> * for some time. So unless we are waiting, skip it.
> */
> if (!(flags & SYNC_WAIT) &&
> (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY))
> return 0;
>
> makes more sense and is consistent with xfs_reclaim_inode() usage.
>
Fair enough. I was thinking of the "force" scan mode as I called it, but
as you point out in the next patch that's inconsistently named as well.
Will fix.
>> + ret = xfs_free_eofblocks(ip->i_mount, ip, true);
>> +
>> + /* ignore EAGAIN on a best effort scan */
>> + if (!force && (ret == EAGAIN))
>> + ret = 0;
>
> /* don't revisit the inode if we not waiting */
> if (ret == EAGAIN && !(flags & SYNC_WAIT))
> return 0;
> return ret;
>> +
>> + return ret;
>> +}
>> +
>> +int
>> +xfs_inodes_free_eofblocks(
>> + struct xfs_mount *mp,
>> + int flags)
>> +{
>> + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
>> + return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
>> + NULL, XFS_ICI_EOFBLOCKS_TAG);
>> +}
>
> TWo functions very similarly named. Perhaps the latter would be
> better named xfs_icache_free_eofblocks() to indicate it is an inode
> cache operation, rather than an inode operation.
>
Ok, so correct me if I misread your comment. xfs_inodes_free_eofblocks()
goes to xfs_icache_free_eofblocks() and xfs_inode_free_eofblocks()
remains as is.
> Then at some point in another patch set we can rename
> xfs_reclaim_inodes* to xfs_icache_reclaim_* and
> xfs_inode_ag_iterator* to xfs_icache_iterator* and so one so that
> there is a clear naming difference between operations on the inode
> cache and individual inodes...
>
Sounds logical.
Brian
> Cheers,
>
> Dave.
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
` (4 preceding siblings ...)
2012-09-27 17:45 ` [PATCH v4 5/8] xfs: create function to scan and clear EOFBLOCKS inodes Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:25 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan Brian Foster
2012-09-27 17:45 ` [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes Brian Foster
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
The XFS_IOC_FREE_EOFBLOCKS ioctl allows users to invoke an EOFBLOCKS
scan. The xfs_eofblocks structure is defined to support the command
parameters (scan mode).
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_fs.h | 14 ++++++++++++++
fs/xfs/xfs_ioctl.c | 12 ++++++++++++
2 files changed, 26 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c13fed8..32bb2e8 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -339,6 +339,19 @@ typedef struct xfs_error_injection {
/*
+ * Speculative preallocation trimming.
+ */
+struct xfs_eofblocks {
+ __u32 eof_flags;
+ __s32 version;
+ unsigned char pad[12];
+};
+
+/* eof_flags values */
+#define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
+
+
+/*
* The user-level Handle Request interface structure.
*/
typedef struct xfs_fsop_handlereq {
@@ -456,6 +469,7 @@ typedef struct xfs_handle {
/* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
+#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0e0232c..216ca7a 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1602,6 +1602,18 @@ xfs_file_ioctl(
error = xfs_errortag_clearall(mp, 1);
return -error;
+ case XFS_IOC_FREE_EOFBLOCKS: {
+ struct xfs_eofblocks eofb;
+ int flags;
+
+ if (copy_from_user(&eofb, arg, sizeof(eofb)))
+ return -XFS_ERROR(EFAULT);
+
+ flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
+ error = xfs_inodes_free_eofblocks(mp, flags);
+ return -error;
+ }
+
default:
return -ENOTTY;
}
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl
2012-09-27 17:45 ` [PATCH v4 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl Brian Foster
@ 2012-09-28 7:25 ` Dave Chinner
0 siblings, 0 replies; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 7:25 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:50PM -0400, Brian Foster wrote:
> The XFS_IOC_FREE_EOFBLOCKS ioctl allows users to invoke an EOFBLOCKS
> scan. The xfs_eofblocks structure is defined to support the command
> parameters (scan mode).
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
> fs/xfs/xfs_fs.h | 14 ++++++++++++++
> fs/xfs/xfs_ioctl.c | 12 ++++++++++++
> 2 files changed, 26 insertions(+), 0 deletions(-)
>
> diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
> index c13fed8..32bb2e8 100644
> --- a/fs/xfs/xfs_fs.h
> +++ b/fs/xfs/xfs_fs.h
> @@ -339,6 +339,19 @@ typedef struct xfs_error_injection {
>
>
> /*
> + * Speculative preallocation trimming.
> + */
> +struct xfs_eofblocks {
> + __u32 eof_flags;
> + __s32 version;
__u32
> + unsigned char pad[12];
> +};
> +
> +/* eof_flags values */
> +#define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
Might be better defined as XFS_EOF_FLAGS_SYNC as that is more
consistent with what users see as a blocking operation that waits
for completion to occur.
Also, you need to define the structure version here as well (i.e. v1).
> +
> +
> +/*
> * The user-level Handle Request interface structure.
> */
> typedef struct xfs_fsop_handlereq {
> @@ -456,6 +469,7 @@ typedef struct xfs_handle {
> /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */
> #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
> #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
> +#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_eofblocks)
>
> /*
> * ioctl commands that replace IRIX syssgi()'s
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 0e0232c..216ca7a 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1602,6 +1602,18 @@ xfs_file_ioctl(
> error = xfs_errortag_clearall(mp, 1);
> return -error;
>
> + case XFS_IOC_FREE_EOFBLOCKS: {
> + struct xfs_eofblocks eofb;
> + int flags;
> +
> + if (copy_from_user(&eofb, arg, sizeof(eofb)))
> + return -XFS_ERROR(EFAULT);
And check the version here, too.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
` (5 preceding siblings ...)
2012-09-27 17:45 ` [PATCH v4 6/8] xfs: add XFS_IOC_FREE_EOFBLOCKS ioctl Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 7:53 ` Dave Chinner
2012-09-27 17:45 ` [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes Brian Foster
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
Support EOFBLOCKS scan filtering by quota ID or minimum file size.
Add the appropriate fields/flags to the xfs_eofblocks structure and
pass it down to xfs_inode_free_eofblocks() where filtering
functionality is implemented.
A (user requested) quota ID based scan requires the associated
quota mode be enabled.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_fs.h | 9 +++++++++
fs/xfs/xfs_ioctl.c | 10 +++++++++-
fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++----
fs/xfs/xfs_sync.h | 2 +-
4 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 32bb2e8..54c0f39 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -343,12 +343,21 @@ typedef struct xfs_error_injection {
*/
struct xfs_eofblocks {
__u32 eof_flags;
+ __u32 eof_id;
+ __u64 eof_min_file_size;
__s32 version;
unsigned char pad[12];
};
/* eof_flags values */
#define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
+#define XFS_EOF_FLAGS_USRID 0x02 /* filter by user id */
+#define XFS_EOF_FLAGS_GRPID 0x04 /* filter by group id */
+#define XFS_EOF_FLAGS_PROJID 0x08 /* filter by project id */
+#define XFS_EOF_FLAGS_MINFILESIZE 0x10 /* minimum file size */
+
+#define XFS_EOF_VALID_QUOTA (XFS_EOF_FLAGS_USRID|XFS_EOF_FLAGS_GRPID| \
+ XFS_EOF_FLAGS_PROJID)
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 216ca7a..a7bf847 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1609,8 +1609,16 @@ xfs_file_ioctl(
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
+ if (((eofb.eof_flags & XFS_EOF_FLAGS_USRID) &&
+ !XFS_IS_UQUOTA_ON(mp)) ||
+ ((eofb.eof_flags & XFS_EOF_FLAGS_GRPID) &&
+ !XFS_IS_GQUOTA_ON(mp)) ||
+ ((eofb.eof_flags & XFS_EOF_FLAGS_PROJID) &&
+ !XFS_IS_PQUOTA_ON(mp)))
+ return -XFS_ERROR(EINVAL);
+
flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
- error = xfs_inodes_free_eofblocks(mp, flags);
+ error = xfs_inodes_free_eofblocks(mp, flags, &eofb);
return -error;
}
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 6854800..c9e1c16 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -1015,6 +1015,21 @@ xfs_reclaim_inodes_count(
}
STATIC int
+xfs_inode_match_quota_id(
+ struct xfs_inode *ip,
+ struct xfs_eofblocks *eofb)
+{
+ if (eofb->eof_flags & XFS_EOF_FLAGS_USRID)
+ return ip->i_d.di_uid == eofb->eof_id;
+ else if (eofb->eof_flags & XFS_EOF_FLAGS_GRPID)
+ return ip->i_d.di_gid == eofb->eof_id;
+ else if (eofb->eof_flags & XFS_EOF_FLAGS_PROJID)
+ return xfs_get_projid(ip) == eofb->eof_id;
+
+ return 0;
+}
+
+STATIC int
xfs_inode_free_eofblocks(
struct xfs_inode *ip,
struct xfs_perag *pag,
@@ -1022,6 +1037,7 @@ xfs_inode_free_eofblocks(
void *args)
{
int ret;
+ struct xfs_eofblocks *eofb = args;
bool force = flags & SYNC_WAIT;
if (!xfs_can_free_eofblocks(ip, false)) {
@@ -1031,8 +1047,13 @@ xfs_inode_free_eofblocks(
return 0;
}
- if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
- PAGECACHE_TAG_DIRTY))
+ if ((eofb &&
+ (((eofb->eof_flags & XFS_EOF_VALID_QUOTA) &&
+ !xfs_inode_match_quota_id(ip, eofb)) ||
+ ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
+ (XFS_ISIZE(ip) < eofb->eof_min_file_size)))) ||
+ (!force && mapping_tagged(VFS_I(ip)->i_mapping,
+ PAGECACHE_TAG_DIRTY)))
return 0;
ret = xfs_free_eofblocks(ip->i_mount, ip, true);
@@ -1047,11 +1068,12 @@ xfs_inode_free_eofblocks(
int
xfs_inodes_free_eofblocks(
struct xfs_mount *mp,
- int flags)
+ int flags,
+ struct xfs_eofblocks *eofb)
{
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
- NULL, XFS_ICI_EOFBLOCKS_TAG);
+ eofb, XFS_ICI_EOFBLOCKS_TAG);
}
void
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 307654a..167f06c 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -45,7 +45,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
-int xfs_inodes_free_eofblocks(struct xfs_mount *, int);
+int xfs_inodes_free_eofblocks(struct xfs_mount *, int, struct xfs_eofblocks *);
int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan
2012-09-27 17:45 ` [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan Brian Foster
@ 2012-09-28 7:53 ` Dave Chinner
2012-09-28 20:42 ` Brian Foster
0 siblings, 1 reply; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 7:53 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:51PM -0400, Brian Foster wrote:
> Support EOFBLOCKS scan filtering by quota ID or minimum file size.
> Add the appropriate fields/flags to the xfs_eofblocks structure and
> pass it down to xfs_inode_free_eofblocks() where filtering
> functionality is implemented.
>
> A (user requested) quota ID based scan requires the associated
> quota mode be enabled.
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
> fs/xfs/xfs_fs.h | 9 +++++++++
> fs/xfs/xfs_ioctl.c | 10 +++++++++-
> fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++----
> fs/xfs/xfs_sync.h | 2 +-
> 4 files changed, 45 insertions(+), 6 deletions(-)
>
> diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
> index 32bb2e8..54c0f39 100644
> --- a/fs/xfs/xfs_fs.h
> +++ b/fs/xfs/xfs_fs.h
> @@ -343,12 +343,21 @@ typedef struct xfs_error_injection {
> */
> struct xfs_eofblocks {
> __u32 eof_flags;
> + __u32 eof_id;
> + __u64 eof_min_file_size;
> __s32 version;
> unsigned char pad[12];
> };
ACtually, looking at this the version needs to be the first field of
the structure, so we can guarantee that it can be read by any kernel
that supports the ioctl regardless of how the rest of the structure
changes.
> /* eof_flags values */
> #define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
> +#define XFS_EOF_FLAGS_USRID 0x02 /* filter by user id */
> +#define XFS_EOF_FLAGS_GRPID 0x04 /* filter by group id */
> +#define XFS_EOF_FLAGS_PROJID 0x08 /* filter by project id */
I'm wondering if it would be better to pass real quota fields (as
per dqblk_xfs.h) than make up a new way to pass the same
information). That way we might be able to use standard quota
functions rather for checks and comparisons rather than duplicating
them. That way if we ever add new quota types, we don't have to add
flags and validation to this ioctl....
i.e. we have XFS_EOF_FLAGS_QUOTA to say "filter by quota fields",
similar to the XFS_EOF_FLAGS_MINFILESIZE flag...
And it becomes much easier to convert to userns kqids that are not
that far away:
https://lkml.org/lkml/2012/9/19/587
> +#define XFS_EOF_FLAGS_MINFILESIZE 0x10 /* minimum file size */
> +
> +#define XFS_EOF_VALID_QUOTA (XFS_EOF_FLAGS_USRID|XFS_EOF_FLAGS_GRPID| \
> + XFS_EOF_FLAGS_PROJID)
>
>
> /*
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index 216ca7a..a7bf847 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -1609,8 +1609,16 @@ xfs_file_ioctl(
> if (copy_from_user(&eofb, arg, sizeof(eofb)))
> return -XFS_ERROR(EFAULT);
>
> + if (((eofb.eof_flags & XFS_EOF_FLAGS_USRID) &&
> + !XFS_IS_UQUOTA_ON(mp)) ||
> + ((eofb.eof_flags & XFS_EOF_FLAGS_GRPID) &&
> + !XFS_IS_GQUOTA_ON(mp)) ||
> + ((eofb.eof_flags & XFS_EOF_FLAGS_PROJID) &&
> + !XFS_IS_PQUOTA_ON(mp)))
> + return -XFS_ERROR(EINVAL);
> +
> flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
> - error = xfs_inodes_free_eofblocks(mp, flags);
> + error = xfs_inodes_free_eofblocks(mp, flags, &eofb);
You probably shoul djust pass the &eofb in the first patch, rather
than having to change the implementation here again.
> return -error;
> }
>
> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
> index 6854800..c9e1c16 100644
> --- a/fs/xfs/xfs_sync.c
> +++ b/fs/xfs/xfs_sync.c
> @@ -1015,6 +1015,21 @@ xfs_reclaim_inodes_count(
> }
>
> STATIC int
> +xfs_inode_match_quota_id(
> + struct xfs_inode *ip,
> + struct xfs_eofblocks *eofb)
> +{
> + if (eofb->eof_flags & XFS_EOF_FLAGS_USRID)
> + return ip->i_d.di_uid == eofb->eof_id;
> + else if (eofb->eof_flags & XFS_EOF_FLAGS_GRPID)
> + return ip->i_d.di_gid == eofb->eof_id;
> + else if (eofb->eof_flags & XFS_EOF_FLAGS_PROJID)
> + return xfs_get_projid(ip) == eofb->eof_id;
> +
> + return 0;
> +}
> +
> +STATIC int
> xfs_inode_free_eofblocks(
> struct xfs_inode *ip,
> struct xfs_perag *pag,
> @@ -1022,6 +1037,7 @@ xfs_inode_free_eofblocks(
> void *args)
> {
> int ret;
> + struct xfs_eofblocks *eofb = args;
> bool force = flags & SYNC_WAIT;
>
> if (!xfs_can_free_eofblocks(ip, false)) {
> @@ -1031,8 +1047,13 @@ xfs_inode_free_eofblocks(
> return 0;
> }
>
> - if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
> - PAGECACHE_TAG_DIRTY))
> + if ((eofb &&
> + (((eofb->eof_flags & XFS_EOF_VALID_QUOTA) &&
> + !xfs_inode_match_quota_id(ip, eofb)) ||
> + ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
> + (XFS_ISIZE(ip) < eofb->eof_min_file_size)))) ||
> + (!force && mapping_tagged(VFS_I(ip)->i_mapping,
> + PAGECACHE_TAG_DIRTY)))
> return 0;
Break that up into multiple "if() return 0" statements so it is
possible to read the logic. ;)
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan
2012-09-28 7:53 ` Dave Chinner
@ 2012-09-28 20:42 ` Brian Foster
0 siblings, 0 replies; 22+ messages in thread
From: Brian Foster @ 2012-09-28 20:42 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 09/28/2012 03:53 AM, Dave Chinner wrote:
> On Thu, Sep 27, 2012 at 01:45:51PM -0400, Brian Foster wrote:
>> Support EOFBLOCKS scan filtering by quota ID or minimum file size.
>> Add the appropriate fields/flags to the xfs_eofblocks structure and
>> pass it down to xfs_inode_free_eofblocks() where filtering
>> functionality is implemented.
>>
>> A (user requested) quota ID based scan requires the associated
>> quota mode be enabled.
>>
>> Signed-off-by: Brian Foster <bfoster@redhat.com>
>> ---
>> fs/xfs/xfs_fs.h | 9 +++++++++
>> fs/xfs/xfs_ioctl.c | 10 +++++++++-
>> fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++----
>> fs/xfs/xfs_sync.h | 2 +-
>> 4 files changed, 45 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
>> index 32bb2e8..54c0f39 100644
>> --- a/fs/xfs/xfs_fs.h
>> +++ b/fs/xfs/xfs_fs.h
>> @@ -343,12 +343,21 @@ typedef struct xfs_error_injection {
>> */
>> struct xfs_eofblocks {
>> __u32 eof_flags;
>> + __u32 eof_id;
>> + __u64 eof_min_file_size;
>> __s32 version;
>> unsigned char pad[12];
>> };
>
> ACtually, looking at this the version needs to be the first field of
> the structure, so we can guarantee that it can be read by any kernel
> that supports the ioctl regardless of how the rest of the structure
> changes.
>
Ah, right.
>
>> /* eof_flags values */
>> #define XFS_EOF_FLAGS_FORCE 0x01 /* force/wait mode scan */
>> +#define XFS_EOF_FLAGS_USRID 0x02 /* filter by user id */
>> +#define XFS_EOF_FLAGS_GRPID 0x04 /* filter by group id */
>> +#define XFS_EOF_FLAGS_PROJID 0x08 /* filter by project id */
>
> I'm wondering if it would be better to pass real quota fields (as
> per dqblk_xfs.h) than make up a new way to pass the same
> information). That way we might be able to use standard quota
> functions rather for checks and comparisons rather than duplicating
> them. That way if we ever add new quota types, we don't have to add
> flags and validation to this ioctl....
>
> i.e. we have XFS_EOF_FLAGS_QUOTA to say "filter by quota fields",
> similar to the XFS_EOF_FLAGS_MINFILESIZE flag...
>
> And it becomes much easier to convert to userns kqids that are not
> that far away:
>
> https://lkml.org/lkml/2012/9/19/587
>
I think that means I would replace the id (and associated id bits in
the flags field) in the structure to something like:
struct xfs_eofblocks {
...
__u32 eof_q_id;
u8 eof_q_type;
...
};
... to mirror the values passed through quotactl(). I would check these
only if the aforementioned XFS_EOF_FLAGS_QUOTA bit is set. These
implicitly convert to the qid_t and {USR,GRP,PRJ}QUOTA values used
currently and presumably will convert over nicely to kqid when that hits.
I don't think this changes the code too much. Perhaps I can open up and
make use of xfs_quota_type() as well, and it cleans up the duplication
you referred to in terms of all the quota definitions we have already.
>> +#define XFS_EOF_FLAGS_MINFILESIZE 0x10 /* minimum file size */
>> +
>> +#define XFS_EOF_VALID_QUOTA (XFS_EOF_FLAGS_USRID|XFS_EOF_FLAGS_GRPID| \
>> + XFS_EOF_FLAGS_PROJID)
>>
>>
>> /*
>> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
>> index 216ca7a..a7bf847 100644
>> --- a/fs/xfs/xfs_ioctl.c
>> +++ b/fs/xfs/xfs_ioctl.c
>> @@ -1609,8 +1609,16 @@ xfs_file_ioctl(
>> if (copy_from_user(&eofb, arg, sizeof(eofb)))
>> return -XFS_ERROR(EFAULT);
>>
>> + if (((eofb.eof_flags & XFS_EOF_FLAGS_USRID) &&
>> + !XFS_IS_UQUOTA_ON(mp)) ||
>> + ((eofb.eof_flags & XFS_EOF_FLAGS_GRPID) &&
>> + !XFS_IS_GQUOTA_ON(mp)) ||
>> + ((eofb.eof_flags & XFS_EOF_FLAGS_PROJID) &&
>> + !XFS_IS_PQUOTA_ON(mp)))
>> + return -XFS_ERROR(EINVAL);
>> +
>> flags = (eofb.eof_flags & XFS_EOF_FLAGS_FORCE) ? SYNC_WAIT : SYNC_TRYLOCK;
>> - error = xfs_inodes_free_eofblocks(mp, flags);
>> + error = xfs_inodes_free_eofblocks(mp, flags, &eofb);
>
> You probably shoul djust pass the &eofb in the first patch, rather
> than having to change the implementation here again.
>
I introduce the xfs_inodes_free_eofblocks() call in patch 5 and
xfs_eofblocks in patch 6 for the ioctl support. I could start passing
eofb down in patch 6, but it would be unused unless I replaced the flags
parameter and checked the scan mode based on the eof_flags instead. This
means the background scanner invocation would now pass an xfs_eofblocks
as well (as well as future callers). Thoughts?
If it's just a matter of ordering and patch succinctness, of course, I
can leave the function signature alone and just pass the structure down
unused. ;)
>> return -error;
>> }
>>
>> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
>> index 6854800..c9e1c16 100644
>> --- a/fs/xfs/xfs_sync.c
>> +++ b/fs/xfs/xfs_sync.c
>> @@ -1015,6 +1015,21 @@ xfs_reclaim_inodes_count(
>> }
>>
>> STATIC int
>> +xfs_inode_match_quota_id(
>> + struct xfs_inode *ip,
>> + struct xfs_eofblocks *eofb)
>> +{
>> + if (eofb->eof_flags & XFS_EOF_FLAGS_USRID)
>> + return ip->i_d.di_uid == eofb->eof_id;
>> + else if (eofb->eof_flags & XFS_EOF_FLAGS_GRPID)
>> + return ip->i_d.di_gid == eofb->eof_id;
>> + else if (eofb->eof_flags & XFS_EOF_FLAGS_PROJID)
>> + return xfs_get_projid(ip) == eofb->eof_id;
>> +
>> + return 0;
>> +}
>> +
>> +STATIC int
>> xfs_inode_free_eofblocks(
>> struct xfs_inode *ip,
>> struct xfs_perag *pag,
>> @@ -1022,6 +1037,7 @@ xfs_inode_free_eofblocks(
>> void *args)
>> {
>> int ret;
>> + struct xfs_eofblocks *eofb = args;
>> bool force = flags & SYNC_WAIT;
>>
>> if (!xfs_can_free_eofblocks(ip, false)) {
>> @@ -1031,8 +1047,13 @@ xfs_inode_free_eofblocks(
>> return 0;
>> }
>>
>> - if (!force && mapping_tagged(VFS_I(ip)->i_mapping,
>> - PAGECACHE_TAG_DIRTY))
>> + if ((eofb &&
>> + (((eofb->eof_flags & XFS_EOF_VALID_QUOTA) &&
>> + !xfs_inode_match_quota_id(ip, eofb)) ||
>> + ((eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE) &&
>> + (XFS_ISIZE(ip) < eofb->eof_min_file_size)))) ||
>> + (!force && mapping_tagged(VFS_I(ip)->i_mapping,
>> + PAGECACHE_TAG_DIRTY)))
>> return 0;
>
> Break that up into multiple "if() return 0" statements so it is
> possible to read the logic. ;)
>
Ok. ;)
Brian
> Cheers,
>
> Dave.
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes
2012-09-27 17:45 [PATCH v4 0/8] speculative preallocation inode tracking Brian Foster
` (6 preceding siblings ...)
2012-09-27 17:45 ` [PATCH v4 7/8] xfs: add enhanced filtering to EOFBLOCKS scan Brian Foster
@ 2012-09-27 17:45 ` Brian Foster
2012-09-28 8:00 ` Dave Chinner
7 siblings, 1 reply; 22+ messages in thread
From: Brian Foster @ 2012-09-27 17:45 UTC (permalink / raw)
To: xfs
Create a delayed_work to enable background scanning and freeing
of EOFBLOCKS inodes. The scanner kicks in once speculative
preallocation occurs and stops requeueing itself when no EOFBLOCKS
inodes exist.
Scans are queued on the existing syncd workqueue and the interval
is based on the new eofb_timer tunable (default to 5m). The
background scanner performs unfiltered, best effort scans (which
skips inodes under lock contention or with a dirty cache mapping).
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_globals.c | 1 +
fs/xfs/xfs_linux.h | 1 +
fs/xfs/xfs_mount.h | 2 ++
fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++++++
fs/xfs/xfs_sysctl.c | 9 +++++++++
fs/xfs/xfs_sysctl.h | 1 +
6 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 76e81cf..fda9a66 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -40,4 +40,5 @@ xfs_param_t xfs_params = {
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
.fstrm_timer = { 1, 30*100, 3600*100},
+ .eofb_timer = { 1*100, 300*100, 7200*100},
};
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 828662f..bbad99b 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -118,6 +118,7 @@
#define xfs_rotorstep xfs_params.rotorstep.val
#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
+#define xfs_eofb_centisecs xfs_params.eofb_timer.val
#define current_cpu() (raw_smp_processor_id())
#define current_pid() (current->pid)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index deee09e..bf5ecfa 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -199,6 +199,8 @@ typedef struct xfs_mount {
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_sync_work; /* background sync work */
struct delayed_work m_reclaim_work; /* background inode reclaim */
+ struct delayed_work m_eofblocks_work; /* background eof blocks
+ trimming */
struct work_struct m_flush_work; /* background inode flush */
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index c9e1c16..31f678a 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -532,6 +532,31 @@ xfs_flush_worker(
xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
}
+/*
+ * Background scanning to trim post-EOF preallocated space. This is queued
+ * based on the 'eofb_centisecs' tunable (5m by default).
+ */
+STATIC void
+xfs_queue_eofblocks(
+ struct xfs_mount *mp)
+{
+ rcu_read_lock();
+ if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
+ queue_delayed_work(xfs_syncd_wq, &mp->m_eofblocks_work,
+ msecs_to_jiffies(xfs_eofb_centisecs * 10));
+ rcu_read_unlock();
+}
+
+STATIC void
+xfs_eofblocks_worker(
+struct work_struct *work)
+{
+ struct xfs_mount *mp = container_of(to_delayed_work(work),
+ struct xfs_mount, m_eofblocks_work);
+ xfs_inodes_free_eofblocks(mp, SYNC_TRYLOCK, NULL);
+ xfs_queue_eofblocks(mp);
+}
+
int
xfs_syncd_init(
struct xfs_mount *mp)
@@ -539,6 +564,7 @@ xfs_syncd_init(
INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
xfs_syncd_queue_sync(mp);
@@ -551,6 +577,7 @@ xfs_syncd_stop(
{
cancel_delayed_work_sync(&mp->m_sync_work);
cancel_delayed_work_sync(&mp->m_reclaim_work);
+ cancel_delayed_work_sync(&mp->m_eofblocks_work);
cancel_work_sync(&mp->m_flush_work);
}
@@ -1101,6 +1128,9 @@ xfs_inode_set_eofblocks_tag(
XFS_ICI_EOFBLOCKS_TAG);
spin_unlock(&ip->i_mount->m_perag_lock);
+ /* kick off background trimming */
+ xfs_queue_eofblocks(ip->i_mount);
+
trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno,
-1, _RET_IP_);
}
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index ee2d2ad..45d74fc 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -202,6 +202,15 @@ static ctl_table xfs_table[] = {
.extra1 = &xfs_params.fstrm_timer.min,
.extra2 = &xfs_params.fstrm_timer.max,
},
+ {
+ .procname = "eofb_centisecs",
+ .data = &xfs_params.eofb_timer.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &xfs_params.eofb_timer.min,
+ .extra2 = &xfs_params.eofb_timer.max,
+ },
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index b9937d4..bd8e157 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -47,6 +47,7 @@ typedef struct xfs_param {
xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
+ xfs_sysctl_val_t eofb_timer; /* Interval between eofb scan wakeups */
} xfs_param_t;
/*
--
1.7.7.6
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes
2012-09-27 17:45 ` [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes Brian Foster
@ 2012-09-28 8:00 ` Dave Chinner
2012-09-28 20:42 ` Brian Foster
0 siblings, 1 reply; 22+ messages in thread
From: Dave Chinner @ 2012-09-28 8:00 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Sep 27, 2012 at 01:45:52PM -0400, Brian Foster wrote:
> Create a delayed_work to enable background scanning and freeing
> of EOFBLOCKS inodes. The scanner kicks in once speculative
> preallocation occurs and stops requeueing itself when no EOFBLOCKS
> inodes exist.
>
> Scans are queued on the existing syncd workqueue and the interval
> is based on the new eofb_timer tunable (default to 5m). The
> background scanner performs unfiltered, best effort scans (which
> skips inodes under lock contention or with a dirty cache mapping).
>
> Signed-off-by: Brian Foster <bfoster@redhat.com>
> ---
> fs/xfs/xfs_globals.c | 1 +
> fs/xfs/xfs_linux.h | 1 +
> fs/xfs/xfs_mount.h | 2 ++
> fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++++++
> fs/xfs/xfs_sysctl.c | 9 +++++++++
> fs/xfs/xfs_sysctl.h | 1 +
> 6 files changed, 44 insertions(+), 0 deletions(-)
>
> diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
> index 76e81cf..fda9a66 100644
> --- a/fs/xfs/xfs_globals.c
> +++ b/fs/xfs/xfs_globals.c
> @@ -40,4 +40,5 @@ xfs_param_t xfs_params = {
> .rotorstep = { 1, 1, 255 },
> .inherit_nodfrg = { 0, 1, 1 },
> .fstrm_timer = { 1, 30*100, 3600*100},
> + .eofb_timer = { 1*100, 300*100, 7200*100},
> };
> diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
> index 828662f..bbad99b 100644
> --- a/fs/xfs/xfs_linux.h
> +++ b/fs/xfs/xfs_linux.h
> @@ -118,6 +118,7 @@
> #define xfs_rotorstep xfs_params.rotorstep.val
> #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
> #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
> +#define xfs_eofb_centisecs xfs_params.eofb_timer.val
Let's not propagate that stupid "centiseconds" unit any further.
Nobody uses it, and it was only introduced because jiffie was 10ms
and there were 100 to a second so it was easy to convert in the
code. I don't think there is any reason for needing sub-second
granularity for this background function, so seconds shoul dbe just
fine for it. If you think we nee dfiner granularity, milliseconds is
the nex tunit to choose....
>
> #define current_cpu() (raw_smp_processor_id())
> #define current_pid() (current->pid)
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index deee09e..bf5ecfa 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -199,6 +199,8 @@ typedef struct xfs_mount {
> struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
> struct delayed_work m_sync_work; /* background sync work */
> struct delayed_work m_reclaim_work; /* background inode reclaim */
> + struct delayed_work m_eofblocks_work; /* background eof blocks
> + trimming */
> struct work_struct m_flush_work; /* background inode flush */
> __int64_t m_update_flags; /* sb flags we need to update
> on the next remount,rw */
> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
> index c9e1c16..31f678a 100644
> --- a/fs/xfs/xfs_sync.c
> +++ b/fs/xfs/xfs_sync.c
> @@ -532,6 +532,31 @@ xfs_flush_worker(
> xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
> }
>
> +/*
> + * Background scanning to trim post-EOF preallocated space. This is queued
> + * based on the 'eofb_centisecs' tunable (5m by default).
> + */
> +STATIC void
> +xfs_queue_eofblocks(
> + struct xfs_mount *mp)
> +{
> + rcu_read_lock();
> + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
> + queue_delayed_work(xfs_syncd_wq, &mp->m_eofblocks_work,
> + msecs_to_jiffies(xfs_eofb_centisecs * 10));
> + rcu_read_unlock();
> +}
This will all need reworking for the new xfs_icache.c and per-mount
workqueue structuring. Fundamentally there is nothing wrong with
what you've done, it's just been reworked...
> + {
> + .procname = "eofb_centisecs",
Ugh. Call it something users might understand. Say
"background_prealloc_discard_period", or something similarly
informative...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v4 8/8] xfs: add background scanning to clear EOFBLOCKS inodes
2012-09-28 8:00 ` Dave Chinner
@ 2012-09-28 20:42 ` Brian Foster
0 siblings, 0 replies; 22+ messages in thread
From: Brian Foster @ 2012-09-28 20:42 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On 09/28/2012 04:00 AM, Dave Chinner wrote:
> On Thu, Sep 27, 2012 at 01:45:52PM -0400, Brian Foster wrote:
>> Create a delayed_work to enable background scanning and freeing
>> of EOFBLOCKS inodes. The scanner kicks in once speculative
>> preallocation occurs and stops requeueing itself when no EOFBLOCKS
>> inodes exist.
>>
>> Scans are queued on the existing syncd workqueue and the interval
>> is based on the new eofb_timer tunable (default to 5m). The
>> background scanner performs unfiltered, best effort scans (which
>> skips inodes under lock contention or with a dirty cache mapping).
>>
>> Signed-off-by: Brian Foster <bfoster@redhat.com>
>> ---
>> fs/xfs/xfs_globals.c | 1 +
>> fs/xfs/xfs_linux.h | 1 +
>> fs/xfs/xfs_mount.h | 2 ++
>> fs/xfs/xfs_sync.c | 30 ++++++++++++++++++++++++++++++
>> fs/xfs/xfs_sysctl.c | 9 +++++++++
>> fs/xfs/xfs_sysctl.h | 1 +
>> 6 files changed, 44 insertions(+), 0 deletions(-)
>>
>> diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
>> index 76e81cf..fda9a66 100644
>> --- a/fs/xfs/xfs_globals.c
>> +++ b/fs/xfs/xfs_globals.c
>> @@ -40,4 +40,5 @@ xfs_param_t xfs_params = {
>> .rotorstep = { 1, 1, 255 },
>> .inherit_nodfrg = { 0, 1, 1 },
>> .fstrm_timer = { 1, 30*100, 3600*100},
>> + .eofb_timer = { 1*100, 300*100, 7200*100},
>> };
>> diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
>> index 828662f..bbad99b 100644
>> --- a/fs/xfs/xfs_linux.h
>> +++ b/fs/xfs/xfs_linux.h
>> @@ -118,6 +118,7 @@
>> #define xfs_rotorstep xfs_params.rotorstep.val
>> #define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
>> #define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
>> +#define xfs_eofb_centisecs xfs_params.eofb_timer.val
>
> Let's not propagate that stupid "centiseconds" unit any further.
> Nobody uses it, and it was only introduced because jiffie was 10ms
> and there were 100 to a second so it was easy to convert in the
> code. I don't think there is any reason for needing sub-second
> granularity for this background function, so seconds shoul dbe just
> fine for it. If you think we nee dfiner granularity, milliseconds is
> the nex tunit to choose....
>
I think seconds is fine. I chose 1s for a minimum, but even that is
pathological and really only useful for focused stress testing.
>>
>> #define current_cpu() (raw_smp_processor_id())
>> #define current_pid() (current->pid)
>> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
>> index deee09e..bf5ecfa 100644
>> --- a/fs/xfs/xfs_mount.h
>> +++ b/fs/xfs/xfs_mount.h
>> @@ -199,6 +199,8 @@ typedef struct xfs_mount {
>> struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
>> struct delayed_work m_sync_work; /* background sync work */
>> struct delayed_work m_reclaim_work; /* background inode reclaim */
>> + struct delayed_work m_eofblocks_work; /* background eof blocks
>> + trimming */
>> struct work_struct m_flush_work; /* background inode flush */
>> __int64_t m_update_flags; /* sb flags we need to update
>> on the next remount,rw */
>> diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
>> index c9e1c16..31f678a 100644
>> --- a/fs/xfs/xfs_sync.c
>> +++ b/fs/xfs/xfs_sync.c
>> @@ -532,6 +532,31 @@ xfs_flush_worker(
>> xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
>> }
>>
>> +/*
>> + * Background scanning to trim post-EOF preallocated space. This is queued
>> + * based on the 'eofb_centisecs' tunable (5m by default).
>> + */
>> +STATIC void
>> +xfs_queue_eofblocks(
>> + struct xfs_mount *mp)
>> +{
>> + rcu_read_lock();
>> + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_EOFBLOCKS_TAG))
>> + queue_delayed_work(xfs_syncd_wq, &mp->m_eofblocks_work,
>> + msecs_to_jiffies(xfs_eofb_centisecs * 10));
>> + rcu_read_unlock();
>> +}
>
> This will all need reworking for the new xfs_icache.c and per-mount
> workqueue structuring. Fundamentally there is nothing wrong with
> what you've done, it's just been reworked...
>
>> + {
>> + .procname = "eofb_centisecs",
>
> Ugh. Call it something users might understand. Say
> "background_prealloc_discard_period", or something similarly
> informative...
>
Ok. Thanks for the review.
Brian
> Cheers,
>
> Dave.
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 22+ messages in thread