* [PATCH 1/6] xfs: remove if_real_bytes
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-12 13:49 ` [PATCH 2/6] xfs: simplify xfs_idata_realloc Christoph Hellwig
` (4 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
The field is only used for asserts, and to track if we really need to do
realloc when growing the inode fork data. But the krealloc function
already performs this check internally, so there is no need to keep track
of the real allocation size.
This will free space in the inode fork for keeping a sequence counter of
changes to the extent list.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_inode_fork.c | 19 ++++---------------
fs/xfs/libxfs/xfs_inode_fork.h | 1 -
fs/xfs/xfs_inode.c | 3 +--
fs/xfs/xfs_inode_item.c | 4 ----
4 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 183ec0cb8921..dee85b0f8846 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -158,7 +158,6 @@ xfs_init_local_fork(
}
ifp->if_bytes = size;
- ifp->if_real_bytes = real_size;
ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
ifp->if_flags |= XFS_IFINLINE;
}
@@ -226,7 +225,6 @@ xfs_iformat_extents(
return -EFSCORRUPTED;
}
- ifp->if_real_bytes = 0;
ifp->if_bytes = 0;
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
@@ -317,7 +315,6 @@ xfs_iformat_btree(
ifp->if_flags &= ~XFS_IFEXTENTS;
ifp->if_flags |= XFS_IFBROOT;
- ifp->if_real_bytes = 0;
ifp->if_bytes = 0;
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
@@ -501,7 +498,6 @@ xfs_idata_realloc(
*/
real_size = roundup(new_size, 4);
if (ifp->if_u1.if_data == NULL) {
- ASSERT(ifp->if_real_bytes == 0);
ifp->if_u1.if_data = kmem_alloc(real_size,
KM_SLEEP | KM_NOFS);
} else {
@@ -509,15 +505,12 @@ xfs_idata_realloc(
* Only do the realloc if the underlying size
* is really changing.
*/
- if (ifp->if_real_bytes != real_size) {
- ifp->if_u1.if_data =
- kmem_realloc(ifp->if_u1.if_data,
- real_size,
- KM_SLEEP | KM_NOFS);
- }
+ ifp->if_u1.if_data =
+ kmem_realloc(ifp->if_u1.if_data,
+ real_size,
+ KM_SLEEP | KM_NOFS);
}
}
- ifp->if_real_bytes = real_size;
ifp->if_bytes = new_size;
ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
}
@@ -543,17 +536,13 @@ xfs_idestroy_fork(
*/
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
if (ifp->if_u1.if_data != NULL) {
- ASSERT(ifp->if_real_bytes != 0);
kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = NULL;
- ifp->if_real_bytes = 0;
}
} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
xfs_iext_destroy(ifp);
}
- ASSERT(ifp->if_real_bytes == 0);
-
if (whichfork == XFS_ATTR_FORK) {
kmem_zone_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 781b1603df5e..46242052aad0 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,7 +14,6 @@ struct xfs_dinode;
*/
typedef struct xfs_ifork {
int if_bytes; /* bytes in if_u1 */
- int if_real_bytes; /* bytes allocated in if_u1 */
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7b2694d3901a..d6cd545f5d1d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -927,7 +927,7 @@ xfs_ialloc(
case S_IFLNK:
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_df.if_flags = XFS_IFEXTENTS;
- ip->i_df.if_bytes = ip->i_df.if_real_bytes = 0;
+ ip->i_df.if_bytes = 0;
ip->i_df.if_u1.if_root = NULL;
break;
default:
@@ -1872,7 +1872,6 @@ xfs_inactive(
* to clean up here.
*/
if (VFS_I(ip)->i_mode == 0) {
- ASSERT(ip->i_df.if_real_bytes == 0);
ASSERT(ip->i_df.if_broot_bytes == 0);
return;
}
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 2389c34c172d..fa1c4fe2ffbf 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -194,8 +194,6 @@ xfs_inode_item_format_data_fork(
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_df.if_bytes, 4);
- ASSERT(ip->i_df.if_real_bytes == 0 ||
- ip->i_df.if_real_bytes >= data_bytes);
ASSERT(ip->i_df.if_u1.if_data != NULL);
ASSERT(ip->i_d.di_size > 0);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
@@ -280,8 +278,6 @@ xfs_inode_item_format_attr_fork(
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_afp->if_bytes, 4);
- ASSERT(ip->i_afp->if_real_bytes == 0 ||
- ip->i_afp->if_real_bytes >= data_bytes);
ASSERT(ip->i_afp->if_u1.if_data != NULL);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
ip->i_afp->if_u1.if_data,
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 2/6] xfs: simplify xfs_idata_realloc
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
2018-07-12 13:49 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-12 13:49 ` [PATCH 3/6] xfs: remove the xfs_ifork_t typedef Christoph Hellwig
` (3 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
Streamline the code and take advantage of the fact that kmem_realloc
through krealloc will be have like a normal allocation if passing in a
NULL old pointer.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_inode_fork.c | 55 ++++++++++++----------------------
1 file changed, 19 insertions(+), 36 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index dee85b0f8846..a0e3fb804605 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -468,51 +468,34 @@ xfs_iroot_realloc(
*/
void
xfs_idata_realloc(
- xfs_inode_t *ip,
- int byte_diff,
- int whichfork)
+ struct xfs_inode *ip,
+ int byte_diff,
+ int whichfork)
{
- xfs_ifork_t *ifp;
- int new_size;
- int real_size;
-
- if (byte_diff == 0) {
- return;
- }
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ int new_size = (int)ifp->if_bytes + byte_diff;
- ifp = XFS_IFORK_PTR(ip, whichfork);
- new_size = (int)ifp->if_bytes + byte_diff;
ASSERT(new_size >= 0);
+ ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));
+
+ if (byte_diff == 0)
+ return;
if (new_size == 0) {
kmem_free(ifp->if_u1.if_data);
ifp->if_u1.if_data = NULL;
- real_size = 0;
- } else {
- /*
- * Stuck with malloc/realloc.
- * For inline data, the underlying buffer must be
- * a multiple of 4 bytes in size so that it can be
- * logged and stay on word boundaries. We enforce
- * that here.
- */
- real_size = roundup(new_size, 4);
- if (ifp->if_u1.if_data == NULL) {
- ifp->if_u1.if_data = kmem_alloc(real_size,
- KM_SLEEP | KM_NOFS);
- } else {
- /*
- * Only do the realloc if the underlying size
- * is really changing.
- */
- ifp->if_u1.if_data =
- kmem_realloc(ifp->if_u1.if_data,
- real_size,
- KM_SLEEP | KM_NOFS);
- }
+ ifp->if_bytes = 0;
+ return;
}
+
+ /*
+ * For inline data, the underlying buffer must be a multiple of 4 bytes
+ * in size so that it can be logged and stay on word boundaries.
+ * We enforce that here.
+ */
+ ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data,
+ roundup(new_size, 4), KM_SLEEP | KM_NOFS);
ifp->if_bytes = new_size;
- ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
}
void
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 3/6] xfs: remove the xfs_ifork_t typedef
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
2018-07-12 13:49 ` [PATCH 1/6] xfs: remove if_real_bytes Christoph Hellwig
2018-07-12 13:49 ` [PATCH 2/6] xfs: simplify xfs_idata_realloc Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-12 13:49 ` [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper Christoph Hellwig
` (2 subsequent siblings)
5 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
We only have a few more callers left, so seize the opportunity and kill
it off.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_attr_leaf.c | 6 +++---
fs/xfs/libxfs/xfs_bmap.c | 18 +++++++++---------
fs/xfs/libxfs/xfs_inode_fork.c | 8 ++++----
fs/xfs/libxfs/xfs_inode_fork.h | 4 ++--
fs/xfs/xfs_icache.c | 2 +-
fs/xfs/xfs_inode.h | 6 +++---
fs/xfs/xfs_super.c | 2 +-
7 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 251304f3bc5d..bdf971c1fd77 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -506,7 +506,7 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
{
xfs_attr_sf_hdr_t *hdr;
xfs_inode_t *dp;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
trace_xfs_attr_sf_create(args);
@@ -541,7 +541,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
int i, offset, size;
xfs_mount_t *mp;
xfs_inode_t *dp;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
trace_xfs_attr_sf_add(args);
@@ -682,7 +682,7 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
xfs_attr_shortform_t *sf;
xfs_attr_sf_entry_t *sfe;
int i;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
trace_xfs_attr_sf_lookup(args);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7b93b1e16ad9..fa6a704ac1c0 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -326,7 +326,7 @@ xfs_bmap_check_leaf_extents(
xfs_buf_t *bp; /* buffer for "block" */
int error; /* error return value */
xfs_extnum_t i=0, j; /* index into the extents list */
- xfs_ifork_t *ifp; /* fork structure */
+ struct xfs_ifork *ifp; /* fork structure */
int level; /* btree level, for checking */
xfs_mount_t *mp; /* file system mount structure */
__be64 *pp; /* pointer to block address */
@@ -594,7 +594,7 @@ xfs_bmap_btree_to_extents(
xfs_fsblock_t cbno; /* child block number */
xfs_buf_t *cbp; /* child block's buffer */
int error; /* error return value */
- xfs_ifork_t *ifp; /* inode fork data */
+ struct xfs_ifork *ifp; /* inode fork data */
xfs_mount_t *mp; /* mount point structure */
__be64 *pp; /* ptr to block address */
struct xfs_btree_block *rblock;/* root btree block */
@@ -817,7 +817,7 @@ xfs_bmap_local_to_extents(
{
int error = 0;
int flags; /* logging flags returned */
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
xfs_alloc_arg_t args; /* allocation arguments */
xfs_buf_t *bp; /* buffer for extent block */
struct xfs_bmbt_irec rec;
@@ -1479,7 +1479,7 @@ xfs_bmap_one_block(
xfs_inode_t *ip, /* incore inode */
int whichfork) /* data or attr fork */
{
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
int rval; /* return value */
xfs_bmbt_irec_t s; /* internal version of extent */
struct xfs_iext_cursor icur;
@@ -1517,7 +1517,7 @@ xfs_bmap_add_extent_delay_real(
struct xfs_bmbt_irec *new = &bma->got;
int error; /* error return value */
int i; /* temp state */
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
xfs_fileoff_t new_endoff; /* end offset of new entry */
xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
@@ -2026,7 +2026,7 @@ xfs_bmap_add_extent_unwritten_real(
xfs_btree_cur_t *cur; /* btree cursor */
int error; /* error return value */
int i; /* temp state */
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
xfs_fileoff_t new_endoff; /* end offset of new entry */
xfs_bmbt_irec_t r[3]; /* neighbor extent entries */
/* left is 0, right is 1, prev is 2 */
@@ -2494,7 +2494,7 @@ xfs_bmap_add_extent_hole_delay(
struct xfs_iext_cursor *icur,
xfs_bmbt_irec_t *new) /* new data to add to file extents */
{
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
xfs_bmbt_irec_t left; /* left neighbor extent entry */
xfs_filblks_t newlen=0; /* new indirect size */
xfs_filblks_t oldlen=0; /* old indirect size */
@@ -4855,7 +4855,7 @@ xfs_bmap_del_extent_real(
struct xfs_bmbt_irec got; /* current extent entry */
xfs_fileoff_t got_endoff; /* first offset past got */
int i; /* temp state */
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
xfs_mount_t *mp; /* mount structure */
xfs_filblks_t nblks; /* quota/sb block count */
xfs_bmbt_irec_t new; /* new record to be inserted */
@@ -5103,7 +5103,7 @@ __xfs_bunmapi(
int error; /* error return value */
xfs_extnum_t extno; /* extent number in list */
struct xfs_bmbt_irec got; /* current extent record */
- xfs_ifork_t *ifp; /* inode fork pointer */
+ struct xfs_ifork *ifp; /* inode fork pointer */
int isrt; /* freeing in rt area */
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index a0e3fb804605..f9acf1d436f6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -269,7 +269,7 @@ xfs_iformat_btree(
{
struct xfs_mount *mp = ip->i_mount;
xfs_bmdr_block_t *dfp;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
/* REFERENCED */
int nrecs;
int size;
@@ -347,7 +347,7 @@ xfs_iroot_realloc(
{
struct xfs_mount *mp = ip->i_mount;
int cur_max;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
struct xfs_btree_block *new_broot;
int new_max;
size_t new_size;
@@ -503,7 +503,7 @@ xfs_idestroy_fork(
xfs_inode_t *ip,
int whichfork)
{
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
ifp = XFS_IFORK_PTR(ip, whichfork);
if (ifp->if_broot != NULL) {
@@ -592,7 +592,7 @@ xfs_iflush_fork(
int whichfork)
{
char *cp;
- xfs_ifork_t *ifp;
+ struct xfs_ifork *ifp;
xfs_mount_t *mp;
static const short brootflag[2] =
{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 46242052aad0..1492143371f3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -12,7 +12,7 @@ struct xfs_dinode;
/*
* File incore extent information, present for each of data & attr forks.
*/
-typedef struct xfs_ifork {
+struct xfs_ifork {
int if_bytes; /* bytes in if_u1 */
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
@@ -22,7 +22,7 @@ typedef struct xfs_ifork {
void *if_root; /* extent tree root */
char *if_data; /* inline file data */
} if_u1;
-} xfs_ifork_t;
+};
/*
* Per-fork incore inode flags.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 47f417d20a30..79f344fa8b14 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -66,7 +66,7 @@ xfs_inode_alloc(
ip->i_cowfp = NULL;
ip->i_cnextents = 0;
ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
- memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
+ memset(&ip->i_df, 0, sizeof(ip->i_df));
ip->i_flags = 0;
ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(ip->i_d));
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b1f0e8394f3b..51ae5d79de0d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -34,9 +34,9 @@ typedef struct xfs_inode {
struct xfs_imap i_imap; /* location for xfs_imap() */
/* Extent information. */
- xfs_ifork_t *i_afp; /* attribute fork pointer */
- xfs_ifork_t *i_cowfp; /* copy on write extents */
- xfs_ifork_t i_df; /* data fork */
+ struct xfs_ifork *i_afp; /* attribute fork pointer */
+ struct xfs_ifork *i_cowfp; /* copy on write extents */
+ struct xfs_ifork i_df; /* data fork */
/* operations vectors */
const struct xfs_dir_ops *d_ops; /* directory ops vector */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f9f8dc490d3d..62312b4cef64 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1886,7 +1886,7 @@ xfs_init_zones(void)
if (!xfs_da_state_zone)
goto out_destroy_btree_cur_zone;
- xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
+ xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
if (!xfs_ifork_zone)
goto out_destroy_da_state_zone;
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
` (2 preceding siblings ...)
2018-07-12 13:49 ` [PATCH 3/6] xfs: remove the xfs_ifork_t typedef Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-12 13:49 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
5 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
We have a few places that already check if an inode has actual data in
the COW fork to avoid work on reflink inodes that do not actually have
outstanding COW blocks. There are a few more places that can avoid
working if doing the same check, so add a documented helper for this
condition and use it in all places where it makes sense.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_aops.c | 4 ++--
fs/xfs/xfs_bmap_util.c | 2 +-
fs/xfs/xfs_icache.c | 10 ++++------
fs/xfs/xfs_inode.c | 3 +--
fs/xfs/xfs_inode.h | 9 +++++++++
fs/xfs/xfs_reflink.c | 2 +-
6 files changed, 18 insertions(+), 12 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f4d3252236c1..814100d27343 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -338,7 +338,7 @@ xfs_map_blocks(
imap_valid = offset_fsb >= wpc->imap.br_startoff &&
offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
if (imap_valid &&
- (!xfs_is_reflink_inode(ip) || wpc->io_type == XFS_IO_COW))
+ (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
return 0;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -363,7 +363,7 @@ xfs_map_blocks(
* Check if this is offset is covered by a COW extents, and if yes use
* it directly instead of looking up anything in the data fork.
*/
- if (xfs_is_reflink_inode(ip) &&
+ if (xfs_inode_has_cow_data(ip) &&
xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
imap.br_startoff <= offset_fsb) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index d3a314fd721f..5c918e8bf496 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1277,7 +1277,7 @@ xfs_prepare_shift(
* we've flushed all the dirty data out to disk to avoid having
* CoW extents at the wrong offsets.
*/
- if (xfs_is_reflink_inode(ip)) {
+ if (xfs_inode_has_cow_data(ip)) {
error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
true);
if (error)
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 79f344fa8b14..fdae4c2d461e 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1697,14 +1697,13 @@ xfs_inode_clear_eofblocks_tag(
*/
static bool
xfs_prep_free_cowblocks(
- struct xfs_inode *ip,
- struct xfs_ifork *ifp)
+ struct xfs_inode *ip)
{
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
*/
- if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
+ if (!xfs_inode_has_cow_data(ip)) {
trace_xfs_inode_free_cowblocks_invalid(ip);
xfs_inode_clear_cowblocks_tag(ip);
return false;
@@ -1742,11 +1741,10 @@ xfs_inode_free_cowblocks(
void *args)
{
struct xfs_eofblocks *eofb = args;
- struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
int match;
int ret = 0;
- if (!xfs_prep_free_cowblocks(ip, ifp))
+ if (!xfs_prep_free_cowblocks(ip))
return 0;
if (eofb) {
@@ -1771,7 +1769,7 @@ xfs_inode_free_cowblocks(
* Check again, nobody else should be able to dirty blocks or change
* the reflink iflag now that we have the first two locks held.
*/
- if (xfs_prep_free_cowblocks(ip, ifp))
+ if (xfs_prep_free_cowblocks(ip))
ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d6cd545f5d1d..b5c6464b2307 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1863,7 +1863,6 @@ xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
- struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
int error;
int truncate = 0;
@@ -1884,7 +1883,7 @@ xfs_inactive(
return;
/* Try to clean out the cow blocks if there are any. */
- if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+ if (xfs_inode_has_cow_data(ip))
xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (VFS_I(ip)->i_nlink != 0) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 51ae5d79de0d..9cd02852d193 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -198,6 +198,15 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
}
+/*
+ * Check if an inode has any data in the COW fork. This might be often false
+ * even for inodes with the reflink flag when there is no pending COW operation.
+ */
+static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
+{
+ return ip->i_cowfp && ip->i_cowfp->if_bytes;
+}
+
/*
* In-core inode flags.
*/
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3143889097f1..1c59814d8bb2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -487,7 +487,7 @@ xfs_reflink_cancel_cow_blocks(
struct xfs_defer_ops *odfops = (*tpp)->t_dfops;
int error = 0;
- if (!xfs_is_reflink_inode(ip))
+ if (!xfs_inode_has_cow_data(ip))
return 0;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
return 0;
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
` (3 preceding siblings ...)
2018-07-12 13:49 ` [PATCH 4/6] xfs: introduce a new xfs_inode_has_cow_data helper Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-13 22:50 ` Darrick J. Wong
2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
5 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
Add a simple 32-bit unsigned integer as the sequence count for
modifications to the extent list in the inode fork. This will be
used to optimize away extent list lookups in the writeback code.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_iext_tree.c | 4 ++++
fs/xfs/libxfs/xfs_inode_fork.h | 1 +
2 files changed, 5 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index b80c63faace2..cce7e8024f46 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -650,6 +650,7 @@ xfs_iext_insert(
cur->leaf->recs[i] = cur->leaf->recs[i - 1];
xfs_iext_set(cur_rec(cur), irec);
ifp->if_bytes += sizeof(struct xfs_iext_rec);
+ ifp->if_seq++;
trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
@@ -869,6 +870,7 @@ xfs_iext_remove(
leaf->recs[i] = leaf->recs[i + 1];
xfs_iext_rec_clear(&leaf->recs[nr_entries]);
ifp->if_bytes -= sizeof(struct xfs_iext_rec);
+ ifp->if_seq++;
if (cur->pos == 0 && nr_entries > 0) {
xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
@@ -983,6 +985,8 @@ xfs_iext_update_extent(
trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
xfs_iext_set(cur_rec(cur), new);
trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
+
+ ifp->if_seq++;
}
/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 1492143371f3..f20b2468ca35 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -14,6 +14,7 @@ struct xfs_dinode;
*/
struct xfs_ifork {
int if_bytes; /* bytes in if_u1 */
+ unsigned int if_seq;
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations
2018-07-12 13:49 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
@ 2018-07-13 22:50 ` Darrick J. Wong
0 siblings, 0 replies; 15+ messages in thread
From: Darrick J. Wong @ 2018-07-13 22:50 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Thu, Jul 12, 2018 at 03:49:09PM +0200, Christoph Hellwig wrote:
> Add a simple 32-bit unsigned integer as the sequence count for
> modifications to the extent list in the inode fork. This will be
> used to optimize away extent list lookups in the writeback code.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
--D
> ---
> fs/xfs/libxfs/xfs_iext_tree.c | 4 ++++
> fs/xfs/libxfs/xfs_inode_fork.h | 1 +
> 2 files changed, 5 insertions(+)
>
> diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
> index b80c63faace2..cce7e8024f46 100644
> --- a/fs/xfs/libxfs/xfs_iext_tree.c
> +++ b/fs/xfs/libxfs/xfs_iext_tree.c
> @@ -650,6 +650,7 @@ xfs_iext_insert(
> cur->leaf->recs[i] = cur->leaf->recs[i - 1];
> xfs_iext_set(cur_rec(cur), irec);
> ifp->if_bytes += sizeof(struct xfs_iext_rec);
> + ifp->if_seq++;
>
> trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
>
> @@ -869,6 +870,7 @@ xfs_iext_remove(
> leaf->recs[i] = leaf->recs[i + 1];
> xfs_iext_rec_clear(&leaf->recs[nr_entries]);
> ifp->if_bytes -= sizeof(struct xfs_iext_rec);
> + ifp->if_seq++;
>
> if (cur->pos == 0 && nr_entries > 0) {
> xfs_iext_update_node(ifp, offset, xfs_iext_leaf_key(leaf, 0), 1,
> @@ -983,6 +985,8 @@ xfs_iext_update_extent(
> trace_xfs_bmap_pre_update(ip, cur, state, _RET_IP_);
> xfs_iext_set(cur_rec(cur), new);
> trace_xfs_bmap_post_update(ip, cur, state, _RET_IP_);
> +
> + ifp->if_seq++;
> }
>
> /*
> diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
> index 1492143371f3..f20b2468ca35 100644
> --- a/fs/xfs/libxfs/xfs_inode_fork.h
> +++ b/fs/xfs/libxfs/xfs_inode_fork.h
> @@ -14,6 +14,7 @@ struct xfs_dinode;
> */
> struct xfs_ifork {
> int if_bytes; /* bytes in if_u1 */
> + unsigned int if_seq;
> struct xfs_btree_block *if_broot; /* file's incore btree root */
> short if_broot_bytes; /* bytes allocated for root */
> unsigned char if_flags; /* per-fork flags */
> --
> 2.18.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
2018-07-12 13:49 reduce lookups in the COW extent tree V2 Christoph Hellwig
` (4 preceding siblings ...)
2018-07-12 13:49 ` [PATCH 5/6] xfs: maintain a sequence count for inode fork manipulations Christoph Hellwig
@ 2018-07-12 13:49 ` Christoph Hellwig
2018-07-13 22:51 ` Darrick J. Wong
2018-07-14 0:03 ` Dave Chinner
5 siblings, 2 replies; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-12 13:49 UTC (permalink / raw)
To: linux-xfs
Used the per-fork sequence counter to avoid lookups in the writeback code
unless the COW fork actually changed.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/xfs_aops.c | 30 +++++++++++++++++++++++++-----
fs/xfs/xfs_iomap.c | 5 ++++-
fs/xfs/xfs_iomap.h | 2 +-
3 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 814100d27343..aff9d44fa338 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -29,6 +29,7 @@
struct xfs_writepage_ctx {
struct xfs_bmbt_irec imap;
unsigned int io_type;
+ unsigned int cow_seq;
struct xfs_ioend *ioend;
};
@@ -310,6 +311,7 @@ xfs_map_blocks(
struct xfs_mount *mp = ip->i_mount;
ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
+ xfs_fileoff_t cow_fsb = NULLFILEOFF;
struct xfs_bmbt_irec imap;
int whichfork = XFS_DATA_FORK;
struct xfs_iext_cursor icur;
@@ -333,12 +335,15 @@ xfs_map_blocks(
* COW fork blocks can overlap data fork blocks even if the blocks
* aren't shared. COW I/O always takes precedent, so we must always
* check for overlap on reflink inodes unless the mapping is already a
- * COW one.
+ * COW one, or the COW fork hasn't changed from the last time we looked
+ * at it.
*/
imap_valid = offset_fsb >= wpc->imap.br_startoff &&
offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
if (imap_valid &&
- (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
+ (!xfs_inode_has_cow_data(ip) ||
+ wpc->io_type == XFS_IO_COW ||
+ wpc->cow_seq == ip->i_cowfp->if_seq))
return 0;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -364,8 +369,10 @@ xfs_map_blocks(
* it directly instead of looking up anything in the data fork.
*/
if (xfs_inode_has_cow_data(ip) &&
- xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
- imap.br_startoff <= offset_fsb) {
+ xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
+ cow_fsb = imap.br_startoff;
+ if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
+ wpc->cow_seq = ip->i_cowfp->if_seq;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
/*
* Truncate can race with writeback since writeback doesn't
@@ -411,6 +418,16 @@ xfs_map_blocks(
imap.br_startblock = HOLESTARTBLOCK;
wpc->io_type = XFS_IO_HOLE;
} else {
+ /*
+ * Truncate to the next COW extent if there is one. This is the
+ * only opportunity to do this because we can skip COW fork
+ * lookups for the subsequent blocks in the mapping; however,
+ * the requirement to treat the COW range separately remains.
+ */
+ if (cow_fsb != NULLFILEOFF &&
+ cow_fsb < imap.br_startoff + imap.br_blockcount)
+ imap.br_blockcount = cow_fsb - imap.br_startoff;
+
if (isnullstartblock(imap.br_startblock)) {
/* got a delalloc extent */
wpc->io_type = XFS_IO_DELALLOC;
@@ -427,9 +444,12 @@ xfs_map_blocks(
trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
return 0;
allocate_blocks:
- error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
+ error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
+ &wpc->cow_seq);
if (error)
return error;
+ ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
+ imap.br_startoff + imap.br_blockcount <= cow_fsb);
wpc->imap = imap;
trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
return 0;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 756694219f77..43a7ff7f450c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
xfs_inode_t *ip,
int whichfork,
xfs_off_t offset,
- xfs_bmbt_irec_t *imap)
+ xfs_bmbt_irec_t *imap,
+ unsigned int *cow_seq)
{
xfs_mount_t *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, last_block;
@@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
if (error)
goto error0;
+ if (whichfork == XFS_COW_FORK)
+ *cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 83474c9cede9..c6170548831b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
struct xfs_bmbt_irec *, int);
int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
- struct xfs_bmbt_irec *);
+ struct xfs_bmbt_irec *, unsigned int *);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
--
2.18.0
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
@ 2018-07-13 22:51 ` Darrick J. Wong
2018-07-14 0:03 ` Dave Chinner
1 sibling, 0 replies; 15+ messages in thread
From: Darrick J. Wong @ 2018-07-13 22:51 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Thu, Jul 12, 2018 at 03:49:10PM +0200, Christoph Hellwig wrote:
> Used the per-fork sequence counter to avoid lookups in the writeback code
> unless the COW fork actually changed.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Looks ok,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
--D
> ---
> fs/xfs/xfs_aops.c | 30 +++++++++++++++++++++++++-----
> fs/xfs/xfs_iomap.c | 5 ++++-
> fs/xfs/xfs_iomap.h | 2 +-
> 3 files changed, 30 insertions(+), 7 deletions(-)
>
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index 814100d27343..aff9d44fa338 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -29,6 +29,7 @@
> struct xfs_writepage_ctx {
> struct xfs_bmbt_irec imap;
> unsigned int io_type;
> + unsigned int cow_seq;
> struct xfs_ioend *ioend;
> };
>
> @@ -310,6 +311,7 @@ xfs_map_blocks(
> struct xfs_mount *mp = ip->i_mount;
> ssize_t count = i_blocksize(inode);
> xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset), end_fsb;
> + xfs_fileoff_t cow_fsb = NULLFILEOFF;
> struct xfs_bmbt_irec imap;
> int whichfork = XFS_DATA_FORK;
> struct xfs_iext_cursor icur;
> @@ -333,12 +335,15 @@ xfs_map_blocks(
> * COW fork blocks can overlap data fork blocks even if the blocks
> * aren't shared. COW I/O always takes precedent, so we must always
> * check for overlap on reflink inodes unless the mapping is already a
> - * COW one.
> + * COW one, or the COW fork hasn't changed from the last time we looked
> + * at it.
> */
> imap_valid = offset_fsb >= wpc->imap.br_startoff &&
> offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
> if (imap_valid &&
> - (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> + (!xfs_inode_has_cow_data(ip) ||
> + wpc->io_type == XFS_IO_COW ||
> + wpc->cow_seq == ip->i_cowfp->if_seq))
> return 0;
>
> if (XFS_FORCED_SHUTDOWN(mp))
> @@ -364,8 +369,10 @@ xfs_map_blocks(
> * it directly instead of looking up anything in the data fork.
> */
> if (xfs_inode_has_cow_data(ip) &&
> - xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap) &&
> - imap.br_startoff <= offset_fsb) {
> + xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
> + cow_fsb = imap.br_startoff;
> + if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
> + wpc->cow_seq = ip->i_cowfp->if_seq;
> xfs_iunlock(ip, XFS_ILOCK_SHARED);
> /*
> * Truncate can race with writeback since writeback doesn't
> @@ -411,6 +418,16 @@ xfs_map_blocks(
> imap.br_startblock = HOLESTARTBLOCK;
> wpc->io_type = XFS_IO_HOLE;
> } else {
> + /*
> + * Truncate to the next COW extent if there is one. This is the
> + * only opportunity to do this because we can skip COW fork
> + * lookups for the subsequent blocks in the mapping; however,
> + * the requirement to treat the COW range separately remains.
> + */
> + if (cow_fsb != NULLFILEOFF &&
> + cow_fsb < imap.br_startoff + imap.br_blockcount)
> + imap.br_blockcount = cow_fsb - imap.br_startoff;
> +
> if (isnullstartblock(imap.br_startblock)) {
> /* got a delalloc extent */
> wpc->io_type = XFS_IO_DELALLOC;
> @@ -427,9 +444,12 @@ xfs_map_blocks(
> trace_xfs_map_blocks_found(ip, offset, count, wpc->io_type, &imap);
> return 0;
> allocate_blocks:
> - error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap);
> + error = xfs_iomap_write_allocate(ip, whichfork, offset, &imap,
> + &wpc->cow_seq);
> if (error)
> return error;
> + ASSERT(whichfork == XFS_COW_FORK || cow_fsb == NULLFILEOFF ||
> + imap.br_startoff + imap.br_blockcount <= cow_fsb);
> wpc->imap = imap;
> trace_xfs_map_blocks_alloc(ip, offset, count, wpc->io_type, &imap);
> return 0;
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 756694219f77..43a7ff7f450c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -658,7 +658,8 @@ xfs_iomap_write_allocate(
> xfs_inode_t *ip,
> int whichfork,
> xfs_off_t offset,
> - xfs_bmbt_irec_t *imap)
> + xfs_bmbt_irec_t *imap,
> + unsigned int *cow_seq)
> {
> xfs_mount_t *mp = ip->i_mount;
> xfs_fileoff_t offset_fsb, last_block;
> @@ -780,6 +781,8 @@ xfs_iomap_write_allocate(
> if (error)
> goto error0;
>
> + if (whichfork == XFS_COW_FORK)
> + *cow_seq = XFS_IFORK_PTR(ip, whichfork)->if_seq;
> xfs_iunlock(ip, XFS_ILOCK_EXCL);
> }
>
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 83474c9cede9..c6170548831b 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -14,7 +14,7 @@ struct xfs_bmbt_irec;
> int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
> struct xfs_bmbt_irec *, int);
> int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
> - struct xfs_bmbt_irec *);
> + struct xfs_bmbt_irec *, unsigned int *);
> int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
>
> void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
> --
> 2.18.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 15+ messages in thread* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
2018-07-12 13:49 ` [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change Christoph Hellwig
2018-07-13 22:51 ` Darrick J. Wong
@ 2018-07-14 0:03 ` Dave Chinner
2018-07-17 13:37 ` Christoph Hellwig
1 sibling, 1 reply; 15+ messages in thread
From: Dave Chinner @ 2018-07-14 0:03 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Thu, Jul 12, 2018 at 03:49:10PM +0200, Christoph Hellwig wrote:
> @@ -333,12 +335,15 @@ xfs_map_blocks(
> * COW fork blocks can overlap data fork blocks even if the blocks
> * aren't shared. COW I/O always takes precedent, so we must always
> * check for overlap on reflink inodes unless the mapping is already a
> - * COW one.
> + * COW one, or the COW fork hasn't changed from the last time we looked
> + * at it.
> */
> imap_valid = offset_fsb >= wpc->imap.br_startoff &&
> offset_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount;
> if (imap_valid &&
> - (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> + (!xfs_inode_has_cow_data(ip) ||
> + wpc->io_type == XFS_IO_COW ||
> + wpc->cow_seq == ip->i_cowfp->if_seq))
> return 0;
Isn't this racy? It's not an atomic variable, we hold no locks,
there are no memory barriers, etc. Hence we can miss changes made by
concurrent mapping changes...
Which makes me ask - is the sequence number bumped before or after
we modify the extent map? It seems to me that it needs to be done
before we make a modification so that we invalidate the current map
before we change the extent map to avoid issuing IO to an extent map
we are know we changing, right?
/me had prototype seqno patches like this from way back and kept
hitting races because of these "imap_valid checks are done without
locks" issues...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
2018-07-14 0:03 ` Dave Chinner
@ 2018-07-17 13:37 ` Christoph Hellwig
2018-07-17 23:13 ` Dave Chinner
0 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2018-07-17 13:37 UTC (permalink / raw)
To: Dave Chinner; +Cc: Christoph Hellwig, linux-xfs
On Sat, Jul 14, 2018 at 10:03:01AM +1000, Dave Chinner wrote:
> > if (imap_valid &&
> > - (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > + (!xfs_inode_has_cow_data(ip) ||
> > + wpc->io_type == XFS_IO_COW ||
> > + wpc->cow_seq == ip->i_cowfp->if_seq))
> > return 0;
>
> Isn't this racy? It's not an atomic variable, we hold no locks,
> there are no memory barriers, etc. Hence we can miss changes made by
> concurrent mapping changes...
>
> Which makes me ask - is the sequence number bumped before or after
> we modify the extent map? It seems to me that it needs to be done
> before we make a modification so that we invalidate the current map
> before we change the extent map to avoid issuing IO to an extent map
> we are know we changing, right?
Right now they are bumped later, but they really should be first.
I've got a series that bumps them first now.
That being said at least for the COW fork I don't think we care
about the races too much. All the actual updates to the COW fork
are under the page lock, so for a given page we are synchronized
already. And for the bigger picture we either convert a COW
fork to a real fork, in which case the change doesn't matter, or
we drop it, in which case we already have higher level synchronization.
For the data fork things might be more nasty, and that could explain
why my trivial extension to that just didn't work at all..
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 6/6] xfs: avoid COW fork extent lookups in writeback if the fork didn't change
2018-07-17 13:37 ` Christoph Hellwig
@ 2018-07-17 23:13 ` Dave Chinner
0 siblings, 0 replies; 15+ messages in thread
From: Dave Chinner @ 2018-07-17 23:13 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-xfs
On Tue, Jul 17, 2018 at 03:37:10PM +0200, Christoph Hellwig wrote:
> On Sat, Jul 14, 2018 at 10:03:01AM +1000, Dave Chinner wrote:
> > > if (imap_valid &&
> > > - (!xfs_inode_has_cow_data(ip) || wpc->io_type == XFS_IO_COW))
> > > + (!xfs_inode_has_cow_data(ip) ||
> > > + wpc->io_type == XFS_IO_COW ||
> > > + wpc->cow_seq == ip->i_cowfp->if_seq))
> > > return 0;
> >
> > Isn't this racy? It's not an atomic variable, we hold no locks,
> > there are no memory barriers, etc. Hence we can miss changes made by
> > concurrent mapping changes...
> >
> > Which makes me ask - is the sequence number bumped before or after
> > we modify the extent map? It seems to me that it needs to be done
> > before we make a modification so that we invalidate the current map
> > before we change the extent map to avoid issuing IO to an extent map
> > we are know we changing, right?
>
> Right now they are bumped later, but they really should be first.
>
> I've got a series that bumps them first now.
Cool.
> That being said at least for the COW fork I don't think we care
> about the races too much. All the actual updates to the COW fork
> are under the page lock, so for a given page we are synchronized
> already. And for the bigger picture we either convert a COW
> fork to a real fork, in which case the change doesn't matter, or
> we drop it, in which case we already have higher level synchronization.
Ok.
>
> For the data fork things might be more nasty, and that could explain
> why my trivial extension to that just didn't work at all..
Yeah, my patch was catching data fork modifications as well.
IIRC I ended up putting the seqno bump and checks under the
ip->i_flags_lock as a quick method of serialising updates, and that
made the update vs check races go away. It didn't make all the
problems go away - just the ones that were easy to reproduce - but
those remaining bugs may have been in other parts of the patchset
that I never got to the bottom of...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 15+ messages in thread