* [PATCH 1/4] xfs: debug mode forced buffered write failure
2016-03-04 14:40 [PATCH v3 0/4] fix up indlen reservations on extent split Brian Foster
@ 2016-03-04 14:40 ` Brian Foster
2016-03-05 18:27 ` Christoph Hellwig
2016-03-04 14:40 ` [PATCH 2/4] xfs: update freeblocks counter after extent deletion Brian Foster
` (2 subsequent siblings)
3 siblings, 1 reply; 6+ messages in thread
From: Brian Foster @ 2016-03-04 14:40 UTC (permalink / raw)
To: xfs
Add a DEBUG mode-only sysfs knob to enable forced buffered write
failure. An additional side effect of this mode is brute force killing
of delayed allocation blocks in the range of the write. The latter is
the prime motiviation behind this patch, as userspace test
infrastructure requires a reliable mechanism to create and split
delalloc extents without causing extent conversion.
Certain fallocate operations (i.e., zero range) were used for this in
the past, but the implementations have changed such that delalloc
extents are flushed and converted to real blocks, rendering the test
useless.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_aops.c | 9 ++++++-
fs/xfs/xfs_mount.h | 25 +++++++++++++++++
fs/xfs/xfs_sysfs.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++-------
3 files changed, 101 insertions(+), 11 deletions(-)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7a467b3..e1d80b4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1473,6 +1473,7 @@ xfs_vm_write_failed(
loff_t from = pos & (PAGE_CACHE_SIZE - 1);
loff_t to = from + len;
struct buffer_head *bh, *head;
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
/*
* The request pos offset might be 32 or 64 bit, this is all fine
@@ -1514,7 +1515,8 @@ xfs_vm_write_failed(
if (!buffer_delay(bh) && !buffer_unwritten(bh))
continue;
- if (!buffer_new(bh) && block_offset < i_size_read(inode))
+ if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
+ block_offset < i_size_read(inode))
continue;
if (buffer_delay(bh))
@@ -1554,6 +1556,7 @@ xfs_vm_write_begin(
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
struct page *page;
int status;
+ struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
ASSERT(len <= PAGE_CACHE_SIZE);
@@ -1562,6 +1565,8 @@ xfs_vm_write_begin(
return -ENOMEM;
status = __block_write_begin(page, pos, len, xfs_get_blocks);
+ if (xfs_mp_fail_writes(mp))
+ status = -EIO;
if (unlikely(status)) {
struct inode *inode = mapping->host;
size_t isize = i_size_read(inode);
@@ -1574,6 +1579,8 @@ xfs_vm_write_begin(
* allocated in this write, not blocks that were previously
* written successfully.
*/
+ if (xfs_mp_fail_writes(mp))
+ isize = 0;
if (pos + len > isize) {
ssize_t start = max_t(ssize_t, pos, isize);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1c8611f..bac6b34 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -147,6 +147,17 @@ typedef struct xfs_mount {
* to various other kinds of pain inflicted on the pNFS server.
*/
__uint32_t m_generation;
+
+#ifdef DEBUG
+ /*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are forced to fail. All delalloc blocks in the range
+ * of the write (including pre-existing delalloc blocks!) are tossed as
+ * part of the write failure error handling sequence.
+ */
+ bool m_fail_writes;
+#endif
} xfs_mount_t;
/*
@@ -263,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
+#ifdef DEBUG
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+ return mp->m_fail_writes;
+}
+#else
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+ return 0;
+}
+#endif
+
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 641d625..5f2d28d 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -22,6 +22,9 @@
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_stats.h"
+#include "libxfs/xfs_trans_resv.h"
+#include "libxfs/xfs_format.h"
+#include "xfs_mount.h"
struct xfs_sysfs_attr {
struct attribute attr;
@@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
-/*
- * xfs_mount kobject. This currently has no attributes and thus no need for show
- * and store helpers. The mp kobject serves as the per-mount parent object that
- * is identified by the fsname under sysfs.
- */
-
-struct kobj_type xfs_mp_ktype = {
- .release = xfs_sysfs_release,
-};
-
STATIC ssize_t
xfs_sysfs_object_show(
struct kobject *kobject,
@@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
.store = xfs_sysfs_object_store,
};
+/*
+ * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
+ * that is identified by the fsname under sysfs.
+ */
+
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+
+ return container_of(kobj, struct xfs_mount, m_kobj);
+}
+
+#ifdef DEBUG
+
+STATIC ssize_t
+fail_writes_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val == 1)
+ mp->m_fail_writes = true;
+ else if (val == 0)
+ mp->m_fail_writes = false;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+STATIC ssize_t
+fail_writes_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(fail_writes);
+
+#endif /* DEBUG */
+
+static struct attribute *xfs_mp_attrs[] = {
+#ifdef DEBUG
+ ATTR_LIST(fail_writes),
+#endif
+ NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_mp_attrs,
+};
+
#ifdef DEBUG
/* debug */
--
2.4.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/4] xfs: update freeblocks counter after extent deletion
2016-03-04 14:40 [PATCH v3 0/4] fix up indlen reservations on extent split Brian Foster
2016-03-04 14:40 ` [PATCH 1/4] xfs: debug mode forced buffered write failure Brian Foster
@ 2016-03-04 14:40 ` Brian Foster
2016-03-04 14:40 ` [PATCH 3/4] xfs: refactor delalloc indlen reservation split into helper Brian Foster
2016-03-04 14:40 ` [PATCH 4/4] xfs: borrow indirect blocks from freed extent when available Brian Foster
3 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2016-03-04 14:40 UTC (permalink / raw)
To: xfs
xfs_bunmapi() currently updates the fdblocks counter, unreserves quota,
etc. before the extent is deleted by xfs_bmap_del_extent(). The function
has problems dividing up the indirect reserved blocks for scenarios
where a single delalloc extent is split in two. Particularly, there
aren't always enough blocks reserved for multiple extents in a single
extent reservation.
The solution to this problem is to allow the extent removal code to
steal from the deleted extent to meet indirect reservation requirements.
Move the block of code in xfs_bmapi() that updates the fdblocks counter
to after the call to xfs_bmap_del_extent() to allow the codepath to
update the extent record before the free blocks are accounted. Also,
reshuffle the code slightly so the delalloc accounting occurs near the
xfs_bmap_del_extent() call to provide context for the comments.
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/xfs/libxfs/xfs_bmap.c | 58 ++++++++++++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 24 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index cb58d72..f57a9e9 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5296,31 +5296,7 @@ xfs_bunmapi(
goto nodelete;
}
}
- if (wasdel) {
- ASSERT(startblockval(del.br_startblock) > 0);
- /* Update realtime/data freespace, unreserve quota */
- if (isrt) {
- xfs_filblks_t rtexts;
- rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
- do_div(rtexts, mp->m_sb.sb_rextsize);
- xfs_mod_frextents(mp, (int64_t)rtexts);
- (void)xfs_trans_reserve_quota_nblks(NULL,
- ip, -((long)del.br_blockcount), 0,
- XFS_QMOPT_RES_RTBLKS);
- } else {
- xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
- false);
- (void)xfs_trans_reserve_quota_nblks(NULL,
- ip, -((long)del.br_blockcount), 0,
- XFS_QMOPT_RES_REGBLKS);
- }
- ip->i_delayed_blks -= del.br_blockcount;
- if (cur)
- cur->bc_private.b.flags |=
- XFS_BTCUR_BPRV_WASDEL;
- } else if (cur)
- cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
/*
* If it's the case where the directory code is running
* with no block reservation, and the deleted block is in
@@ -5342,11 +5318,45 @@ xfs_bunmapi(
error = -ENOSPC;
goto error0;
}
+
+ /*
+ * Unreserve quota and update realtime free space, if
+ * appropriate. If delayed allocation, update the inode delalloc
+ * counter now and wait to update the sb counters as
+ * xfs_bmap_del_extent() might need to borrow some blocks.
+ */
+ if (wasdel) {
+ ASSERT(startblockval(del.br_startblock) > 0);
+ if (isrt) {
+ xfs_filblks_t rtexts;
+
+ rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
+ do_div(rtexts, mp->m_sb.sb_rextsize);
+ xfs_mod_frextents(mp, (int64_t)rtexts);
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
+ XFS_QMOPT_RES_RTBLKS);
+ } else {
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
+ XFS_QMOPT_RES_REGBLKS);
+ }
+ ip->i_delayed_blks -= del.br_blockcount;
+ if (cur)
+ cur->bc_private.b.flags |=
+ XFS_BTCUR_BPRV_WASDEL;
+ } else if (cur)
+ cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
+
error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
&tmp_logflags, whichfork);
logflags |= tmp_logflags;
if (error)
goto error0;
+
+ if (!isrt && wasdel)
+ xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+
bno = del.br_startoff - 1;
nodelete:
/*
--
2.4.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/4] xfs: refactor delalloc indlen reservation split into helper
2016-03-04 14:40 [PATCH v3 0/4] fix up indlen reservations on extent split Brian Foster
2016-03-04 14:40 ` [PATCH 1/4] xfs: debug mode forced buffered write failure Brian Foster
2016-03-04 14:40 ` [PATCH 2/4] xfs: update freeblocks counter after extent deletion Brian Foster
@ 2016-03-04 14:40 ` Brian Foster
2016-03-04 14:40 ` [PATCH 4/4] xfs: borrow indirect blocks from freed extent when available Brian Foster
3 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2016-03-04 14:40 UTC (permalink / raw)
To: xfs
The delayed allocation indirect reservation splitting code is not
sufficient in some cases where a delalloc extent is split in two. In
preparation for enhancements to this code, refactor the current indlen
distribution algorithm into a new helper function.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/libxfs/xfs_bmap.c | 77 ++++++++++++++++++++++++++++++++++++------------
1 file changed, 58 insertions(+), 19 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f57a9e9..27e6689 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4721,6 +4721,51 @@ error0:
}
/*
+ * When a delalloc extent is split (e.g., due to a hole punch), the original
+ * indlen reservation must be shared across the two new extents that are left
+ * behind.
+ *
+ * Given the original reservation and the worst case indlen for the two new
+ * extents (as calculated by xfs_bmap_worst_indlen()), split the original
+ * reservation fairly across the two new extents.
+ */
+static void
+xfs_bmap_split_indlen(
+ xfs_filblks_t ores, /* original res. */
+ xfs_filblks_t *indlen1, /* ext1 worst indlen */
+ xfs_filblks_t *indlen2) /* ext2 worst indlen */
+{
+ xfs_filblks_t nres; /* new total res. */
+ xfs_filblks_t temp;
+ xfs_filblks_t temp2;
+
+ temp = *indlen1;
+ temp2 = *indlen2;
+ nres = temp + temp2;
+
+ /*
+ * The only blocks available are those reserved for the original extent.
+ * Therefore, we have to skim blocks off each of the new reservations so
+ * long as the new total reservation is greater than the original.
+ */
+ while (nres > ores) {
+ if (temp) {
+ temp--;
+ nres--;
+ }
+ if (nres == ores)
+ break;
+ if (temp2) {
+ temp2--;
+ nres--;
+ }
+ }
+
+ *indlen1 = temp;
+ *indlen2 = temp2;
+}
+
+/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
*/
@@ -4985,27 +5030,21 @@ xfs_bmap_del_extent(
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
} else {
ASSERT(whichfork == XFS_DATA_FORK);
- temp = xfs_bmap_worst_indlen(ip, temp);
+
+ /*
+ * Distribute the original indlen reservation across the
+ * two new extents.
+ */
+ temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
+ temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+ xfs_bmap_split_indlen(da_old, &temp, &temp2);
+ da_new = temp + temp2;
+
+ /*
+ * Set the reservation for each extent.
+ */
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- temp2 = xfs_bmap_worst_indlen(ip, temp2);
new.br_startblock = nullstartblock((int)temp2);
- da_new = temp + temp2;
- while (da_new > da_old) {
- if (temp) {
- temp--;
- da_new--;
- xfs_bmbt_set_startblock(ep,
- nullstartblock((int)temp));
- }
- if (da_new == da_old)
- break;
- if (temp2) {
- temp2--;
- da_new--;
- new.br_startblock =
- nullstartblock((int)temp2);
- }
- }
}
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx + 1, 1, &new, state);
--
2.4.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 4/4] xfs: borrow indirect blocks from freed extent when available
2016-03-04 14:40 [PATCH v3 0/4] fix up indlen reservations on extent split Brian Foster
` (2 preceding siblings ...)
2016-03-04 14:40 ` [PATCH 3/4] xfs: refactor delalloc indlen reservation split into helper Brian Foster
@ 2016-03-04 14:40 ` Brian Foster
3 siblings, 0 replies; 6+ messages in thread
From: Brian Foster @ 2016-03-04 14:40 UTC (permalink / raw)
To: xfs
xfs_bmap_del_extent() handles extent removal from the in-core and
on-disk extent lists. When removing a delalloc range, it updates the
indirect block reservation appropriately based on the removal. It
currently enforces that the new indirect block reservation is less than
or equal to the original. This is normally the case in all situations
except for in certain cases when the removed range creates a hole in a
single delalloc extent, thus splitting a single delalloc extent in two.
It is possible with small enough extents to split an indlen==1 extent
into two such slightly smaller extents. This leaves one extent with 0
indirect blocks and leads to assert failures in other areas (e.g.,
xfs_bunmapi() if the extent happens to be removed).
Update the indlen distribution code to steal blocks from the deleted
extent, if necessary, to satisfy the worst case total indirect
reservation for the new extents. This is safe as the caller does not
update the fdblocks counters until the extent is removed. Blocks stolen
in this manner simply remain accounted as allocated, having ownership
transferred from the data extent to an indirect reservation.
As a precaution, fall back to the original reservation algorithm if the
new indlen requirement is not met and warn if we end up with extents
without any reservation at all to detect this more easily in the future.
Signed-off-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/libxfs/xfs_bmap.c | 46 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 36 insertions(+), 10 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 27e6689..2c3080f 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4727,26 +4727,43 @@ error0:
*
* Given the original reservation and the worst case indlen for the two new
* extents (as calculated by xfs_bmap_worst_indlen()), split the original
- * reservation fairly across the two new extents.
+ * reservation fairly across the two new extents. If necessary, steal available
+ * blocks from a deleted extent to make up a reservation deficiency (e.g., if
+ * ores == 1). The number of stolen blocks is returned. The availability and
+ * subsequent accounting of stolen blocks is the responsibility of the caller.
*/
-static void
+static xfs_filblks_t
xfs_bmap_split_indlen(
xfs_filblks_t ores, /* original res. */
xfs_filblks_t *indlen1, /* ext1 worst indlen */
- xfs_filblks_t *indlen2) /* ext2 worst indlen */
+ xfs_filblks_t *indlen2, /* ext2 worst indlen */
+ xfs_filblks_t avail) /* stealable blocks */
{
xfs_filblks_t nres; /* new total res. */
xfs_filblks_t temp;
xfs_filblks_t temp2;
+ xfs_filblks_t stolen = 0;
temp = *indlen1;
temp2 = *indlen2;
nres = temp + temp2;
/*
- * The only blocks available are those reserved for the original extent.
- * Therefore, we have to skim blocks off each of the new reservations so
- * long as the new total reservation is greater than the original.
+ * Steal as many blocks as we can to try and satisfy the worst case
+ * indlen for both new extents.
+ */
+ while (nres > ores && avail) {
+ nres--;
+ avail--;
+ stolen++;
+ }
+
+ /*
+ * The only blocks available are those reserved for the original
+ * extent and what we can steal from the extent being removed.
+ * If this still isn't enough to satisfy the combined
+ * requirements for the two new extents, skim blocks off of each
+ * of the new reservations until they match what is available.
*/
while (nres > ores) {
if (temp) {
@@ -4763,6 +4780,8 @@ xfs_bmap_split_indlen(
*indlen1 = temp;
*indlen2 = temp2;
+
+ return stolen;
}
/*
@@ -5029,20 +5048,27 @@ xfs_bmap_del_extent(
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
} else {
+ xfs_filblks_t stolen;
ASSERT(whichfork == XFS_DATA_FORK);
/*
* Distribute the original indlen reservation across the
- * two new extents.
+ * two new extents. Steal blocks from the deleted extent
+ * if necessary. Stealing blocks simply fudges the
+ * fdblocks accounting in xfs_bunmapi().
*/
temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
- xfs_bmap_split_indlen(da_old, &temp, &temp2);
- da_new = temp + temp2;
+ stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
+ del->br_blockcount);
+ da_new = temp + temp2 - stolen;
+ del->br_blockcount -= stolen;
/*
- * Set the reservation for each extent.
+ * Set the reservation for each extent. Warn if either
+ * is zero as this can lead to delalloc problems.
*/
+ WARN_ON_ONCE(!temp || !temp2);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
new.br_startblock = nullstartblock((int)temp2);
}
--
2.4.3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 6+ messages in thread