* [PATCH 1/8] vfs: add common GETFSMAP ioctl definitions
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-18 1:17 ` [PATCH 2/8] xfs: plumb in needed functions for range querying of the freespace btrees Darrick J. Wong
` (7 subsequent siblings)
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Add the GETFSMAP headers to the VFS kernel headers
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
include/uapi/linux/fsmap.h | 110 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 110 insertions(+)
create mode 100644 include/uapi/linux/fsmap.h
diff --git a/include/uapi/linux/fsmap.h b/include/uapi/linux/fsmap.h
new file mode 100644
index 0000000..1e94856
--- /dev/null
+++ b/include/uapi/linux/fsmap.h
@@ -0,0 +1,110 @@
+/*
+ * FS_IOC_GETFSMAP ioctl infrastructure.
+ *
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef _LINUX_FSMAP_H
+#define _LINUX_FSMAP_H
+
+#include <linux/types.h>
+
+/*
+ * Structure for FS_IOC_GETFSMAP.
+ *
+ * The memory layout for this call are the scalar values defined in
+ * struct fsmap_head, followed by two struct fsmap that describe
+ * the lower and upper bound of mappings to return, followed by an
+ * array of struct fsmap mappings.
+ *
+ * fmh_iflags control the output of the call, whereas fmh_oflags report
+ * on the overall record output. fmh_count should be set to the
+ * length of the fmh_recs array, and fmh_entries will be set to the
+ * number of entries filled out during each call. If fmh_count is
+ * zero, the number of reverse mappings will be returned in
+ * fmh_entries, though no mappings will be returned. fmh_reserved
+ * must be set to zero.
+ *
+ * The two elements in the fmh_keys array are used to constrain the
+ * output. The first element in the array should represent the
+ * lowest disk mapping ("low key") that the user wants to learn
+ * about. If this value is all zeroes, the filesystem will return
+ * the first entry it knows about. For a subsequent call, the
+ * contents of fsmap_head.fmh_recs[fsmap_head.fmh_count - 1] should be
+ * copied into fmh_keys[0] to have the kernel start where it left off.
+ *
+ * The second element in the fmh_keys array should represent the
+ * highest disk mapping ("high key") that the user wants to learn
+ * about. If this value is all ones, the filesystem will not stop
+ * until it runs out of mapping to return or runs out of space in
+ * fmh_recs.
+ *
+ * fmr_device can be either a 32-bit cookie representing a device, or
+ * a 32-bit dev_t if the FMH_OF_DEV_T flag is set. fmr_physical,
+ * fmr_offset, and fmr_length are expressed in units of bytes.
+ * fmr_owner is either an inode number, or a special value if
+ * FMR_OF_SPECIAL_OWNER is set in fmr_flags.
+ */
+struct fsmap {
+ __u32 fmr_device; /* device id */
+ __u32 fmr_flags; /* mapping flags */
+ __u64 fmr_physical; /* device offset of segment */
+ __u64 fmr_owner; /* owner id */
+ __u64 fmr_offset; /* file offset of segment */
+ __u64 fmr_length; /* length of segment */
+ __u64 fmr_reserved[3]; /* must be zero */
+};
+
+struct fsmap_head {
+ __u32 fmh_iflags; /* control flags */
+ __u32 fmh_oflags; /* output flags */
+ __u32 fmh_count; /* # of entries in array incl. input */
+ __u32 fmh_entries; /* # of entries filled in (output). */
+ __u64 fmh_reserved[6]; /* must be zero */
+
+ struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */
+ struct fsmap fmh_recs[]; /* returned records */
+};
+
+/* Size of an fsmap_head with room for nr records. */
+static inline size_t
+fsmap_sizeof(
+ unsigned int nr)
+{
+ return sizeof(struct fsmap_head) + nr * sizeof(struct fsmap);
+}
+
+/* Start the next fsmap query at the end of the current query results. */
+static inline void
+fsmap_advance(
+ struct fsmap_head *head)
+{
+ head->fmh_keys[0] = head->fmh_recs[head->fmh_entries - 1];
+}
+
+/* fmh_iflags values - set by FS_IOC_GETFSMAP caller in the header. */
+/* no flags defined yet */
+#define FMH_IF_VALID 0
+
+/* fmh_oflags values - returned in the header segment only. */
+#define FMH_OF_DEV_T 0x1 /* fmr_device values will be dev_t */
+
+/* fmr_flags values - returned for each non-header segment */
+#define FMR_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */
+#define FMR_OF_ATTR_FORK 0x2 /* segment = attribute fork */
+#define FMR_OF_EXTENT_MAP 0x4 /* segment = extent map */
+#define FMR_OF_SHARED 0x8 /* segment = shared with another file */
+#define FMR_OF_SPECIAL_OWNER 0x10 /* owner is a special value */
+#define FMR_OF_LAST 0x20 /* segment is the last in the FS */
+
+/* Each FS gets to define its own special owner codes. */
+#define FMR_OWNER(type, code) (((__u64)type << 32) | \
+ ((__u64)code & 0xFFFFFFFFULL))
+#define FMR_OWN_FREE FMR_OWNER(0, 1) /* free space */
+#define FMR_OWN_UNKNOWN FMR_OWNER(0, 2) /* unknown owner */
+#define FMR_OWN_METADATA FMR_OWNER(0, 3) /* metadata */
+
+#define FS_IOC_GETFSMAP _IOWR('X', 59, struct fsmap_head)
+
+#endif /* _LINUX_FSMAP_H */
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 2/8] xfs: plumb in needed functions for range querying of the freespace btrees
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
2017-02-18 1:17 ` [PATCH 1/8] vfs: add common GETFSMAP ioctl definitions Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-21 17:34 ` [PATCH v2 " Darrick J. Wong
2017-02-18 1:17 ` [PATCH 3/8] xfs: provide a query_range function for " Darrick J. Wong
` (6 subsequent siblings)
8 siblings, 1 reply; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Plumb in the pieces (init_high_key, diff_two_keys) necessary to call
query_range on the free space btrees. Remove the debugging asserts
so that we can make queries starting from block 0.
While we're at it, merge the redundant "if (btnum ==" hunks.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc_btree.c | 162 +++++++++++++++++++++++++++++----------
1 file changed, 119 insertions(+), 43 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index efb467b..ba3ec9c 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -205,19 +205,28 @@ xfs_allocbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- ASSERT(rec->alloc.ar_startblock != 0);
-
key->alloc.ar_startblock = rec->alloc.ar_startblock;
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
STATIC void
+xfs_bnobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->alloc.ar_startblock);
+ x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
+ key->alloc.ar_startblock = cpu_to_be32(x);
+ key->alloc.ar_blockcount = 0;
+}
+
+STATIC void
xfs_allocbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- ASSERT(cur->bc_rec.a.ar_startblock != 0);
-
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
@@ -236,18 +245,24 @@ xfs_allocbt_init_ptr_from_cur(
}
STATIC __int64_t
-xfs_allocbt_key_diff(
+xfs_bnobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- __int64_t diff;
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return (__int64_t)be32_to_cpu(kp->ar_startblock) -
- rec->ar_startblock;
- }
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+STATIC __int64_t
+xfs_cntbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
if (diff)
@@ -256,6 +271,33 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
+STATIC __int64_t
+xfs_bnobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
+STATIC __int64_t
+xfs_cntbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ __int64_t diff;
+
+ diff = be32_to_cpu(k1->alloc.ar_blockcount) -
+ be32_to_cpu(k2->alloc.ar_blockcount);
+ if (diff)
+ return diff;
+
+ return be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
@@ -346,44 +388,78 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
-xfs_allocbt_keys_inorder(
+xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(k1->alloc.ar_blockcount) <
- be32_to_cpu(k2->alloc.ar_blockcount) ||
- (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
- be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock));
- }
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
}
STATIC int
-xfs_allocbt_recs_inorder(
+xfs_bnobt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(r1->alloc.ar_startblock) +
- be32_to_cpu(r1->alloc.ar_blockcount) <=
- be32_to_cpu(r2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(r1->alloc.ar_blockcount) <
- be32_to_cpu(r2->alloc.ar_blockcount) ||
- (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
- be32_to_cpu(r1->alloc.ar_startblock) <
- be32_to_cpu(r2->alloc.ar_startblock));
- }
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+}
+
+STATIC int
+xfs_cntbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
+}
+
+STATIC int
+xfs_cntbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
}
-#endif /* DEBUG */
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_bnobt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_bnobt_key_diff,
+ .buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_bnobt_diff_two_keys,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_bnobt_keys_inorder,
+ .recs_inorder = xfs_bnobt_recs_inorder,
+#endif
+};
-static const struct xfs_btree_ops xfs_allocbt_ops = {
+static const struct xfs_btree_ops xfs_cntbt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -397,11 +473,12 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_allocbt_key_diff,
+ .key_diff = xfs_cntbt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_cntbt_diff_two_keys,
#if defined(DEBUG) || defined(XFS_WARN)
- .keys_inorder = xfs_allocbt_keys_inorder,
- .recs_inorder = xfs_allocbt_recs_inorder,
+ .keys_inorder = xfs_cntbt_keys_inorder,
+ .recs_inorder = xfs_cntbt_recs_inorder,
#endif
};
@@ -427,16 +504,15 @@ xfs_allocbt_init_cursor(
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
- cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_BNO)
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
- else
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
+ cur->bc_ops = &xfs_cntbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
} else {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+ cur->bc_ops = &xfs_bnobt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH v2 2/8] xfs: plumb in needed functions for range querying of the freespace btrees
2017-02-18 1:17 ` [PATCH 2/8] xfs: plumb in needed functions for range querying of the freespace btrees Darrick J. Wong
@ 2017-02-21 17:34 ` Darrick J. Wong
0 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-21 17:34 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-ext4; +Cc: Brian Foster
Plumb in the pieces (init_high_key, diff_two_keys) necessary to call
query_range on the free space btrees. Remove the debugging asserts
so that we can make queries starting from block 0.
While we're at it, merge the redundant "if (btnum ==" hunks.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
v2: Add forgotten cntbt high key function.
---
fs/xfs/libxfs/xfs_alloc_btree.c | 172 +++++++++++++++++++++++++++++----------
1 file changed, 129 insertions(+), 43 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index efb467b..e1fcfe7 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -205,19 +205,37 @@ xfs_allocbt_init_key_from_rec(
union xfs_btree_key *key,
union xfs_btree_rec *rec)
{
- ASSERT(rec->alloc.ar_startblock != 0);
-
key->alloc.ar_startblock = rec->alloc.ar_startblock;
key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
}
STATIC void
+xfs_bnobt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ __u32 x;
+
+ x = be32_to_cpu(rec->alloc.ar_startblock);
+ x += be32_to_cpu(rec->alloc.ar_blockcount) - 1;
+ key->alloc.ar_startblock = cpu_to_be32(x);
+ key->alloc.ar_blockcount = 0;
+}
+
+STATIC void
+xfs_cntbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ union xfs_btree_rec *rec)
+{
+ key->alloc.ar_blockcount = rec->alloc.ar_blockcount;
+ key->alloc.ar_startblock = 0;
+}
+
+STATIC void
xfs_allocbt_init_rec_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_rec *rec)
{
- ASSERT(cur->bc_rec.a.ar_startblock != 0);
-
rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
}
@@ -236,18 +254,24 @@ xfs_allocbt_init_ptr_from_cur(
}
STATIC __int64_t
-xfs_allocbt_key_diff(
+xfs_bnobt_key_diff(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
xfs_alloc_key_t *kp = &key->alloc;
- __int64_t diff;
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return (__int64_t)be32_to_cpu(kp->ar_startblock) -
- rec->ar_startblock;
- }
+ return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
+}
+
+STATIC __int64_t
+xfs_cntbt_key_diff(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *key)
+{
+ xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a;
+ xfs_alloc_key_t *kp = &key->alloc;
+ __int64_t diff;
diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount;
if (diff)
@@ -256,6 +280,33 @@ xfs_allocbt_key_diff(
return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
}
+STATIC __int64_t
+xfs_bnobt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return (__int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
+STATIC __int64_t
+xfs_cntbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ __int64_t diff;
+
+ diff = be32_to_cpu(k1->alloc.ar_blockcount) -
+ be32_to_cpu(k2->alloc.ar_blockcount);
+ if (diff)
+ return diff;
+
+ return be32_to_cpu(k1->alloc.ar_startblock) -
+ be32_to_cpu(k2->alloc.ar_startblock);
+}
+
static bool
xfs_allocbt_verify(
struct xfs_buf *bp)
@@ -346,44 +397,54 @@ const struct xfs_buf_ops xfs_allocbt_buf_ops = {
#if defined(DEBUG) || defined(XFS_WARN)
STATIC int
-xfs_allocbt_keys_inorder(
+xfs_bnobt_keys_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(k1->alloc.ar_blockcount) <
- be32_to_cpu(k2->alloc.ar_blockcount) ||
- (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
- be32_to_cpu(k1->alloc.ar_startblock) <
- be32_to_cpu(k2->alloc.ar_startblock));
- }
+ return be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock);
}
STATIC int
-xfs_allocbt_recs_inorder(
+xfs_bnobt_recs_inorder(
struct xfs_btree_cur *cur,
union xfs_btree_rec *r1,
union xfs_btree_rec *r2)
{
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- return be32_to_cpu(r1->alloc.ar_startblock) +
- be32_to_cpu(r1->alloc.ar_blockcount) <=
- be32_to_cpu(r2->alloc.ar_startblock);
- } else {
- return be32_to_cpu(r1->alloc.ar_blockcount) <
- be32_to_cpu(r2->alloc.ar_blockcount) ||
- (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
- be32_to_cpu(r1->alloc.ar_startblock) <
- be32_to_cpu(r2->alloc.ar_startblock));
- }
+ return be32_to_cpu(r1->alloc.ar_startblock) +
+ be32_to_cpu(r1->alloc.ar_blockcount) <=
+ be32_to_cpu(r2->alloc.ar_startblock);
+}
+
+STATIC int
+xfs_cntbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_key *k1,
+ union xfs_btree_key *k2)
+{
+ return be32_to_cpu(k1->alloc.ar_blockcount) <
+ be32_to_cpu(k2->alloc.ar_blockcount) ||
+ (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount &&
+ be32_to_cpu(k1->alloc.ar_startblock) <
+ be32_to_cpu(k2->alloc.ar_startblock));
}
-#endif /* DEBUG */
-static const struct xfs_btree_ops xfs_allocbt_ops = {
+STATIC int
+xfs_cntbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *r1,
+ union xfs_btree_rec *r2)
+{
+ return be32_to_cpu(r1->alloc.ar_blockcount) <
+ be32_to_cpu(r2->alloc.ar_blockcount) ||
+ (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount &&
+ be32_to_cpu(r1->alloc.ar_startblock) <
+ be32_to_cpu(r2->alloc.ar_startblock));
+}
+#endif /* DEBUG */
+
+static const struct xfs_btree_ops xfs_bnobt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -395,13 +456,39 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.get_minrecs = xfs_allocbt_get_minrecs,
.get_maxrecs = xfs_allocbt_get_maxrecs,
.init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_bnobt_init_high_key_from_rec,
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
- .key_diff = xfs_allocbt_key_diff,
+ .key_diff = xfs_bnobt_key_diff,
.buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_bnobt_diff_two_keys,
#if defined(DEBUG) || defined(XFS_WARN)
- .keys_inorder = xfs_allocbt_keys_inorder,
- .recs_inorder = xfs_allocbt_recs_inorder,
+ .keys_inorder = xfs_bnobt_keys_inorder,
+ .recs_inorder = xfs_bnobt_recs_inorder,
+#endif
+};
+
+static const struct xfs_btree_ops xfs_cntbt_ops = {
+ .rec_len = sizeof(xfs_alloc_rec_t),
+ .key_len = sizeof(xfs_alloc_key_t),
+
+ .dup_cursor = xfs_allocbt_dup_cursor,
+ .set_root = xfs_allocbt_set_root,
+ .alloc_block = xfs_allocbt_alloc_block,
+ .free_block = xfs_allocbt_free_block,
+ .update_lastrec = xfs_allocbt_update_lastrec,
+ .get_minrecs = xfs_allocbt_get_minrecs,
+ .get_maxrecs = xfs_allocbt_get_maxrecs,
+ .init_key_from_rec = xfs_allocbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_cntbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
+ .key_diff = xfs_cntbt_key_diff,
+ .buf_ops = &xfs_allocbt_buf_ops,
+ .diff_two_keys = xfs_cntbt_diff_two_keys,
+#if defined(DEBUG) || defined(XFS_WARN)
+ .keys_inorder = xfs_cntbt_keys_inorder,
+ .recs_inorder = xfs_cntbt_recs_inorder,
#endif
};
@@ -427,16 +514,15 @@ xfs_allocbt_init_cursor(
cur->bc_mp = mp;
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
- cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_BNO)
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
- else
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
+ cur->bc_ops = &xfs_cntbt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
} else {
+ cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
+ cur->bc_ops = &xfs_bnobt_ops;
cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
}
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 3/8] xfs: provide a query_range function for freespace btrees
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
2017-02-18 1:17 ` [PATCH 1/8] vfs: add common GETFSMAP ioctl definitions Darrick J. Wong
2017-02-18 1:17 ` [PATCH 2/8] xfs: plumb in needed functions for range querying of the freespace btrees Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-18 1:17 ` [PATCH 4/8] xfs: create a function to query all records in a btree Darrick J. Wong
` (5 subsequent siblings)
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Implement a query_range function for the bnobt and cntbt. This will
be used for getfsmap fallback if there is no rmapbt and by the online
scrub and repair code.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
fs/xfs/libxfs/xfs_alloc.h | 10 ++++++++++
2 files changed, 52 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9f06a21..526df17 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2875,3 +2875,45 @@ xfs_free_extent(
xfs_trans_brelse(tp, agbp);
return error;
}
+
+struct xfs_alloc_query_range_info {
+ xfs_alloc_query_range_fn fn;
+ void *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_alloc_query_range_helper(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info *query = priv;
+ struct xfs_alloc_rec_incore irec;
+
+ irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
+ irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
+ return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all free space within a given range of blocks. */
+int
+xfs_alloc_query_range(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ low_brec.a = *low_rec;
+ high_brec.a = *high_rec;
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_range(cur, &low_brec, &high_brec,
+ xfs_alloc_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 1d0f48a..f534998 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -223,4 +223,14 @@ int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp);
+typedef int (*xfs_alloc_query_range_fn)(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv);
+
+int xfs_alloc_query_range(struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *low_rec,
+ struct xfs_alloc_rec_incore *high_rec,
+ xfs_alloc_query_range_fn fn, void *priv);
+
#endif /* __XFS_ALLOC_H__ */
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 4/8] xfs: create a function to query all records in a btree
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (2 preceding siblings ...)
2017-02-18 1:17 ` [PATCH 3/8] xfs: provide a query_range function for " Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-18 1:17 ` [PATCH 5/8] xfs: introduce the XFS_IOC_GETFSMAP ioctl Darrick J. Wong
` (4 subsequent siblings)
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Create a helper function that will query all records in a btree.
This will be used by the online repair functions to examine every
record in a btree to rebuild a second btree.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/libxfs/xfs_alloc.c | 15 +++++++++++++++
fs/xfs/libxfs/xfs_alloc.h | 2 ++
fs/xfs/libxfs/xfs_btree.c | 15 +++++++++++++++
fs/xfs/libxfs/xfs_btree.h | 2 ++
fs/xfs/libxfs/xfs_rmap.c | 28 +++++++++++++++++++++-------
fs/xfs/libxfs/xfs_rmap.h | 2 ++
6 files changed, 57 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 526df17..ba15f30 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2917,3 +2917,18 @@ xfs_alloc_query_range(
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_alloc_query_range_helper, &query);
}
+
+/* Find all free space records. */
+int
+xfs_alloc_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_alloc_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_alloc_query_range_info query;
+
+ ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index f534998..a2101de 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -232,5 +232,7 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
struct xfs_alloc_rec_incore *low_rec,
struct xfs_alloc_rec_incore *high_rec,
xfs_alloc_query_range_fn fn, void *priv);
+int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
+ void *priv);
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2849d3f..ca94b87 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4823,6 +4823,21 @@ xfs_btree_query_range(
fn, priv);
}
+/* Query a btree for all records. */
+int
+xfs_btree_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_btree_query_range_fn fn,
+ void *priv)
+{
+ union xfs_btree_irec low_rec;
+ union xfs_btree_irec high_rec;
+
+ memset(&low_rec, 0, sizeof(low_rec));
+ memset(&high_rec, 0xFF, sizeof(high_rec));
+ return xfs_btree_query_range(cur, &low_rec, &high_rec, fn, priv);
+}
+
/*
* Calculate the number of blocks needed to store a given number of records
* in a short-format (per-AG metadata) btree.
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 33a8f86..5114055 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -494,6 +494,8 @@ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
int xfs_btree_query_range(struct xfs_btree_cur *cur,
union xfs_btree_irec *low_rec, union xfs_btree_irec *high_rec,
xfs_btree_query_range_fn fn, void *priv);
+int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
+ void *priv);
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
void *data);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 3a8cc71..3840556 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2001,14 +2001,14 @@ xfs_rmap_query_range_helper(
/* Find all rmaps between two keys. */
int
xfs_rmap_query_range(
- struct xfs_btree_cur *cur,
- struct xfs_rmap_irec *low_rec,
- struct xfs_rmap_irec *high_rec,
- xfs_rmap_query_range_fn fn,
- void *priv)
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *low_rec,
+ struct xfs_rmap_irec *high_rec,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
{
- union xfs_btree_irec low_brec;
- union xfs_btree_irec high_brec;
+ union xfs_btree_irec low_brec;
+ union xfs_btree_irec high_brec;
struct xfs_rmap_query_range_info query;
low_brec.r = *low_rec;
@@ -2019,6 +2019,20 @@ xfs_rmap_query_range(
xfs_rmap_query_range_helper, &query);
}
+/* Find all rmaps. */
+int
+xfs_rmap_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_rmap_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_rmap_query_range_info query;
+
+ query.priv = priv;
+ query.fn = fn;
+ return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
+}
+
/* Clean up after calling xfs_rmap_finish_one. */
void
xfs_rmap_finish_one_cleanup(
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 7899305..faf2c1a 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -162,6 +162,8 @@ typedef int (*xfs_rmap_query_range_fn)(
int xfs_rmap_query_range(struct xfs_btree_cur *cur,
struct xfs_rmap_irec *low_rec, struct xfs_rmap_irec *high_rec,
xfs_rmap_query_range_fn fn, void *priv);
+int xfs_rmap_query_all(struct xfs_btree_cur *cur, xfs_rmap_query_range_fn fn,
+ void *priv);
enum xfs_rmap_intent_type {
XFS_RMAP_MAP,
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 5/8] xfs: introduce the XFS_IOC_GETFSMAP ioctl
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (3 preceding siblings ...)
2017-02-18 1:17 ` [PATCH 4/8] xfs: create a function to query all records in a btree Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-18 1:17 ` [PATCH 6/8] xfs: have getfsmap fall back to the freesp btrees when rmap is not present Darrick J. Wong
` (3 subsequent siblings)
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Introduce a new ioctl that uses the reverse mapping btree to return
information about the physical layout of the filesystem.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/Makefile | 1
fs/xfs/libxfs/xfs_fs.h | 13 +
fs/xfs/xfs_fsmap.c | 782 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsmap.h | 51 +++
fs/xfs/xfs_ioctl.c | 103 ++++++
fs/xfs/xfs_ioctl32.c | 2
fs/xfs/xfs_trace.h | 85 +++++
fs/xfs/xfs_trans.c | 22 +
fs/xfs/xfs_trans.h | 2
9 files changed, 1061 insertions(+)
create mode 100644 fs/xfs/xfs_fsmap.c
create mode 100644 fs/xfs/xfs_fsmap.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index c7515d4..0e7ee30 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -80,6 +80,7 @@ xfs-y += xfs_aops.o \
xfs_extent_busy.o \
xfs_file.o \
xfs_filestream.o \
+ xfs_fsmap.o \
xfs_fsops.o \
xfs_globals.o \
xfs_icache.o \
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index b72dc82..095bdf0 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -92,6 +92,18 @@ struct getbmapx {
#define BMV_OF_LAST 0x4 /* segment is the last in the file */
#define BMV_OF_SHARED 0x8 /* segment shared with another file */
+/* fmr_owner special values for FS_IOC_GETFSMAP */
+#define XFS_FMR_OWN_FREE FMR_OWN_FREE /* free space */
+#define XFS_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */
+#define XFS_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */
+#define XFS_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */
+#define XFS_FMR_OWN_AG FMR_OWNER('X', 3) /* per-AG metadata */
+#define XFS_FMR_OWN_INOBT FMR_OWNER('X', 4) /* inode btree blocks */
+#define XFS_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */
+#define XFS_FMR_OWN_REFC FMR_OWNER('X', 6) /* refcount tree */
+#define XFS_FMR_OWN_COW FMR_OWNER('X', 7) /* cow staging */
+#define XFS_FMR_OWN_DEFECTIVE FMR_OWNER('X', 8) /* bad blocks */
+
/*
* Structure for XFS_IOC_FSSETDM.
* For use by backup and restore programs to set the XFS on-disk inode
@@ -502,6 +514,7 @@ typedef struct xfs_swapext
#define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap)
#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64)
#define XFS_IOC_FREE_EOFBLOCKS _IOR ('X', 58, struct xfs_fs_eofblocks)
+/* XFS_IOC_GETFSMAP ------ hoisted 59 */
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
new file mode 100644
index 0000000..09d6b92
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.c
@@ -0,0 +1,782 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_error.h"
+#include "xfs_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bit.h"
+#include <linux/fsmap.h>
+#include "xfs_fsmap.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+
+/* Convert an xfs_fsmap to an fsmap. */
+void
+xfs_fsmap_from_internal(
+ struct fsmap *dest,
+ struct xfs_fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BBTOB(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BBTOB(src->fmr_offset);
+ dest->fmr_length = BBTOB(src->fmr_length);
+ dest->fmr_reserved[0] = 0;
+ dest->fmr_reserved[1] = 0;
+ dest->fmr_reserved[2] = 0;
+}
+
+/* Convert an fsmap to an xfs_fsmap. */
+void
+xfs_fsmap_to_internal(
+ struct xfs_fsmap *dest,
+ struct fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = BTOBBT(src->fmr_physical);
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = BTOBBT(src->fmr_offset);
+ dest->fmr_length = BTOBBT(src->fmr_length);
+}
+
+/* Convert an fsmap owner into an rmapbt owner. */
+static int
+xfs_fsmap_owner_to_rmap(
+ struct xfs_fsmap *fmr,
+ struct xfs_rmap_irec *rm)
+{
+ if (!(fmr->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
+ rm->rm_owner = fmr->fmr_owner;
+ return 0;
+ }
+
+ switch (fmr->fmr_owner) {
+ case 0: /* "lowest owner id possible" */
+ case -1ULL: /* "highest owner id possible" */
+ rm->rm_owner = 0;
+ break;
+ case XFS_FMR_OWN_FREE:
+ rm->rm_owner = XFS_RMAP_OWN_NULL;
+ break;
+ case XFS_FMR_OWN_UNKNOWN:
+ rm->rm_owner = XFS_RMAP_OWN_UNKNOWN;
+ break;
+ case XFS_FMR_OWN_FS:
+ rm->rm_owner = XFS_RMAP_OWN_FS;
+ break;
+ case XFS_FMR_OWN_LOG:
+ rm->rm_owner = XFS_RMAP_OWN_LOG;
+ break;
+ case XFS_FMR_OWN_AG:
+ rm->rm_owner = XFS_RMAP_OWN_AG;
+ break;
+ case XFS_FMR_OWN_INOBT:
+ rm->rm_owner = XFS_RMAP_OWN_INOBT;
+ break;
+ case XFS_FMR_OWN_INODES:
+ rm->rm_owner = XFS_RMAP_OWN_INODES;
+ break;
+ case XFS_FMR_OWN_REFC:
+ rm->rm_owner = XFS_RMAP_OWN_REFC;
+ break;
+ case XFS_FMR_OWN_COW:
+ rm->rm_owner = XFS_RMAP_OWN_COW;
+ break;
+ case XFS_FMR_OWN_DEFECTIVE: /* not implemented */
+ /* fall through */
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Convert an rmapbt owner into an fsmap owner. */
+static int
+xfs_fsmap_owner_from_rmap(
+ struct xfs_rmap_irec *rm,
+ struct xfs_fsmap *fmr)
+{
+ fmr->fmr_flags = 0;
+ if (!XFS_RMAP_NON_INODE_OWNER(rm->rm_owner)) {
+ fmr->fmr_owner = rm->rm_owner;
+ return 0;
+ }
+ fmr->fmr_flags |= FMR_OF_SPECIAL_OWNER;
+
+ switch (rm->rm_owner) {
+ case XFS_RMAP_OWN_FS:
+ fmr->fmr_owner = XFS_FMR_OWN_FS;
+ break;
+ case XFS_RMAP_OWN_LOG:
+ fmr->fmr_owner = XFS_FMR_OWN_LOG;
+ break;
+ case XFS_RMAP_OWN_AG:
+ fmr->fmr_owner = XFS_FMR_OWN_AG;
+ break;
+ case XFS_RMAP_OWN_INOBT:
+ fmr->fmr_owner = XFS_FMR_OWN_INOBT;
+ break;
+ case XFS_RMAP_OWN_INODES:
+ fmr->fmr_owner = XFS_FMR_OWN_INODES;
+ break;
+ case XFS_RMAP_OWN_REFC:
+ fmr->fmr_owner = XFS_FMR_OWN_REFC;
+ break;
+ case XFS_RMAP_OWN_COW:
+ fmr->fmr_owner = XFS_FMR_OWN_COW;
+ break;
+ default:
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+
+/* getfsmap query state */
+struct xfs_getfsmap_info {
+ struct xfs_fsmap_head *head;
+ struct xfs_fsmap *rkey_low; /* lowest key */
+ xfs_fsmap_format_t formatter; /* formatting fn */
+ void *format_arg; /* format buffer */
+ bool last; /* last extent? */
+ xfs_daddr_t next_daddr; /* next daddr we expect */
+ u32 dev; /* device id */
+ u64 missing_owner; /* owner of holes */
+
+ xfs_agnumber_t agno; /* AG number, if applicable */
+ struct xfs_buf *agf_bp; /* AGF, for refcount queries */
+ struct xfs_rmap_irec low; /* low rmap key */
+ struct xfs_rmap_irec high; /* high rmap key */
+};
+
+/* Associate a device with a getfsmap handler. */
+struct xfs_getfsmap_dev {
+ u32 dev;
+ int (*fn)(struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info);
+};
+
+/* Compare two getfsmap device handlers. */
+static int
+xfs_getfsmap_dev_compare(
+ const void *p1,
+ const void *p2)
+{
+ const struct xfs_getfsmap_dev *d1 = p1;
+ const struct xfs_getfsmap_dev *d2 = p2;
+
+ return d1->dev - d2->dev;
+}
+
+/* Compare a record against our starting point */
+static bool
+xfs_getfsmap_rec_before_low_key(
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec)
+{
+ uint64_t x, y;
+
+ if (rec->rm_startblock < info->low.rm_startblock)
+ return true;
+ if (rec->rm_startblock > info->low.rm_startblock)
+ return false;
+
+ if (rec->rm_owner < info->low.rm_owner)
+ return true;
+ if (rec->rm_owner > info->low.rm_owner)
+ return false;
+
+ x = xfs_rmap_irec_offset_pack(rec);
+ y = xfs_rmap_irec_offset_pack(&info->low);
+ if (x < y)
+ return true;
+ return false;
+}
+
+/* Decide if this mapping is shared. */
+STATIC int
+xfs_getfsmap_is_shared(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ bool *stat)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *cur;
+ xfs_agblock_t fbno;
+ xfs_extlen_t flen;
+ int error;
+
+ *stat = false;
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return 0;
+ /* rt files will have agno set to NULLAGNUMBER */
+ if (info->agno == NULLAGNUMBER)
+ return 0;
+
+ /* Are there any shared blocks here? */
+ flen = 0;
+ cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno, NULL);
+
+ error = xfs_refcount_find_shared(cur, rec->rm_startblock,
+ rec->rm_blockcount, &fbno, &flen, false);
+
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ *stat = flen > 0;
+ return 0;
+}
+
+/*
+ * Format a reverse mapping for getfsmap, having translated rm_startblock
+ * into the appropriate daddr units.
+ */
+STATIC int
+xfs_getfsmap_helper(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_rmap_irec *rec,
+ xfs_daddr_t rec_daddr)
+{
+ struct xfs_fsmap fmr;
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_daddr_t key_end;
+ bool shared;
+ int error;
+
+ if (fatal_signal_pending(current))
+ return -EAGAIN;
+
+ /*
+ * Filter out records that start before our startpoint, if the
+ * caller requested that.
+ */
+ if (xfs_getfsmap_rec_before_low_key(info, rec)) {
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the caller passed in a length with the low record and
+ * the record represents a file data extent, we incremented
+ * the offset in the low key by the length in the hopes of
+ * finding reverse mappings for the physical blocks we just
+ * saw. We did /not/ increment next_daddr by the length
+ * because the range query would not be able to find shared
+ * extents within the same physical block range.
+ *
+ * However, the extent we've been fed could have a startblock
+ * past the passed-in low record. If this is the case,
+ * advance next_daddr to the end of the passed-in low record
+ * so we don't report the extent prior to this extent as
+ * free.
+ */
+ key_end = info->rkey_low->fmr_physical + info->rkey_low->fmr_length;
+ if (info->dev == info->rkey_low->fmr_device &&
+ info->next_daddr < key_end && rec_daddr >= key_end)
+ info->next_daddr = key_end;
+
+ /* Are we just counting mappings? */
+ if (info->head->fmh_count == 0) {
+ if (rec_daddr > info->next_daddr)
+ info->head->fmh_entries++;
+
+ if (info->last)
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+
+ info->head->fmh_entries++;
+
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the record starts past the last physical block we saw,
+ * then we've found some free space. Report that too.
+ */
+ if (rec_daddr > info->next_daddr) {
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ trace_xfs_fsmap_mapping(mp, info->dev, info->agno,
+ XFS_DADDR_TO_FSB(mp, info->next_daddr),
+ XFS_DADDR_TO_FSB(mp, rec_daddr -
+ info->next_daddr),
+ info->missing_owner, 0);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = info->next_daddr;
+ fmr.fmr_owner = info->missing_owner;
+ fmr.fmr_offset = 0;
+ fmr.fmr_length = rec_daddr - info->next_daddr;
+ fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+ }
+
+ if (info->last)
+ goto out;
+
+ /* Fill out the extent we found */
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+
+ trace_xfs_fsmap_mapping(mp, info->dev, info->agno,
+ rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
+ rec->rm_offset);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = rec_daddr;
+ error = xfs_fsmap_owner_from_rmap(rec, &fmr);
+ if (error)
+ return error;
+ fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
+ fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
+ fmr.fmr_flags |= FMR_OF_PREALLOC;
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ fmr.fmr_flags |= FMR_OF_ATTR_FORK;
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
+ if (fmr.fmr_flags == 0) {
+ error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
+ if (error)
+ return error;
+ if (shared)
+ fmr.fmr_flags |= FMR_OF_SHARED;
+ }
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+
+out:
+ rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
+ if (info->next_daddr < rec_daddr)
+ info->next_daddr = rec_daddr;
+ return XFS_BTREE_QUERY_RANGE_CONTINUE;
+}
+
+/* Transform a rmapbt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ xfs_fsblock_t fsb;
+ xfs_daddr_t rec_daddr;
+
+ fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->rm_startblock);
+ rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
+
+ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+}
+
+/* Transform a absolute-startblock rmap (rtdev, logdev) into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, rec->rm_startblock);
+
+ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
+}
+
+/* Set rmap flags based on the getfsmap flags */
+static void
+xfs_getfsmap_set_irec_flags(
+ struct xfs_rmap_irec *irec,
+ struct xfs_fsmap *fmr)
+{
+ irec->rm_flags = 0;
+ if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
+ irec->rm_flags |= XFS_RMAP_ATTR_FORK;
+ if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
+ irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
+ if (fmr->fmr_flags & FMR_OF_PREALLOC)
+ irec->rm_flags |= XFS_RMAP_UNWRITTEN;
+}
+
+/* Execute a getfsmap query against the log device. */
+STATIC int
+xfs_getfsmap_logdev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_fsmap *dkey_low = keys;
+ struct xfs_btree_cur cur;
+ struct xfs_rmap_irec rmap;
+ int error;
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_BB_TO_FSBT(mp, dkey_low->fmr_physical);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(keys, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ error = xfs_fsmap_owner_to_rmap(keys + 1, &info->high);
+ if (error)
+ return error;
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+ info->missing_owner = XFS_FMR_OWN_FREE;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+
+ if (dkey_low->fmr_physical > 0)
+ return 0;
+
+ rmap.rm_startblock = 0;
+ rmap.rm_blockcount = mp->m_sb.sb_logblocks;
+ rmap.rm_owner = XFS_RMAP_OWN_LOG;
+ rmap.rm_offset = 0;
+ rmap.rm_flags = 0;
+
+ cur.bc_mp = mp;
+ cur.bc_tp = tp;
+ return xfs_getfsmap_rtdev_helper(&cur, &rmap, info);
+}
+
+/* Execute a getfsmap query against the regular data device. */
+STATIC int
+xfs_getfsmap_datadev(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_agnumber_t start_ag;
+ xfs_agnumber_t end_ag;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_DADDR_TO_FSB(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_DADDR_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+ info->missing_owner = XFS_FMR_OWN_FREE;
+
+ start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
+ end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ if (info->agno == end_ag) {
+ info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
+ end_fsb);
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp,
+ dkey_high->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ goto err;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+ }
+
+ if (bt_cur) {
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ info->agf_bp = NULL;
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
+ &info->agf_bp);
+ if (error)
+ goto err;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ bt_cur = xfs_rmapbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno);
+ error = xfs_rmap_query_range(bt_cur, &info->low, &info->high,
+ xfs_getfsmap_datadev_helper, info);
+ if (error)
+ goto err;
+
+ if (info->agno == start_ag) {
+ info->low.rm_startblock = 0;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+ info->low.rm_flags = 0;
+ }
+ }
+
+ /* Report any free space at the end of the AG */
+ info->last = true;
+ error = xfs_getfsmap_datadev_helper(bt_cur, &info->high, info);
+ if (error)
+ goto err;
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (info->agf_bp)
+ info->agf_bp = NULL;
+
+ return error;
+}
+
+/* Do we recognize the device? */
+STATIC bool
+xfs_getfsmap_is_valid_device(
+ struct xfs_mount *mp,
+ struct xfs_fsmap *fm)
+{
+ if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
+ fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
+ return true;
+ if (mp->m_logdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
+ return true;
+ return false;
+}
+
+/* Ensure that the low key is less than the high key. */
+STATIC bool
+xfs_getfsmap_check_keys(
+ struct xfs_fsmap *low_key,
+ struct xfs_fsmap *high_key)
+{
+ if (low_key->fmr_device > high_key->fmr_device)
+ return false;
+ if (low_key->fmr_device < high_key->fmr_device)
+ return true;
+
+ if (low_key->fmr_physical > high_key->fmr_physical)
+ return false;
+ if (low_key->fmr_physical < high_key->fmr_physical)
+ return true;
+
+ if (low_key->fmr_owner > high_key->fmr_owner)
+ return false;
+ if (low_key->fmr_owner < high_key->fmr_owner)
+ return true;
+
+ if (low_key->fmr_offset > high_key->fmr_offset)
+ return false;
+ if (low_key->fmr_offset < high_key->fmr_offset)
+ return true;
+
+ return false;
+}
+
+#define XFS_GETFSMAP_DEVS 3
+/*
+ * Get filesystem's extents as described in head, and format for
+ * output. Calls formatter to fill the user's buffer until all
+ * extents are mapped, until the passed-in head->fmh_count slots have
+ * been filled, or until the formatter short-circuits the loop, if it
+ * is tracking filled-in extents on its own.
+ */
+int
+xfs_getfsmap(
+ struct xfs_mount *mp,
+ struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter,
+ void *arg)
+{
+ struct xfs_trans *tp = NULL;
+ struct xfs_fsmap *rkey_low; /* request keys */
+ struct xfs_fsmap *rkey_high;
+ struct xfs_fsmap dkeys[2]; /* per-dev keys */
+ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
+ struct xfs_getfsmap_info info = {0};
+ int i;
+ int error = 0;
+
+ if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+ return -EOPNOTSUPP;
+ if (head->fmh_iflags & ~FMH_IF_VALID)
+ return -EINVAL;
+ rkey_low = head->fmh_keys;
+ rkey_high = rkey_low + 1;
+ if (!xfs_getfsmap_is_valid_device(mp, rkey_low) ||
+ !xfs_getfsmap_is_valid_device(mp, rkey_high))
+ return -EINVAL;
+
+ head->fmh_entries = 0;
+
+ /* Set up our device handlers. */
+ memset(handlers, 0, sizeof(handlers));
+ handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
+ handlers[0].fn = xfs_getfsmap_datadev;
+ if (mp->m_logdev_targp != mp->m_ddev_targp) {
+ handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
+ handlers[1].fn = xfs_getfsmap_logdev;
+ }
+
+ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
+ xfs_getfsmap_dev_compare);
+
+ /*
+ * Since we allow the user to copy the last mapping from a previous
+ * call into the low key slot, we have to advance the low key by
+ * whatever the reported length is. If the offset field doesn't apply,
+ * move up the start block to the next extent and start over with the
+ * lowest owner/offset possible; otherwise it's file data, so move up
+ * the offset only.
+ */
+ dkeys[0] = *rkey_low;
+ if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
+ dkeys[0].fmr_physical += dkeys[0].fmr_length;
+ dkeys[0].fmr_owner = 0;
+ dkeys[0].fmr_offset = 0;
+ } else
+ dkeys[0].fmr_offset += dkeys[0].fmr_length;
+ memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
+
+ if (!xfs_getfsmap_check_keys(dkeys, rkey_high))
+ return -EINVAL;
+
+ info.rkey_low = rkey_low;
+ info.formatter = formatter;
+ info.format_arg = arg;
+ info.head = head;
+
+ /* For each device we support... */
+ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
+ /* Is this device within the range the user asked for? */
+ if (!handlers[i].fn)
+ continue;
+ if (rkey_low->fmr_device > handlers[i].dev)
+ continue;
+ if (rkey_high->fmr_device < handlers[i].dev)
+ break;
+
+ /*
+ * If this device number matches the high key, we have
+ * to pass the high key to the handler to limit the
+ * query results. If the device number exceeds the
+ * low key, zero out the low key so that we get
+ * everything from the beginning.
+ */
+ if (handlers[i].dev == rkey_high->fmr_device)
+ dkeys[1] = *rkey_high;
+ if (handlers[i].dev > rkey_low->fmr_device)
+ memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ break;
+
+ info.next_daddr = dkeys[0].fmr_physical;
+ info.dev = handlers[i].dev;
+ info.last = false;
+ info.agno = NULLAGNUMBER;
+ error = handlers[i].fn(tp, dkeys, &info);
+ if (error)
+ break;
+ xfs_trans_cancel(tp);
+ tp = NULL;
+ }
+
+ if (tp)
+ xfs_trans_cancel(tp);
+ head->fmh_oflags = FMH_OF_DEV_T;
+ return error;
+}
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
new file mode 100644
index 0000000..1943047
--- /dev/null
+++ b/fs/xfs/xfs_fsmap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __XFS_FSMAP_H__
+#define __XFS_FSMAP_H__
+
+/* internal fsmap representation */
+struct xfs_fsmap {
+ dev_t fmr_device; /* device id */
+ uint32_t fmr_flags; /* mapping flags */
+ uint64_t fmr_physical; /* device offset of segment */
+ uint64_t fmr_owner; /* owner id */
+ xfs_fileoff_t fmr_offset; /* file offset of segment */
+ xfs_filblks_t fmr_length; /* length of segment, blocks */
+};
+
+struct xfs_fsmap_head {
+ uint32_t fmh_iflags; /* control flags */
+ uint32_t fmh_oflags; /* output flags */
+ unsigned int fmh_count; /* # of entries in array incl. input */
+ unsigned int fmh_entries; /* # of entries filled in (output). */
+
+ struct xfs_fsmap fmh_keys[2]; /* low and high keys */
+};
+
+void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
+void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
+
+/* fsmap to userspace formatter - copy to user & advance pointer */
+typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
+
+int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
+ xfs_fsmap_format_t formatter, void *arg);
+
+#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index c67cfb4..bbe1b58 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -41,6 +41,9 @@
#include "xfs_trans.h"
#include "xfs_pnfs.h"
#include "xfs_acl.h"
+#include "xfs_btree.h"
+#include <linux/fsmap.h>
+#include "xfs_fsmap.h"
#include <linux/capability.h>
#include <linux/dcache.h>
@@ -1607,6 +1610,103 @@ xfs_ioc_getbmapx(
return 0;
}
+struct getfsmap_info {
+ struct xfs_mount *mp;
+ struct fsmap __user *data;
+ __u32 last_flags;
+};
+
+STATIC int
+xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
+{
+ struct getfsmap_info *info = priv;
+ struct fsmap fm;
+
+ trace_xfs_getfsmap_mapping(info->mp, xfm->fmr_device, xfm->fmr_physical,
+ xfm->fmr_length, xfm->fmr_owner, xfm->fmr_offset,
+ xfm->fmr_flags);
+
+ info->last_flags = xfm->fmr_flags;
+ xfs_fsmap_from_internal(&fm, xfm);
+ if (copy_to_user(info->data, &fm, sizeof(struct fsmap)))
+ return -EFAULT;
+
+ info->data++;
+ return 0;
+}
+
+STATIC int
+xfs_ioc_getfsmap(
+ struct xfs_inode *ip,
+ void __user *arg)
+{
+ struct getfsmap_info info;
+ struct xfs_fsmap_head xhead = {0};
+ struct fsmap_head head;
+ bool aborted = false;
+ int error;
+
+ if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
+ return -EFAULT;
+ if (head.fmh_reserved[0] || head.fmh_reserved[1] ||
+ head.fmh_reserved[2] || head.fmh_reserved[3] ||
+ head.fmh_reserved[4] || head.fmh_reserved[5] ||
+ head.fmh_keys[0].fmr_reserved[0] ||
+ head.fmh_keys[0].fmr_reserved[1] ||
+ head.fmh_keys[0].fmr_reserved[2] ||
+ head.fmh_keys[1].fmr_reserved[0] ||
+ head.fmh_keys[1].fmr_reserved[1] ||
+ head.fmh_keys[1].fmr_reserved[2])
+ return -EINVAL;
+
+ xhead.fmh_iflags = head.fmh_iflags;
+ xhead.fmh_count = head.fmh_count;
+ xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
+ xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
+
+ trace_xfs_getfsmap_low_key(ip->i_mount,
+ xhead.fmh_keys[0].fmr_device,
+ xhead.fmh_keys[0].fmr_physical,
+ xhead.fmh_keys[0].fmr_length,
+ xhead.fmh_keys[0].fmr_owner,
+ xhead.fmh_keys[0].fmr_offset,
+ xhead.fmh_keys[0].fmr_flags);
+
+ trace_xfs_getfsmap_high_key(ip->i_mount,
+ xhead.fmh_keys[1].fmr_device,
+ xhead.fmh_keys[1].fmr_physical,
+ xhead.fmh_keys[1].fmr_length,
+ xhead.fmh_keys[1].fmr_owner,
+ xhead.fmh_keys[1].fmr_offset,
+ xhead.fmh_keys[1].fmr_flags);
+
+ info.mp = ip->i_mount;
+ info.data = ((__force struct fsmap_head *)arg)->fmh_recs;
+ error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
+ if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ error = 0;
+ aborted = true;
+ } else if (error)
+ return error;
+
+ /* If we didn't abort, set the "last" flag in the last fmx */
+ if (!aborted && xhead.fmh_entries) {
+ info.data--;
+ info.last_flags |= FMR_OF_LAST;
+ if (copy_to_user(&info.data->fmr_flags, &info.last_flags,
+ sizeof(info.last_flags)))
+ return -EFAULT;
+ }
+
+ /* copy back header */
+ head.fmh_entries = xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
+ return -EFAULT;
+
+ return 0;
+}
+
int
xfs_ioc_swapext(
xfs_swapext_t *sxp)
@@ -1787,6 +1887,9 @@ xfs_file_ioctl(
case XFS_IOC_GETBMAPX:
return xfs_ioc_getbmapx(ip, arg);
+ case FS_IOC_GETFSMAP:
+ return xfs_ioc_getfsmap(ip, arg);
+
case XFS_IOC_FD_TO_HANDLE:
case XFS_IOC_PATH_TO_HANDLE:
case XFS_IOC_PATH_TO_FSHANDLE: {
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7c49938..fa0bc4d 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -20,6 +20,7 @@
#include <linux/mount.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/fsmap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
@@ -554,6 +555,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_GOINGDOWN:
case XFS_IOC_ERROR_INJECTION:
case XFS_IOC_ERROR_CLEARALL:
+ case FS_IOC_GETFSMAP:
return xfs_file_ioctl(filp, cmd, p);
#ifndef BROKEN_X86_ALIGNMENT
/* These are handled fine if no alignment issues */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d3d11905..125d568 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3270,6 +3270,91 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap);
DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
+/* fsmap traces */
+DECLARE_EVENT_CLASS(xfs_fsmap_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
+ xfs_fsblock_t bno, xfs_filblks_t len, __uint64_t owner,
+ __uint64_t offset),
+ TP_ARGS(mp, keydev, agno, bno, len, owner, offset),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_fsblock_t, bno)
+ __field(xfs_filblks_t, len)
+ __field(__uint64_t, owner)
+ __field(__uint64_t, offset)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __entry->len = len;
+ __entry->owner = owner;
+ __entry->offset = offset;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld offset 0x%llx\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->agno,
+ __entry->bno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset)
+)
+#define DEFINE_FSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
+ xfs_fsblock_t bno, xfs_filblks_t len, __uint64_t owner, \
+ __uint64_t offset), \
+ TP_ARGS(mp, keydev, agno, bno, len, owner, offset))
+DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
+DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+
+DECLARE_EVENT_CLASS(xfs_getfsmap_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_daddr_t block,
+ xfs_daddr_t len, __uint64_t owner, __uint64_t offset,
+ __uint64_t flags),
+ TP_ARGS(mp, keydev, block, len, owner, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_daddr_t, block)
+ __field(xfs_daddr_t, len)
+ __field(__uint64_t, owner)
+ __field(__uint64_t, offset)
+ __field(__uint64_t, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->block = block;
+ __entry->len = len;
+ __entry->owner = owner;
+ __entry->offset = offset;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->block,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+)
+#define DEFINE_GETFSMAP_EVENT(name) \
+DEFINE_EVENT(xfs_getfsmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_daddr_t block, \
+ xfs_daddr_t len, __uint64_t owner, __uint64_t offset, \
+ __uint64_t flags), \
+ TP_ARGS(mp, keydev, block, len, owner, offset, flags))
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
+DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 70f42ea..a280e12 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -263,6 +263,28 @@ xfs_trans_alloc(
}
/*
+ * Create an empty transaction with no reservation. This is a defensive
+ * mechanism for routines that query metadata without actually modifying
+ * them -- if the metadata being queried is somehow cross-linked (think a
+ * btree block pointer that points higher in the tree), we risk deadlock.
+ * However, blocks grabbed as part of a transaction can be re-grabbed.
+ * The verifiers will notice the corrupt block and the operation will fail
+ * back to userspace without deadlocking.
+ *
+ * Note the zero-length reservation; this transaction MUST be cancelled
+ * without any dirty data.
+ */
+int
+xfs_trans_alloc_empty(
+ struct xfs_mount *mp,
+ struct xfs_trans **tpp)
+{
+ struct xfs_trans_res resv = {0};
+
+ return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
+}
+
+/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
* For now, just store the change in the transaction structure.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 61b7fbd..98024cb 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -159,6 +159,8 @@ typedef struct xfs_trans {
int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
uint blocks, uint rtextents, uint flags,
struct xfs_trans **tpp);
+int xfs_trans_alloc_empty(struct xfs_mount *mp,
+ struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 6/8] xfs: have getfsmap fall back to the freesp btrees when rmap is not present
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (4 preceding siblings ...)
2017-02-18 1:17 ` [PATCH 5/8] xfs: introduce the XFS_IOC_GETFSMAP ioctl Darrick J. Wong
@ 2017-02-18 1:17 ` Darrick J. Wong
2017-02-18 1:18 ` [PATCH 7/8] xfs: getfsmap should fall back to rtbitmap when rtrmapbt " Darrick J. Wong
` (2 subsequent siblings)
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:17 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
If the reverse-mapping btree isn't available, fall back to the
free space btrees to provide partial reverse mapping information.
The online scrub tool can make use of even partial information to
speed up the data block scan.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_fsmap.c | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 152 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 09d6b92..82e44a9 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -40,6 +40,7 @@
#include "xfs_fsmap.h"
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
+#include "xfs_alloc_btree.h"
/* Convert an xfs_fsmap to an fsmap. */
void
@@ -158,6 +159,9 @@ xfs_fsmap_owner_from_rmap(
case XFS_RMAP_OWN_COW:
fmr->fmr_owner = XFS_FMR_OWN_COW;
break;
+ case XFS_RMAP_OWN_NULL: /* "free" */
+ fmr->fmr_owner = XFS_FMR_OWN_FREE;
+ break;
default:
return -EFSCORRUPTED;
}
@@ -433,6 +437,31 @@ xfs_getfsmap_rtdev_helper(
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
}
+/* Transform a bnobt irec into a fsmap */
+STATIC int
+xfs_getfsmap_datadev_bnobt_helper(
+ struct xfs_btree_cur *cur,
+ struct xfs_alloc_rec_incore *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_fsblock_t fsb;
+ xfs_daddr_t rec_daddr;
+
+ fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->ar_startblock);
+ rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
+}
+
/* Set rmap flags based on the getfsmap flags */
static void
xfs_getfsmap_set_irec_flags(
@@ -621,6 +650,125 @@ xfs_getfsmap_datadev(
return error;
}
+/* Execute a getfsmap query against the regular data device's bnobt. */
+STATIC int
+xfs_getfsmap_datadev_bnobt(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_btree_cur *bt_cur = NULL;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ struct xfs_alloc_rec_incore alow;
+ struct xfs_alloc_rec_incore ahigh;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_agnumber_t start_ag;
+ xfs_agnumber_t end_ag;
+ xfs_daddr_t eofs;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_DADDR_TO_FSB(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_DADDR_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
+
+ start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
+ end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ if (info->agno == end_ag) {
+ info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
+ end_fsb);
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp,
+ dkey_high->fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ goto err;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+ }
+
+ if (bt_cur) {
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ info->agf_bp = NULL;
+ }
+
+ error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
+ &info->agf_bp);
+ if (error)
+ goto err;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ bt_cur = xfs_allocbt_init_cursor(mp, tp, info->agf_bp,
+ info->agno, XFS_BTNUM_BNO);
+ alow.ar_startblock = info->low.rm_startblock;
+ ahigh.ar_startblock = info->high.rm_startblock;
+ error = xfs_alloc_query_range(bt_cur, &alow, &ahigh,
+ xfs_getfsmap_datadev_bnobt_helper, info);
+ if (error)
+ goto err;
+
+ if (info->agno == start_ag) {
+ info->low.rm_startblock = 0;
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+ info->low.rm_flags = 0;
+ }
+ }
+
+ /* Report any free space at the end of the AG */
+ info->last = true;
+ error = xfs_getfsmap_datadev_bnobt_helper(bt_cur, &ahigh, info);
+ if (error)
+ goto err;
+
+err:
+ if (bt_cur)
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ if (info->agf_bp)
+ info->agf_bp = NULL;
+
+ return error;
+}
+
/* Do we recognize the device? */
STATIC bool
xfs_getfsmap_is_valid_device(
@@ -689,8 +837,6 @@ xfs_getfsmap(
int i;
int error = 0;
- if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
- return -EOPNOTSUPP;
if (head->fmh_iflags & ~FMH_IF_VALID)
return -EINVAL;
rkey_low = head->fmh_keys;
@@ -704,7 +850,10 @@ xfs_getfsmap(
/* Set up our device handlers. */
memset(handlers, 0, sizeof(handlers));
handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
- handlers[0].fn = xfs_getfsmap_datadev;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ handlers[0].fn = xfs_getfsmap_datadev;
+ else
+ handlers[0].fn = xfs_getfsmap_datadev_bnobt;
if (mp->m_logdev_targp != mp->m_ddev_targp) {
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 7/8] xfs: getfsmap should fall back to rtbitmap when rtrmapbt not present
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (5 preceding siblings ...)
2017-02-18 1:17 ` [PATCH 6/8] xfs: have getfsmap fall back to the freesp btrees when rmap is not present Darrick J. Wong
@ 2017-02-18 1:18 ` Darrick J. Wong
2017-02-18 1:18 ` [PATCH 8/8] ext4: support GETFSMAP ioctls Darrick J. Wong
2017-02-21 22:14 ` [PATCH] ioctl_getfsmap.2: document the GETFSMAP ioctl Darrick J. Wong
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:18 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Use the realtime bitmap to return freespace information when the
rtrmapbt isn't present. Note that the rtrmapbt fsmap implementation
will show up later with the rtrmapbt patchset.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_fsmap.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_rtalloc.h | 2 +
2 files changed, 136 insertions(+)
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 82e44a9..e769322 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -41,6 +41,7 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc_btree.h"
+#include "xfs_rtalloc.h"
/* Convert an xfs_fsmap to an fsmap. */
void
@@ -437,6 +438,30 @@ xfs_getfsmap_rtdev_helper(
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
}
+/* Transform a rtbitmap "record" into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_helper(
+ struct xfs_trans *tp,
+ xfs_rtblock_t start,
+ xfs_rtblock_t end,
+ void *priv)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, start);
+
+ irec.rm_startblock = start;
+ irec.rm_blockcount = end - start + 1;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+}
+
/* Transform a bnobt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_bnobt_helper(
@@ -536,6 +561,108 @@ xfs_getfsmap_logdev(
return xfs_getfsmap_rtdev_helper(&cur, &rmap, info);
}
+/* Execute a getfsmap query against the realtime data device (rtbitmap). */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap(
+ struct xfs_trans *tp,
+ struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_fsmap *dkey_low;
+ struct xfs_fsmap *dkey_high;
+ xfs_fsblock_t start_fsb;
+ xfs_fsblock_t end_fsb;
+ xfs_rtblock_t rtstart;
+ xfs_rtblock_t rtend;
+ xfs_rtblock_t rem;
+ xfs_daddr_t eofs;
+ int is_free;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = XFS_BB_TO_FSBT(mp, dkey_low->fmr_physical);
+ end_fsb = XFS_BB_TO_FSB(mp, dkey_high->fmr_physical);
+
+ /* Set up search keys */
+ info->low.rm_startblock = start_fsb;
+ error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
+ if (error)
+ return error;
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
+ info->low.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
+
+ info->high.rm_startblock = end_fsb;
+ error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
+ if (error)
+ return error;
+ info->high.rm_offset = XFS_BB_TO_FSBT(mp, dkey_high->fmr_offset);
+ info->high.rm_blockcount = 0;
+ xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
+
+ info->missing_owner = XFS_FMR_OWN_UNKNOWN;
+
+ trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
+ info->low.rm_startblock,
+ info->low.rm_blockcount,
+ info->low.rm_owner,
+ info->low.rm_offset);
+
+ trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
+ info->high.rm_startblock,
+ info->high.rm_blockcount,
+ info->high.rm_owner,
+ info->high.rm_offset);
+
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ /* Iterate the bitmap, looking for discrepancies. */
+ rtstart = 0;
+ rem = mp->m_sb.sb_rblocks;
+ while (rem) {
+ /* Is the first block free? */
+ error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
+ &is_free);
+ if (error)
+ goto out_unlock;
+
+ /* How long does the extent go for? */
+ error = xfs_rtfind_forw(mp, tp, rtstart,
+ mp->m_sb.sb_rblocks - 1, &rtend);
+ if (error)
+ goto out_unlock;
+
+ if (is_free) {
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(tp,
+ rtstart, rtend, info);
+ if (error)
+ goto out_unlock;
+ }
+
+ rem -= rtend - rtstart + 1;
+ rtstart = rtend + 1;
+ }
+
+out_unlock:
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ /* Report any free space at the end of the rtdev */
+ info->last = true;
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(tp, end_fsb, 0, info);
+ if (error)
+ goto err;
+
+err:
+ return error;
+}
+
/* Execute a getfsmap query against the regular data device. */
STATIC int
xfs_getfsmap_datadev(
@@ -781,6 +908,9 @@ xfs_getfsmap_is_valid_device(
if (mp->m_logdev_targp &&
fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
return true;
+ if (mp->m_rtdev_targp &&
+ fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
+ return true;
return false;
}
@@ -858,6 +988,10 @@ xfs_getfsmap(
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
}
+ if (mp->m_rtdev_targp) {
+ handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
+ handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
+ }
xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
xfs_getfsmap_dev_compare);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 355dd9e..f798a3e 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -126,6 +126,8 @@ int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
+# define xfs_rtcheck_range(...) (ENOSYS)
+# define xfs_rtfind_forw(...) (ENOSYS)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH 8/8] ext4: support GETFSMAP ioctls
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (6 preceding siblings ...)
2017-02-18 1:18 ` [PATCH 7/8] xfs: getfsmap should fall back to rtbitmap when rtrmapbt " Darrick J. Wong
@ 2017-02-18 1:18 ` Darrick J. Wong
2017-02-21 22:14 ` [PATCH] ioctl_getfsmap.2: document the GETFSMAP ioctl Darrick J. Wong
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-18 1:18 UTC (permalink / raw)
To: darrick.wong; +Cc: linux-xfs, linux-fsdevel, linux-ext4
From: Darrick J. Wong <darrick.wong@oracle.com>
Support the GETFSMAP ioctls so that we can use the xfs free space
management tools to probe ext4 as well. Note that this is a partial
implementation -- we only report fixed-location metadata and free space;
everything else is reported as "unknown".
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/ext4/Makefile | 2
fs/ext4/fsmap.c | 742 +++++++++++++++++++++++++++++++++++++++++++
fs/ext4/fsmap.h | 72 ++++
fs/ext4/ioctl.c | 108 ++++++
fs/ext4/mballoc.c | 49 +++
fs/ext4/mballoc.h | 17 +
include/trace/events/ext4.h | 78 +++++
7 files changed, 1067 insertions(+), 1 deletion(-)
create mode 100644 fs/ext4/fsmap.c
create mode 100644 fs/ext4/fsmap.h
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 354103f..be515aa 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,7 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
- xattr_trusted.o inline.o readpage.o sysfs.o
+ xattr_trusted.o inline.o readpage.o sysfs.o fsmap.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
new file mode 100644
index 0000000..a9033c8d
--- /dev/null
+++ b/fs/ext4/fsmap.c
@@ -0,0 +1,742 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "ext4.h"
+#include <linux/fsmap.h>
+#include "fsmap.h"
+#include "mballoc.h"
+#include <linux/sort.h>
+#include <linux/list_sort.h>
+#include <trace/events/ext4.h>
+
+/* Convert an ext4_fsmap to an fsmap. */
+void
+ext4_fsmap_from_internal(
+ struct super_block *sb,
+ struct fsmap *dest,
+ struct ext4_fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = src->fmr_physical << sb->s_blocksize_bits;
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_offset = 0;
+ dest->fmr_length = src->fmr_length << sb->s_blocksize_bits;
+ dest->fmr_reserved[0] = 0;
+ dest->fmr_reserved[1] = 0;
+ dest->fmr_reserved[2] = 0;
+}
+
+/* Convert an fsmap to an ext4_fsmap. */
+void
+ext4_fsmap_to_internal(
+ struct super_block *sb,
+ struct ext4_fsmap *dest,
+ struct fsmap *src)
+{
+ dest->fmr_device = src->fmr_device;
+ dest->fmr_flags = src->fmr_flags;
+ dest->fmr_physical = src->fmr_physical >> sb->s_blocksize_bits;
+ dest->fmr_owner = src->fmr_owner;
+ dest->fmr_length = src->fmr_length >> sb->s_blocksize_bits;
+}
+
+/* getfsmap query state */
+struct ext4_getfsmap_info {
+ struct ext4_fsmap_head *head;
+ struct ext4_fsmap *rkey_low; /* lowest key */
+ ext4_fsmap_format_t formatter; /* formatting fn */
+ void *format_arg; /* format buffer */
+ bool last; /* last extent? */
+ ext4_fsblk_t next_fsblk; /* next fsblock we expect */
+ u32 dev; /* device id */
+
+ ext4_group_t agno; /* AG number, if applicable */
+ struct ext4_fsmap low; /* low rmap key */
+ struct ext4_fsmap high; /* high rmap key */
+ struct ext4_fsmap lastfree; /* free ext at end of last AG */
+ struct list_head meta_list; /* fixed metadata list */
+};
+
+/* Associate a device with a getfsmap handler. */
+struct ext4_getfsmap_dev {
+ u32 dev;
+ int (*fn)(struct super_block *sb,
+ struct ext4_fsmap *keys,
+ struct ext4_getfsmap_info *info);
+};
+
+/* Compare two getfsmap device handlers. */
+static int
+ext4_getfsmap_dev_compare(
+ const void *p1,
+ const void *p2)
+{
+ const struct ext4_getfsmap_dev *d1 = p1;
+ const struct ext4_getfsmap_dev *d2 = p2;
+
+ return d1->dev - d2->dev;
+}
+
+/* Compare a record against our starting point */
+static bool
+ext4_getfsmap_rec_before_low_key(
+ struct ext4_getfsmap_info *info,
+ struct ext4_fsmap *rec)
+{
+ return rec->fmr_physical < info->low.fmr_physical;
+}
+
+/*
+ * Format a reverse mapping for getfsmap, having translated rm_startblock
+ * into the appropriate daddr units.
+ */
+static int
+ext4_getfsmap_helper(
+ struct super_block *sb,
+ struct ext4_getfsmap_info *info,
+ struct ext4_fsmap *rec)
+{
+ struct ext4_fsmap fmr;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_fsblk_t rec_fsblk = rec->fmr_physical;
+ ext4_fsblk_t key_end;
+ ext4_group_t agno;
+ ext4_grpblk_t cno;
+ int error;
+
+ if (fatal_signal_pending(current))
+ return -EAGAIN;
+
+ /*
+ * Filter out records that start before our startpoint, if the
+ * caller requested that.
+ */
+ if (ext4_getfsmap_rec_before_low_key(info, rec)) {
+ rec_fsblk += rec->fmr_length;
+ if (info->next_fsblk < rec_fsblk)
+ info->next_fsblk = rec_fsblk;
+ return EXT4_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the caller passed in a length with the low record and
+ * the record represents a file data extent, we incremented
+ * the offset in the low key by the length in the hopes of
+ * finding reverse mappings for the physical blocks we just
+ * saw. We did /not/ increment next_daddr by the length
+ * because the range query would not be able to find shared
+ * extents within the same physical block range.
+ *
+ * However, the extent we've been fed could have a startblock
+ * past the passed-in low record. If this is the case,
+ * advance next_daddr to the end of the passed-in low record
+ * so we don't report the extent prior to this extent as
+ * free.
+ */
+ key_end = info->rkey_low->fmr_physical + info->rkey_low->fmr_length;
+ if (info->dev == info->rkey_low->fmr_device &&
+ info->next_fsblk < key_end && rec_fsblk >= key_end)
+ info->next_fsblk = key_end;
+
+ /* Are we just counting mappings? */
+ if (info->head->fmh_count == 0) {
+ if (rec_fsblk > info->next_fsblk)
+ info->head->fmh_entries++;
+
+ if (info->last)
+ return EXT4_QUERY_RANGE_CONTINUE;
+
+ info->head->fmh_entries++;
+
+ rec_fsblk += rec->fmr_length;
+ if (info->next_fsblk < rec_fsblk)
+ info->next_fsblk = rec_fsblk;
+ return EXT4_QUERY_RANGE_CONTINUE;
+ }
+
+ /*
+ * If the record starts past the last physical block we saw,
+ * then we've found some free space. Report that too.
+ */
+ if (rec_fsblk > info->next_fsblk) {
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return EXT4_QUERY_RANGE_ABORT;
+
+ ext4_get_group_no_and_offset(sb, info->next_fsblk, &agno, &cno);
+ trace_ext4_fsmap_mapping(sb, info->dev, agno,
+ EXT4_C2B(sbi, cno),
+ rec_fsblk - info->next_fsblk,
+ EXT4_FMR_OWN_UNKNOWN);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = info->next_fsblk;
+ fmr.fmr_owner = EXT4_FMR_OWN_UNKNOWN;
+ fmr.fmr_length = rec_fsblk - info->next_fsblk;
+ fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+ }
+
+ if (info->last)
+ goto out;
+
+ /* Fill out the extent we found */
+ if (info->head->fmh_entries >= info->head->fmh_count)
+ return EXT4_QUERY_RANGE_ABORT;
+
+ ext4_get_group_no_and_offset(sb, rec_fsblk, &agno, &cno);
+ trace_ext4_fsmap_mapping(sb, info->dev, agno, EXT4_C2B(sbi, cno),
+ rec->fmr_length, rec->fmr_owner);
+
+ fmr.fmr_device = info->dev;
+ fmr.fmr_physical = rec_fsblk;
+ fmr.fmr_owner = rec->fmr_owner;
+ fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
+ fmr.fmr_length = rec->fmr_length;
+ error = info->formatter(&fmr, info->format_arg);
+ if (error)
+ return error;
+ info->head->fmh_entries++;
+
+out:
+ rec_fsblk += rec->fmr_length;
+ if (info->next_fsblk < rec_fsblk)
+ info->next_fsblk = rec_fsblk;
+ return EXT4_QUERY_RANGE_CONTINUE;
+}
+
+static inline ext4_fsblk_t
+ext4_fsmap_next_pblk(
+ struct ext4_fsmap *f)
+{
+ return f->fmr_physical + f->fmr_length;
+}
+
+/* Transform a blockgroup's free record into a fsmap */
+static int
+ext4_getfsmap_datadev_helper(
+ struct super_block *sb,
+ ext4_group_t agno,
+ ext4_grpblk_t start,
+ ext4_grpblk_t len,
+ void *priv)
+{
+ struct ext4_fsmap irec;
+ struct ext4_getfsmap_info *info = priv;
+ struct ext4_metadata_fsmap *p;
+ struct ext4_metadata_fsmap *tmp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_fsblk_t fsb;
+ ext4_fsblk_t fslen;
+ int error;
+
+ fsb = (EXT4_C2B(sbi, start) + ext4_group_first_block_no(sb, agno));
+ fslen = EXT4_C2B(sbi, len);
+
+ /* If the retained free extent record is set... */
+ if (info->lastfree.fmr_owner) {
+ /* ...and abuts this one, lengthen it and return. */
+ if (ext4_fsmap_next_pblk(&info->lastfree) == fsb) {
+ info->lastfree.fmr_length += fslen;
+ return 0;
+ }
+
+ /*
+ * There's a gap between the two free extents; emit the
+ * retained extent prior to merging the meta_list.
+ */
+ error = ext4_getfsmap_helper(sb, info, &info->lastfree);
+ if (error)
+ return error;
+ info->lastfree.fmr_owner = 0;
+ }
+
+ /* Merge in any relevant extents from the meta_list */
+ list_for_each_entry_safe(p, tmp, &info->meta_list, mf_list) {
+ if (p->mf_physical + p->mf_length <= info->next_fsblk) {
+ list_del(&p->mf_list);
+ kfree(p);
+ } else if (p->mf_physical < fsb) {
+ irec.fmr_physical = p->mf_physical;
+ irec.fmr_length = p->mf_length;
+ irec.fmr_owner = p->mf_owner;
+ irec.fmr_flags = 0;
+
+ error = ext4_getfsmap_helper(sb, info, &irec);
+ if (error)
+ return error;
+
+ list_del(&p->mf_list);
+ kfree(p);
+ }
+ }
+
+ irec.fmr_physical = fsb;
+ irec.fmr_length = fslen;
+ irec.fmr_owner = EXT4_FMR_OWN_FREE;
+ irec.fmr_flags = 0;
+
+ /* If this is a free extent at the end of an AG, buffer it. */
+ if (ext4_fsmap_next_pblk(&irec) ==
+ ext4_group_first_block_no(sb, agno + 1)) {
+ info->lastfree = irec;
+ return 0;
+ }
+
+ /* Otherwise, emit it */
+ return ext4_getfsmap_helper(sb, info, &irec);
+}
+
+/* Execute a getfsmap query against the log device. */
+static int
+ext4_getfsmap_logdev(
+ struct super_block *sb,
+ struct ext4_fsmap *keys,
+ struct ext4_getfsmap_info *info)
+{
+ struct ext4_fsmap *dkey_low = keys;
+ journal_t *journal = EXT4_SB(sb)->s_journal;
+ struct ext4_fsmap irec;
+
+ /* Set up search keys */
+ info->low = *dkey_low;
+ info->low.fmr_length = 0;
+
+ memset(&info->high, 0xFF, sizeof(info->high));
+
+ trace_ext4_fsmap_low_key(sb, info->dev, 0,
+ info->low.fmr_physical,
+ info->low.fmr_length,
+ info->low.fmr_owner);
+
+ trace_ext4_fsmap_high_key(sb, info->dev, 0,
+ info->high.fmr_physical,
+ info->high.fmr_length,
+ info->high.fmr_owner);
+
+ if (dkey_low->fmr_physical > 0)
+ return 0;
+ irec.fmr_physical = journal->j_blk_offset;
+ irec.fmr_length = journal->j_maxlen;
+ irec.fmr_owner = EXT4_FMR_OWN_LOG;
+ irec.fmr_flags = 0;
+
+ return ext4_getfsmap_helper(sb, info, &irec);
+}
+
+/*
+ * This function returns the number of file system metadata blocks at
+ * the beginning of a block group, including the reserved gdt blocks.
+ */
+static unsigned int
+ext4_getfsmap_count_group_meta_blocks(
+ struct super_block *sb,
+ ext4_group_t block_group)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ unsigned int num;
+
+ /* Check for superblock and gdt backups in this group */
+ num = ext4_bg_has_super(sb, block_group);
+
+ if (!ext4_has_feature_meta_bg(sb) ||
+ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
+ sbi->s_desc_per_block) {
+ if (num) {
+ num += ext4_bg_num_gdb(sb, block_group);
+ num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
+ }
+ } else { /* For META_BG_BLOCK_GROUPS */
+ num += ext4_bg_num_gdb(sb, block_group);
+ }
+ return num;
+}
+
+/* Compare two fixed metadata items. */
+static int
+ext4_getfsmap_compare_fixed_metadata(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
+{
+ struct ext4_metadata_fsmap *fa;
+ struct ext4_metadata_fsmap *fb;
+
+ fa = container_of(a, struct ext4_metadata_fsmap, mf_list);
+ fb = container_of(b, struct ext4_metadata_fsmap, mf_list);
+ if (fa->mf_physical < fb->mf_physical)
+ return -1;
+ else if (fa->mf_physical > fb->mf_physical)
+ return 1;
+ return 0;
+}
+
+/* Merge adjacent extents of fixed metadata. */
+static void
+ext4_getfsmap_merge_fixed_metadata(
+ struct list_head *meta_list)
+{
+ struct ext4_metadata_fsmap *p;
+ struct ext4_metadata_fsmap *prev = NULL;
+ struct ext4_metadata_fsmap *tmp;
+
+ list_for_each_entry_safe(p, tmp, meta_list, mf_list) {
+ if (!prev) {
+ prev = p;
+ continue;
+ }
+
+ if (prev->mf_owner == p->mf_owner &&
+ prev->mf_physical + prev->mf_length == p->mf_physical) {
+ prev->mf_length += p->mf_length;
+ list_del(&p->mf_list);
+ kfree(p);
+ } else
+ prev = p;
+ }
+}
+
+/* Free a list of fixed metadata. */
+static void
+ext4_getfsmap_free_fixed_metadata(
+ struct list_head *meta_list)
+{
+ struct ext4_metadata_fsmap *p;
+ struct ext4_metadata_fsmap *tmp;
+
+ list_for_each_entry_safe(p, tmp, meta_list, mf_list) {
+ list_del(&p->mf_list);
+ kfree(p);
+ }
+}
+
+/* Find all the fixed metadata in the filesystem. */
+int
+ext4_getfsmap_find_fixed_metadata(
+ struct super_block *sb,
+ struct list_head *meta_list)
+{
+ struct ext4_metadata_fsmap *fsm;
+ struct ext4_group_desc *gdp;
+ ext4_group_t agno;
+ unsigned int nr_super;
+ int error;
+
+ INIT_LIST_HEAD(meta_list);
+
+ /* Collect everything. */
+ for (agno = 0; agno < EXT4_SB(sb)->s_groups_count; agno++) {
+ gdp = ext4_get_group_desc(sb, agno, NULL);
+ if (!gdp) {
+ error = -EFSCORRUPTED;
+ goto err;
+ }
+
+ /* Superblock & GDT */
+ nr_super = ext4_getfsmap_count_group_meta_blocks(sb, agno);
+ if (nr_super) {
+ fsm = kmalloc(sizeof(*fsm), GFP_NOFS);
+ if (!fsm) {
+ error = -ENOMEM;
+ goto err;
+ }
+ fsm->mf_physical = ext4_group_first_block_no(sb, agno);
+ fsm->mf_owner = EXT4_FMR_OWN_FS;
+ fsm->mf_length = nr_super;
+ list_add_tail(&fsm->mf_list, meta_list);
+ }
+
+ /* Block bitmap */
+ fsm = kmalloc(sizeof(*fsm), GFP_NOFS);
+ if (!fsm) {
+ error = -ENOMEM;
+ goto err;
+ }
+ fsm->mf_physical = ext4_block_bitmap(sb, gdp);
+ fsm->mf_owner = EXT4_FMR_OWN_BLKBM;
+ fsm->mf_length = 1;
+ list_add_tail(&fsm->mf_list, meta_list);
+
+ /* Inode bitmap */
+ fsm = kmalloc(sizeof(*fsm), GFP_NOFS);
+ if (!fsm) {
+ error = -ENOMEM;
+ goto err;
+ }
+ fsm->mf_physical = ext4_inode_bitmap(sb, gdp);
+ fsm->mf_owner = EXT4_FMR_OWN_INOBM;
+ fsm->mf_length = 1;
+ list_add_tail(&fsm->mf_list, meta_list);
+
+ /* Inodes */
+ fsm = kmalloc(sizeof(*fsm), GFP_NOFS);
+ if (!fsm) {
+ error = -ENOMEM;
+ goto err;
+ }
+ fsm->mf_physical = ext4_inode_table(sb, gdp);
+ fsm->mf_owner = EXT4_FMR_OWN_INODES;
+ fsm->mf_length = EXT4_SB(sb)->s_itb_per_group;
+ list_add_tail(&fsm->mf_list, meta_list);
+ }
+
+ /* Sort the list */
+ list_sort(NULL, meta_list, ext4_getfsmap_compare_fixed_metadata);
+
+ /* Merge adjacent extents */
+ ext4_getfsmap_merge_fixed_metadata(meta_list);
+
+ return 0;
+err:
+ ext4_getfsmap_free_fixed_metadata(meta_list);
+ return error;
+}
+
+/* Execute a getfsmap query against the buddy bitmaps */
+static int
+ext4_getfsmap_datadev(
+ struct super_block *sb,
+ struct ext4_fsmap *keys,
+ struct ext4_getfsmap_info *info)
+{
+ struct ext4_fsmap *dkey_low;
+ struct ext4_fsmap *dkey_high;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_fsblk_t start_fsb;
+ ext4_fsblk_t end_fsb;
+ ext4_fsblk_t eofs;
+ ext4_group_t start_ag;
+ ext4_group_t end_ag;
+ ext4_grpblk_t first_cluster;
+ ext4_grpblk_t last_cluster;
+ int error = 0;
+
+ dkey_low = keys;
+ dkey_high = keys + 1;
+ eofs = ext4_blocks_count(sbi->s_es);
+ if (dkey_low->fmr_physical >= eofs)
+ return 0;
+ if (dkey_high->fmr_physical >= eofs)
+ dkey_high->fmr_physical = eofs - 1;
+ start_fsb = dkey_low->fmr_physical;
+ end_fsb = dkey_high->fmr_physical;
+
+ /* Determine first and last group to examine based on start and end */
+ ext4_get_group_no_and_offset(sb, start_fsb, &start_ag, &first_cluster);
+ ext4_get_group_no_and_offset(sb, end_fsb, &end_ag, &last_cluster);
+
+ /* Set up search keys */
+ info->low = *dkey_low;
+ info->low.fmr_physical = EXT4_C2B(sbi, first_cluster);
+ info->low.fmr_length = 0;
+
+ memset(&info->high, 0xFF, sizeof(info->high));
+
+ /* Assemble a list of all the fixed-location metadata. */
+ error = ext4_getfsmap_find_fixed_metadata(sb, &info->meta_list);
+ if (error)
+ goto err;
+
+ /* Query each AG */
+ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
+ if (info->agno == end_ag) {
+ info->high = *dkey_high;
+ info->high.fmr_physical = EXT4_C2B(sbi, last_cluster);
+ info->high.fmr_length = 0;
+ }
+
+ trace_ext4_fsmap_low_key(sb, info->dev, info->agno,
+ info->low.fmr_physical,
+ info->low.fmr_length,
+ info->low.fmr_owner);
+
+ trace_ext4_fsmap_high_key(sb, info->dev, info->agno,
+ info->high.fmr_physical,
+ info->high.fmr_length,
+ info->high.fmr_owner);
+
+ error = ext4_mballoc_query_range(sb, info->agno,
+ EXT4_B2C(sbi, info->low.fmr_physical),
+ EXT4_B2C(sbi, info->high.fmr_physical),
+ ext4_getfsmap_datadev_helper, info);
+ if (error)
+ goto err;
+
+ if (info->agno == start_ag)
+ memset(&info->low, 0, sizeof(info->low));
+ }
+
+ /* Do we have a retained free extent? */
+ if (info->lastfree.fmr_owner) {
+ error = ext4_getfsmap_helper(sb, info, &info->lastfree);
+ if (error)
+ goto err;
+ }
+
+ /* Report any free space at the end of the AG */
+ info->last = true;
+ error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster, 0, info);
+ if (error)
+ goto err;
+
+err:
+ ext4_getfsmap_free_fixed_metadata(&info->meta_list);
+ return error;
+}
+
+/* Do we recognize the device? */
+static bool
+ext4_getfsmap_is_valid_device(
+ struct super_block *sb,
+ struct ext4_fsmap *fm)
+{
+ if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
+ fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev))
+ return true;
+ if (EXT4_SB(sb)->journal_bdev &&
+ fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev))
+ return true;
+ return false;
+}
+
+/* Ensure that the low key is less than the high key. */
+static bool
+ext4_getfsmap_check_keys(
+ struct ext4_fsmap *low_key,
+ struct ext4_fsmap *high_key)
+{
+ if (low_key->fmr_device > high_key->fmr_device)
+ return false;
+ if (low_key->fmr_device < high_key->fmr_device)
+ return true;
+
+ if (low_key->fmr_physical > high_key->fmr_physical)
+ return false;
+ if (low_key->fmr_physical < high_key->fmr_physical)
+ return true;
+
+ if (low_key->fmr_owner > high_key->fmr_owner)
+ return false;
+ if (low_key->fmr_owner < high_key->fmr_owner)
+ return true;
+
+ return false;
+}
+
+#define EXT4_GETFSMAP_DEVS 2
+/*
+ * Get filesystem's extents as described in head, and format for
+ * output. Calls formatter to fill the user's buffer until all
+ * extents are mapped, until the passed-in head->fmh_count slots have
+ * been filled, or until the formatter short-circuits the loop, if it
+ * is tracking filled-in extents on its own.
+ */
+int
+ext4_getfsmap(
+ struct super_block *sb,
+ struct ext4_fsmap_head *head,
+ ext4_fsmap_format_t formatter,
+ void *arg)
+{
+ struct ext4_fsmap *rkey_low; /* request keys */
+ struct ext4_fsmap *rkey_high;
+ struct ext4_fsmap dkeys[2]; /* per-dev keys */
+ struct ext4_getfsmap_dev handlers[EXT4_GETFSMAP_DEVS];
+ struct ext4_getfsmap_info info = {0};
+ int i;
+ int error = 0;
+
+ if (head->fmh_iflags & ~FMH_IF_VALID)
+ return -EINVAL;
+ rkey_low = head->fmh_keys;
+ rkey_high = rkey_low + 1;
+ if (!ext4_getfsmap_is_valid_device(sb, rkey_low) ||
+ !ext4_getfsmap_is_valid_device(sb, rkey_high))
+ return -EINVAL;
+
+ head->fmh_entries = 0;
+
+ /* Set up our device handlers. */
+ memset(handlers, 0, sizeof(handlers));
+ handlers[0].dev = new_encode_dev(sb->s_bdev->bd_dev);
+ handlers[0].fn = ext4_getfsmap_datadev;
+ if (EXT4_SB(sb)->journal_bdev) {
+ handlers[1].dev = new_encode_dev(
+ EXT4_SB(sb)->journal_bdev->bd_dev);
+ handlers[1].fn = ext4_getfsmap_logdev;
+ }
+
+ sort(handlers, EXT4_GETFSMAP_DEVS, sizeof(struct ext4_getfsmap_dev),
+ ext4_getfsmap_dev_compare, NULL);
+
+ /*
+ * Since we allow the user to copy the last mapping from a previous
+ * call into the low key slot, we have to advance the low key by
+ * whatever the reported length is.
+ */
+ dkeys[0] = *rkey_low;
+ dkeys[0].fmr_physical += dkeys[0].fmr_length;
+ dkeys[0].fmr_owner = 0;
+ memset(&dkeys[1], 0xFF, sizeof(struct ext4_fsmap));
+
+ if (!ext4_getfsmap_check_keys(dkeys, rkey_high))
+ return -EINVAL;
+
+ info.rkey_low = rkey_low;
+ info.formatter = formatter;
+ info.format_arg = arg;
+ info.head = head;
+
+ /* For each device we support... */
+ for (i = 0; i < EXT4_GETFSMAP_DEVS; i++) {
+ /* Is this device within the range the user asked for? */
+ if (!handlers[i].fn)
+ continue;
+ if (rkey_low->fmr_device > handlers[i].dev)
+ continue;
+ if (rkey_high->fmr_device < handlers[i].dev)
+ break;
+
+ /*
+ * If this device number matches the high key, we have
+ * to pass the high key to the handler to limit the
+ * query results. If the device number exceeds the
+ * low key, zero out the low key so that we get
+ * everything from the beginning.
+ */
+ if (handlers[i].dev == rkey_high->fmr_device)
+ dkeys[1] = *rkey_high;
+ if (handlers[i].dev > rkey_low->fmr_device)
+ memset(&dkeys[0], 0, sizeof(struct ext4_fsmap));
+
+ info.next_fsblk = dkeys[0].fmr_physical;
+ info.dev = handlers[i].dev;
+ info.last = false;
+ info.agno = -1;
+ error = handlers[i].fn(sb, dkeys, &info);
+ if (error)
+ break;
+ }
+
+ head->fmh_oflags = FMH_OF_DEV_T;
+ return error;
+}
diff --git a/fs/ext4/fsmap.h b/fs/ext4/fsmap.h
new file mode 100644
index 0000000..e2e721d
--- /dev/null
+++ b/fs/ext4/fsmap.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef __EXT4_FSMAP_H__
+#define __EXT4_FSMAP_H__
+
+struct ext4_metadata_fsmap {
+ struct list_head mf_list;
+ uint64_t mf_physical; /* device offset of segment */
+ uint64_t mf_owner; /* owner id */
+ uint64_t mf_length; /* length of segment, blocks */
+};
+
+/* internal fsmap representation */
+struct ext4_fsmap {
+ struct list_head fmr_list;
+ dev_t fmr_device; /* device id */
+ uint32_t fmr_flags; /* mapping flags */
+ uint64_t fmr_physical; /* device offset of segment */
+ uint64_t fmr_owner; /* owner id */
+ uint64_t fmr_length; /* length of segment, blocks */
+};
+
+struct ext4_fsmap_head {
+ uint32_t fmh_iflags; /* control flags */
+ uint32_t fmh_oflags; /* output flags */
+ unsigned int fmh_count; /* # of entries in array incl. input */
+ unsigned int fmh_entries; /* # of entries filled in (output). */
+
+ struct ext4_fsmap fmh_keys[2]; /* low and high keys */
+};
+
+void ext4_fsmap_from_internal(struct super_block *sb, struct fsmap *dest,
+ struct ext4_fsmap *src);
+void ext4_fsmap_to_internal(struct super_block *sb, struct ext4_fsmap *dest,
+ struct fsmap *src);
+
+/* fsmap to userspace formatter - copy to user & advance pointer */
+typedef int (*ext4_fsmap_format_t)(struct ext4_fsmap *, void *);
+
+int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
+ ext4_fsmap_format_t formatter, void *arg);
+
+#define EXT4_QUERY_RANGE_ABORT 1
+#define EXT4_QUERY_RANGE_CONTINUE 0
+
+/* fmr_owner special values for FS_IOC_GETFSMAP; some share w/ XFS */
+#define EXT4_FMR_OWN_FREE FMR_OWN_FREE /* free space */
+#define EXT4_FMR_OWN_UNKNOWN FMR_OWN_UNKNOWN /* unknown owner */
+#define EXT4_FMR_OWN_FS FMR_OWNER('X', 1) /* static fs metadata */
+#define EXT4_FMR_OWN_LOG FMR_OWNER('X', 2) /* journalling log */
+#define EXT4_FMR_OWN_INODES FMR_OWNER('X', 5) /* inodes */
+#define EXT4_FMR_OWN_BLKBM FMR_OWNER('f', 1) /* inode bitmap */
+#define EXT4_FMR_OWN_INOBM FMR_OWNER('f', 2) /* block bitmap */
+
+#endif /* __EXT4_FSMAP_H__ */
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d534399..ed62465 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,9 @@
#include <linux/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
+#include <linux/fsmap.h>
+#include "fsmap.h"
+#include <trace/events/ext4.h>
/**
* Swap memory between @a and @b for @len bytes.
@@ -442,6 +445,108 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
return iflags;
}
+struct getfsmap_info {
+ struct super_block *sb;
+ struct fsmap __user *data;
+ __u32 last_flags;
+};
+
+static int
+ext4_getfsmap_format(
+ struct ext4_fsmap *xfm,
+ void *priv)
+{
+ struct getfsmap_info *info = priv;
+ struct fsmap fm;
+
+ trace_ext4_getfsmap_mapping(info->sb, xfm->fmr_device,
+ xfm->fmr_physical, xfm->fmr_length, xfm->fmr_owner,
+ 0, xfm->fmr_flags);
+
+ info->last_flags = xfm->fmr_flags;
+ ext4_fsmap_from_internal(info->sb, &fm, xfm);
+ if (copy_to_user(info->data, &fm, sizeof(struct fsmap)))
+ return -EFAULT;
+
+ info->data++;
+ return 0;
+}
+
+static int
+ext4_ioc_getfsmap(
+ struct super_block *sb,
+ void __user *arg)
+{
+ struct getfsmap_info info;
+ struct ext4_fsmap_head xhead = {0};
+ struct fsmap_head head;
+ bool aborted = false;
+ int error;
+
+ if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
+ return -EFAULT;
+ if (head.fmh_reserved[0] || head.fmh_reserved[1] ||
+ head.fmh_reserved[2] || head.fmh_reserved[3] ||
+ head.fmh_reserved[4] || head.fmh_reserved[5] ||
+ head.fmh_keys[0].fmr_offset ||
+ (head.fmh_keys[1].fmr_offset != 0 &&
+ head.fmh_keys[1].fmr_offset != -1ULL) ||
+ head.fmh_keys[0].fmr_reserved[0] ||
+ head.fmh_keys[0].fmr_reserved[1] ||
+ head.fmh_keys[0].fmr_reserved[2] ||
+ head.fmh_keys[1].fmr_reserved[0] ||
+ head.fmh_keys[1].fmr_reserved[1] ||
+ head.fmh_keys[1].fmr_reserved[2])
+ return -EINVAL;
+
+ xhead.fmh_iflags = head.fmh_iflags;
+ xhead.fmh_count = head.fmh_count;
+ ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]);
+ ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]);
+
+ trace_ext4_getfsmap_low_key(sb,
+ xhead.fmh_keys[0].fmr_device,
+ xhead.fmh_keys[0].fmr_physical,
+ xhead.fmh_keys[0].fmr_length,
+ xhead.fmh_keys[0].fmr_owner,
+ 0,
+ xhead.fmh_keys[0].fmr_flags);
+
+ trace_ext4_getfsmap_high_key(sb,
+ xhead.fmh_keys[1].fmr_device,
+ xhead.fmh_keys[1].fmr_physical,
+ xhead.fmh_keys[1].fmr_length,
+ xhead.fmh_keys[1].fmr_owner,
+ 0,
+ xhead.fmh_keys[1].fmr_flags);
+
+ info.sb = sb;
+ info.data = ((__force struct fsmap_head *)arg)->fmh_recs;
+ error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info);
+ if (error == EXT4_QUERY_RANGE_ABORT) {
+ error = 0;
+ aborted = true;
+ } else if (error)
+ return error;
+
+ /* If we didn't abort, set the "last" flag in the last fmx */
+ if (!aborted && xhead.fmh_entries) {
+ info.data--;
+ info.last_flags |= FMR_OF_LAST;
+ if (copy_to_user(&info.data->fmr_flags, &info.last_flags,
+ sizeof(info.last_flags)))
+ return -EFAULT;
+ }
+
+ /* copy back header */
+ head.fmh_entries = xhead.fmh_entries;
+ head.fmh_oflags = xhead.fmh_oflags;
+ if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
+ return -EFAULT;
+
+ return 0;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -452,6 +557,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
switch (cmd) {
+ case FS_IOC_GETFSMAP:
+ return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
ext4_get_inode_flags(ei);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
@@ -959,6 +1066,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
+ case FS_IOC_GETFSMAP:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7ae43c5..8813c54 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5258,3 +5258,52 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
return ret;
}
+
+/* Iterate all the free extents in the group. */
+int
+ext4_mballoc_query_range(
+ struct super_block *sb,
+ ext4_group_t group,
+ ext4_grpblk_t start,
+ ext4_grpblk_t end,
+ ext4_mballoc_query_range_fn formatter,
+ void *priv)
+{
+ void *bitmap;
+ ext4_grpblk_t next;
+ struct ext4_buddy e4b;
+ int error;
+
+ error = ext4_mb_load_buddy(sb, group, &e4b);
+ if (error)
+ return error;
+ bitmap = e4b.bd_bitmap;
+
+ ext4_lock_group(sb, group);
+
+ start = (e4b.bd_info->bb_first_free > start) ?
+ e4b.bd_info->bb_first_free : start;
+ if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
+ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+
+ while (start <= end) {
+ start = mb_find_next_zero_bit(bitmap, end + 1, start);
+ if (start > end)
+ break;
+ next = mb_find_next_bit(bitmap, end + 1, start);
+
+ ext4_unlock_group(sb, group);
+ error = formatter(sb, group, start, next - start, priv);
+ if (error)
+ goto out_unload;
+ ext4_lock_group(sb, group);
+
+ start = next + 1;
+ }
+
+ ext4_unlock_group(sb, group);
+out_unload:
+ ext4_mb_unload_buddy(&e4b);
+
+ return error;
+}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 1aba469..2bed620 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -199,4 +199,21 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
return ext4_group_first_block_no(sb, fex->fe_group) +
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
+
+typedef int (*ext4_mballoc_query_range_fn)(
+ struct super_block *sb,
+ ext4_group_t agno,
+ ext4_grpblk_t start,
+ ext4_grpblk_t len,
+ void *priv);
+
+int
+ext4_mballoc_query_range(
+ struct super_block *sb,
+ ext4_group_t agno,
+ ext4_grpblk_t start,
+ ext4_grpblk_t end,
+ ext4_mballoc_query_range_fn formatter,
+ void *priv);
+
#endif
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 09c71e9..922baeb 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2529,6 +2529,84 @@ TRACE_EVENT(ext4_es_shrink,
__entry->scan_time, __entry->nr_skipped, __entry->retried)
);
+/* fsmap traces */
+DECLARE_EVENT_CLASS(ext4_fsmap_class,
+ TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len,
+ u64 owner),
+ TP_ARGS(sb, keydev, agno, bno, len, owner),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(u32, agno)
+ __field(u64, bno)
+ __field(u64, len)
+ __field(u64, owner)
+ ),
+ TP_fast_assign(
+ __entry->dev = sb->s_bdev->bd_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->agno = agno;
+ __entry->bno = bno;
+ __entry->len = len;
+ __entry->owner = owner;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d agno %u bno %llu len %llu owner %lld\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->agno,
+ __entry->bno,
+ __entry->len,
+ __entry->owner)
+)
+#define DEFINE_FSMAP_EVENT(name) \
+DEFINE_EVENT(ext4_fsmap_class, name, \
+ TP_PROTO(struct super_block *sb, u32 keydev, u32 agno, u64 bno, u64 len, \
+ u64 owner), \
+ TP_ARGS(sb, keydev, agno, bno, len, owner))
+DEFINE_FSMAP_EVENT(ext4_fsmap_low_key);
+DEFINE_FSMAP_EVENT(ext4_fsmap_high_key);
+DEFINE_FSMAP_EVENT(ext4_fsmap_mapping);
+
+DECLARE_EVENT_CLASS(ext4_getfsmap_class,
+ TP_PROTO(struct super_block *sb, u32 keydev, u64 block, u64 len,
+ u64 owner, u64 offset, u64 flags),
+ TP_ARGS(sb, keydev, block, len, owner, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(u64, block)
+ __field(u64, len)
+ __field(u64, owner)
+ __field(u64, offset)
+ __field(u64, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = sb->s_bdev->bd_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->block = block;
+ __entry->len = len;
+ __entry->owner = owner;
+ __entry->offset = offset;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d block %llu len %llu owner %lld offset %llu flags 0x%llx\n",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->block,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+)
+#define DEFINE_GETFSMAP_EVENT(name) \
+DEFINE_EVENT(ext4_getfsmap_class, name, \
+ TP_PROTO(struct super_block *sb, u32 keydev, u64 block, u64 len, \
+ u64 owner, u64 offset, u64 flags), \
+ TP_ARGS(sb, keydev, block, len, owner, offset, flags))
+DEFINE_GETFSMAP_EVENT(ext4_getfsmap_low_key);
+DEFINE_GETFSMAP_EVENT(ext4_getfsmap_high_key);
+DEFINE_GETFSMAP_EVENT(ext4_getfsmap_mapping);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
^ permalink raw reply related [flat|nested] 27+ messages in thread
* [PATCH] ioctl_getfsmap.2: document the GETFSMAP ioctl
2017-02-18 1:17 [RFC PATCH v6 0/8] vfs/xfs/ext4: GETFSMAP support Darrick J. Wong
` (7 preceding siblings ...)
2017-02-18 1:18 ` [PATCH 8/8] ext4: support GETFSMAP ioctls Darrick J. Wong
@ 2017-02-21 22:14 ` Darrick J. Wong
8 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2017-02-21 22:14 UTC (permalink / raw)
To: linux-xfs, linux-fsdevel, linux-ext4; +Cc: linux-api, linux-man, linux-btrfs
Document the new GETFSMAP ioctl that returns the physical layout of a
(disk-based) filesystem. This time around the fs-specific parts have
been moved to a separate section; I'll move move them into separate
xfsprogs/e2fsprogs manpages when we get closer to landing the ioctl.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
man2/ioctl_getfsmap.2 | 359 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 359 insertions(+)
create mode 100644 man2/ioctl_getfsmap.2
diff --git a/man2/ioctl_getfsmap.2 b/man2/ioctl_getfsmap.2
new file mode 100644
index 0000000..7121d61
--- /dev/null
+++ b/man2/ioctl_getfsmap.2
@@ -0,0 +1,359 @@
+.\" Copyright (c) 2017, Oracle. All rights reserved.
+.\"
+.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
+.\" This is free documentation; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License as
+.\" published by the Free Software Foundation; either version 2 of
+.\" the License, or (at your option) any later version.
+.\"
+.\" The GNU General Public License's references to "object code"
+.\" and "executables" are to be interpreted as the output of any
+.\" document formatting or typesetting system, including
+.\" intermediate and printed output.
+.\"
+.\" This manual is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public
+.\" License along with this manual; if not, see
+.\" <http://www.gnu.org/licenses/>.
+.\" %%%LICENSE_END
+.TH IOCTL-GETFSMAP 2 2017-02-10 "Linux" "Linux Programmer's Manual"
+.SH NAME
+ioctl_getfsmap \- retrieve the physical layout of the filesystem
+.SH SYNOPSIS
+.br
+.B #include <sys/ioctl.h>
+.br
+.B #include <linux/fs.h>
+.br
+.B #include <linux/fsmap.h>
+.sp
+.BI "int ioctl(int " fd ", GETFSMAP, struct fsmap_head * " arg );
+.SH DESCRIPTION
+This
+.BR ioctl (2)
+retrieves physical extent mappings for a filesystem.
+This information can be used to discover which files are mapped to a physical
+block, examine free space, or find known bad blocks, among other things.
+
+The sole argument to this ioctl should be a pointer to a single
+.BR "struct fsmap_head" ":"
+.in +4n
+.nf
+
+struct fsmap {
+ __u32 fmr_device; /* device id */
+ __u32 fmr_flags; /* mapping flags */
+ __u64 fmr_physical; /* device offset of segment */
+ __u64 fmr_owner; /* owner id */
+ __u64 fmr_offset; /* file offset of segment */
+ __u64 fmr_length; /* length of segment */
+ __u64 fmr_reserved[3]; /* must be zero */
+};
+
+struct fsmap_head {
+ __u32 fmh_iflags; /* control flags */
+ __u32 fmh_oflags; /* output flags */
+ __u32 fmh_count; /* # of entries in array incl. input */
+ __u32 fmh_entries; /* # of entries filled in (output). */
+ __u64 fmh_reserved[6]; /* must be zero */
+
+ struct fsmap fmh_keys[2]; /* low and high keys for the mapping search */
+ struct fsmap fmh_recs[]; /* returned records */
+};
+
+.fi
+.in
+The two
+.I fmh_keys
+array elements specify the lowest and highest reverse-mapping
+keys, respectively, for which userspace would like physical mapping
+information.
+A reverse mapping key consists of the tuple (device, block, owner, offset).
+The owner and offset fields are part of the key because some filesystems
+support sharing physical blocks between multiple files and
+therefore may return multiple mappings for a given physical block.
+.PP
+Filesystem mappings are copied into the
+.I fmh_recs
+array, which immediately follows the header data.
+.SS Fields of struct fsmap_head
+.PP
+The
+.I fmh_iflags
+field is a bitmask passed to the kernel to alter the output.
+There are no flags defined, so this value must be zero.
+
+.PP
+The
+.I fmh_oflags
+field is a bitmask of flags that concern all output mappings.
+If
+.B FMH_OF_DEV_T
+is set, then the
+.I fmr_device
+field represents a
+.B dev_t
+structure containing the major and minor numbers of the block device.
+
+.PP
+The
+.I fmh_count
+field contains the number of elements in the array being passed to the
+kernel.
+If this value is 0,
+.I fmh_entries
+will be set to the number of records that would have been returned had
+the array been large enough;
+no mapping information will be returned.
+
+.PP
+The
+.I fmh_entries
+field contains the number of elements in the
+.I fmh_recs
+array that contain useful information.
+
+.PP
+The
+.I fmh_reserved
+fields must be set to zero.
+
+.SS Keys
+.PP
+The two key records in
+.B fsmap_head.fmh_keys
+specify the lowest and highest extent records in the keyspace that the caller
+wants returned.
+A filesystem that can share blocks between files likely requires the tuple
+.RI "(" "device" ", " "physical" ", " "owner" ", " "offset" ", " "flags" ")"
+to uniquely index any filesystem mapping record.
+Classic non-sharing filesystems might be able to identify any record with only
+.RI "(" "device" ", " "physical" ", " "flags" ")."
+For example, if the low key is set to (0, 36864, 0, 0, 0), the filesystem will
+only return records for extents starting at or above 36KiB on disk.
+If the high key is set to (0, 1048576, 0, 0, 0), only records below 1MiB will
+be returned.
+By convention, the field
+.B fsmap_head.fmh_keys[0]
+must contain the low key and
+.B fsmap_head.fmh_keys[1]
+must contain the high key for the request.
+.PP
+For convenience, if
+.B fmr_length
+is set in the low key, it will be added to
+.IR fmr_block " or " fmr_offset
+as appropriate.
+The caller can take advantage of this subtlety to set up subsequent calls
+by copying
+.B fsmap_head.fmh_recs[fsmap_head.fmh_entries - 1]
+into the low key.
+The function
+.B fsmap_advance
+provides this functionality.
+
+.SS Fields of struct fsmap
+.PP
+The
+.I fmr_device
+field contains a 32-bit cookie to uniquely identify the underlying storage
+device.
+If the
+.B FMH_OF_DEV_T
+flag is set in the header's
+.I fmh_oflags
+field, this field contains a
+.B dev_t
+from which major and minor numbers can be extracted.
+If the flag is not set, this field contains a value that must be unique
+for each unique storage device.
+
+.PP
+The
+.I fmr_physical
+field contains the disk address of the extent in bytes.
+
+.PP
+The
+.I fmr_owner
+field contains the owner of the extent.
+This is an inode number unless
+.B FMR_OF_SPECIAL_OWNER
+is set in the
+.I fmr_flags
+field, in which case the value is determined by the filesystem.
+See the section below about special owner values for more details.
+
+.PP
+The
+.I fmr_offset
+field contains the logical address in the mapping record in bytes.
+This field has no meaning if the
+.BR FMR_OF_SPECIAL_OWNER " or " FMR_OF_EXTENT_MAP
+flags are set in
+.IR fmr_flags "."
+
+.PP
+The
+.I fmr_length
+field contains the length of the extent in bytes.
+
+.PP
+The
+.I fmr_flags
+field is a bitmask of extent state flags.
+The bits are:
+.RS 0.4i
+.TP
+.B FMR_OF_PREALLOC
+The extent is allocated but not yet written.
+.TP
+.B FMR_OF_ATTR_FORK
+This extent contains extended attribute data.
+.TP
+.B FMR_OF_EXTENT_MAP
+This extent contains extent map information for the owner.
+.TP
+.B FMR_OF_SHARED
+Parts of this extent may be shared.
+.TP
+.B FMR_OF_SPECIAL_OWNER
+The
+.I fmr_owner
+field contains a special value instead of an inode number.
+.TP
+.B FMR_OF_LAST
+This is the last record in the filesystem.
+.RE
+
+.PP
+The
+.I fmr_reserved
+field will be set to zero.
+
+.SS Special Owner Values
+The following special owner values are generic to all filesystems:
+.RS 0.4i
+.TP
+.B FMR_OWN_FREE
+Free space.
+.TP
+.B FMR_OWN_UNKNOWN
+This extent is in use but its owner is not known.
+.TP
+.B FMR_OWN_METADATA
+This extent is filesystem metadata.
+.RE
+
+XFS can return the following special owner values:
+.RS 0.4i
+.TP
+.B XFS_FMR_OWN_FREE
+Free space.
+.TP
+.B XFS_FMR_OWN_UNKNOWN
+This extent is in use but its owner is not known.
+.TP
+.B XFS_FMR_OWN_FS
+Static filesystem metadata which exists at a fixed address.
+These are the AG superblock, the AGF, the AGFL, and the AGI headers.
+.TP
+.B XFS_FMR_OWN_LOG
+The filesystem journal.
+.TP
+.B XFS_FMR_OWN_AG
+Allocation group metadata, such as the free space btrees and the
+reverse mapping btrees.
+.TP
+.B XFS_FMR_OWN_INOBT
+The inode and free inode btrees.
+.TP
+.B XFS_FMR_OWN_INODES
+Inode records.
+.TP
+.B XFS_FMR_OWN_REFC
+Reference count information.
+.TP
+.B XFS_FMR_OWN_COW
+This extent is being used to stage a copy-on-write.
+.TP
+.B XFS_FMR_OWN_DEFECTIVE:
+This extent has been marked defective either by the filesystem or the
+underlying device.
+.RE
+
+ext4 can return the following special owner values:
+.RS 0.4i
+.TP
+.B EXT4_FMR_OWN_FREE
+Free space.
+.TP
+.B EXT4_FMR_OWN_UNKNOWN
+This extent is in use but its owner is not known.
+.TP
+.B EXT4_FMR_OWN_FS
+Static filesystem metadata which exists at a fixed address.
+This is the superblock and the group descriptors.
+.TP
+.B EXT4_FMR_OWN_LOG
+The filesystem journal.
+.TP
+.B EXT4_FMR_OWN_INODES
+Inode records.
+.TP
+.B EXT4_FMR_OWN_BLKBM
+Block bitmap.
+.TP
+.B EXT4_FMR_OWN_INOBM
+Inode bitmap.
+.RE
+
+.SH RETURN VALUE
+On error, \-1 is returned, and
+.I errno
+is set to indicate the error.
+.PP
+.SH ERRORS
+Error codes can be one of, but are not limited to, the following:
+.TP
+.B EINVAL
+The array is not long enough, or a non-zero value was passed in one of the
+fields that must be zero.
+.TP
+.B EFAULT
+The pointer passed in was not mapped to a valid memory address.
+.TP
+.B EBADF
+.IR fd
+is not open for reading.
+.TP
+.B EPERM
+This query is not allowed.
+.TP
+.B EOPNOTSUPP
+The filesystem does not support this command.
+.TP
+.B EUCLEAN
+The filesystem metadata is corrupt and needs repair.
+.TP
+.B EBADMSG
+The filesystem has detected a checksum error in the metadata.
+.TP
+.B ENOMEM
+Insufficient memory to process the request.
+
+.SH EXAMPLE
+.TP
+Please see io/fsmap.c in the xfsprogs distribution for a sample program.
+
+.SH CONFORMING TO
+This API is Linux-specific.
+Not all filesystems support it.
+.fi
+.in
+.SH SEE ALSO
+.BR ioctl (2)
^ permalink raw reply related [flat|nested] 27+ messages in thread