* [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl
@ 2015-03-03 14:41 Carlos Maiolino
2015-03-03 20:58 ` Eric Sandeen
2015-03-03 21:55 ` Dave Chinner
0 siblings, 2 replies; 4+ messages in thread
From: Carlos Maiolino @ 2015-03-03 14:41 UTC (permalink / raw)
To: xfs
This patch aims to implement a ranged bulkstat ioctl, which will make users able
to bulkstat inodes in a filesystem based on range instead on rely only in a
whole filesystem bulkstat.
This first implementation add a per AG bulkstat possibility, but it also adds
the necessary infrastructure to implement different kinds of ranged bulkstat,
like per block.
The patch is already working and I've been testing it for a while, so, I think
it's time to send this patch out and ask for comments on it.
The core of the implementation is very similar with what xfs_bulsktat() does by
now, which, instead bulkstat the whole filesystem, it start/stop on the
specified range.
As per Dave's suggesting, I added to rgbulkreq structure (used to pass data
to/from userland), a padding, so we can use the same structure for another kind
of ranged bulkstats without the need to actually change the structure size.
Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
---
fs/xfs/libxfs/xfs_fs.h | 12 +++
fs/xfs/xfs_ioctl.c | 56 +++++++++++++
fs/xfs/xfs_itable.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_itable.h | 16 ++++
4 files changed, 294 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 18dc721..88665aa 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -334,6 +334,17 @@ typedef struct xfs_fsop_bulkreq {
__s32 __user *ocount; /* output count pointer */
} xfs_fsop_bulkreq_t;
+typedef struct xfs_fsop_rgbulkreq {
+ __u64 __user *lastip; /* last inode # pointer */
+ __s32 icount; /* count of entries in buffer */
+ void __user *ubuffer;/* user buffer for inode desc. */
+ __s32 __user *ocount; /* output count pointer */
+ __u64 start; /* start point of rgbulkstat */
+ __u64 end; /* end point of rgbulkstat */
+ __u32 flags; /* multipurpose flag field */
+ __u32 pad[5]; /* reserved space */
+} xfs_fsop_rgbulkreq_t;
+
/*
* Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
@@ -556,6 +567,7 @@ typedef struct xfs_swapext
#define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
#define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t)
+#define XFS_IOC_FSBULKSTAT_RANGE _IOWR('X', 126, struct xfs_fsop_rgbulkreq)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f7afb86..34a4de5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -790,6 +790,59 @@ xfs_ioc_bulkstat(
}
STATIC int
+xfs_ioc_bulkstat_range(
+ xfs_mount_t *mp,
+ unsigned int cmd,
+ void __user *arg)
+{
+
+ xfs_fsop_rgbulkreq_t rgbulkreq;
+ int count; /* # of records returned */
+ xfs_ino_t inlast; /* last inode number */
+ int done;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ /* We do not support another ranged calls yet */
+ if (cmd != XFS_IOC_FSBULKSTAT_RANGE)
+ return -EINVAL;
+
+ if (copy_from_user(&rgbulkreq, arg, sizeof(xfs_fsop_rgbulkreq_t)))
+ return -EFAULT;
+
+ if (copy_from_user(&inlast, rgbulkreq.lastip, sizeof(__s64)))
+ return -EFAULT;
+
+ if ((count = rgbulkreq.icount) <= 0)
+ return -EINVAL;
+
+ if (rgbulkreq.ubuffer == NULL)
+ return -EINVAL;
+
+ error = xfs_bulkstat_range(mp, &inlast, &count, xfs_bulkstat_one,
+ sizeof(xfs_bstat_t), rgbulkreq.ubuffer,
+ rgbulkreq.start, rgbulkreq.end, &done);
+
+ if (error)
+ return error;
+
+ if (rgbulkreq.ocount != NULL) {
+ if (copy_to_user(rgbulkreq.lastip, &inlast, sizeof(xfs_ino_t)))
+ return -EFAULT;
+
+ if (copy_to_user(rgbulkreq.ocount, &count, sizeof(count)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+STATIC int
xfs_ioc_fsgeometry_v1(
xfs_mount_t *mp,
void __user *arg)
@@ -1555,6 +1608,9 @@ xfs_file_ioctl(
case XFS_IOC_FSINUMBERS:
return xfs_ioc_bulkstat(mp, cmd, arg);
+ case XFS_IOC_FSBULKSTAT_RANGE:
+ return xfs_ioc_bulkstat_range(mp, cmd, arg);
+
case XFS_IOC_FSGEOMETRY_V1:
return xfs_ioc_fsgeometry_v1(mp, arg);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 82e3142..d6b27ee 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -530,6 +530,216 @@ del_cursor:
return error;
}
+/*
+ * Return stat information in bulk (by-inode) for specified
+ * filesystem range.
+ */
+int
+xfs_bulkstat_range(
+ xfs_mount_t *mp, /* mount point for filesystem */
+ xfs_ino_t *lastinop, /* last inode returned */
+ int *ubcountp, /* size of buffer/count returned */
+ bulkstat_one_pf formatter, /* func that'd fill a single buf */
+ size_t statstruct_size, /* sizeof struct filling */
+ char __user *ubuffer, /* buffer with inode stats */
+ __u64 start, /* start bulkstat here */
+ __u64 end, /* end bulkstat here */
+ int *done) /* 1 if there are more stats to get */
+{
+ xfs_buf_t *agbp; /* agi header buffer */
+ xfs_agino_t agino; /* inode # in allocation group */
+ xfs_agnumber_t agno; /* allocation group number */
+ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
+ size_t irbsize; /* size of irec buffer in bytes */
+ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
+ int nirbuf; /* size of irbuf */
+ int ubcount; /* size of user's buffer */
+ struct xfs_bulkstat_agichunk ac;
+ int error = 0;
+
+ /*
+ * Get the last inode value, see if there is nothing to do.
+ */
+ if (*lastinop != 0)
+ agno = XFS_INO_TO_AGNO(mp, *lastinop);
+ else
+ agno = start;
+
+ agino = XFS_INO_TO_AGINO(mp, *lastinop);
+
+ /*
+ * If specified end is bigger than mp->m_sb.sb_agount, should we
+ * gracefully interpret as if there is nothing to do, or trigger
+ * an error?
+ */
+ if (agno >= mp->m_sb.sb_agcount) {
+ *done = 1;
+ *ubcountp = 0;
+ return 0;
+ }
+
+ ubcount = *ubcountp;
+ ac.ac_ubuffer = &ubuffer;
+ ac.ac_ubleft = ubcount * statstruct_size; /* bytes */
+ ac.ac_ubelem = 0;
+
+ *ubcountp = 0;
+ *done = 0;
+
+ irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
+ if (!irbuf)
+ return -ENOMEM;
+
+ nirbuf = irbsize / sizeof(*irbuf);
+
+ /*
+ * Loop over the allocation groups, starting from the last inode
+ * returned; - means start of the allocation group.
+ */
+ while (agno <= end) {
+ struct xfs_inobt_rec_incore *irbp = irbuf;
+ struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
+ bool end_of_ag = false;
+ int icount = 0;
+ int stat;
+
+ error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
+ if (error)
+ break;
+
+ /*
+ * Allocate and initialize a btree cursor for ialloc btree.
+ */
+ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
+ XFS_BTNUM_INO);
+
+ if (agino > 0) {
+ /*
+ * In the middle of an allocation group, we need to get
+ * the remainder of the chunk we are in.
+ */
+ struct xfs_inobt_rec_incore r;
+
+ error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
+ if (error)
+ goto del_cursor;
+ if (icount) {
+ irbp->ir_startino = r.ir_startino;
+ irbp->ir_freecount = r.ir_freecount;
+ irbp->ir_free = r.ir_free;
+ irbp++;
+ }
+ /* Increment to the next record */
+ error = xfs_btree_increment(cur, 0, &stat);
+ } else {
+ /* Start of ag. Lookup the first inode chunk */
+ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
+ }
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
+ }
+
+ /*
+ * Loop through inode btree records in this ag,
+ * until we run out of inodes or space in the buffer.
+ */
+ while (irbp < irbufend && icount < ubcount) {
+ struct xfs_inobt_rec_incore r;
+
+ error = xfs_inobt_get_rec(cur, &r, &stat);
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
+ }
+
+ /*
+ * If this chunk has any allocated inodes, save it.
+ * Also start read-ahead now for this chunk.
+ */
+ if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
+ xfs_bulkstat_ichunk_ra(mp, agno, &r);
+ irbp->ir_startino = r.ir_startino;
+ irbp->ir_freecount = r.ir_freecount;
+ irbp->ir_free = r.ir_free;
+ irbp++;
+ icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
+ }
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error || stat == 0) {
+ end_of_ag = true;
+ goto del_cursor;
+ }
+ cond_resched();
+ }
+
+ /*
+ * Drop the btree buffers and the agi buffer as we can't hold any of the
+ * locks these represent when calling iget. If there is a pending error,
+ * then we are done.
+ */
+del_cursor:
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ xfs_buf_relse(agbp);
+ if (error)
+ break;
+ /*
+ * Now format all the good inodes into the user's buffer. The
+ * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
+ * for the next loop iteration.
+ */
+ irbufend = irbp;
+ for (irbp = irbuf;
+ irbp < irbufend && ac.ac_ubleft >= statstruct_size;
+ irbp++) {
+ error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, formatter,
+ statstruct_size, &ac, &agino);
+ if (error)
+ break;
+
+ cond_resched();
+ }
+
+ /*
+ * If we have run out of space or had a formatting error, we
+ * are now done
+ */
+ if (ac.ac_ubleft < statstruct_size || error)
+ break;
+
+ if (end_of_ag) {
+ agno++;
+ agino = 0;
+ }
+ }
+ /*
+ * Done, we are either out of specified inode range or space to
+ * put the data.
+ */
+ kmem_free(irbuf);
+ *ubcountp = ac.ac_ubelem;
+
+ /*
+ * We found some inodes, so clear the error status and return them.
+ * The lastino pointer will point directly at the inode that triggered
+ * any error that occurred, so on the next call the error will be
+ * triggered again and propagated to userspace as there will be no
+ * formatted inodes in the buffer.
+ */
+ if (ac.ac_ubelem)
+ error = 0;
+
+ /*
+ * If we ran out of specified range, lastino will point off the end of
+ * the file range, so, the next call will return immediately.
+ */
+ *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (agno >= end)
+ *done = 1;
+
+ return error;
+}
+
int
xfs_inumbers_fmt(
void __user *ubuffer, /* buffer to write to */
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 6ea8b39..f8cf724 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -50,6 +50,22 @@ xfs_bulkstat(
char __user *ubuffer,/* buffer with inode stats */
int *done); /* 1 if there are more stats to get */
+/*
+ * Return stat information in bulk (by-inode) for the ispecified
+ * filesystem range.
+ */
+int
+xfs_bulkstat_range(
+ xfs_mount_t *mp, /* mount point for filesystem */
+ xfs_ino_t *lastinop, /* last inode returned */
+ int *ubcountp, /* size of buffer/count returned */
+ bulkstat_one_pf formatter, /* func that'd fill a single buf */
+ size_t statstruct_size,/* sizeof struct filling */
+ char __user *ubuffer,/* buffer with inode stats */
+ __u64 start, /* start bulkstat here */
+ __u64 end, /* end bulkstat here */
+ int *done); /* 1 if there are more stats to get */
+
typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
void __user *ubuffer, /* buffer to write to */
int ubsize, /* remaining user buffer sz */
--
2.1.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl
2015-03-03 14:41 [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl Carlos Maiolino
@ 2015-03-03 20:58 ` Eric Sandeen
2015-03-03 21:55 ` Dave Chinner
1 sibling, 0 replies; 4+ messages in thread
From: Eric Sandeen @ 2015-03-03 20:58 UTC (permalink / raw)
To: Carlos Maiolino, xfs
On 3/3/15 8:41 AM, Carlos Maiolino wrote:
> This patch aims to implement a ranged bulkstat ioctl, which will make users able
> to bulkstat inodes in a filesystem based on range instead on rely only in a
> whole filesystem bulkstat.
>
> This first implementation add a per AG bulkstat possibility, but it also adds
> the necessary infrastructure to implement different kinds of ranged bulkstat,
> like per block.
>
> The patch is already working and I've been testing it for a while, so, I think
> it's time to send this patch out and ask for comments on it.
>
> The core of the implementation is very similar with what xfs_bulsktat() does by
> now, which, instead bulkstat the whole filesystem, it start/stop on the
> specified range.
>
> As per Dave's suggesting, I added to rgbulkreq structure (used to pass data
> to/from userland), a padding, so we can use the same structure for another kind
> of ranged bulkstats without the need to actually change the structure size.
>
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
> fs/xfs/libxfs/xfs_fs.h | 12 +++
> fs/xfs/xfs_ioctl.c | 56 +++++++++++++
> fs/xfs/xfs_itable.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_itable.h | 16 ++++
> 4 files changed, 294 insertions(+)
>
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 18dc721..88665aa 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -334,6 +334,17 @@ typedef struct xfs_fsop_bulkreq {
> __s32 __user *ocount; /* output count pointer */
> } xfs_fsop_bulkreq_t;
>
> +typedef struct xfs_fsop_rgbulkreq {
> + __u64 __user *lastip; /* last inode # pointer */
> + __s32 icount; /* count of entries in buffer */
> + void __user *ubuffer;/* user buffer for inode desc. */
> + __s32 __user *ocount; /* output count pointer */
> + __u64 start; /* start point of rgbulkstat */
> + __u64 end; /* end point of rgbulkstat */
Ok, so this is a generic start/end, but your patch interprets them as AGs,
right?
> + __u32 flags; /* multipurpose flag field */
unused... Was the intent to be able to specify inode numbers or blocks as
a range as well, and maybe the flag tells XFS what the start/end numbers
mean?
If you intend start/end to have multiple possible meanings, I wouldn't
merge this patch without at least a requirement that the flags specify i.e.
BULKREQ_RANGE_AGNO or something, so that it's clear to the user and to the
code how the range is specified and interpreted.
It'd be trivial, in the ioctl entry code, to read the flag, interpret
start/end as blocks/inodes/ags, and convert the given values to inode numbers
(the smallest granularity we'd have).
> + __u32 pad[5]; /* reserved space */
> +} xfs_fsop_rgbulkreq_t;
> +
>
> /*
> * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
> @@ -556,6 +567,7 @@ typedef struct xfs_swapext
> #define XFS_IOC_ATTRMULTI_BY_HANDLE _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
> #define XFS_IOC_FSGEOMETRY _IOR ('X', 124, struct xfs_fsop_geom)
> #define XFS_IOC_GOINGDOWN _IOR ('X', 125, __uint32_t)
> +#define XFS_IOC_FSBULKSTAT_RANGE _IOWR('X', 126, struct xfs_fsop_rgbulkreq)
> /* XFS_IOC_GETFSUUID ---------- deprecated 140 */
>
>
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index f7afb86..34a4de5 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -790,6 +790,59 @@ xfs_ioc_bulkstat(
> }
>
> STATIC int
> +xfs_ioc_bulkstat_range(
> + xfs_mount_t *mp,
> + unsigned int cmd,
> + void __user *arg)
> +{
> +
> + xfs_fsop_rgbulkreq_t rgbulkreq;
> + int count; /* # of records returned */
> + xfs_ino_t inlast; /* last inode number */
> + int done;
> + int error;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return -EIO;
> +
> + /* We do not support another ranged calls yet */
> + if (cmd != XFS_IOC_FSBULKSTAT_RANGE)
> + return -EINVAL;
I don't think this is necessary; the only way we can get here is if
cmd == XFS_IOC_FSBULKSTAT_RANGE. What other cases do you envision?
> +
> + if (copy_from_user(&rgbulkreq, arg, sizeof(xfs_fsop_rgbulkreq_t)))
> + return -EFAULT;
> +
> + if (copy_from_user(&inlast, rgbulkreq.lastip, sizeof(__s64)))
> + return -EFAULT;
> +
> + if ((count = rgbulkreq.icount) <= 0)
> + return -EINVAL;
> +
> + if (rgbulkreq.ubuffer == NULL)
> + return -EINVAL;
> +
> + error = xfs_bulkstat_range(mp, &inlast, &count, xfs_bulkstat_one,
> + sizeof(xfs_bstat_t), rgbulkreq.ubuffer,
> + rgbulkreq.start, rgbulkreq.end, &done);
> +
> + if (error)
> + return error;
> +
> + if (rgbulkreq.ocount != NULL) {
> + if (copy_to_user(rgbulkreq.lastip, &inlast, sizeof(xfs_ino_t)))
> + return -EFAULT;
> +
> + if (copy_to_user(rgbulkreq.ocount, &count, sizeof(count)))
> + return -EFAULT;
> + }
> +
> + return 0;
> +}
> +
> +STATIC int
> xfs_ioc_fsgeometry_v1(
> xfs_mount_t *mp,
> void __user *arg)
> @@ -1555,6 +1608,9 @@ xfs_file_ioctl(
> case XFS_IOC_FSINUMBERS:
> return xfs_ioc_bulkstat(mp, cmd, arg);
>
> + case XFS_IOC_FSBULKSTAT_RANGE:
> + return xfs_ioc_bulkstat_range(mp, cmd, arg);
There's only one possible cmd that gets passed to xfs_ioc_bulkstat_range
here, so there is no need to pass it in.
> +
> case XFS_IOC_FSGEOMETRY_V1:
> return xfs_ioc_fsgeometry_v1(mp, arg);
>
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index 82e3142..d6b27ee 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -530,6 +530,216 @@ del_cursor:
> return error;
> }
>
> +/*
> + * Return stat information in bulk (by-inode) for specified
> + * filesystem range.
> + */
> +int
> +xfs_bulkstat_range(
*way* too much cut & paste from xfs_bulkstat() here. Didn't Dave suggest a
refactoring which re-uses most of this code?
I had envisioned slightly extending xfs_bulkstat() to take an end inode
numbers by default it would do the entire fs. (lastinop already specifies
the starting inde number). With your ioctl entry point, lastinop could
be provided based on the start value, and end would be specified as well,
and all the same code would be re-used.
I think Dave had some other vision, but in any case, cut & paste of ~200 LOC
w/o any real changes won't fly.
Comments below, anyway. ;)
> + xfs_mount_t *mp, /* mount point for filesystem */
> + xfs_ino_t *lastinop, /* last inode returned */
> + int *ubcountp, /* size of buffer/count returned */
> + bulkstat_one_pf formatter, /* func that'd fill a single buf */
> + size_t statstruct_size, /* sizeof struct filling */
> + char __user *ubuffer, /* buffer with inode stats */
> + __u64 start, /* start bulkstat here */
> + __u64 end, /* end bulkstat here */
> + int *done) /* 1 if there are more stats to get */
> +{
> + xfs_buf_t *agbp; /* agi header buffer */
> + xfs_agino_t agino; /* inode # in allocation group */
> + xfs_agnumber_t agno; /* allocation group number */
> + xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> + size_t irbsize; /* size of irec buffer in bytes */
> + xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> + int nirbuf; /* size of irbuf */
> + int ubcount; /* size of user's buffer */
> + struct xfs_bulkstat_agichunk ac;
> + int error = 0;
> +
> + /*
> + * Get the last inode value, see if there is nothing to do.
> + */
this comment isn't relevant in your copied code, at least not
to the code following it.
> + if (*lastinop != 0)
> + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> + else
> + agno = start;
> +
> + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> +
> + /*
> + * If specified end is bigger than mp->m_sb.sb_agount, should we
> + * gracefully interpret as if there is nothing to do, or trigger
> + * an error?
> + */
I think EINVAL would be reasonable here.
> + if (agno >= mp->m_sb.sb_agcount) {
> + *done = 1;
> + *ubcountp = 0;
> + return 0;
> + }
> +
> + ubcount = *ubcountp;
> + ac.ac_ubuffer = &ubuffer;
> + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */
> + ac.ac_ubelem = 0;
> +
> + *ubcountp = 0;
> + *done = 0;
> +
> + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
> + if (!irbuf)
> + return -ENOMEM;
> +
> + nirbuf = irbsize / sizeof(*irbuf);
> +
> + /*
> + * Loop over the allocation groups, starting from the last inode
> + * returned; - means start of the allocation group.
"- means start" ?
> + */
> + while (agno <= end) {
> + struct xfs_inobt_rec_incore *irbp = irbuf;
> + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
> + bool end_of_ag = false;
> + int icount = 0;
> + int stat;
> +
> + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
> + if (error)
> + break;
> +
> + /*
> + * Allocate and initialize a btree cursor for ialloc btree.
> + */
> + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
> + XFS_BTNUM_INO);
> +
> + if (agino > 0) {
> + /*
> + * In the middle of an allocation group, we need to get
> + * the remainder of the chunk we are in.
> + */
> + struct xfs_inobt_rec_incore r;
> +
> + error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
> + if (error)
> + goto del_cursor;
> + if (icount) {
> + irbp->ir_startino = r.ir_startino;
> + irbp->ir_freecount = r.ir_freecount;
> + irbp->ir_free = r.ir_free;
> + irbp++;
> + }
> + /* Increment to the next record */
> + error = xfs_btree_increment(cur, 0, &stat);
> + } else {
> + /* Start of ag. Lookup the first inode chunk */
> + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
> + }
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> +
> + /*
> + * Loop through inode btree records in this ag,
> + * until we run out of inodes or space in the buffer.
> + */
> + while (irbp < irbufend && icount < ubcount) {
> + struct xfs_inobt_rec_incore r;
> +
> + error = xfs_inobt_get_rec(cur, &r, &stat);
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> +
> + /*
> + * If this chunk has any allocated inodes, save it.
> + * Also start read-ahead now for this chunk.
> + */
> + if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
> + xfs_bulkstat_ichunk_ra(mp, agno, &r);
> + irbp->ir_startino = r.ir_startino;
> + irbp->ir_freecount = r.ir_freecount;
> + irbp->ir_free = r.ir_free;
> + irbp++;
> + icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
> + }
> + error = xfs_btree_increment(cur, 0, &stat);
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> + cond_resched();
> + }
> +
> + /*
> + * Drop the btree buffers and the agi buffer as we can't hold any of the
> + * locks these represent when calling iget. If there is a pending error,
> + * then we are done.
> + */
odd indenting?
> +del_cursor:
> + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> + xfs_buf_relse(agbp);
> + if (error)
> + break;
> + /*
> + * Now format all the good inodes into the user's buffer. The
> + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
> + * for the next loop iteration.
> + */
> + irbufend = irbp;
> + for (irbp = irbuf;
> + irbp < irbufend && ac.ac_ubleft >= statstruct_size;
> + irbp++) {
> + error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, formatter,
> + statstruct_size, &ac, &agino);
> + if (error)
> + break;
> +
> + cond_resched();
> + }
> +
> + /*
> + * If we have run out of space or had a formatting error, we
> + * are now done
> + */
> + if (ac.ac_ubleft < statstruct_size || error)
> + break;
> +
> + if (end_of_ag) {
> + agno++;
> + agino = 0;
> + }
> + }
> + /*
> + * Done, we are either out of specified inode range or space to
> + * put the data.
> + */
> + kmem_free(irbuf);
> + *ubcountp = ac.ac_ubelem;
> +
> + /*
> + * We found some inodes, so clear the error status and return them.
> + * The lastino pointer will point directly at the inode that triggered
> + * any error that occurred, so on the next call the error will be
> + * triggered again and propagated to userspace as there will be no
> + * formatted inodes in the buffer.
> + */
> + if (ac.ac_ubelem)
> + error = 0;
> +
> + /*
> + * If we ran out of specified range, lastino will point off the end of
> + * the file range, so, the next call will return immediately.
> + */
> + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
> + if (agno >= end)
> + *done = 1;
> +
> + return error;
> +}
> +
> int
> xfs_inumbers_fmt(
> void __user *ubuffer, /* buffer to write to */
> diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
> index 6ea8b39..f8cf724 100644
> --- a/fs/xfs/xfs_itable.h
> +++ b/fs/xfs/xfs_itable.h
> @@ -50,6 +50,22 @@ xfs_bulkstat(
> char __user *ubuffer,/* buffer with inode stats */
> int *done); /* 1 if there are more stats to get */
>
> +/*
> + * Return stat information in bulk (by-inode) for the ispecified
> + * filesystem range.
> + */
> +int
> +xfs_bulkstat_range(
> + xfs_mount_t *mp, /* mount point for filesystem */
> + xfs_ino_t *lastinop, /* last inode returned */
> + int *ubcountp, /* size of buffer/count returned */
> + bulkstat_one_pf formatter, /* func that'd fill a single buf */
> + size_t statstruct_size,/* sizeof struct filling */
> + char __user *ubuffer,/* buffer with inode stats */
> + __u64 start, /* start bulkstat here */
> + __u64 end, /* end bulkstat here */
> + int *done); /* 1 if there are more stats to get */
> +
> typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
> void __user *ubuffer, /* buffer to write to */
> int ubsize, /* remaining user buffer sz */
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl
2015-03-03 14:41 [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl Carlos Maiolino
2015-03-03 20:58 ` Eric Sandeen
@ 2015-03-03 21:55 ` Dave Chinner
2015-03-06 17:34 ` Carlos Maiolino
1 sibling, 1 reply; 4+ messages in thread
From: Dave Chinner @ 2015-03-03 21:55 UTC (permalink / raw)
To: Carlos Maiolino; +Cc: xfs
On Tue, Mar 03, 2015 at 11:41:18AM -0300, Carlos Maiolino wrote:
> This patch aims to implement a ranged bulkstat ioctl, which will make users able
> to bulkstat inodes in a filesystem based on range instead on rely only in a
> whole filesystem bulkstat.
>
> This first implementation add a per AG bulkstat possibility, but it also adds
> the necessary infrastructure to implement different kinds of ranged bulkstat,
> like per block.
>
> The patch is already working and I've been testing it for a while, so, I think
> it's time to send this patch out and ask for comments on it.
>
> The core of the implementation is very similar with what xfs_bulsktat() does by
> now, which, instead bulkstat the whole filesystem, it start/stop on the
> specified range.
>
> As per Dave's suggesting, I added to rgbulkreq structure (used to pass data
> to/from userland), a padding, so we can use the same structure for another kind
> of ranged bulkstats without the need to actually change the structure size.
>
> Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> ---
> fs/xfs/libxfs/xfs_fs.h | 12 +++
> fs/xfs/xfs_ioctl.c | 56 +++++++++++++
> fs/xfs/xfs_itable.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_itable.h | 16 ++++
> 4 files changed, 294 insertions(+)
>
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 18dc721..88665aa 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -334,6 +334,17 @@ typedef struct xfs_fsop_bulkreq {
> __s32 __user *ocount; /* output count pointer */
> } xfs_fsop_bulkreq_t;
>
> +typedef struct xfs_fsop_rgbulkreq {
> + __u64 __user *lastip; /* last inode # pointer */
> + __s32 icount; /* count of entries in buffer */
> + void __user *ubuffer;/* user buffer for inode desc. */
> + __s32 __user *ocount; /* output count pointer */
> + __u64 start; /* start point of rgbulkstat */
> + __u64 end; /* end point of rgbulkstat */
> + __u32 flags; /* multipurpose flag field */
> + __u32 pad[5]; /* reserved space */
> +} xfs_fsop_rgbulkreq_t;
No new typedefs. Also, call it struct xfs_fsop_bulkstat_range, so
it's clear what ioctl it belongs with.
And, as eric mentioned, the flag:
#define XFS_FSOP_BSRANGE_AGNUMBER 1
also needs to be defined to tell the kernel that start/end are ag
numbers.
> diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> index f7afb86..34a4de5 100644
> --- a/fs/xfs/xfs_ioctl.c
> +++ b/fs/xfs/xfs_ioctl.c
> @@ -790,6 +790,59 @@ xfs_ioc_bulkstat(
> }
>
> STATIC int
> +xfs_ioc_bulkstat_range(
> + xfs_mount_t *mp,
> + unsigned int cmd,
> + void __user *arg)
> +{
> +
> + xfs_fsop_rgbulkreq_t rgbulkreq;
Again, better name needed. Even just "bsreq" would be fine, because
it's just a bulkstat request structure....
> + int count; /* # of records returned */
> + xfs_ino_t inlast; /* last inode number */
> + int done;
> + int error;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return -EIO;
> +
> + /* We do not support another ranged calls yet */
> + if (cmd != XFS_IOC_FSBULKSTAT_RANGE)
> + return -EINVAL;
Not necessary.
> +
> + if (copy_from_user(&rgbulkreq, arg, sizeof(xfs_fsop_rgbulkreq_t)))
> + return -EFAULT;
> +
> + if (copy_from_user(&inlast, rgbulkreq.lastip, sizeof(__s64)))
> + return -EFAULT;
> +
> + if ((count = rgbulkreq.icount) <= 0)
> + return -EINVAL;
Assignments need to be on their own line.
> +
> + if (rgbulkreq.ubuffer == NULL)
> + return -EINVAL;
Missing is validation of the start, end and flags fields.
> +
> + error = xfs_bulkstat_range(mp, &inlast, &count, xfs_bulkstat_one,
> + sizeof(xfs_bstat_t), rgbulkreq.ubuffer,
> + rgbulkreq.start, rgbulkreq.end, &done);
Why do you need a "done" parameter? Also, just pass the bsreq
structure, not the individual elements. I don't see any point in
passing the formatter or size parameters at this point, either,
because xfs_bulkstat_range() can easily define them.
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index 82e3142..d6b27ee 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -530,6 +530,216 @@ del_cursor:
> return error;
> }
>
> +/*
> + * Return stat information in bulk (by-inode) for specified
> + * filesystem range.
> + */
> +int
> +xfs_bulkstat_range(
> + xfs_mount_t *mp, /* mount point for filesystem */
> + xfs_ino_t *lastinop, /* last inode returned */
> + int *ubcountp, /* size of buffer/count returned */
> + bulkstat_one_pf formatter, /* func that'd fill a single buf */
> + size_t statstruct_size, /* sizeof struct filling */
> + char __user *ubuffer, /* buffer with inode stats */
> + __u64 start, /* start bulkstat here */
> + __u64 end, /* end bulkstat here */
> + int *done) /* 1 if there are more stats to get */
> +{
> + xfs_buf_t *agbp; /* agi header buffer */
> + xfs_agino_t agino; /* inode # in allocation group */
> + xfs_agnumber_t agno; /* allocation group number */
> + xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> + size_t irbsize; /* size of irec buffer in bytes */
> + xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> + int nirbuf; /* size of irbuf */
> + int ubcount; /* size of user's buffer */
> + struct xfs_bulkstat_agichunk ac;
> + int error = 0;
> +
> + /*
> + * Get the last inode value, see if there is nothing to do.
> + */
> + if (*lastinop != 0)
> + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> + else
> + agno = start;
> +
> + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> +
> + /*
> + * If specified end is bigger than mp->m_sb.sb_agount, should we
> + * gracefully interpret as if there is nothing to do, or trigger
> + * an error?
> + */
We should have already errored out before we get here in this case.
> + if (agno >= mp->m_sb.sb_agcount) {
> + *done = 1;
> + *ubcountp = 0;
> + return 0;
> + }
>From this point onwards, you've pretty much copy-n-pasted the
xfs_bulkstat() implementation. What should happen here is:
for (agno = start; agno < end; agno++) {
error = xfs_bulkstat_range_ag(agno, ... );
if (error)
break;
}
and all this:
> + ubcount = *ubcountp;
> + ac.ac_ubuffer = &ubuffer;
> + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */
> + ac.ac_ubelem = 0;
> +
> + *ubcountp = 0;
> + *done = 0;
> +
> + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
> + if (!irbuf)
> + return -ENOMEM;
> +
> + nirbuf = irbsize / sizeof(*irbuf);
> +
> + /*
> + * Loop over the allocation groups, starting from the last inode
> + * returned; - means start of the allocation group.
> + */
> + while (agno <= end) {
> + struct xfs_inobt_rec_incore *irbp = irbuf;
> + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
> + bool end_of_ag = false;
> + int icount = 0;
> + int stat;
> +
> + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
> + if (error)
> + break;
> +
> + /*
> + * Allocate and initialize a btree cursor for ialloc btree.
> + */
> + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
> + XFS_BTNUM_INO);
> +
> + if (agino > 0) {
> + /*
> + * In the middle of an allocation group, we need to get
> + * the remainder of the chunk we are in.
> + */
> + struct xfs_inobt_rec_incore r;
> +
> + error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
> + if (error)
> + goto del_cursor;
> + if (icount) {
> + irbp->ir_startino = r.ir_startino;
> + irbp->ir_freecount = r.ir_freecount;
> + irbp->ir_free = r.ir_free;
> + irbp++;
> + }
> + /* Increment to the next record */
> + error = xfs_btree_increment(cur, 0, &stat);
> + } else {
> + /* Start of ag. Lookup the first inode chunk */
> + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
> + }
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> +
> + /*
> + * Loop through inode btree records in this ag,
> + * until we run out of inodes or space in the buffer.
> + */
> + while (irbp < irbufend && icount < ubcount) {
> + struct xfs_inobt_rec_incore r;
> +
> + error = xfs_inobt_get_rec(cur, &r, &stat);
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> +
> + /*
> + * If this chunk has any allocated inodes, save it.
> + * Also start read-ahead now for this chunk.
> + */
> + if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
> + xfs_bulkstat_ichunk_ra(mp, agno, &r);
> + irbp->ir_startino = r.ir_startino;
> + irbp->ir_freecount = r.ir_freecount;
> + irbp->ir_free = r.ir_free;
> + irbp++;
> + icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
> + }
> + error = xfs_btree_increment(cur, 0, &stat);
> + if (error || stat == 0) {
> + end_of_ag = true;
> + goto del_cursor;
> + }
> + cond_resched();
> + }
> +
> + /*
> + * Drop the btree buffers and the agi buffer as we can't hold any of the
> + * locks these represent when calling iget. If there is a pending error,
> + * then we are done.
> + */
> +del_cursor:
> + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> + xfs_buf_relse(agbp);
> + if (error)
> + break;
> + /*
> + * Now format all the good inodes into the user's buffer. The
> + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
> + * for the next loop iteration.
> + */
> + irbufend = irbp;
> + for (irbp = irbuf;
> + irbp < irbufend && ac.ac_ubleft >= statstruct_size;
> + irbp++) {
> + error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, formatter,
> + statstruct_size, &ac, &agino);
> + if (error)
> + break;
> +
> + cond_resched();
> + }
> +
> + /*
> + * If we have run out of space or had a formatting error, we
> + * are now done
> + */
> + if (ac.ac_ubleft < statstruct_size || error)
> + break;
> +
> + if (end_of_ag) {
> + agno++;
> + agino = 0;
> + }
> + }
> + /*
> + * Done, we are either out of specified inode range or space to
> + * put the data.
> + */
> + kmem_free(irbuf);
> + *ubcountp = ac.ac_ubelem;
Up to here is done by xfs_bulkstat_range_ag(). This factoring should
be done to xfs_bulkstat() as the first patch in the series, then you
can add the xfs_bulkstat_range() function that uses it, then finally
introduce the ioctl that wraps around xfs_bulkstat_range().
This avoids all of the copy/paste, and ensures that the bulkstat
APIs all use the same implementation which makes testing and
maintenance of the code in future much simpler.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl
2015-03-03 21:55 ` Dave Chinner
@ 2015-03-06 17:34 ` Carlos Maiolino
0 siblings, 0 replies; 4+ messages in thread
From: Carlos Maiolino @ 2015-03-06 17:34 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
Hi Eric and Dave,
thank you for the reply, I completely forgot about our talk regarding refactor
xfs_bulkstat(), the idea to send the patch here was to get more comments into
it, so, looks like I got what I wanted :-)
I'll work on it and refactor xfs_bulkstat() and also implement the ideas you
gave.
Thanks
On Wed, Mar 04, 2015 at 08:55:57AM +1100, Dave Chinner wrote:
> On Tue, Mar 03, 2015 at 11:41:18AM -0300, Carlos Maiolino wrote:
> > This patch aims to implement a ranged bulkstat ioctl, which will make users able
> > to bulkstat inodes in a filesystem based on range instead on rely only in a
> > whole filesystem bulkstat.
> >
> > This first implementation add a per AG bulkstat possibility, but it also adds
> > the necessary infrastructure to implement different kinds of ranged bulkstat,
> > like per block.
> >
> > The patch is already working and I've been testing it for a while, so, I think
> > it's time to send this patch out and ask for comments on it.
> >
> > The core of the implementation is very similar with what xfs_bulsktat() does by
> > now, which, instead bulkstat the whole filesystem, it start/stop on the
> > specified range.
> >
> > As per Dave's suggesting, I added to rgbulkreq structure (used to pass data
> > to/from userland), a padding, so we can use the same structure for another kind
> > of ranged bulkstats without the need to actually change the structure size.
> >
> > Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
> > ---
> > fs/xfs/libxfs/xfs_fs.h | 12 +++
> > fs/xfs/xfs_ioctl.c | 56 +++++++++++++
> > fs/xfs/xfs_itable.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++
> > fs/xfs/xfs_itable.h | 16 ++++
> > 4 files changed, 294 insertions(+)
> >
> > diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> > index 18dc721..88665aa 100644
> > --- a/fs/xfs/libxfs/xfs_fs.h
> > +++ b/fs/xfs/libxfs/xfs_fs.h
> > @@ -334,6 +334,17 @@ typedef struct xfs_fsop_bulkreq {
> > __s32 __user *ocount; /* output count pointer */
> > } xfs_fsop_bulkreq_t;
> >
> > +typedef struct xfs_fsop_rgbulkreq {
> > + __u64 __user *lastip; /* last inode # pointer */
> > + __s32 icount; /* count of entries in buffer */
> > + void __user *ubuffer;/* user buffer for inode desc. */
> > + __s32 __user *ocount; /* output count pointer */
> > + __u64 start; /* start point of rgbulkstat */
> > + __u64 end; /* end point of rgbulkstat */
> > + __u32 flags; /* multipurpose flag field */
> > + __u32 pad[5]; /* reserved space */
> > +} xfs_fsop_rgbulkreq_t;
>
> No new typedefs. Also, call it struct xfs_fsop_bulkstat_range, so
> it's clear what ioctl it belongs with.
>
> And, as eric mentioned, the flag:
>
> #define XFS_FSOP_BSRANGE_AGNUMBER 1
>
> also needs to be defined to tell the kernel that start/end are ag
> numbers.
>
> > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
> > index f7afb86..34a4de5 100644
> > --- a/fs/xfs/xfs_ioctl.c
> > +++ b/fs/xfs/xfs_ioctl.c
> > @@ -790,6 +790,59 @@ xfs_ioc_bulkstat(
> > }
> >
> > STATIC int
> > +xfs_ioc_bulkstat_range(
> > + xfs_mount_t *mp,
> > + unsigned int cmd,
> > + void __user *arg)
> > +{
> > +
> > + xfs_fsop_rgbulkreq_t rgbulkreq;
>
> Again, better name needed. Even just "bsreq" would be fine, because
> it's just a bulkstat request structure....
>
> > + int count; /* # of records returned */
> > + xfs_ino_t inlast; /* last inode number */
> > + int done;
> > + int error;
> > +
> > + if (!capable(CAP_SYS_ADMIN))
> > + return -EPERM;
> > +
> > + if (XFS_FORCED_SHUTDOWN(mp))
> > + return -EIO;
> > +
> > + /* We do not support another ranged calls yet */
> > + if (cmd != XFS_IOC_FSBULKSTAT_RANGE)
> > + return -EINVAL;
>
> Not necessary.
>
> > +
> > + if (copy_from_user(&rgbulkreq, arg, sizeof(xfs_fsop_rgbulkreq_t)))
> > + return -EFAULT;
> > +
> > + if (copy_from_user(&inlast, rgbulkreq.lastip, sizeof(__s64)))
> > + return -EFAULT;
> > +
> > + if ((count = rgbulkreq.icount) <= 0)
> > + return -EINVAL;
>
> Assignments need to be on their own line.
>
> > +
> > + if (rgbulkreq.ubuffer == NULL)
> > + return -EINVAL;
>
> Missing is validation of the start, end and flags fields.
>
> > +
> > + error = xfs_bulkstat_range(mp, &inlast, &count, xfs_bulkstat_one,
> > + sizeof(xfs_bstat_t), rgbulkreq.ubuffer,
> > + rgbulkreq.start, rgbulkreq.end, &done);
>
> Why do you need a "done" parameter? Also, just pass the bsreq
> structure, not the individual elements. I don't see any point in
> passing the formatter or size parameters at this point, either,
> because xfs_bulkstat_range() can easily define them.
>
> > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> > index 82e3142..d6b27ee 100644
> > --- a/fs/xfs/xfs_itable.c
> > +++ b/fs/xfs/xfs_itable.c
> > @@ -530,6 +530,216 @@ del_cursor:
> > return error;
> > }
> >
> > +/*
> > + * Return stat information in bulk (by-inode) for specified
> > + * filesystem range.
> > + */
> > +int
> > +xfs_bulkstat_range(
> > + xfs_mount_t *mp, /* mount point for filesystem */
> > + xfs_ino_t *lastinop, /* last inode returned */
> > + int *ubcountp, /* size of buffer/count returned */
> > + bulkstat_one_pf formatter, /* func that'd fill a single buf */
> > + size_t statstruct_size, /* sizeof struct filling */
> > + char __user *ubuffer, /* buffer with inode stats */
> > + __u64 start, /* start bulkstat here */
> > + __u64 end, /* end bulkstat here */
> > + int *done) /* 1 if there are more stats to get */
> > +{
> > + xfs_buf_t *agbp; /* agi header buffer */
> > + xfs_agino_t agino; /* inode # in allocation group */
> > + xfs_agnumber_t agno; /* allocation group number */
> > + xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> > + size_t irbsize; /* size of irec buffer in bytes */
> > + xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> > + int nirbuf; /* size of irbuf */
> > + int ubcount; /* size of user's buffer */
> > + struct xfs_bulkstat_agichunk ac;
> > + int error = 0;
> > +
> > + /*
> > + * Get the last inode value, see if there is nothing to do.
> > + */
> > + if (*lastinop != 0)
> > + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> > + else
> > + agno = start;
> > +
> > + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> > +
> > + /*
> > + * If specified end is bigger than mp->m_sb.sb_agount, should we
> > + * gracefully interpret as if there is nothing to do, or trigger
> > + * an error?
> > + */
>
> We should have already errored out before we get here in this case.
>
> > + if (agno >= mp->m_sb.sb_agcount) {
> > + *done = 1;
> > + *ubcountp = 0;
> > + return 0;
> > + }
>
> From this point onwards, you've pretty much copy-n-pasted the
> xfs_bulkstat() implementation. What should happen here is:
>
> for (agno = start; agno < end; agno++) {
> error = xfs_bulkstat_range_ag(agno, ... );
> if (error)
> break;
> }
>
> and all this:
>
> > + ubcount = *ubcountp;
> > + ac.ac_ubuffer = &ubuffer;
> > + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */
> > + ac.ac_ubelem = 0;
> > +
> > + *ubcountp = 0;
> > + *done = 0;
> > +
> > + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
> > + if (!irbuf)
> > + return -ENOMEM;
> > +
> > + nirbuf = irbsize / sizeof(*irbuf);
> > +
> > + /*
> > + * Loop over the allocation groups, starting from the last inode
> > + * returned; - means start of the allocation group.
> > + */
> > + while (agno <= end) {
> > + struct xfs_inobt_rec_incore *irbp = irbuf;
> > + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
> > + bool end_of_ag = false;
> > + int icount = 0;
> > + int stat;
> > +
> > + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
> > + if (error)
> > + break;
> > +
> > + /*
> > + * Allocate and initialize a btree cursor for ialloc btree.
> > + */
> > + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
> > + XFS_BTNUM_INO);
> > +
> > + if (agino > 0) {
> > + /*
> > + * In the middle of an allocation group, we need to get
> > + * the remainder of the chunk we are in.
> > + */
> > + struct xfs_inobt_rec_incore r;
> > +
> > + error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
> > + if (error)
> > + goto del_cursor;
> > + if (icount) {
> > + irbp->ir_startino = r.ir_startino;
> > + irbp->ir_freecount = r.ir_freecount;
> > + irbp->ir_free = r.ir_free;
> > + irbp++;
> > + }
> > + /* Increment to the next record */
> > + error = xfs_btree_increment(cur, 0, &stat);
> > + } else {
> > + /* Start of ag. Lookup the first inode chunk */
> > + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
> > + }
> > + if (error || stat == 0) {
> > + end_of_ag = true;
> > + goto del_cursor;
> > + }
> > +
> > + /*
> > + * Loop through inode btree records in this ag,
> > + * until we run out of inodes or space in the buffer.
> > + */
> > + while (irbp < irbufend && icount < ubcount) {
> > + struct xfs_inobt_rec_incore r;
> > +
> > + error = xfs_inobt_get_rec(cur, &r, &stat);
> > + if (error || stat == 0) {
> > + end_of_ag = true;
> > + goto del_cursor;
> > + }
> > +
> > + /*
> > + * If this chunk has any allocated inodes, save it.
> > + * Also start read-ahead now for this chunk.
> > + */
> > + if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
> > + xfs_bulkstat_ichunk_ra(mp, agno, &r);
> > + irbp->ir_startino = r.ir_startino;
> > + irbp->ir_freecount = r.ir_freecount;
> > + irbp->ir_free = r.ir_free;
> > + irbp++;
> > + icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
> > + }
> > + error = xfs_btree_increment(cur, 0, &stat);
> > + if (error || stat == 0) {
> > + end_of_ag = true;
> > + goto del_cursor;
> > + }
> > + cond_resched();
> > + }
> > +
> > + /*
> > + * Drop the btree buffers and the agi buffer as we can't hold any of the
> > + * locks these represent when calling iget. If there is a pending error,
> > + * then we are done.
> > + */
> > +del_cursor:
> > + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> > + xfs_buf_relse(agbp);
> > + if (error)
> > + break;
> > + /*
> > + * Now format all the good inodes into the user's buffer. The
> > + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
> > + * for the next loop iteration.
> > + */
> > + irbufend = irbp;
> > + for (irbp = irbuf;
> > + irbp < irbufend && ac.ac_ubleft >= statstruct_size;
> > + irbp++) {
> > + error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, formatter,
> > + statstruct_size, &ac, &agino);
> > + if (error)
> > + break;
> > +
> > + cond_resched();
> > + }
> > +
> > + /*
> > + * If we have run out of space or had a formatting error, we
> > + * are now done
> > + */
> > + if (ac.ac_ubleft < statstruct_size || error)
> > + break;
> > +
> > + if (end_of_ag) {
> > + agno++;
> > + agino = 0;
> > + }
> > + }
> > + /*
> > + * Done, we are either out of specified inode range or space to
> > + * put the data.
> > + */
> > + kmem_free(irbuf);
> > + *ubcountp = ac.ac_ubelem;
>
> Up to here is done by xfs_bulkstat_range_ag(). This factoring should
> be done to xfs_bulkstat() as the first patch in the series, then you
> can add the xfs_bulkstat_range() function that uses it, then finally
> introduce the ioctl that wraps around xfs_bulkstat_range().
>
> This avoids all of the copy/paste, and ensures that the bulkstat
> APIs all use the same implementation which makes testing and
> maintenance of the code in future much simpler.
>
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com
--
Carlos
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-03-06 17:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-03 14:41 [PATCH] [RFC] Implement XFS_IOC_FSBULKSTAT_RANGE ioctl Carlos Maiolino
2015-03-03 20:58 ` Eric Sandeen
2015-03-03 21:55 ` Dave Chinner
2015-03-06 17:34 ` Carlos Maiolino
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox