* [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-04 12:53 [PATCH 0/6] xfs: fix the bulkstat mess Dave Chinner
@ 2014-11-04 12:53 ` Dave Chinner
2014-11-04 19:00 ` Brian Foster
0 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-04 12:53 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The bulkstat main loop progress is tracked by the "lastino"
variable, which is a full 64 bit inode. However, the loop actually
works on agno/agino pairs, and so there's a significant disconnect
between the rest of the loop and the main cursor. Convert this to
use the agino, and pass the agino into the chunk formatting function
and convert it too.
This gets rid of the inconsistency in the loop processing, and
finally makes it simple for us to skip inodes at any point in the
loop simply by incrementing the agino cursor.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_itable.c | 50 +++++++++++++++++++-------------------------------
1 file changed, 19 insertions(+), 31 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 6a4ef8e..2a6f2e8 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -282,45 +282,39 @@ xfs_bulkstat_ag_ichunk(
bulkstat_one_pf formatter,
size_t statstruct_size,
struct xfs_bulkstat_agichunk *acp,
- xfs_ino_t *lastino)
+ xfs_agino_t *last_agino)
{
char __user **ubufp = acp->ac_ubuffer;
int chunkidx;
int error = 0;
- xfs_agino_t agino;
- agino = irbp->ir_startino;
+ *last_agino = irbp->ir_startino;
for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx++, agino++) {
+ chunkidx++, (*last_agino)++) {
int fmterror;
int ubused;
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
/* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- *lastino = ino;
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
continue;
- }
/* Get the inode and fill in a single buffer */
ubused = statstruct_size;
- error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
- &ubused, &fmterror);
+ error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, *last_agino),
+ *ubufp, acp->ac_ubleft, &ubused, &fmterror);
+
if (fmterror == BULKSTAT_RV_GIVEUP ||
(error && error != -ENOENT && error != -EINVAL)) {
acp->ac_ubleft = 0;
ASSERT(error);
break;
}
- if (fmterror == BULKSTAT_RV_NOTHING || error) {
- *lastino = ino;
+ if (fmterror == BULKSTAT_RV_NOTHING || error)
continue;
- }
*ubufp += ubused;
acp->ac_ubleft -= ubused;
acp->ac_ubelem++;
- *lastino = ino;
if (acp->ac_ubleft < statstruct_size)
break;
@@ -349,7 +343,6 @@ xfs_bulkstat(
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
struct xfs_bulkstat_agichunk ac;
@@ -358,11 +351,10 @@ xfs_bulkstat(
/*
* Get the last inode value, see if there's nothing to do.
*/
- lastino = *lastinop;
- agno = XFS_INO_TO_AGNO(mp, lastino);
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ agno = XFS_INO_TO_AGNO(mp, *lastinop);
+ agino = XFS_INO_TO_AGINO(mp, *lastinop);
if (agno >= mp->m_sb.sb_agcount ||
- lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
*done = 1;
*ubcountp = 0;
return 0;
@@ -486,7 +478,7 @@ del_cursor:
irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
formatter, statstruct_size, &ac,
- &lastino);
+ &agino);
if (error)
break;
@@ -499,8 +491,7 @@ del_cursor:
if (end_of_ag) {
agno++;
agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ }
}
/*
* Done, we're either out of filesystem or space to put the data.
@@ -518,16 +509,13 @@ del_cursor:
if (ac.ac_ubelem)
error = 0;
- if (agno >= mp->m_sb.sb_agcount) {
- /*
- * If we ran out of filesystem, mark lastino as off
- * the end of the filesystem, so the next call
- * will return immediately.
- */
- *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
+ /*
+ * If we ran out of filesystem, lastino will point off the end of
+ * the filesystem so the next call will return immediately.
+ */
+ *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (agno >= mp->m_sb.sb_agcount)
*done = 1;
- } else
- *lastinop = (xfs_ino_t)lastino;
return error;
}
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-04 12:53 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
@ 2014-11-04 19:00 ` Brian Foster
2014-11-04 21:39 ` Dave Chinner
0 siblings, 1 reply; 21+ messages in thread
From: Brian Foster @ 2014-11-04 19:00 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Tue, Nov 04, 2014 at 11:53:21PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The bulkstat main loop progress is tracked by the "lastino"
> variable, which is a full 64 bit inode. However, the loop actually
> works on agno/agino pairs, and so there's a significant disconnect
> between the rest of the loop and the main cursor. Convert this to
> use the agino, and pass the agino into the chunk formatting function
> and convert it too.
>
> This gets rid of the inconsistency in the loop processing, and
> finally makes it simple for us to skip inodes at any point in the
> loop simply by incrementing the agino cursor.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> fs/xfs/xfs_itable.c | 50 +++++++++++++++++++-------------------------------
> 1 file changed, 19 insertions(+), 31 deletions(-)
>
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index 6a4ef8e..2a6f2e8 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -282,45 +282,39 @@ xfs_bulkstat_ag_ichunk(
> bulkstat_one_pf formatter,
> size_t statstruct_size,
> struct xfs_bulkstat_agichunk *acp,
> - xfs_ino_t *lastino)
> + xfs_agino_t *last_agino)
> {
> char __user **ubufp = acp->ac_ubuffer;
> int chunkidx;
> int error = 0;
> - xfs_agino_t agino;
>
> - agino = irbp->ir_startino;
> + *last_agino = irbp->ir_startino;
Shouldn't last_agino refer to the last inode written out to the
userspace buffer? I'm not familiar with the semantics that xfsdump
expects or works around here as you've discussed in the commit logs and
whatnot. What happens if we set last_agino here and error out in the
first iteration of the loop?
> for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
> - chunkidx++, agino++) {
> + chunkidx++, (*last_agino)++) {
Similar question here when we handle a full record and increment
*last_agino beyond the last agino in the record. Unless I'm missing
something, it looks to me that if the next record is adjacent,
xfs_bulkstat_grab_ichunk() would skip the first inode in that record.
> int fmterror;
> int ubused;
> - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
>
> /* Skip if this inode is free */
> - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
> - *lastino = ino;
> + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
> continue;
> - }
>
> /* Get the inode and fill in a single buffer */
> ubused = statstruct_size;
> - error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
> - &ubused, &fmterror);
> + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, *last_agino),
> + *ubufp, acp->ac_ubleft, &ubused, &fmterror);
> +
> if (fmterror == BULKSTAT_RV_GIVEUP ||
> (error && error != -ENOENT && error != -EINVAL)) {
> acp->ac_ubleft = 0;
> ASSERT(error);
> break;
> }
> - if (fmterror == BULKSTAT_RV_NOTHING || error) {
> - *lastino = ino;
> + if (fmterror == BULKSTAT_RV_NOTHING || error)
> continue;
> - }
>
> *ubufp += ubused;
> acp->ac_ubleft -= ubused;
> acp->ac_ubelem++;
> - *lastino = ino;
>
> if (acp->ac_ubleft < statstruct_size)
> break;
> @@ -349,7 +343,6 @@ xfs_bulkstat(
> xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> size_t irbsize; /* size of irec buffer in bytes */
> xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> - xfs_ino_t lastino; /* last inode number returned */
> int nirbuf; /* size of irbuf */
> int ubcount; /* size of user's buffer */
> struct xfs_bulkstat_agichunk ac;
> @@ -358,11 +351,10 @@ xfs_bulkstat(
> /*
> * Get the last inode value, see if there's nothing to do.
> */
> - lastino = *lastinop;
> - agno = XFS_INO_TO_AGNO(mp, lastino);
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> if (agno >= mp->m_sb.sb_agcount ||
> - lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
> + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
> *done = 1;
> *ubcountp = 0;
> return 0;
> @@ -486,7 +478,7 @@ del_cursor:
> irbp++) {
> error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
> formatter, statstruct_size, &ac,
> - &lastino);
> + &agino);
> if (error)
> break;
>
> @@ -499,8 +491,7 @@ del_cursor:
> if (end_of_ag) {
> agno++;
> agino = 0;
> - } else
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + }
> }
> /*
> * Done, we're either out of filesystem or space to put the data.
> @@ -518,16 +509,13 @@ del_cursor:
> if (ac.ac_ubelem)
> error = 0;
>
> - if (agno >= mp->m_sb.sb_agcount) {
> - /*
> - * If we ran out of filesystem, mark lastino as off
> - * the end of the filesystem, so the next call
> - * will return immediately.
> - */
> - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
> + /*
> + * If we ran out of filesystem, lastino will point off the end of
> + * the filesystem so the next call will return immediately.
> + */
> + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
This means that the main loop should never move agino forward unless the
inodes up through agino are guaranteed to be copied out to the user
buffer. There are two places in the main loop where we do this:
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
... and both of those lines execute before a potential error path that
breaks out of the main loop before that current record is formatted out.
Those lines appear to be spurious at this point, however, so I suspect
we can just kill them.
Brian
> + if (agno >= mp->m_sb.sb_agcount)
> *done = 1;
> - } else
> - *lastinop = (xfs_ino_t)lastino;
>
> return error;
> }
> --
> 2.0.0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-04 19:00 ` Brian Foster
@ 2014-11-04 21:39 ` Dave Chinner
0 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-04 21:39 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Tue, Nov 04, 2014 at 02:00:06PM -0500, Brian Foster wrote:
> On Tue, Nov 04, 2014 at 11:53:21PM +1100, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > The bulkstat main loop progress is tracked by the "lastino"
> > variable, which is a full 64 bit inode. However, the loop actually
> > works on agno/agino pairs, and so there's a significant disconnect
> > between the rest of the loop and the main cursor. Convert this to
> > use the agino, and pass the agino into the chunk formatting function
> > and convert it too.
> >
> > This gets rid of the inconsistency in the loop processing, and
> > finally makes it simple for us to skip inodes at any point in the
> > loop simply by incrementing the agino cursor.
> >
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> > fs/xfs/xfs_itable.c | 50 +++++++++++++++++++-------------------------------
> > 1 file changed, 19 insertions(+), 31 deletions(-)
> >
> > diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> > index 6a4ef8e..2a6f2e8 100644
> > --- a/fs/xfs/xfs_itable.c
> > +++ b/fs/xfs/xfs_itable.c
> > @@ -282,45 +282,39 @@ xfs_bulkstat_ag_ichunk(
> > bulkstat_one_pf formatter,
> > size_t statstruct_size,
> > struct xfs_bulkstat_agichunk *acp,
> > - xfs_ino_t *lastino)
> > + xfs_agino_t *last_agino)
> > {
> > char __user **ubufp = acp->ac_ubuffer;
> > int chunkidx;
> > int error = 0;
> > - xfs_agino_t agino;
> >
> > - agino = irbp->ir_startino;
> > + *last_agino = irbp->ir_startino;
>
> Shouldn't last_agino refer to the last inode written out to the
> userspace buffer? I'm not familiar with the semantics that xfsdump
> expects or works around here as you've discussed in the commit logs and
> whatnot. What happens if we set last_agino here and error out in the
> first iteration of the loop?
lastino is supposed to represent the last inode scanned, not
necessarily the last inode formatted into the buffer. This can be
seen that we update lastino even if the inode if free - it hasn't
been written into the user buffer, but we have processed it.
>
> > for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
> > - chunkidx++, agino++) {
> > + chunkidx++, (*last_agino)++) {
>
> Similar question here when we handle a full record and increment
> *last_agino beyond the last agino in the record. Unless I'm missing
> something, it looks to me that if the next record is adjacent,
> xfs_bulkstat_grab_ichunk() would skip the first inode in that record.
Hmmm - yes, that could occur. I have another patch that I didn't
post that changes the xfs_bulkstat_grab_ichunk() to not skip the
incoming lastino value, but I reasoned that it was a change of
behaviour and hence could cause xfsdump to break.
I'll rework *last_agino to do post-update rather than pre-update.
> > * Done, we're either out of filesystem or space to put the data.
> > @@ -518,16 +509,13 @@ del_cursor:
> > if (ac.ac_ubelem)
> > error = 0;
> >
> > - if (agno >= mp->m_sb.sb_agcount) {
> > - /*
> > - * If we ran out of filesystem, mark lastino as off
> > - * the end of the filesystem, so the next call
> > - * will return immediately.
> > - */
> > - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
> > + /*
> > + * If we ran out of filesystem, lastino will point off the end of
> > + * the filesystem so the next call will return immediately.
> > + */
> > + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
>
> This means that the main loop should never move agino forward unless the
> inodes up through agino are guaranteed to be copied out to the user
> buffer. There are two places in the main loop where we do this:
>
> agino = r.ir_startino + XFS_INODES_PER_CHUNK;
>
> ... and both of those lines execute before a potential error path that
> breaks out of the main loop before that current record is formatted out.
> Those lines appear to be spurious at this point, however, so I suspect
> we can just kill them.
*nod*
I was in two minds about that, but then figured agino would be
overwritten by the formatting callout if there was no error. I
didn't consider the error breakout case, so yes, those updates need
to die...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 0/6 v2] xfs: fix the bulkstat mess
@ 2014-11-05 0:05 Dave Chinner
2014-11-05 0:05 ` [PATCH 1/6] xfs: bulkstat btree walk doesn't terminate Dave Chinner
` (6 more replies)
0 siblings, 7 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
Hi folks, this is version 2 of the bulkstat fixup series first
posted here:
http://oss.sgi.com/archives/xfs/2014-11/msg00057.html
Version 2 fixes the issues Brian found during review:
- chunk formatter error leakage (patch 3)
- moved main loop chunk formatter error handling from patch 4 to
patch 5
- reworks last_agino updating in patch 6 to do post-formatting
updates and added comments.
Comeents and testing welcome.
-Dave.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 1/6] xfs: bulkstat btree walk doesn't terminate
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 0:05 ` [PATCH 2/6] xfs: bulkstat chunk formatting cursor is broken Dave Chinner
` (5 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The bulkstat code has several different ways of detecting the end of
an AG when doing a walk. They are not consistently detected, and the
code that checks for the end of AG conditions is not consistently
coded. Hence the are conditions where the walk code can get stuck in
an endless loop making no progress and not triggering any
termination conditions.
Convert all the "tmp/i" status return codes from btree operations
to a common name (stat) and apply end-of-ag detection to these
operations consistently.
cc: stable@vger.kernel.org
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_itable.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 7765ff7..16737cb 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -356,7 +356,6 @@ xfs_bulkstat(
int end_of_ag; /* set if we've seen the ag end */
int error; /* error code */
int fmterror;/* bulkstat formatter result */
- int i; /* loop index */
int icount; /* count of inodes good in irbuf */
size_t irbsize; /* size of irec buffer in bytes */
xfs_ino_t ino; /* inode number (filesystem) */
@@ -366,11 +365,11 @@ xfs_bulkstat(
xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
int rval; /* return value error code */
- int tmp; /* result value from btree calls */
int ubcount; /* size of user's buffer */
int ubleft; /* bytes left in user's buffer */
char __user *ubufp; /* pointer into user's buffer */
int ubelem; /* spaces used in user's buffer */
+ int stat;
/*
* Get the last inode value, see if there's nothing to do.
@@ -436,13 +435,15 @@ xfs_bulkstat(
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
}
/* Increment to the next record */
- error = xfs_btree_increment(cur, 0, &tmp);
+ error = xfs_btree_increment(cur, 0, &stat);
} else {
/* Start of ag. Lookup the first inode chunk */
- error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
+ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
}
- if (error)
+ if (error || stat == 0) {
+ end_of_ag = 1;
goto del_cursor;
+ }
/*
* Loop through inode btree records in this ag,
@@ -451,8 +452,8 @@ xfs_bulkstat(
while (irbp < irbufend && icount < ubcount) {
struct xfs_inobt_rec_incore r;
- error = xfs_inobt_get_rec(cur, &r, &i);
- if (error || i == 0) {
+ error = xfs_inobt_get_rec(cur, &r, &stat);
+ if (error || stat == 0) {
end_of_ag = 1;
goto del_cursor;
}
@@ -473,8 +474,8 @@ xfs_bulkstat(
* Set agino to after this chunk and bump the cursor.
*/
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
- error = xfs_btree_increment(cur, 0, &tmp);
- if (error) {
+ error = xfs_btree_increment(cur, 0, &stat);
+ if (error || stat == 0) {
end_of_ag = 1;
goto del_cursor;
}
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 2/6] xfs: bulkstat chunk formatting cursor is broken
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
2014-11-05 0:05 ` [PATCH 1/6] xfs: bulkstat btree walk doesn't terminate Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 0:05 ` [PATCH 3/6] xfs: bulkstat chunk-formatter has issues Dave Chinner
` (4 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The xfs_bulkstat_agichunk formatting cursor takes buffer values from
the main loop and passes them via the structure to the chunk
formatter, and the writes the changed values back into the main loop
local variables. Unfortunately, this complex dance is full of corner
cases that aren't handled correctly.
The biggest problem is that it is double handling the information in
both the main loop and the chunk formatting function, leading to
inconsistent updates and endless loops where progress is not made.
To fix this, push the struct xfs_bulkstat_agichunk outwards to be
the primary holder of user buffer information. this removes the
double handling in the main loop.
Also, pass the last inode processed by the chunk formatter as a
separate parameter as it purely an output variable and is not
related to the user buffer consumption cursor.
Finally, the chunk formatting code is not shared by anyone, so make
it local to xfs_itable.c.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_itable.c | 59 +++++++++++++++++++++++++----------------------------
fs/xfs/xfs_itable.h | 16 ---------------
2 files changed, 28 insertions(+), 47 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 16737cb..50a3e59 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -262,20 +262,26 @@ xfs_bulkstat_grab_ichunk(
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
+struct xfs_bulkstat_agichunk {
+ char __user **ac_ubuffer;/* pointer into user's buffer */
+ int ac_ubleft; /* bytes left in user's buffer */
+ int ac_ubelem; /* spaces used in user's buffer */
+};
+
/*
* Process inodes in chunk with a pointer to a formatter function
* that will iget the inode and fill in the appropriate structure.
*/
-int
+static int
xfs_bulkstat_ag_ichunk(
struct xfs_mount *mp,
xfs_agnumber_t agno,
struct xfs_inobt_rec_incore *irbp,
bulkstat_one_pf formatter,
size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp)
+ struct xfs_bulkstat_agichunk *acp,
+ xfs_ino_t *lastino)
{
- xfs_ino_t lastino = acp->ac_lastino;
char __user **ubufp = acp->ac_ubuffer;
int ubleft = acp->ac_ubleft;
int ubelem = acp->ac_ubelem;
@@ -295,7 +301,7 @@ xfs_bulkstat_ag_ichunk(
/* Skip if this inode is free */
if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- lastino = ino;
+ *lastino = ino;
continue;
}
@@ -313,7 +319,7 @@ xfs_bulkstat_ag_ichunk(
ubleft = 0;
break;
}
- lastino = ino;
+ *lastino = ino;
continue;
}
if (fmterror == BULKSTAT_RV_GIVEUP) {
@@ -325,10 +331,9 @@ xfs_bulkstat_ag_ichunk(
*ubufp += ubused;
ubleft -= ubused;
ubelem++;
- lastino = ino;
+ *lastino = ino;
}
- acp->ac_lastino = lastino;
acp->ac_ubleft = ubleft;
acp->ac_ubelem = ubelem;
@@ -355,7 +360,6 @@ xfs_bulkstat(
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
int end_of_ag; /* set if we've seen the ag end */
int error; /* error code */
- int fmterror;/* bulkstat formatter result */
int icount; /* count of inodes good in irbuf */
size_t irbsize; /* size of irec buffer in bytes */
xfs_ino_t ino; /* inode number (filesystem) */
@@ -366,10 +370,8 @@ xfs_bulkstat(
int nirbuf; /* size of irbuf */
int rval; /* return value error code */
int ubcount; /* size of user's buffer */
- int ubleft; /* bytes left in user's buffer */
- char __user *ubufp; /* pointer into user's buffer */
- int ubelem; /* spaces used in user's buffer */
int stat;
+ struct xfs_bulkstat_agichunk ac;
/*
* Get the last inode value, see if there's nothing to do.
@@ -386,11 +388,13 @@ xfs_bulkstat(
}
ubcount = *ubcountp; /* statstruct's */
- ubleft = ubcount * statstruct_size; /* bytes */
- *ubcountp = ubelem = 0;
+ ac.ac_ubuffer = &ubuffer;
+ ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
+ ac.ac_ubelem = 0;
+
+ *ubcountp = 0;
*done = 0;
- fmterror = 0;
- ubufp = ubuffer;
+
irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
if (!irbuf)
return -ENOMEM;
@@ -402,7 +406,7 @@ xfs_bulkstat(
* inode returned; 0 means start of the allocation group.
*/
rval = 0;
- while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
+ while (XFS_BULKSTAT_UBLEFT(ac.ac_ubleft) && agno < mp->m_sb.sb_agcount) {
cond_resched();
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
if (error)
@@ -497,28 +501,21 @@ del_cursor:
*/
irbufend = irbp;
for (irbp = irbuf;
- irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
- struct xfs_bulkstat_agichunk ac;
-
- ac.ac_lastino = lastino;
- ac.ac_ubuffer = &ubuffer;
- ac.ac_ubleft = ubleft;
- ac.ac_ubelem = ubelem;
+ irbp < irbufend && XFS_BULKSTAT_UBLEFT(ac.ac_ubleft);
+ irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
- formatter, statstruct_size, &ac);
+ formatter, statstruct_size, &ac,
+ &lastino);
if (error)
rval = error;
- lastino = ac.ac_lastino;
- ubleft = ac.ac_ubleft;
- ubelem = ac.ac_ubelem;
-
cond_resched();
}
+
/*
* Set up for the next loop iteration.
*/
- if (XFS_BULKSTAT_UBLEFT(ubleft)) {
+ if (XFS_BULKSTAT_UBLEFT(ac.ac_ubleft)) {
if (end_of_ag) {
agno++;
agino = 0;
@@ -531,11 +528,11 @@ del_cursor:
* Done, we're either out of filesystem or space to put the data.
*/
kmem_free(irbuf);
- *ubcountp = ubelem;
+ *ubcountp = ac.ac_ubelem;
/*
* Found some inodes, return them now and return the error next time.
*/
- if (ubelem)
+ if (ac.ac_ubelem)
rval = 0;
if (agno >= mp->m_sb.sb_agcount) {
/*
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index aaed080..6ea8b39 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
int *ubused,
int *stat);
-struct xfs_bulkstat_agichunk {
- xfs_ino_t ac_lastino; /* last inode returned */
- char __user **ac_ubuffer;/* pointer into user's buffer */
- int ac_ubleft; /* bytes left in user's buffer */
- int ac_ubelem; /* spaces used in user's buffer */
-};
-
-int
-xfs_bulkstat_ag_ichunk(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irbp,
- bulkstat_one_pf formatter,
- size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp);
-
/*
* Values for stat return value.
*/
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 3/6] xfs: bulkstat chunk-formatter has issues
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
2014-11-05 0:05 ` [PATCH 1/6] xfs: bulkstat btree walk doesn't terminate Dave Chinner
2014-11-05 0:05 ` [PATCH 2/6] xfs: bulkstat chunk formatting cursor is broken Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 14:59 ` Brian Foster
2014-11-05 0:05 ` [PATCH 4/6] xfs: bulkstat main loop logic is a mess Dave Chinner
` (3 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The loop construct has issues:
- clustidx is completely unused, so remove it.
- the loop tries to be smart by terminating when the
"freecount" tells it that all inodes are free. Just drop
it as in most cases we have to scan all inodes in the
chunk anyway.
- move the "user buffer left" condition check to the only
point where we consume space int eh user buffer.
- move the initialisation of agino out of the loop, leaving
just a simple loop control logic using the clusteridx.
Also, double handling of the user buffer variables leads to problems
tracking the current state - use the cursor variables directly
rather than keeping local copies and then having to update the
cursor before returning.
cc: <stable@vger.kernel.org>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_itable.c | 58 ++++++++++++++++++++++-------------------------------
1 file changed, 24 insertions(+), 34 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 50a3e59..7ea2b11 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -283,59 +283,49 @@ xfs_bulkstat_ag_ichunk(
xfs_ino_t *lastino)
{
char __user **ubufp = acp->ac_ubuffer;
- int ubleft = acp->ac_ubleft;
- int ubelem = acp->ac_ubelem;
- int chunkidx, clustidx;
+ int chunkidx;
int error = 0;
xfs_agino_t agino;
- for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
- XFS_BULKSTAT_UBLEFT(ubleft) &&
- irbp->ir_freecount < XFS_INODES_PER_CHUNK;
- chunkidx++, clustidx++, agino++) {
- int fmterror; /* bulkstat formatter result */
+ agino = irbp->ir_startino;
+ for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
+ chunkidx++, agino++) {
+ int fmterror;
int ubused;
xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
- ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
-
/* Skip if this inode is free */
if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
*lastino = ino;
continue;
}
- /*
- * Count used inodes as free so we can tell when the
- * chunk is used up.
- */
- irbp->ir_freecount++;
-
/* Get the inode and fill in a single buffer */
ubused = statstruct_size;
- error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
- if (fmterror == BULKSTAT_RV_NOTHING) {
- if (error && error != -ENOENT && error != -EINVAL) {
- ubleft = 0;
- break;
- }
- *lastino = ino;
- continue;
- }
- if (fmterror == BULKSTAT_RV_GIVEUP) {
- ubleft = 0;
+ error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
+ &ubused, &fmterror);
+ if (fmterror == BULKSTAT_RV_GIVEUP ||
+ (error && error != -ENOENT && error != -EINVAL)) {
+ acp->ac_ubleft = 0;
ASSERT(error);
break;
}
- if (*ubufp)
- *ubufp += ubused;
- ubleft -= ubused;
- ubelem++;
+
+ /* be careful not to leak error if at end of chunk */
+ if (fmterror == BULKSTAT_RV_NOTHING || error) {
+ *lastino = ino;
+ error = 0;
+ continue;
+ }
+
+ *ubufp += ubused;
+ acp->ac_ubleft -= ubused;
+ acp->ac_ubelem++;
*lastino = ino;
- }
- acp->ac_ubleft = ubleft;
- acp->ac_ubelem = ubelem;
+ if (acp->ac_ubleft < statstruct_size)
+ break;
+ }
return error;
}
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 4/6] xfs: bulkstat main loop logic is a mess
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
` (2 preceding siblings ...)
2014-11-05 0:05 ` [PATCH 3/6] xfs: bulkstat chunk-formatter has issues Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 0:05 ` [PATCH 5/6] xfs: bulkstat error handling is broken Dave Chinner
` (2 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
There are a bunch of variables tha tare more wildy scoped than they
need to be, obfuscated user buffer checks and tortured "next inode"
tracking. This all needs cleaning up to expose the real issues that
need fixing.
cc: <stable@vger.kernel.org>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_itable.c | 52 ++++++++++++++++++++--------------------------------
1 file changed, 20 insertions(+), 32 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 7ea2b11..a9c28d1 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -348,30 +348,23 @@ xfs_bulkstat(
xfs_agino_t agino; /* inode # in allocation group */
xfs_agnumber_t agno; /* allocation group number */
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- int end_of_ag; /* set if we've seen the ag end */
- int error; /* error code */
- int icount; /* count of inodes good in irbuf */
size_t irbsize; /* size of irec buffer in bytes */
- xfs_ino_t ino; /* inode number (filesystem) */
- xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */
xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
int rval; /* return value error code */
int ubcount; /* size of user's buffer */
- int stat;
struct xfs_bulkstat_agichunk ac;
+ int error = 0;
/*
* Get the last inode value, see if there's nothing to do.
*/
- ino = (xfs_ino_t)*lastinop;
- lastino = ino;
- agno = XFS_INO_TO_AGNO(mp, ino);
- agino = XFS_INO_TO_AGINO(mp, ino);
+ lastino = *lastinop;
+ agno = XFS_INO_TO_AGNO(mp, lastino);
+ agino = XFS_INO_TO_AGINO(mp, lastino);
if (agno >= mp->m_sb.sb_agcount ||
- ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
*done = 1;
*ubcountp = 0;
return 0;
@@ -396,8 +389,13 @@ xfs_bulkstat(
* inode returned; 0 means start of the allocation group.
*/
rval = 0;
- while (XFS_BULKSTAT_UBLEFT(ac.ac_ubleft) && agno < mp->m_sb.sb_agcount) {
- cond_resched();
+ while (ac.ac_ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) {
+ struct xfs_inobt_rec_incore *irbp = irbuf;
+ struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
+ bool end_of_ag = false;
+ int icount = 0;
+ int stat;
+
error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
if (error)
break;
@@ -407,10 +405,6 @@ xfs_bulkstat(
*/
cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
XFS_BTNUM_INO);
- irbp = irbuf;
- irbufend = irbuf + nirbuf;
- end_of_ag = 0;
- icount = 0;
if (agino > 0) {
/*
* In the middle of an allocation group, we need to get
@@ -435,7 +429,7 @@ xfs_bulkstat(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
}
if (error || stat == 0) {
- end_of_ag = 1;
+ end_of_ag = true;
goto del_cursor;
}
@@ -448,7 +442,7 @@ xfs_bulkstat(
error = xfs_inobt_get_rec(cur, &r, &stat);
if (error || stat == 0) {
- end_of_ag = 1;
+ end_of_ag = true;
goto del_cursor;
}
@@ -470,7 +464,7 @@ xfs_bulkstat(
agino = r.ir_startino + XFS_INODES_PER_CHUNK;
error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) {
- end_of_ag = 1;
+ end_of_ag = true;
goto del_cursor;
}
cond_resched();
@@ -491,7 +485,7 @@ del_cursor:
*/
irbufend = irbp;
for (irbp = irbuf;
- irbp < irbufend && XFS_BULKSTAT_UBLEFT(ac.ac_ubleft);
+ irbp < irbufend && ac.ac_ubleft >= statstruct_size;
irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
formatter, statstruct_size, &ac,
@@ -502,17 +496,11 @@ del_cursor:
cond_resched();
}
- /*
- * Set up for the next loop iteration.
- */
- if (XFS_BULKSTAT_UBLEFT(ac.ac_ubleft)) {
- if (end_of_ag) {
- agno++;
- agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ if (end_of_ag) {
+ agno++;
+ agino = 0;
} else
- break;
+ agino = XFS_INO_TO_AGINO(mp, lastino);
}
/*
* Done, we're either out of filesystem or space to put the data.
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 5/6] xfs: bulkstat error handling is broken
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
` (3 preceding siblings ...)
2014-11-05 0:05 ` [PATCH 4/6] xfs: bulkstat main loop logic is a mess Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 0:05 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
2014-11-05 6:07 ` [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
6 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The error propagation is a horror - xfs_bulkstat() returns
a rval variable which is only set if there are formatter errors. Any
sort of btree walk error or corruption will cause the bulkstat walk
to terminate but will not pass an error back to userspace. Worse
is the fact that formatter errors will also be ignored if any inodes
were correctly formatted into the user buffer.
Hence bulkstat can fail badly yet still report success to userspace.
This causes significant issues with xfsdump not dumping everything
in the filesystem yet reporting success. It's not until a restore
fails that there is any indication that the dump was bad and tha
bulkstat failed. This patch now triggers xfsdump to fail with
bulkstat errors rather than silently missing files in the dump.
This now causes bulkstat to fail when the lastino cookie does not
fall inside an existing inode chunk. The pre-3.17 code tolerated
that error by allowing the code to move to the next inode chunk
as the agino target is guaranteed to fall into the next btree
record.
With the fixes up to this point in the series, xfsdump now passes on
the troublesome filesystem image that exposes all these bugs.
cc: <stable@vger.kernel.org>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
fs/xfs/xfs_itable.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index a9c28d1..be96754 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk(
XFS_WANT_CORRUPTED_RETURN(stat == 1);
/* Check if the record contains the inode in request */
- if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
- return -EINVAL;
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
+ *icount = 0;
+ return 0;
+ }
idx = agino - irec->ir_startino + 1;
if (idx < XFS_INODES_PER_CHUNK &&
@@ -352,7 +354,6 @@ xfs_bulkstat(
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
- int rval; /* return value error code */
int ubcount; /* size of user's buffer */
struct xfs_bulkstat_agichunk ac;
int error = 0;
@@ -388,7 +389,6 @@ xfs_bulkstat(
* Loop over the allocation groups, starting from the last
* inode returned; 0 means start of the allocation group.
*/
- rval = 0;
while (ac.ac_ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) {
struct xfs_inobt_rec_incore *irbp = irbuf;
struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
@@ -491,11 +491,14 @@ del_cursor:
formatter, statstruct_size, &ac,
&lastino);
if (error)
- rval = error;
+ break;
cond_resched();
}
+ if (error)
+ break;
+
if (end_of_ag) {
agno++;
agino = 0;
@@ -507,11 +510,17 @@ del_cursor:
*/
kmem_free(irbuf);
*ubcountp = ac.ac_ubelem;
+
/*
- * Found some inodes, return them now and return the error next time.
+ * We found some inodes, so clear the error status and return them.
+ * The lastino pointer will point directly at the inode that triggered
+ * any error that occurred, so on the next call the error will be
+ * triggered again and propagated to userspace as there will be no
+ * formatted inodes in the buffer.
*/
if (ac.ac_ubelem)
- rval = 0;
+ error = 0;
+
if (agno >= mp->m_sb.sb_agcount) {
/*
* If we ran out of filesystem, mark lastino as off
@@ -523,7 +532,7 @@ del_cursor:
} else
*lastinop = (xfs_ino_t)lastino;
- return rval;
+ return error;
}
int
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
` (4 preceding siblings ...)
2014-11-05 0:05 ` [PATCH 5/6] xfs: bulkstat error handling is broken Dave Chinner
@ 2014-11-05 0:05 ` Dave Chinner
2014-11-05 15:14 ` Brian Foster
2014-11-05 6:07 ` [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
6 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 0:05 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The bulkstat main loop progress is tracked by the "lastino"
variable, which is a full 64 bit inode. However, the loop actually
works on agno/agino pairs, and so there's a significant disconnect
between the rest of the loop and the main cursor. Convert this to
use the agino, and pass the agino into the chunk formatting function
and convert it too.
This gets rid of the inconsistency in the loop processing, and
finally makes it simple for us to skip inodes at any point in the
loop simply by incrementing the agino cursor.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_itable.c | 68 ++++++++++++++++++++++++++---------------------------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index be96754..a082bb7 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -282,30 +282,31 @@ xfs_bulkstat_ag_ichunk(
bulkstat_one_pf formatter,
size_t statstruct_size,
struct xfs_bulkstat_agichunk *acp,
- xfs_ino_t *lastino)
+ xfs_agino_t *last_agino)
{
char __user **ubufp = acp->ac_ubuffer;
int chunkidx;
int error = 0;
- xfs_agino_t agino;
+ xfs_agino_t agino = irbp->ir_startino;
- agino = irbp->ir_startino;
for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
chunkidx++, agino++) {
int fmterror;
int ubused;
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
+
+ /* inode won't fit in buffer, we are done */
+ if (acp->ac_ubleft < statstruct_size)
+ break;
/* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- *lastino = ino;
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
continue;
- }
/* Get the inode and fill in a single buffer */
ubused = statstruct_size;
- error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
- &ubused, &fmterror);
+ error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
+ *ubufp, acp->ac_ubleft, &ubused, &fmterror);
+
if (fmterror == BULKSTAT_RV_GIVEUP ||
(error && error != -ENOENT && error != -EINVAL)) {
acp->ac_ubleft = 0;
@@ -315,7 +316,6 @@ xfs_bulkstat_ag_ichunk(
/* be careful not to leak error if at end of chunk */
if (fmterror == BULKSTAT_RV_NOTHING || error) {
- *lastino = ino;
error = 0;
continue;
}
@@ -323,12 +323,18 @@ xfs_bulkstat_ag_ichunk(
*ubufp += ubused;
acp->ac_ubleft -= ubused;
acp->ac_ubelem++;
- *lastino = ino;
-
- if (acp->ac_ubleft < statstruct_size)
- break;
}
+ /*
+ * Post-update *last_agino. At this point, agino will always point one
+ * inode past the last inode we processed successfully. Hence we
+ * substract that inode when setting the *last_agino cursor so that we
+ * return the correct cookie to userspace. On the next bulkstat call,
+ * the inode under the lastino cookie will be skipped as we have already
+ * processed it here.
+ */
+ *last_agino = agino - 1;
+
return error;
}
@@ -352,7 +358,6 @@ xfs_bulkstat(
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
struct xfs_bulkstat_agichunk ac;
@@ -361,11 +366,10 @@ xfs_bulkstat(
/*
* Get the last inode value, see if there's nothing to do.
*/
- lastino = *lastinop;
- agno = XFS_INO_TO_AGNO(mp, lastino);
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ agno = XFS_INO_TO_AGNO(mp, *lastinop);
+ agino = XFS_INO_TO_AGINO(mp, *lastinop);
if (agno >= mp->m_sb.sb_agcount ||
- lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
*done = 1;
*ubcountp = 0;
return 0;
@@ -420,7 +424,6 @@ xfs_bulkstat(
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
}
/* Increment to the next record */
error = xfs_btree_increment(cur, 0, &stat);
@@ -461,7 +464,6 @@ xfs_bulkstat(
/*
* Set agino to after this chunk and bump the cursor.
*/
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) {
end_of_ag = true;
@@ -481,7 +483,9 @@ del_cursor:
if (error)
break;
/*
- * Now format all the good inodes into the user's buffer.
+ * Now format all the good inodes into the user's buffer. The
+ * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
+ * for the next loop iteration.
*/
irbufend = irbp;
for (irbp = irbuf;
@@ -489,7 +493,7 @@ del_cursor:
irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
formatter, statstruct_size, &ac,
- &lastino);
+ &agino);
if (error)
break;
@@ -502,8 +506,7 @@ del_cursor:
if (end_of_ag) {
agno++;
agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ }
}
/*
* Done, we're either out of filesystem or space to put the data.
@@ -521,16 +524,13 @@ del_cursor:
if (ac.ac_ubelem)
error = 0;
- if (agno >= mp->m_sb.sb_agcount) {
- /*
- * If we ran out of filesystem, mark lastino as off
- * the end of the filesystem, so the next call
- * will return immediately.
- */
- *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
+ /*
+ * If we ran out of filesystem, lastino will point off the end of
+ * the filesystem so the next call will return immediately.
+ */
+ *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (agno >= mp->m_sb.sb_agcount)
*done = 1;
- } else
- *lastinop = (xfs_ino_t)lastino;
return error;
}
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
` (5 preceding siblings ...)
2014-11-05 0:05 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
@ 2014-11-05 6:07 ` Dave Chinner
2014-11-05 6:32 ` Dave Chinner
6 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 6:07 UTC (permalink / raw)
To: xfs
On Wed, Nov 05, 2014 at 11:05:15AM +1100, Dave Chinner wrote:
> Hi folks, this is version 2 of the bulkstat fixup series first
> posted here:
>
> http://oss.sgi.com/archives/xfs/2014-11/msg00057.html
>
> Version 2 fixes the issues Brian found during review:
> - chunk formatter error leakage (patch 3)
> - moved main loop chunk formatter error handling from patch 4 to
> patch 5
> - reworks last_agino updating in patch 6 to do post-formatting
> updates and added comments.
>
> Comeents and testing welcome.
I'm not 100% convinced that this fixes all the problems. I just
created, dumped and restored a 10 million inode filesystem (about
50GB of dump file) and I found 102 missing files in the dump with no
errors from xfsdump or xfsrestore.
The files are missing from just 4 directories out of about 1000
directories containing equal numbers of files, so its not a common
trigger whatever the issue is. I'll keep digging...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-05 6:07 ` [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
@ 2014-11-05 6:32 ` Dave Chinner
2014-11-05 13:17 ` Brian Foster
0 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 6:32 UTC (permalink / raw)
To: xfs
On Wed, Nov 05, 2014 at 05:07:28PM +1100, Dave Chinner wrote:
> On Wed, Nov 05, 2014 at 11:05:15AM +1100, Dave Chinner wrote:
> > Hi folks, this is version 2 of the bulkstat fixup series first
> > posted here:
> >
> > http://oss.sgi.com/archives/xfs/2014-11/msg00057.html
> >
> > Version 2 fixes the issues Brian found during review:
> > - chunk formatter error leakage (patch 3)
> > - moved main loop chunk formatter error handling from patch 4 to
> > patch 5
> > - reworks last_agino updating in patch 6 to do post-formatting
> > updates and added comments.
> >
> > Comeents and testing welcome.
>
> I'm not 100% convinced that this fixes all the problems. I just
> created, dumped and restored a 10 million inode filesystem (about
> 50GB of dump file) and I found 102 missing files in the dump with no
> errors from xfsdump or xfsrestore.
>
> The files are missing from just 4 directories out of about 1000
> directories containing equal numbers of files, so its not a common
> trigger whatever the issue is. I'll keep digging...
OK, this looks like a problem with handling the last record in the
AGI btree:
$ for i in `cat s.diff | grep "^+/" | sed -e 's/^+//'` ; do ls -i $i; done |sort -n
163209114099 /mnt/scratch/2/dbc/5459605f~~~~~~~~RDJX8QBHPPMCGMD7YJQGYPD2
....
163209114129 /mnt/scratch/2/dbc/5459605f~~~~~~~~U820IYQFKS8A6QYCC8HU3ZBX
292057960758 /mnt/scratch/0/dcc/54596070~~~~~~~~9BUH5D5PZTGAC8BT1YL77OZ0
...
292057960769 /mnt/scratch/0/dcc/54596070~~~~~~~~DAO78GAAFNUZU8PH7Q0UZNRH
1395864555809 /mnt/scratch/1/e60/54596103~~~~~~~~GEMXGHYNREW409N7W9INBMVA
.....
1395864555841 /mnt/scratch/1/e60/54596103~~~~~~~~9XPK9FWHCE21AJ3EN023DU47
1653562593576 /mnt/scratch/5/e79/5459611c~~~~~~~~BSBZ6EUCT9HOIRQPMFZDVPQ5
.....
1653562593601 /mnt/scratch/5/e79/5459611c~~~~~~~~6QY1SO8ZGGNQESAGXSB3G3DH
$
xfs_db> convert inode 163209114099 agno
0x26 (38)
xfs_db> convert inode 163209114099 agino
0x571f3 (356851)
xfs_db> convert inode 163209114129 agino
0x57211 (356881)
xfs_db> agi 38
xfs_db> a root
xfs_db> a ptrs[2]
xfs_db> p
....
recs[1-234] = [startino,freecount,free]
......
228:[356352,0,0] 229:[356416,0,0] 230:[356512,0,0] 231:[356576,0,0]
232:[356672,0,0] 233:[356736,0,0] 234:[356832,14,0xfffc000000000000]
So the first contiguous inode range they all fall into the partial final record
in the AG.
xfs_db> convert inode 292057960758 agino
0x2d136 (184630)
.....
155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
Same.
xfs_db> convert inode 1395864555809 agino
0x2d121 (184609)
.....
155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
Same.
xfs_db> convert inode 1653562593576 agino
0x2d128 (184616)
....
155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
Same.
So they are all falling into the last btree record in the AG, and so
appear to have been skipped as a result of the same issue. At least
that gives me something to look at.
Still, please review the patches I've already posted - I'll push
them to linus if they are fine ASAP, and then add whatever I find
from this test later.
Cheers,
Dave.
PS: every AG I looked at had an identical inode allocation pattern.
Given that the directory entries and the file contents created are
all deterministic, it's reassuring to see that the allocator has
created identical metadata structure layouts on disk for a
repeating workload that creates identical user-visible
hierarchies...
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-05 6:32 ` Dave Chinner
@ 2014-11-05 13:17 ` Brian Foster
2014-11-05 21:21 ` Dave Chinner
0 siblings, 1 reply; 21+ messages in thread
From: Brian Foster @ 2014-11-05 13:17 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Wed, Nov 05, 2014 at 05:32:26PM +1100, Dave Chinner wrote:
> On Wed, Nov 05, 2014 at 05:07:28PM +1100, Dave Chinner wrote:
> > On Wed, Nov 05, 2014 at 11:05:15AM +1100, Dave Chinner wrote:
> > > Hi folks, this is version 2 of the bulkstat fixup series first
> > > posted here:
> > >
> > > http://oss.sgi.com/archives/xfs/2014-11/msg00057.html
> > >
> > > Version 2 fixes the issues Brian found during review:
> > > - chunk formatter error leakage (patch 3)
> > > - moved main loop chunk formatter error handling from patch 4 to
> > > patch 5
> > > - reworks last_agino updating in patch 6 to do post-formatting
> > > updates and added comments.
> > >
> > > Comeents and testing welcome.
> >
> > I'm not 100% convinced that this fixes all the problems. I just
> > created, dumped and restored a 10 million inode filesystem (about
> > 50GB of dump file) and I found 102 missing files in the dump with no
> > errors from xfsdump or xfsrestore.
> >
> > The files are missing from just 4 directories out of about 1000
> > directories containing equal numbers of files, so its not a common
> > trigger whatever the issue is. I'll keep digging...
>
> OK, this looks like a problem with handling the last record in the
> AGI btree:
>
> $ for i in `cat s.diff | grep "^+/" | sed -e 's/^+//'` ; do ls -i $i; done |sort -n
> 163209114099 /mnt/scratch/2/dbc/5459605f~~~~~~~~RDJX8QBHPPMCGMD7YJQGYPD2
> ....
> 163209114129 /mnt/scratch/2/dbc/5459605f~~~~~~~~U820IYQFKS8A6QYCC8HU3ZBX
> 292057960758 /mnt/scratch/0/dcc/54596070~~~~~~~~9BUH5D5PZTGAC8BT1YL77OZ0
> ...
> 292057960769 /mnt/scratch/0/dcc/54596070~~~~~~~~DAO78GAAFNUZU8PH7Q0UZNRH
> 1395864555809 /mnt/scratch/1/e60/54596103~~~~~~~~GEMXGHYNREW409N7W9INBMVA
> .....
> 1395864555841 /mnt/scratch/1/e60/54596103~~~~~~~~9XPK9FWHCE21AJ3EN023DU47
> 1653562593576 /mnt/scratch/5/e79/5459611c~~~~~~~~BSBZ6EUCT9HOIRQPMFZDVPQ5
> .....
> 1653562593601 /mnt/scratch/5/e79/5459611c~~~~~~~~6QY1SO8ZGGNQESAGXSB3G3DH
> $
>
> xfs_db> convert inode 163209114099 agno
> 0x26 (38)
> xfs_db> convert inode 163209114099 agino
> 0x571f3 (356851)
> xfs_db> convert inode 163209114129 agino
> 0x57211 (356881)
> xfs_db> agi 38
> xfs_db> a root
> xfs_db> a ptrs[2]
> xfs_db> p
> ....
> recs[1-234] = [startino,freecount,free]
> ......
> 228:[356352,0,0] 229:[356416,0,0] 230:[356512,0,0] 231:[356576,0,0]
> 232:[356672,0,0] 233:[356736,0,0] 234:[356832,14,0xfffc000000000000]
>
> So the first contiguous inode range they all fall into the partial final record
> in the AG.
>
> xfs_db> convert inode 292057960758 agino
> 0x2d136 (184630)
> .....
> 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
>
> Same.
>
> xfs_db> convert inode 1395864555809 agino
> 0x2d121 (184609)
> .....
> 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
>
> Same.
>
> xfs_db> convert inode 1653562593576 agino
> 0x2d128 (184616)
> ....
> 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
>
> Same.
>
> So they are all falling into the last btree record in the AG, and so
> appear to have been skipped as a result of the same issue. At least
> that gives me something to look at.
>
Interesting, though just to note... is it possible this is related to
records with free inodes? If this is a prepopulated fs for the purpose
of this test, it's conceivable that there's only a small set of such
records in the fs. The other records in your snippets here are fully
allocated, but of course this is only a small snippet of a larger set of
data.
It also might be interesting to know whether this repeats without the
last patch in the series. IIRC that one seemed to have the most
potential impact on the overall algorithm (by changing loop iteration
logic, etc.). Just a thought.
Brian
> Still, please review the patches I've already posted - I'll push
> them to linus if they are fine ASAP, and then add whatever I find
> from this test later.
>
> Cheers,
>
> Dave.
>
> PS: every AG I looked at had an identical inode allocation pattern.
> Given that the directory entries and the file contents created are
> all deterministic, it's reassuring to see that the allocator has
> created identical metadata structure layouts on disk for a
> repeating workload that creates identical user-visible
> hierarchies...
> --
> Dave Chinner
> david@fromorbit.com
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 3/6] xfs: bulkstat chunk-formatter has issues
2014-11-05 0:05 ` [PATCH 3/6] xfs: bulkstat chunk-formatter has issues Dave Chinner
@ 2014-11-05 14:59 ` Brian Foster
0 siblings, 0 replies; 21+ messages in thread
From: Brian Foster @ 2014-11-05 14:59 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Wed, Nov 05, 2014 at 11:05:18AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The loop construct has issues:
> - clustidx is completely unused, so remove it.
> - the loop tries to be smart by terminating when the
> "freecount" tells it that all inodes are free. Just drop
> it as in most cases we have to scan all inodes in the
> chunk anyway.
> - move the "user buffer left" condition check to the only
> point where we consume space int eh user buffer.
> - move the initialisation of agino out of the loop, leaving
> just a simple loop control logic using the clusteridx.
>
> Also, double handling of the user buffer variables leads to problems
> tracking the current state - use the cursor variables directly
> rather than keeping local copies and then having to update the
> cursor before returning.
>
> cc: <stable@vger.kernel.org>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
Reviewed-by: Brian Foster <bfoster@redhat.com>
> fs/xfs/xfs_itable.c | 58 ++++++++++++++++++++++-------------------------------
> 1 file changed, 24 insertions(+), 34 deletions(-)
>
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index 50a3e59..7ea2b11 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -283,59 +283,49 @@ xfs_bulkstat_ag_ichunk(
> xfs_ino_t *lastino)
> {
> char __user **ubufp = acp->ac_ubuffer;
> - int ubleft = acp->ac_ubleft;
> - int ubelem = acp->ac_ubelem;
> - int chunkidx, clustidx;
> + int chunkidx;
> int error = 0;
> xfs_agino_t agino;
>
> - for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
> - XFS_BULKSTAT_UBLEFT(ubleft) &&
> - irbp->ir_freecount < XFS_INODES_PER_CHUNK;
> - chunkidx++, clustidx++, agino++) {
> - int fmterror; /* bulkstat formatter result */
> + agino = irbp->ir_startino;
> + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
> + chunkidx++, agino++) {
> + int fmterror;
> int ubused;
> xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
>
> - ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
> -
> /* Skip if this inode is free */
> if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
> *lastino = ino;
> continue;
> }
>
> - /*
> - * Count used inodes as free so we can tell when the
> - * chunk is used up.
> - */
> - irbp->ir_freecount++;
> -
> /* Get the inode and fill in a single buffer */
> ubused = statstruct_size;
> - error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
> - if (fmterror == BULKSTAT_RV_NOTHING) {
> - if (error && error != -ENOENT && error != -EINVAL) {
> - ubleft = 0;
> - break;
> - }
> - *lastino = ino;
> - continue;
> - }
> - if (fmterror == BULKSTAT_RV_GIVEUP) {
> - ubleft = 0;
> + error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
> + &ubused, &fmterror);
> + if (fmterror == BULKSTAT_RV_GIVEUP ||
> + (error && error != -ENOENT && error != -EINVAL)) {
> + acp->ac_ubleft = 0;
> ASSERT(error);
> break;
> }
> - if (*ubufp)
> - *ubufp += ubused;
> - ubleft -= ubused;
> - ubelem++;
> +
> + /* be careful not to leak error if at end of chunk */
> + if (fmterror == BULKSTAT_RV_NOTHING || error) {
> + *lastino = ino;
> + error = 0;
> + continue;
> + }
> +
> + *ubufp += ubused;
> + acp->ac_ubleft -= ubused;
> + acp->ac_ubelem++;
> *lastino = ino;
> - }
>
> - acp->ac_ubleft = ubleft;
> - acp->ac_ubelem = ubelem;
> + if (acp->ac_ubleft < statstruct_size)
> + break;
> + }
>
> return error;
> }
> --
> 2.0.0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-05 0:05 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
@ 2014-11-05 15:14 ` Brian Foster
0 siblings, 0 replies; 21+ messages in thread
From: Brian Foster @ 2014-11-05 15:14 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Wed, Nov 05, 2014 at 11:05:21AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The bulkstat main loop progress is tracked by the "lastino"
> variable, which is a full 64 bit inode. However, the loop actually
> works on agno/agino pairs, and so there's a significant disconnect
> between the rest of the loop and the main cursor. Convert this to
> use the agino, and pass the agino into the chunk formatting function
> and convert it too.
>
> This gets rid of the inconsistency in the loop processing, and
> finally makes it simple for us to skip inodes at any point in the
> loop simply by incrementing the agino cursor.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
The deltas in this version look Ok, but when I step back from the patch
and make another pass at the overall algorithm, I think that this
approach still has some problems. Particularly, consider the end_of_ag
agino update in the main xfs_bulkstat() loop.
In that block, we update agno and set agino = 0, but there's no
guarantee we've processed that inode and thus can push the cookie
forward in a manner that can skip inodes. Consider the case where we run
out of buffer space on the last record of the AG. We've set end_of_ag in
either of the preceding record scanning blocks, called
xfs_bulkstat_ag_ichunk() and then simply run out of space in the buffer.
It looks like we'd exit th ag_ichunk() loop, move agino forward to the
next ag and exit out of the main loop on the next iteration. The
lastinop cookie is now set to the first inode of the subsequent AG.
It's not clear what possibilities that introduces for xfsdump, but it
seems broken and I can reproduce some badness with xfstests/src/bstat on
a quick test:
# for i in $(seq 0 999); do touch /mnt/$i; done
# ./bstat /mnt/ | grep ino | wc -l
1001
# ./bstat /mnt/ 4 | grep ino | wc -l
960
So using a small batch size (default is 4096) definitely causes inode
skipping one way or another. This problem doesn't occur if I drop this
patch.
Either way, I'm thinking we could use an xfstests that uses bstat to
create some of these different inode layout, batch size, AG count, etc.
conditions and validates the expected bulkstat count. We could probably
do a lot more (and more quickly) in a single test than via xfsdump. ;)
Brian
> fs/xfs/xfs_itable.c | 68 ++++++++++++++++++++++++++---------------------------
> 1 file changed, 34 insertions(+), 34 deletions(-)
>
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index be96754..a082bb7 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -282,30 +282,31 @@ xfs_bulkstat_ag_ichunk(
> bulkstat_one_pf formatter,
> size_t statstruct_size,
> struct xfs_bulkstat_agichunk *acp,
> - xfs_ino_t *lastino)
> + xfs_agino_t *last_agino)
> {
> char __user **ubufp = acp->ac_ubuffer;
> int chunkidx;
> int error = 0;
> - xfs_agino_t agino;
> + xfs_agino_t agino = irbp->ir_startino;
>
> - agino = irbp->ir_startino;
> for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
> chunkidx++, agino++) {
> int fmterror;
> int ubused;
> - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
> +
> + /* inode won't fit in buffer, we are done */
> + if (acp->ac_ubleft < statstruct_size)
> + break;
>
> /* Skip if this inode is free */
> - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
> - *lastino = ino;
> + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
> continue;
> - }
>
> /* Get the inode and fill in a single buffer */
> ubused = statstruct_size;
> - error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
> - &ubused, &fmterror);
> + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
> + *ubufp, acp->ac_ubleft, &ubused, &fmterror);
> +
> if (fmterror == BULKSTAT_RV_GIVEUP ||
> (error && error != -ENOENT && error != -EINVAL)) {
> acp->ac_ubleft = 0;
> @@ -315,7 +316,6 @@ xfs_bulkstat_ag_ichunk(
>
> /* be careful not to leak error if at end of chunk */
> if (fmterror == BULKSTAT_RV_NOTHING || error) {
> - *lastino = ino;
> error = 0;
> continue;
> }
> @@ -323,12 +323,18 @@ xfs_bulkstat_ag_ichunk(
> *ubufp += ubused;
> acp->ac_ubleft -= ubused;
> acp->ac_ubelem++;
> - *lastino = ino;
> -
> - if (acp->ac_ubleft < statstruct_size)
> - break;
> }
>
> + /*
> + * Post-update *last_agino. At this point, agino will always point one
> + * inode past the last inode we processed successfully. Hence we
> + * substract that inode when setting the *last_agino cursor so that we
> + * return the correct cookie to userspace. On the next bulkstat call,
> + * the inode under the lastino cookie will be skipped as we have already
> + * processed it here.
> + */
> + *last_agino = agino - 1;
> +
> return error;
> }
>
> @@ -352,7 +358,6 @@ xfs_bulkstat(
> xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> size_t irbsize; /* size of irec buffer in bytes */
> xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> - xfs_ino_t lastino; /* last inode number returned */
> int nirbuf; /* size of irbuf */
> int ubcount; /* size of user's buffer */
> struct xfs_bulkstat_agichunk ac;
> @@ -361,11 +366,10 @@ xfs_bulkstat(
> /*
> * Get the last inode value, see if there's nothing to do.
> */
> - lastino = *lastinop;
> - agno = XFS_INO_TO_AGNO(mp, lastino);
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> if (agno >= mp->m_sb.sb_agcount ||
> - lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
> + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
> *done = 1;
> *ubcountp = 0;
> return 0;
> @@ -420,7 +424,6 @@ xfs_bulkstat(
> irbp->ir_freecount = r.ir_freecount;
> irbp->ir_free = r.ir_free;
> irbp++;
> - agino = r.ir_startino + XFS_INODES_PER_CHUNK;
> }
> /* Increment to the next record */
> error = xfs_btree_increment(cur, 0, &stat);
> @@ -461,7 +464,6 @@ xfs_bulkstat(
> /*
> * Set agino to after this chunk and bump the cursor.
> */
> - agino = r.ir_startino + XFS_INODES_PER_CHUNK;
> error = xfs_btree_increment(cur, 0, &stat);
> if (error || stat == 0) {
> end_of_ag = true;
> @@ -481,7 +483,9 @@ del_cursor:
> if (error)
> break;
> /*
> - * Now format all the good inodes into the user's buffer.
> + * Now format all the good inodes into the user's buffer. The
> + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
> + * for the next loop iteration.
> */
> irbufend = irbp;
> for (irbp = irbuf;
> @@ -489,7 +493,7 @@ del_cursor:
> irbp++) {
> error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
> formatter, statstruct_size, &ac,
> - &lastino);
> + &agino);
> if (error)
> break;
>
> @@ -502,8 +506,7 @@ del_cursor:
> if (end_of_ag) {
> agno++;
> agino = 0;
> - } else
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + }
> }
> /*
> * Done, we're either out of filesystem or space to put the data.
> @@ -521,16 +524,13 @@ del_cursor:
> if (ac.ac_ubelem)
> error = 0;
>
> - if (agno >= mp->m_sb.sb_agcount) {
> - /*
> - * If we ran out of filesystem, mark lastino as off
> - * the end of the filesystem, so the next call
> - * will return immediately.
> - */
> - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
> + /*
> + * If we ran out of filesystem, lastino will point off the end of
> + * the filesystem so the next call will return immediately.
> + */
> + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
> + if (agno >= mp->m_sb.sb_agcount)
> *done = 1;
> - } else
> - *lastinop = (xfs_ino_t)lastino;
>
> return error;
> }
> --
> 2.0.0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-05 13:17 ` Brian Foster
@ 2014-11-05 21:21 ` Dave Chinner
2014-11-06 6:53 ` Dave Chinner
0 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-05 21:21 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Wed, Nov 05, 2014 at 08:17:06AM -0500, Brian Foster wrote:
> On Wed, Nov 05, 2014 at 05:32:26PM +1100, Dave Chinner wrote:
> > OK, this looks like a problem with handling the last record in the
> > AGI btree:
> >
> > $ for i in `cat s.diff | grep "^+/" | sed -e 's/^+//'` ; do ls -i $i; done |sort -n
> > 163209114099 /mnt/scratch/2/dbc/5459605f~~~~~~~~RDJX8QBHPPMCGMD7YJQGYPD2
> > ....
> > 163209114129 /mnt/scratch/2/dbc/5459605f~~~~~~~~U820IYQFKS8A6QYCC8HU3ZBX
> > 292057960758 /mnt/scratch/0/dcc/54596070~~~~~~~~9BUH5D5PZTGAC8BT1YL77OZ0
> > ...
> > 292057960769 /mnt/scratch/0/dcc/54596070~~~~~~~~DAO78GAAFNUZU8PH7Q0UZNRH
> > 1395864555809 /mnt/scratch/1/e60/54596103~~~~~~~~GEMXGHYNREW409N7W9INBMVA
> > .....
> > 1395864555841 /mnt/scratch/1/e60/54596103~~~~~~~~9XPK9FWHCE21AJ3EN023DU47
> > 1653562593576 /mnt/scratch/5/e79/5459611c~~~~~~~~BSBZ6EUCT9HOIRQPMFZDVPQ5
> > .....
> > 1653562593601 /mnt/scratch/5/e79/5459611c~~~~~~~~6QY1SO8ZGGNQESAGXSB3G3DH
> > $
> >
> > xfs_db> convert inode 163209114099 agno
> > 0x26 (38)
> > xfs_db> convert inode 163209114099 agino
> > 0x571f3 (356851)
> > xfs_db> convert inode 163209114129 agino
> > 0x57211 (356881)
> > xfs_db> agi 38
> > xfs_db> a root
> > xfs_db> a ptrs[2]
> > xfs_db> p
> > ....
> > recs[1-234] = [startino,freecount,free]
> > ......
> > 228:[356352,0,0] 229:[356416,0,0] 230:[356512,0,0] 231:[356576,0,0]
> > 232:[356672,0,0] 233:[356736,0,0] 234:[356832,14,0xfffc000000000000]
> >
> > So the first contiguous inode range they all fall into the partial final record
> > in the AG.
> >
> > xfs_db> convert inode 292057960758 agino
> > 0x2d136 (184630)
> > .....
> > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> >
> > Same.
> >
> > xfs_db> convert inode 1395864555809 agino
> > 0x2d121 (184609)
> > .....
> > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> >
> > Same.
> >
> > xfs_db> convert inode 1653562593576 agino
> > 0x2d128 (184616)
> > ....
> > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> >
> > Same.
> >
> > So they are all falling into the last btree record in the AG, and so
> > appear to have been skipped as a result of the same issue. At least
> > that gives me something to look at.
> >
>
> Interesting, though just to note... is it possible this is related to
> records with free inodes?
Oh, definitely possible - I haven't ruled that out yet. However, I
would have expected such an issue ot manifest itself during xfstests
(e.g. xfs/068) where random files are removed from the filesystem
and so leaving fre inodes in the inobt....
> If this is a prepopulated fs for the purpose
> of this test, it's conceivable that there's only a small set of such
> records in the fs. The other records in your snippets here are fully
> allocated, but of course this is only a small snippet of a larger set of
> data.
Right, they are the only partially allocated chunks in the AGs in
question, but the other 496 AGs all have partial inode chunks as
their last records, too, and they have dumped correctly.
> It also might be interesting to know whether this repeats without the
> last patch in the series. IIRC that one seemed to have the most
> potential impact on the overall algorithm (by changing loop iteration
> logic, etc.). Just a thought.
I'll try that, but the missing files don't seem aligned to the
record itself, nor is it consistent with the free inodes in the
record, so it's not an obvious loop start/end issue. I'll just have
to dig in and look at it more closely.
Thanks for the musings and thoughts - I'll have to narrow down the
failure first to see if I can trigger it easily...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-05 21:21 ` Dave Chinner
@ 2014-11-06 6:53 ` Dave Chinner
2014-11-06 12:49 ` Brian Foster
0 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-06 6:53 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Nov 06, 2014 at 08:21:00AM +1100, Dave Chinner wrote:
> On Wed, Nov 05, 2014 at 08:17:06AM -0500, Brian Foster wrote:
> > On Wed, Nov 05, 2014 at 05:32:26PM +1100, Dave Chinner wrote:
> > > OK, this looks like a problem with handling the last record in the
> > > AGI btree:
> > >
> > > $ for i in `cat s.diff | grep "^+/" | sed -e 's/^+//'` ; do ls -i $i; done |sort -n
> > > 163209114099 /mnt/scratch/2/dbc/5459605f~~~~~~~~RDJX8QBHPPMCGMD7YJQGYPD2
> > > ....
> > > 163209114129 /mnt/scratch/2/dbc/5459605f~~~~~~~~U820IYQFKS8A6QYCC8HU3ZBX
> > > 292057960758 /mnt/scratch/0/dcc/54596070~~~~~~~~9BUH5D5PZTGAC8BT1YL77OZ0
> > > ...
> > > 292057960769 /mnt/scratch/0/dcc/54596070~~~~~~~~DAO78GAAFNUZU8PH7Q0UZNRH
> > > 1395864555809 /mnt/scratch/1/e60/54596103~~~~~~~~GEMXGHYNREW409N7W9INBMVA
> > > .....
> > > 1395864555841 /mnt/scratch/1/e60/54596103~~~~~~~~9XPK9FWHCE21AJ3EN023DU47
> > > 1653562593576 /mnt/scratch/5/e79/5459611c~~~~~~~~BSBZ6EUCT9HOIRQPMFZDVPQ5
> > > .....
> > > 1653562593601 /mnt/scratch/5/e79/5459611c~~~~~~~~6QY1SO8ZGGNQESAGXSB3G3DH
> > > $
> > >
> > > xfs_db> convert inode 163209114099 agno
> > > 0x26 (38)
> > > xfs_db> convert inode 163209114099 agino
> > > 0x571f3 (356851)
> > > xfs_db> convert inode 163209114129 agino
> > > 0x57211 (356881)
> > > xfs_db> agi 38
> > > xfs_db> a root
> > > xfs_db> a ptrs[2]
> > > xfs_db> p
> > > ....
> > > recs[1-234] = [startino,freecount,free]
> > > ......
> > > 228:[356352,0,0] 229:[356416,0,0] 230:[356512,0,0] 231:[356576,0,0]
> > > 232:[356672,0,0] 233:[356736,0,0] 234:[356832,14,0xfffc000000000000]
> > >
> > > So the first contiguous inode range they all fall into the partial final record
> > > in the AG.
> > >
> > > xfs_db> convert inode 292057960758 agino
> > > 0x2d136 (184630)
> > > .....
> > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > >
> > > Same.
> > >
> > > xfs_db> convert inode 1395864555809 agino
> > > 0x2d121 (184609)
> > > .....
> > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > >
> > > Same.
> > >
> > > xfs_db> convert inode 1653562593576 agino
> > > 0x2d128 (184616)
> > > ....
> > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > >
> > > Same.
> > >
> > > So they are all falling into the last btree record in the AG, and so
> > > appear to have been skipped as a result of the same issue. At least
> > > that gives me something to look at.
> > >
> >
> > Interesting, though just to note... is it possible this is related to
> > records with free inodes?
>
> Oh, definitely possible - I haven't ruled that out yet. However, I
> would have expected such an issue ot manifest itself during xfstests
> (e.g. xfs/068) where random files are removed from the filesystem
> and so leaving fre inodes in the inobt....
It's not.
> > It also might be interesting to know whether this repeats without the
> > last patch in the series. IIRC that one seemed to have the most
> > potential impact on the overall algorithm (by changing loop iteration
> > logic, etc.). Just a thought.
>
> I'll try that, but the missing files don't seem aligned to the
And it's not that.
THe issue, as I suspected was to do with end of AG handling, in a
special corner case: the user buffer ran out of space part way
through processing the last records in the AG.
That is, we've scanned all the way to the end of the ag and records
the inobt records into the temp buffer. As this runs off the end of
the AG, it sets the end_of_ag flag. We then walk the temp
buffer to format the inodes, and while doing so we run out of space
before all the inodes are formatted. We then hit the loop
termination, which does agno++; agino = 0; and then we hit the
"user buffer full check".
And so the cookie we pass back to userspace doesn't match the last
inode we actually formatted, and so it skips a bunch of files in the
dump. So, we have missing inodes in the dump. THe scary part about
this is that the inodes are actually in the directory tree that
xfsdump builds but it does not give any warnings that
inodes weren't found during scan. And xfsrestore doesn't warn that
entries in the directory tree didn't have any corresponding inode in
the dump....
Hence I could look at the dump and find the path and inode number
corresponding to one of the missing files, mark it for extraction
and then extract it *successfully* but the file was not restored -
it is actually missing from the dump....
Anyway, the bug was in patch 3 - we have to check whether the user
buffer has space left before processing the end_of_ag state. I
removed the buffer space check from the for() loop logic because
it's just plain misleading and with the check in the formatter and
the end of the main loop is entires redundant. I'll post the series
again once I've confirmed a good dump and restore.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-06 6:53 ` Dave Chinner
@ 2014-11-06 12:49 ` Brian Foster
2014-11-06 13:02 ` Dave Chinner
0 siblings, 1 reply; 21+ messages in thread
From: Brian Foster @ 2014-11-06 12:49 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Thu, Nov 06, 2014 at 05:53:59PM +1100, Dave Chinner wrote:
> On Thu, Nov 06, 2014 at 08:21:00AM +1100, Dave Chinner wrote:
> > On Wed, Nov 05, 2014 at 08:17:06AM -0500, Brian Foster wrote:
> > > On Wed, Nov 05, 2014 at 05:32:26PM +1100, Dave Chinner wrote:
> > > > OK, this looks like a problem with handling the last record in the
> > > > AGI btree:
> > > >
> > > > $ for i in `cat s.diff | grep "^+/" | sed -e 's/^+//'` ; do ls -i $i; done |sort -n
> > > > 163209114099 /mnt/scratch/2/dbc/5459605f~~~~~~~~RDJX8QBHPPMCGMD7YJQGYPD2
> > > > ....
> > > > 163209114129 /mnt/scratch/2/dbc/5459605f~~~~~~~~U820IYQFKS8A6QYCC8HU3ZBX
> > > > 292057960758 /mnt/scratch/0/dcc/54596070~~~~~~~~9BUH5D5PZTGAC8BT1YL77OZ0
> > > > ...
> > > > 292057960769 /mnt/scratch/0/dcc/54596070~~~~~~~~DAO78GAAFNUZU8PH7Q0UZNRH
> > > > 1395864555809 /mnt/scratch/1/e60/54596103~~~~~~~~GEMXGHYNREW409N7W9INBMVA
> > > > .....
> > > > 1395864555841 /mnt/scratch/1/e60/54596103~~~~~~~~9XPK9FWHCE21AJ3EN023DU47
> > > > 1653562593576 /mnt/scratch/5/e79/5459611c~~~~~~~~BSBZ6EUCT9HOIRQPMFZDVPQ5
> > > > .....
> > > > 1653562593601 /mnt/scratch/5/e79/5459611c~~~~~~~~6QY1SO8ZGGNQESAGXSB3G3DH
> > > > $
> > > >
> > > > xfs_db> convert inode 163209114099 agno
> > > > 0x26 (38)
> > > > xfs_db> convert inode 163209114099 agino
> > > > 0x571f3 (356851)
> > > > xfs_db> convert inode 163209114129 agino
> > > > 0x57211 (356881)
> > > > xfs_db> agi 38
> > > > xfs_db> a root
> > > > xfs_db> a ptrs[2]
> > > > xfs_db> p
> > > > ....
> > > > recs[1-234] = [startino,freecount,free]
> > > > ......
> > > > 228:[356352,0,0] 229:[356416,0,0] 230:[356512,0,0] 231:[356576,0,0]
> > > > 232:[356672,0,0] 233:[356736,0,0] 234:[356832,14,0xfffc000000000000]
> > > >
> > > > So the first contiguous inode range they all fall into the partial final record
> > > > in the AG.
> > > >
> > > > xfs_db> convert inode 292057960758 agino
> > > > 0x2d136 (184630)
> > > > .....
> > > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > > >
> > > > Same.
> > > >
> > > > xfs_db> convert inode 1395864555809 agino
> > > > 0x2d121 (184609)
> > > > .....
> > > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > > >
> > > > Same.
> > > >
> > > > xfs_db> convert inode 1653562593576 agino
> > > > 0x2d128 (184616)
> > > > ....
> > > > 155:[184544,0,0] 156:[184608,30,0xfffffffc00000000]
> > > >
> > > > Same.
> > > >
> > > > So they are all falling into the last btree record in the AG, and so
> > > > appear to have been skipped as a result of the same issue. At least
> > > > that gives me something to look at.
> > > >
> > >
> > > Interesting, though just to note... is it possible this is related to
> > > records with free inodes?
> >
> > Oh, definitely possible - I haven't ruled that out yet. However, I
> > would have expected such an issue ot manifest itself during xfstests
> > (e.g. xfs/068) where random files are removed from the filesystem
> > and so leaving fre inodes in the inobt....
>
> It's not.
>
> > > It also might be interesting to know whether this repeats without the
> > > last patch in the series. IIRC that one seemed to have the most
> > > potential impact on the overall algorithm (by changing loop iteration
> > > logic, etc.). Just a thought.
> >
> > I'll try that, but the missing files don't seem aligned to the
>
> And it's not that.
>
> THe issue, as I suspected was to do with end of AG handling, in a
> special corner case: the user buffer ran out of space part way
> through processing the last records in the AG.
>
> That is, we've scanned all the way to the end of the ag and records
> the inobt records into the temp buffer. As this runs off the end of
> the AG, it sets the end_of_ag flag. We then walk the temp
> buffer to format the inodes, and while doing so we run out of space
> before all the inodes are formatted. We then hit the loop
> termination, which does agno++; agino = 0; and then we hit the
> "user buffer full check".
>
Ok, so this is pretty much what I noted in the comments to the v2 6/6
(though not directly related to the changes in 6/6 I suppose). ;)
> And so the cookie we pass back to userspace doesn't match the last
> inode we actually formatted, and so it skips a bunch of files in the
> dump. So, we have missing inodes in the dump. THe scary part about
> this is that the inodes are actually in the directory tree that
> xfsdump builds but it does not give any warnings that
> inodes weren't found during scan. And xfsrestore doesn't warn that
> entries in the directory tree didn't have any corresponding inode in
> the dump....
>
Yeah, that sounds scary. :/
> Hence I could look at the dump and find the path and inode number
> corresponding to one of the missing files, mark it for extraction
> and then extract it *successfully* but the file was not restored -
> it is actually missing from the dump....
>
> Anyway, the bug was in patch 3 - we have to check whether the user
> buffer has space left before processing the end_of_ag state. I
> removed the buffer space check from the for() loop logic because
> it's just plain misleading and with the check in the formatter and
> the end of the main loop is entires redundant. I'll post the series
> again once I've confirmed a good dump and restore.
>
There's a simple test for that condition, as noted in my previous mail
as well in case you missed it. Again, that probably calls out that we
could be doing better unit testing of bulkstat in xfstests. At the very
least we probably need some bulkstat inode count validation against a
known data set. xfsdump testing is obviously important, but if bulkstat
is broken then we clearly can't expect xfsdump to work (and debugging
the former via the latter appears to be quite painful).
Brian
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 0/6 v2] xfs: fix the bulkstat mess
2014-11-06 12:49 ` Brian Foster
@ 2014-11-06 13:02 ` Dave Chinner
0 siblings, 0 replies; 21+ messages in thread
From: Dave Chinner @ 2014-11-06 13:02 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Nov 06, 2014 at 07:49:08AM -0500, Brian Foster wrote:
> There's a simple test for that condition, as noted in my previous mail
> as well in case you missed it. Again, that probably calls out that we
> could be doing better unit testing of bulkstat in xfstests. At the very
> least we probably need some bulkstat inode count validation against a
> known data set.
That's exactly what I've been running to find this latest problem.
But a 500TB filesystem with 10 million inodes in it is a bit beyond
xfstests. ANd that only showed up the problem in 4 AGs out of 500,
so with smaller filesystems there's a good chance that this would
have also been missed....
> xfsdump testing is obviously important, but if bulkstat
> is broken then we clearly can't expect xfsdump to work (and debugging
> the former via the latter appears to be quite painful).
We have a bulkstat command in xfstests. And it can be used to
comapre the output against a stat of the file. But it can't detect
missing inodes and I don't think we can start at arbitrary inodes,
either. So it needs work to be able to be used in unit tests.
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-06 13:14 [PATCH 0/6 v3] " Dave Chinner
@ 2014-11-06 13:14 ` Dave Chinner
2014-11-06 16:32 ` Brian Foster
0 siblings, 1 reply; 21+ messages in thread
From: Dave Chinner @ 2014-11-06 13:14 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The bulkstat main loop progress is tracked by the "lastino"
variable, which is a full 64 bit inode. However, the loop actually
works on agno/agino pairs, and so there's a significant disconnect
between the rest of the loop and the main cursor. Convert this to
use the agino, and pass the agino into the chunk formatting function
and convert it too.
This gets rid of the inconsistency in the loop processing, and
finally makes it simple for us to skip inodes at any point in the
loop simply by incrementing the agino cursor.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
fs/xfs/xfs_itable.c | 68 ++++++++++++++++++++++++++---------------------------
1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index ff3f431..05e83a6 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -282,30 +282,31 @@ xfs_bulkstat_ag_ichunk(
bulkstat_one_pf formatter,
size_t statstruct_size,
struct xfs_bulkstat_agichunk *acp,
- xfs_ino_t *lastino)
+ xfs_agino_t *last_agino)
{
char __user **ubufp = acp->ac_ubuffer;
int chunkidx;
int error = 0;
- xfs_agino_t agino;
+ xfs_agino_t agino = irbp->ir_startino;
- agino = irbp->ir_startino;
for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
chunkidx++, agino++) {
int fmterror;
int ubused;
- xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
+
+ /* inode won't fit in buffer, we are done */
+ if (acp->ac_ubleft < statstruct_size)
+ break;
/* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
- *lastino = ino;
+ if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
continue;
- }
/* Get the inode and fill in a single buffer */
ubused = statstruct_size;
- error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
- &ubused, &fmterror);
+ error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
+ *ubufp, acp->ac_ubleft, &ubused, &fmterror);
+
if (fmterror == BULKSTAT_RV_GIVEUP ||
(error && error != -ENOENT && error != -EINVAL)) {
acp->ac_ubleft = 0;
@@ -315,7 +316,6 @@ xfs_bulkstat_ag_ichunk(
/* be careful not to leak error if at end of chunk */
if (fmterror == BULKSTAT_RV_NOTHING || error) {
- *lastino = ino;
error = 0;
continue;
}
@@ -323,12 +323,18 @@ xfs_bulkstat_ag_ichunk(
*ubufp += ubused;
acp->ac_ubleft -= ubused;
acp->ac_ubelem++;
- *lastino = ino;
-
- if (acp->ac_ubleft < statstruct_size)
- break;
}
+ /*
+ * Post-update *last_agino. At this point, agino will always point one
+ * inode past the last inode we processed successfully. Hence we
+ * substract that inode when setting the *last_agino cursor so that we
+ * return the correct cookie to userspace. On the next bulkstat call,
+ * the inode under the lastino cookie will be skipped as we have already
+ * processed it here.
+ */
+ *last_agino = agino - 1;
+
return error;
}
@@ -352,7 +358,6 @@ xfs_bulkstat(
xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
size_t irbsize; /* size of irec buffer in bytes */
xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- xfs_ino_t lastino; /* last inode number returned */
int nirbuf; /* size of irbuf */
int ubcount; /* size of user's buffer */
struct xfs_bulkstat_agichunk ac;
@@ -361,11 +366,10 @@ xfs_bulkstat(
/*
* Get the last inode value, see if there's nothing to do.
*/
- lastino = *lastinop;
- agno = XFS_INO_TO_AGNO(mp, lastino);
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ agno = XFS_INO_TO_AGNO(mp, *lastinop);
+ agino = XFS_INO_TO_AGINO(mp, *lastinop);
if (agno >= mp->m_sb.sb_agcount ||
- lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+ *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
*done = 1;
*ubcountp = 0;
return 0;
@@ -420,7 +424,6 @@ xfs_bulkstat(
irbp->ir_freecount = r.ir_freecount;
irbp->ir_free = r.ir_free;
irbp++;
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
}
/* Increment to the next record */
error = xfs_btree_increment(cur, 0, &stat);
@@ -461,7 +464,6 @@ xfs_bulkstat(
/*
* Set agino to after this chunk and bump the cursor.
*/
- agino = r.ir_startino + XFS_INODES_PER_CHUNK;
error = xfs_btree_increment(cur, 0, &stat);
if (error || stat == 0) {
end_of_ag = true;
@@ -481,7 +483,9 @@ del_cursor:
if (error)
break;
/*
- * Now format all the good inodes into the user's buffer.
+ * Now format all the good inodes into the user's buffer. The
+ * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
+ * for the next loop iteration.
*/
irbufend = irbp;
for (irbp = irbuf;
@@ -489,7 +493,7 @@ del_cursor:
irbp++) {
error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
formatter, statstruct_size, &ac,
- &lastino);
+ &agino);
if (error)
break;
@@ -506,8 +510,7 @@ del_cursor:
if (end_of_ag) {
agno++;
agino = 0;
- } else
- agino = XFS_INO_TO_AGINO(mp, lastino);
+ }
}
/*
* Done, we're either out of filesystem or space to put the data.
@@ -525,16 +528,13 @@ del_cursor:
if (ac.ac_ubelem)
error = 0;
- if (agno >= mp->m_sb.sb_agcount) {
- /*
- * If we ran out of filesystem, mark lastino as off
- * the end of the filesystem, so the next call
- * will return immediately.
- */
- *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
+ /*
+ * If we ran out of filesystem, lastino will point off the end of
+ * the filesystem so the next call will return immediately.
+ */
+ *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (agno >= mp->m_sb.sb_agcount)
*done = 1;
- } else
- *lastinop = (xfs_ino_t)lastino;
return error;
}
--
2.0.0
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 6/6] xfs: track bulkstat progress by agino
2014-11-06 13:14 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
@ 2014-11-06 16:32 ` Brian Foster
0 siblings, 0 replies; 21+ messages in thread
From: Brian Foster @ 2014-11-06 16:32 UTC (permalink / raw)
To: Dave Chinner; +Cc: xfs
On Fri, Nov 07, 2014 at 12:14:59AM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The bulkstat main loop progress is tracked by the "lastino"
> variable, which is a full 64 bit inode. However, the loop actually
> works on agno/agino pairs, and so there's a significant disconnect
> between the rest of the loop and the main cursor. Convert this to
> use the agino, and pass the agino into the chunk formatting function
> and convert it too.
>
> This gets rid of the inconsistency in the loop processing, and
> finally makes it simple for us to skip inodes at any point in the
> loop simply by incrementing the agino cursor.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> fs/xfs/xfs_itable.c | 68 ++++++++++++++++++++++++++---------------------------
> 1 file changed, 34 insertions(+), 34 deletions(-)
>
> diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
> index ff3f431..05e83a6 100644
> --- a/fs/xfs/xfs_itable.c
> +++ b/fs/xfs/xfs_itable.c
> @@ -282,30 +282,31 @@ xfs_bulkstat_ag_ichunk(
> bulkstat_one_pf formatter,
> size_t statstruct_size,
> struct xfs_bulkstat_agichunk *acp,
> - xfs_ino_t *lastino)
> + xfs_agino_t *last_agino)
> {
> char __user **ubufp = acp->ac_ubuffer;
> int chunkidx;
> int error = 0;
> - xfs_agino_t agino;
> + xfs_agino_t agino = irbp->ir_startino;
>
> - agino = irbp->ir_startino;
> for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
> chunkidx++, agino++) {
> int fmterror;
> int ubused;
> - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino);
> +
> + /* inode won't fit in buffer, we are done */
> + if (acp->ac_ubleft < statstruct_size)
> + break;
>
> /* Skip if this inode is free */
> - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
> - *lastino = ino;
> + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
> continue;
> - }
>
> /* Get the inode and fill in a single buffer */
> ubused = statstruct_size;
> - error = formatter(mp, ino, *ubufp, acp->ac_ubleft,
> - &ubused, &fmterror);
> + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
> + *ubufp, acp->ac_ubleft, &ubused, &fmterror);
> +
> if (fmterror == BULKSTAT_RV_GIVEUP ||
> (error && error != -ENOENT && error != -EINVAL)) {
> acp->ac_ubleft = 0;
> @@ -315,7 +316,6 @@ xfs_bulkstat_ag_ichunk(
>
> /* be careful not to leak error if at end of chunk */
> if (fmterror == BULKSTAT_RV_NOTHING || error) {
> - *lastino = ino;
> error = 0;
> continue;
> }
> @@ -323,12 +323,18 @@ xfs_bulkstat_ag_ichunk(
> *ubufp += ubused;
> acp->ac_ubleft -= ubused;
> acp->ac_ubelem++;
> - *lastino = ino;
> -
> - if (acp->ac_ubleft < statstruct_size)
> - break;
> }
>
> + /*
> + * Post-update *last_agino. At this point, agino will always point one
> + * inode past the last inode we processed successfully. Hence we
> + * substract that inode when setting the *last_agino cursor so that we
> + * return the correct cookie to userspace. On the next bulkstat call,
> + * the inode under the lastino cookie will be skipped as we have already
> + * processed it here.
> + */
> + *last_agino = agino - 1;
> +
> return error;
> }
>
> @@ -352,7 +358,6 @@ xfs_bulkstat(
> xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
> size_t irbsize; /* size of irec buffer in bytes */
> xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
> - xfs_ino_t lastino; /* last inode number returned */
> int nirbuf; /* size of irbuf */
> int ubcount; /* size of user's buffer */
> struct xfs_bulkstat_agichunk ac;
> @@ -361,11 +366,10 @@ xfs_bulkstat(
> /*
> * Get the last inode value, see if there's nothing to do.
> */
> - lastino = *lastinop;
> - agno = XFS_INO_TO_AGNO(mp, lastino);
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + agno = XFS_INO_TO_AGNO(mp, *lastinop);
> + agino = XFS_INO_TO_AGINO(mp, *lastinop);
> if (agno >= mp->m_sb.sb_agcount ||
> - lastino != XFS_AGINO_TO_INO(mp, agno, agino)) {
> + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
> *done = 1;
> *ubcountp = 0;
> return 0;
> @@ -420,7 +424,6 @@ xfs_bulkstat(
> irbp->ir_freecount = r.ir_freecount;
> irbp->ir_free = r.ir_free;
> irbp++;
> - agino = r.ir_startino + XFS_INODES_PER_CHUNK;
> }
> /* Increment to the next record */
> error = xfs_btree_increment(cur, 0, &stat);
> @@ -461,7 +464,6 @@ xfs_bulkstat(
> /*
> * Set agino to after this chunk and bump the cursor.
> */
This comment could be updated since it's not safe to set agino here any
longer. Other than that, this all looks pretty good to me now:
Reviewed-by: Brian Foster <bfoster@redhat.com>
> - agino = r.ir_startino + XFS_INODES_PER_CHUNK;
> error = xfs_btree_increment(cur, 0, &stat);
> if (error || stat == 0) {
> end_of_ag = true;
> @@ -481,7 +483,9 @@ del_cursor:
> if (error)
> break;
> /*
> - * Now format all the good inodes into the user's buffer.
> + * Now format all the good inodes into the user's buffer. The
> + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
> + * for the next loop iteration.
> */
> irbufend = irbp;
> for (irbp = irbuf;
> @@ -489,7 +493,7 @@ del_cursor:
> irbp++) {
> error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
> formatter, statstruct_size, &ac,
> - &lastino);
> + &agino);
> if (error)
> break;
>
> @@ -506,8 +510,7 @@ del_cursor:
> if (end_of_ag) {
> agno++;
> agino = 0;
> - } else
> - agino = XFS_INO_TO_AGINO(mp, lastino);
> + }
> }
> /*
> * Done, we're either out of filesystem or space to put the data.
> @@ -525,16 +528,13 @@ del_cursor:
> if (ac.ac_ubelem)
> error = 0;
>
> - if (agno >= mp->m_sb.sb_agcount) {
> - /*
> - * If we ran out of filesystem, mark lastino as off
> - * the end of the filesystem, so the next call
> - * will return immediately.
> - */
> - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
> + /*
> + * If we ran out of filesystem, lastino will point off the end of
> + * the filesystem so the next call will return immediately.
> + */
> + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
> + if (agno >= mp->m_sb.sb_agcount)
> *done = 1;
> - } else
> - *lastinop = (xfs_ino_t)lastino;
>
> return error;
> }
> --
> 2.0.0
>
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2014-11-06 16:32 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-05 0:05 [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
2014-11-05 0:05 ` [PATCH 1/6] xfs: bulkstat btree walk doesn't terminate Dave Chinner
2014-11-05 0:05 ` [PATCH 2/6] xfs: bulkstat chunk formatting cursor is broken Dave Chinner
2014-11-05 0:05 ` [PATCH 3/6] xfs: bulkstat chunk-formatter has issues Dave Chinner
2014-11-05 14:59 ` Brian Foster
2014-11-05 0:05 ` [PATCH 4/6] xfs: bulkstat main loop logic is a mess Dave Chinner
2014-11-05 0:05 ` [PATCH 5/6] xfs: bulkstat error handling is broken Dave Chinner
2014-11-05 0:05 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
2014-11-05 15:14 ` Brian Foster
2014-11-05 6:07 ` [PATCH 0/6 v2] xfs: fix the bulkstat mess Dave Chinner
2014-11-05 6:32 ` Dave Chinner
2014-11-05 13:17 ` Brian Foster
2014-11-05 21:21 ` Dave Chinner
2014-11-06 6:53 ` Dave Chinner
2014-11-06 12:49 ` Brian Foster
2014-11-06 13:02 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2014-11-06 13:14 [PATCH 0/6 v3] " Dave Chinner
2014-11-06 13:14 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
2014-11-06 16:32 ` Brian Foster
2014-11-04 12:53 [PATCH 0/6] xfs: fix the bulkstat mess Dave Chinner
2014-11-04 12:53 ` [PATCH 6/6] xfs: track bulkstat progress by agino Dave Chinner
2014-11-04 19:00 ` Brian Foster
2014-11-04 21:39 ` Dave Chinner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox