* [PATCH 0/3] xfs_repair: fix discontiguous directory block
@ 2014-01-22 7:17 Dave Chinner
2014-01-22 7:17 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Dave Chinner @ 2014-01-22 7:17 UTC (permalink / raw)
To: xfs
Hi folks,
This series fixes up one of the problems that was causing xfs/291
to fail on crc enabled filesystems. basically CRCs were failing on
directory buffers because they weren't being treated as a
discontiguous buffer correctly by prefetch and hence CRCs were only
eve calculated over a portion of the directory block. Hence it threw
lots of errors. Non CRC filesystems threw the same badness on lookup
errors, but repair managed to rebuild the directory buffers
sufficiently for things to work.
Anyway, this patchset fixes the xfs_repair prefetch code to handle
discontiguous buffers and so avoid all the noise that was being
generated and the errors that were being triggered on CRC
filesystems.
This does no make xfs/291 pass on CRC filesystems. Making xfs_reapir
work has pointed out that metadump's handling of multi-block
directory buffers on CRC enable filesystems also appears to be
broken. Fixing that is another patchset, however.
Cheers,
Dave.
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map
2014-01-22 7:17 [PATCH 0/3] xfs_repair: fix discontiguous directory block Dave Chinner
@ 2014-01-22 7:17 ` Dave Chinner
2014-01-23 17:14 ` Brian Foster
2014-01-22 7:17 ` [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map Dave Chinner
2014-01-22 7:17 ` [PATCH 3/3] repair: fix discontiguous directory block support Dave Chinner
2 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2014-01-22 7:17 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
It will be needed to make the repair prefetch code aware of compound
buffers.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
include/libxfs.h | 8 ++++----
libxfs/rdwr.c | 15 +++++++++------
libxfs/trans.c | 4 ++--
3 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/include/libxfs.h b/include/libxfs.h
index 4bf331c..2872410 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -392,9 +392,9 @@ extern struct cache_operations libxfs_bcache_operations;
#define libxfs_getbuf(dev, daddr, len) \
libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
(dev), (daddr), (len))
-#define libxfs_getbuf_map(dev, map, nmaps) \
+#define libxfs_getbuf_map(dev, map, nmaps, flags) \
libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \
- (dev), (map), (nmaps))
+ (dev), (map), (nmaps), (flags))
#define libxfs_getbuf_flags(dev, daddr, len, flags) \
libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \
(dev), (daddr), (len), (flags))
@@ -412,7 +412,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int,
extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int,
struct xfs_buftarg *, xfs_daddr_t, int);
extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int,
- struct xfs_buftarg *, struct xfs_buf_map *, int);
+ struct xfs_buftarg *, struct xfs_buf_map *, int, int);
extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
struct xfs_buftarg *, xfs_daddr_t, int, unsigned int);
extern void libxfs_trace_putbuf (const char *, const char *, int,
@@ -427,7 +427,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
extern int libxfs_writebuf(xfs_buf_t *, int);
extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *,
- struct xfs_buf_map *, int);
+ struct xfs_buf_map *, int, int);
extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t,
int, unsigned int);
extern void libxfs_putbuf (xfs_buf_t *);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 0219a08..bf92788 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
int, int, const struct xfs_buf_ops *);
int libxfs_writebuf(xfs_buf_t *, int);
xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
-xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int);
+xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
+ int, int);
xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
unsigned int);
void libxfs_putbuf (xfs_buf_t *);
@@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line,
xfs_buf_t *
libxfs_trace_getbuf_map(const char *func, const char *file, int line,
- struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
+ struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
+ int flags)
{
- xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps);
+ xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
__add_trace(bp, func, file, line);
return bp;
}
@@ -582,7 +584,8 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
}
struct xfs_buf *
-libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
+libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
+ int nmaps, int flags)
{
struct xfs_bufkey key = {0};
int i;
@@ -595,7 +598,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
key.map = map;
key.nmaps = nmaps;
- return __cache_lookup(&key, 0);
+ return __cache_lookup(&key, flags);
}
void
@@ -775,7 +778,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
flags, ops);
- bp = libxfs_getbuf_map(btp, map, nmaps);
+ bp = libxfs_getbuf_map(btp, map, nmaps, 0);
if (!bp)
return NULL;
diff --git a/libxfs/trans.c b/libxfs/trans.c
index 6a05673..6c9d202 100644
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -511,7 +511,7 @@ libxfs_trans_get_buf_map(
xfs_buf_log_item_t *bip;
if (tp == NULL)
- return libxfs_getbuf_map(btp, map, nmaps);
+ return libxfs_getbuf_map(btp, map, nmaps, 0);
bp = xfs_trans_buf_item_match(tp, btp, map, nmaps);
if (bp != NULL) {
@@ -522,7 +522,7 @@ libxfs_trans_get_buf_map(
return bp;
}
- bp = libxfs_getbuf_map(btp, map, nmaps);
+ bp = libxfs_getbuf_map(btp, map, nmaps, 0);
if (bp == NULL)
return NULL;
#ifdef XACT_DEBUG
--
1.8.4.rc3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map
2014-01-22 7:17 [PATCH 0/3] xfs_repair: fix discontiguous directory block Dave Chinner
2014-01-22 7:17 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
@ 2014-01-22 7:17 ` Dave Chinner
2014-01-23 17:15 ` Brian Foster
2014-01-22 7:17 ` [PATCH 3/3] repair: fix discontiguous directory block support Dave Chinner
2 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2014-01-22 7:17 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
The map passed in to libxfs_readbufr_map is used to check the buffer
matches the map. However, the repair readahead code has no map it
can use to validate the buffer it set up previously, so just get rid
of the map being passed in because it serves no useful purpose.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
db/io.c | 4 +---
include/libxfs.h | 3 +--
libxfs/rdwr.c | 12 ++----------
3 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/db/io.c b/db/io.c
index 123214d..d29816c 100644
--- a/db/io.c
+++ b/db/io.c
@@ -449,9 +449,7 @@ write_cur_bbs(void)
/* re-read buffer from disk */
- ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp,
- iocur_top->bbmap->b, iocur_top->bbmap->nmaps,
- 0);
+ ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0);
if (ret != 0)
dbprintf(_("read error: %s\n"), strerror(ret));
}
diff --git a/include/libxfs.h b/include/libxfs.h
index 2872410..bb0369f 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -448,8 +448,7 @@ extern void libxfs_putbufr(xfs_buf_t *);
extern int libxfs_writebuf_int(xfs_buf_t *, int);
extern int libxfs_writebufr(struct xfs_buf *);
extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int);
-extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *,
- struct xfs_buf_map *, int, int);
+extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int);
extern int libxfs_bhash_size;
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index bf92788..ac7739f 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -727,27 +727,19 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
}
int
-libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp,
- struct xfs_buf_map *map, int nmaps, int flags)
+libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
{
int fd = libxfs_device_to_fd(btp->dev);
int error = 0;
char *buf;
int i;
- ASSERT(BBTOB(len) <= bp->b_bcount);
-
- ASSERT(bp->b_nmaps == nmaps);
-
fd = libxfs_device_to_fd(btp->dev);
buf = bp->b_addr;
for (i = 0; i < bp->b_nmaps; i++) {
off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
int len = BBTOB(bp->b_map[i].bm_len);
- ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn);
- ASSERT(bp->b_map[i].bm_len == map[i].bm_len);
-
error = __read_buf(fd, buf, len, offset, flags);
if (error) {
bp->b_error = error;
@@ -787,7 +779,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
return bp;
- error = libxfs_readbufr_map(btp, bp, map, nmaps, flags);
+ error = libxfs_readbufr_map(btp, bp, flags);
if (!error) {
bp->b_flags |= LIBXFS_B_UPTODATE;
if (bp->b_ops)
--
1.8.4.rc3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] repair: fix discontiguous directory block support
2014-01-22 7:17 [PATCH 0/3] xfs_repair: fix discontiguous directory block Dave Chinner
2014-01-22 7:17 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
2014-01-22 7:17 ` [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map Dave Chinner
@ 2014-01-22 7:17 ` Dave Chinner
2014-01-23 17:15 ` Brian Foster
2 siblings, 1 reply; 10+ messages in thread
From: Dave Chinner @ 2014-01-22 7:17 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
xfs/291 tests fragmented multi-block directories, and xfs_repair
throws lots of lookup badness errors in phase 3:
- agno = 1
7fa3d2758740: Badness in key lookup (length)
bp=(bno 0x1e040, len 4096 bytes) key=(bno 0x1e040, len 16384 bytes)
- agno = 2
7fa3d2758740: Badness in key lookup (length)
bp=(bno 0x2d0e8, len 4096 bytes) key=(bno 0x2d0e8, len 16384 bytes)
7fa3d2758740: Badness in key lookup (length)
bp=(bno 0x2d068, len 4096 bytes) key=(bno 0x2d068, len 16384 bytes)
- agno = 3
7fa3d2758740: Badness in key lookup (length)
bp=(bno 0x39130, len 4096 bytes) key=(bno 0x39130, len 16384 bytes)
7fa3d2758740: Badness in key lookup (length)
bp=(bno 0x391b0, len 4096 bytes) key=(bno 0x391b0, len 16384 bytes)
7fa3d2758740: Badness in key lookup (length)
This is because it is trying to read a directory buffer in full
(16k), but is finding a single 4k block in the cache instead.
The opposite is happening in phase 6 - phase 6 is trying to read 4k
buffers but is finding a 16k buffer there instead.
The problem is caused by the fact that directory buffers can be
represented as compound buffers or as individual buffers depending
on the code reading the directory blocks. The main problem is that
the IO prefetch code doesn't understand compound buffers, so teach
it about compound buffers to make the problem go away.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
repair/prefetch.c | 121 ++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 100 insertions(+), 21 deletions(-)
diff --git a/repair/prefetch.c b/repair/prefetch.c
index d3491da..ab90b92 100644
--- a/repair/prefetch.c
+++ b/repair/prefetch.c
@@ -105,11 +105,12 @@ pf_start_io_workers(
static void
pf_queue_io(
prefetch_args_t *args,
- xfs_fsblock_t fsbno,
- int blen,
+ struct xfs_buf_map *map,
+ int nmaps,
int flag)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, map[0].bm_bn);
/*
* Never block on a buffer lock here, given that the actual repair
@@ -117,8 +118,7 @@ pf_queue_io(
* the lock holder is either reading it from disk himself or
* completely overwriting it this behaviour is perfectly fine.
*/
- bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
- XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK);
+ bp = libxfs_getbuf_map(mp->m_dev, map, nmaps, LIBXFS_GETBUF_TRYLOCK);
if (!bp)
return;
@@ -167,6 +167,14 @@ pf_read_bmbt_reclist(
xfs_bmbt_irec_t irec;
xfs_dfilblks_t cp = 0; /* prev count */
xfs_dfiloff_t op = 0; /* prev offset */
+#define MAP_ARRAY_SZ 4
+ struct xfs_buf_map map_array[MAP_ARRAY_SZ];
+ struct xfs_buf_map *map = map_array;
+ int max_extents = MAP_ARRAY_SZ;
+ int nmaps = 0;;
+ unsigned int len = 0;
+ int ret = 0;
+
for (i = 0; i < numrecs; i++) {
libxfs_bmbt_disk_get_all(rp + i, &irec);
@@ -174,11 +182,11 @@ pf_read_bmbt_reclist(
if (((i > 0) && (op + cp > irec.br_startoff)) ||
(irec.br_blockcount == 0) ||
(irec.br_startoff >= fs_max_file_offset))
- return 0;
+ goto out_free;
if (!verify_dfsbno(mp, irec.br_startblock) || !verify_dfsbno(mp,
irec.br_startblock + irec.br_blockcount - 1))
- return 0;
+ goto out_free;
if (!args->dirs_only && ((irec.br_startoff +
irec.br_blockcount) >= mp->m_dirfreeblk))
@@ -188,18 +196,60 @@ pf_read_bmbt_reclist(
cp = irec.br_blockcount;
while (irec.br_blockcount) {
- unsigned int len;
+ unsigned int bm_len;
pftrace("queuing dir extent in AG %d", args->agno);
- len = (irec.br_blockcount > mp->m_dirblkfsbs) ?
- mp->m_dirblkfsbs : irec.br_blockcount;
- pf_queue_io(args, irec.br_startblock, len, B_DIR_META);
- irec.br_blockcount -= len;
- irec.br_startblock += len;
+ if (len + irec.br_blockcount >= mp->m_dirblkfsbs) {
+ bm_len = mp->m_dirblkfsbs - len;
+ len = 0;
+ } else {
+ len += irec.br_blockcount;
+ bm_len = irec.br_blockcount;
+ }
+
+ map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp,
+ irec.br_startblock);
+ map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len);
+ nmaps++;
+
+ if (len == 0) {
+ pf_queue_io(args, map, nmaps, B_DIR_META);
+ nmaps = 0;
+ }
+
+ irec.br_blockcount -= bm_len;
+ irec.br_startblock += bm_len;
+
+ /*
+ * Handle very fragmented dir2 blocks with dynamically
+ * allocated buffer maps.
+ */
+ if (nmaps >= max_extents) {
+ struct xfs_buf_map *old_map = NULL;
+
+ if (map == map_array) {
+ old_map = map;
+ map = NULL;
+ }
+ max_extents *= 2;
+ map = realloc(map, max_extents * sizeof(*map));
+ if (map == NULL) {
+ do_error(
+ _("couldn't malloc dir2 buffer list\n"));
+ exit(1);
+ }
+ if (old_map)
+ memcpy(map, old_map, sizeof(map_array));
+ }
+
}
}
- return 1;
+ ret = 1;
+out_free:
+ if (map != map_array)
+ free(map);
+ return ret;
}
/*
@@ -395,9 +445,28 @@ pf_read_inode_dirs(
}
/*
- * pf_batch_read must be called with the lock locked.
+ * Discontiguous buffers require multiple IOs to fill, so we can't use any
+ * linearising, hole filling algorithms on them to avoid seeks. Just remove them
+ * for the prefetch queue and read them straight into the cache and release
+ * them.
*/
+static void
+pf_read_discontig(
+ struct prefetch_args *args,
+ struct xfs_buf *bp)
+{
+ if (!btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp, bp->b_bn)))
+ do_error(_("prefetch corruption\n"));
+
+ pthread_mutex_unlock(&args->lock);
+ libxfs_readbufr_map(mp->m_ddev_targp, bp, 0);
+ libxfs_putbuf(bp);
+ pthread_mutex_lock(&args->lock);
+}
+/*
+ * pf_batch_read must be called with the lock locked.
+ */
static void
pf_batch_read(
prefetch_args_t *args,
@@ -426,8 +495,15 @@ pf_batch_read(
max_fsbno = fsbno + pf_max_fsbs;
}
while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) {
- if (which != PF_META_ONLY ||
- !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
+ /*
+ * Handle discontiguous buffers outside the seek
+ * optimised IO loop below.
+ */
+ if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) {
+ pf_read_discontig(args, bplist[num]);
+ bplist[num] = NULL;
+ } else if (which != PF_META_ONLY ||
+ !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
num++;
if (num == MAX_BUFS)
break;
@@ -664,10 +740,13 @@ pf_queuing_worker(
bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum);
do {
- pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno),
- blks_per_cluster,
- (cur_irec->ino_isa_dir != 0) ?
- B_DIR_INODE : B_INODE);
+ struct xfs_buf_map map;
+
+ map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno);
+ map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
+ pf_queue_io(args, &map, 1,
+ (cur_irec->ino_isa_dir != 0) ? B_DIR_INODE
+ : B_INODE);
bno += blks_per_cluster;
num_inos += inodes_per_cluster;
} while (num_inos < XFS_IALLOC_INODES(mp));
--
1.8.4.rc3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map
2014-01-22 7:17 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
@ 2014-01-23 17:14 ` Brian Foster
0 siblings, 0 replies; 10+ messages in thread
From: Brian Foster @ 2014-01-23 17:14 UTC (permalink / raw)
To: xfs
On 01/22/2014 02:17 AM, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> It will be needed to make the repair prefetch code aware of compound
> buffers.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
Looks Ok.
Reviewed-by: Brian Foster <bfoster@redhat.com>
> include/libxfs.h | 8 ++++----
> libxfs/rdwr.c | 15 +++++++++------
> libxfs/trans.c | 4 ++--
> 3 files changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/include/libxfs.h b/include/libxfs.h
> index 4bf331c..2872410 100644
> --- a/include/libxfs.h
> +++ b/include/libxfs.h
> @@ -392,9 +392,9 @@ extern struct cache_operations libxfs_bcache_operations;
> #define libxfs_getbuf(dev, daddr, len) \
> libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
> (dev), (daddr), (len))
> -#define libxfs_getbuf_map(dev, map, nmaps) \
> +#define libxfs_getbuf_map(dev, map, nmaps, flags) \
> libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \
> - (dev), (map), (nmaps))
> + (dev), (map), (nmaps), (flags))
> #define libxfs_getbuf_flags(dev, daddr, len, flags) \
> libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \
> (dev), (daddr), (len), (flags))
> @@ -412,7 +412,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int,
> extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int,
> struct xfs_buftarg *, xfs_daddr_t, int);
> extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int,
> - struct xfs_buftarg *, struct xfs_buf_map *, int);
> + struct xfs_buftarg *, struct xfs_buf_map *, int, int);
> extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
> struct xfs_buftarg *, xfs_daddr_t, int, unsigned int);
> extern void libxfs_trace_putbuf (const char *, const char *, int,
> @@ -427,7 +427,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
> extern int libxfs_writebuf(xfs_buf_t *, int);
> extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
> extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *,
> - struct xfs_buf_map *, int);
> + struct xfs_buf_map *, int, int);
> extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t,
> int, unsigned int);
> extern void libxfs_putbuf (xfs_buf_t *);
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index 0219a08..bf92788 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
> int, int, const struct xfs_buf_ops *);
> int libxfs_writebuf(xfs_buf_t *, int);
> xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
> -xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int);
> +xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
> + int, int);
> xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
> unsigned int);
> void libxfs_putbuf (xfs_buf_t *);
> @@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line,
>
> xfs_buf_t *
> libxfs_trace_getbuf_map(const char *func, const char *file, int line,
> - struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
> + struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
> + int flags)
> {
> - xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps);
> + xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
> __add_trace(bp, func, file, line);
> return bp;
> }
> @@ -582,7 +584,8 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
> }
>
> struct xfs_buf *
> -libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
> +libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
> + int nmaps, int flags)
> {
> struct xfs_bufkey key = {0};
> int i;
> @@ -595,7 +598,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
> key.map = map;
> key.nmaps = nmaps;
>
> - return __cache_lookup(&key, 0);
> + return __cache_lookup(&key, flags);
> }
>
> void
> @@ -775,7 +778,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
> return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
> flags, ops);
>
> - bp = libxfs_getbuf_map(btp, map, nmaps);
> + bp = libxfs_getbuf_map(btp, map, nmaps, 0);
> if (!bp)
> return NULL;
>
> diff --git a/libxfs/trans.c b/libxfs/trans.c
> index 6a05673..6c9d202 100644
> --- a/libxfs/trans.c
> +++ b/libxfs/trans.c
> @@ -511,7 +511,7 @@ libxfs_trans_get_buf_map(
> xfs_buf_log_item_t *bip;
>
> if (tp == NULL)
> - return libxfs_getbuf_map(btp, map, nmaps);
> + return libxfs_getbuf_map(btp, map, nmaps, 0);
>
> bp = xfs_trans_buf_item_match(tp, btp, map, nmaps);
> if (bp != NULL) {
> @@ -522,7 +522,7 @@ libxfs_trans_get_buf_map(
> return bp;
> }
>
> - bp = libxfs_getbuf_map(btp, map, nmaps);
> + bp = libxfs_getbuf_map(btp, map, nmaps, 0);
> if (bp == NULL)
> return NULL;
> #ifdef XACT_DEBUG
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map
2014-01-22 7:17 ` [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map Dave Chinner
@ 2014-01-23 17:15 ` Brian Foster
2014-01-23 21:27 ` Dave Chinner
0 siblings, 1 reply; 10+ messages in thread
From: Brian Foster @ 2014-01-23 17:15 UTC (permalink / raw)
To: Dave Chinner, xfs
On 01/22/2014 02:17 AM, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> The map passed in to libxfs_readbufr_map is used to check the buffer
> matches the map. However, the repair readahead code has no map it
> can use to validate the buffer it set up previously, so just get rid
> of the map being passed in because it serves no useful purpose.
>
The code looks fine, effectively just removing some assert code, but I'm
not following the reasoning. I'm probably missing some context. Is the
justification that no callers of libxfs_readbufr_map() will actually
pass a map, or that the checking is not useful (redundant)?
Brian
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
> db/io.c | 4 +---
> include/libxfs.h | 3 +--
> libxfs/rdwr.c | 12 ++----------
> 3 files changed, 4 insertions(+), 15 deletions(-)
>
> diff --git a/db/io.c b/db/io.c
> index 123214d..d29816c 100644
> --- a/db/io.c
> +++ b/db/io.c
> @@ -449,9 +449,7 @@ write_cur_bbs(void)
>
>
> /* re-read buffer from disk */
> - ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp,
> - iocur_top->bbmap->b, iocur_top->bbmap->nmaps,
> - 0);
> + ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0);
> if (ret != 0)
> dbprintf(_("read error: %s\n"), strerror(ret));
> }
> diff --git a/include/libxfs.h b/include/libxfs.h
> index 2872410..bb0369f 100644
> --- a/include/libxfs.h
> +++ b/include/libxfs.h
> @@ -448,8 +448,7 @@ extern void libxfs_putbufr(xfs_buf_t *);
> extern int libxfs_writebuf_int(xfs_buf_t *, int);
> extern int libxfs_writebufr(struct xfs_buf *);
> extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int);
> -extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *,
> - struct xfs_buf_map *, int, int);
> +extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int);
>
> extern int libxfs_bhash_size;
>
> diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
> index bf92788..ac7739f 100644
> --- a/libxfs/rdwr.c
> +++ b/libxfs/rdwr.c
> @@ -727,27 +727,19 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags,
> }
>
> int
> -libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp,
> - struct xfs_buf_map *map, int nmaps, int flags)
> +libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
> {
> int fd = libxfs_device_to_fd(btp->dev);
> int error = 0;
> char *buf;
> int i;
>
> - ASSERT(BBTOB(len) <= bp->b_bcount);
> -
> - ASSERT(bp->b_nmaps == nmaps);
> -
> fd = libxfs_device_to_fd(btp->dev);
> buf = bp->b_addr;
> for (i = 0; i < bp->b_nmaps; i++) {
> off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn);
> int len = BBTOB(bp->b_map[i].bm_len);
>
> - ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn);
> - ASSERT(bp->b_map[i].bm_len == map[i].bm_len);
> -
> error = __read_buf(fd, buf, len, offset, flags);
> if (error) {
> bp->b_error = error;
> @@ -787,7 +779,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
> if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY)))
> return bp;
>
> - error = libxfs_readbufr_map(btp, bp, map, nmaps, flags);
> + error = libxfs_readbufr_map(btp, bp, flags);
> if (!error) {
> bp->b_flags |= LIBXFS_B_UPTODATE;
> if (bp->b_ops)
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] repair: fix discontiguous directory block support
2014-01-22 7:17 ` [PATCH 3/3] repair: fix discontiguous directory block support Dave Chinner
@ 2014-01-23 17:15 ` Brian Foster
2014-01-23 21:41 ` Dave Chinner
0 siblings, 1 reply; 10+ messages in thread
From: Brian Foster @ 2014-01-23 17:15 UTC (permalink / raw)
To: Dave Chinner, xfs
On 01/22/2014 02:17 AM, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> xfs/291 tests fragmented multi-block directories, and xfs_repair
> throws lots of lookup badness errors in phase 3:
>
> - agno = 1
> 7fa3d2758740: Badness in key lookup (length)
> bp=(bno 0x1e040, len 4096 bytes) key=(bno 0x1e040, len 16384 bytes)
> - agno = 2
> 7fa3d2758740: Badness in key lookup (length)
> bp=(bno 0x2d0e8, len 4096 bytes) key=(bno 0x2d0e8, len 16384 bytes)
> 7fa3d2758740: Badness in key lookup (length)
> bp=(bno 0x2d068, len 4096 bytes) key=(bno 0x2d068, len 16384 bytes)
> - agno = 3
> 7fa3d2758740: Badness in key lookup (length)
> bp=(bno 0x39130, len 4096 bytes) key=(bno 0x39130, len 16384 bytes)
> 7fa3d2758740: Badness in key lookup (length)
> bp=(bno 0x391b0, len 4096 bytes) key=(bno 0x391b0, len 16384 bytes)
> 7fa3d2758740: Badness in key lookup (length)
>
> This is because it is trying to read a directory buffer in full
> (16k), but is finding a single 4k block in the cache instead.
>
> The opposite is happening in phase 6 - phase 6 is trying to read 4k
> buffers but is finding a 16k buffer there instead.
>
> The problem is caused by the fact that directory buffers can be
> represented as compound buffers or as individual buffers depending
> on the code reading the directory blocks. The main problem is that
> the IO prefetch code doesn't understand compound buffers, so teach
> it about compound buffers to make the problem go away.
>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
I think I get the gist of what's going on here. One potential issue
noted below along with notes to self to confirm whether I follow the
code correctly.
> repair/prefetch.c | 121 ++++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 100 insertions(+), 21 deletions(-)
>
> diff --git a/repair/prefetch.c b/repair/prefetch.c
> index d3491da..ab90b92 100644
> --- a/repair/prefetch.c
> +++ b/repair/prefetch.c
> @@ -105,11 +105,12 @@ pf_start_io_workers(
> static void
> pf_queue_io(
> prefetch_args_t *args,
> - xfs_fsblock_t fsbno,
> - int blen,
> + struct xfs_buf_map *map,
> + int nmaps,
> int flag)
> {
> - xfs_buf_t *bp;
> + struct xfs_buf *bp;
> + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, map[0].bm_bn);
>
> /*
> * Never block on a buffer lock here, given that the actual repair
> @@ -117,8 +118,7 @@ pf_queue_io(
> * the lock holder is either reading it from disk himself or
> * completely overwriting it this behaviour is perfectly fine.
> */
> - bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno),
> - XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK);
> + bp = libxfs_getbuf_map(mp->m_dev, map, nmaps, LIBXFS_GETBUF_TRYLOCK);
> if (!bp)
> return;
>
Interface change to support queuing a discontig buffer.
> @@ -167,6 +167,14 @@ pf_read_bmbt_reclist(
> xfs_bmbt_irec_t irec;
> xfs_dfilblks_t cp = 0; /* prev count */
> xfs_dfiloff_t op = 0; /* prev offset */
> +#define MAP_ARRAY_SZ 4
> + struct xfs_buf_map map_array[MAP_ARRAY_SZ];
> + struct xfs_buf_map *map = map_array;
> + int max_extents = MAP_ARRAY_SZ;
> + int nmaps = 0;;
> + unsigned int len = 0;
> + int ret = 0;
> +
So if I understand correctly, the idea here is to now batch extent reads
into buffers of the directory block size, quieting the messages
described in the commit log.
>
> for (i = 0; i < numrecs; i++) {
> libxfs_bmbt_disk_get_all(rp + i, &irec);
> @@ -174,11 +182,11 @@ pf_read_bmbt_reclist(
> if (((i > 0) && (op + cp > irec.br_startoff)) ||
> (irec.br_blockcount == 0) ||
> (irec.br_startoff >= fs_max_file_offset))
> - return 0;
> + goto out_free;
>
> if (!verify_dfsbno(mp, irec.br_startblock) || !verify_dfsbno(mp,
> irec.br_startblock + irec.br_blockcount - 1))
> - return 0;
> + goto out_free;
>
> if (!args->dirs_only && ((irec.br_startoff +
> irec.br_blockcount) >= mp->m_dirfreeblk))
> @@ -188,18 +196,60 @@ pf_read_bmbt_reclist(
> cp = irec.br_blockcount;
>
> while (irec.br_blockcount) {
> - unsigned int len;
> + unsigned int bm_len;
>
> pftrace("queuing dir extent in AG %d", args->agno);
>
> - len = (irec.br_blockcount > mp->m_dirblkfsbs) ?
> - mp->m_dirblkfsbs : irec.br_blockcount;
> - pf_queue_io(args, irec.br_startblock, len, B_DIR_META);
> - irec.br_blockcount -= len;
> - irec.br_startblock += len;
> + if (len + irec.br_blockcount >= mp->m_dirblkfsbs) {
> + bm_len = mp->m_dirblkfsbs - len;
> + len = 0;
> + } else {
> + len += irec.br_blockcount;
> + bm_len = irec.br_blockcount;
> + }
So len represents the total length of the maps attached to the current
array...
> +
> + map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp,
> + irec.br_startblock);
> + map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len);
> + nmaps++;
> +
> + if (len == 0) {
> + pf_queue_io(args, map, nmaps, B_DIR_META);
> + nmaps = 0;
> + }
Kind of a nit, but this looks a little weird to me. The logic would be a
bit more clear with something like:
if (len + irec.br_blockcount > mp->dirblkfsbs)
bm_len = mp->m_dirblkfsbs - len;
else
bm_len = irec.br_blockcount;
len += bm_len;
...
if (len == mp->dirblkfsbs) {
len = 0;
pf_queue_io(...)
}
... which then raises the question of what happens if the directory
we're reading doesn't end with len == mp->dirblkfsbs? If so, perhaps not
a performance regression, but it looks like we wouldn't queue the last
I/O. Some of the directory code suggests that we fail if we don't alloc
the dirblkfsbs block count, so maybe this doesn't happen.
> +
> + irec.br_blockcount -= bm_len;
> + irec.br_startblock += bm_len;
> +
> + /*
> + * Handle very fragmented dir2 blocks with dynamically
> + * allocated buffer maps.
> + */
> + if (nmaps >= max_extents) {
> + struct xfs_buf_map *old_map = NULL;
> +
> + if (map == map_array) {
> + old_map = map;
> + map = NULL;
> + }
> + max_extents *= 2;
> + map = realloc(map, max_extents * sizeof(*map));
> + if (map == NULL) {
> + do_error(
> + _("couldn't malloc dir2 buffer list\n"));
> + exit(1);
> + }
> + if (old_map)
> + memcpy(map, old_map, sizeof(map_array));
> + }
> +
> }
> }
> - return 1;
> + ret = 1;
> +out_free:
> + if (map != map_array)
> + free(map);
> + return ret;
> }
>
> /*
> @@ -395,9 +445,28 @@ pf_read_inode_dirs(
> }
>
> /*
> - * pf_batch_read must be called with the lock locked.
> + * Discontiguous buffers require multiple IOs to fill, so we can't use any
> + * linearising, hole filling algorithms on them to avoid seeks. Just remove them
> + * for the prefetch queue and read them straight into the cache and release
> + * them.
> */
> +static void
> +pf_read_discontig(
> + struct prefetch_args *args,
> + struct xfs_buf *bp)
> +{
> + if (!btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp, bp->b_bn)))
> + do_error(_("prefetch corruption\n"));
> +
> + pthread_mutex_unlock(&args->lock);
> + libxfs_readbufr_map(mp->m_ddev_targp, bp, 0);
> + libxfs_putbuf(bp);
> + pthread_mutex_lock(&args->lock);
> +}
>
> +/*
> + * pf_batch_read must be called with the lock locked.
> + */
> static void
> pf_batch_read(
> prefetch_args_t *args,
> @@ -426,8 +495,15 @@ pf_batch_read(
> max_fsbno = fsbno + pf_max_fsbs;
> }
> while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) {
> - if (which != PF_META_ONLY ||
> - !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
> + /*
> + * Handle discontiguous buffers outside the seek
> + * optimised IO loop below.
> + */
> + if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) {
> + pf_read_discontig(args, bplist[num]);
> + bplist[num] = NULL;
So we pull these out from the processing below (which appears to want to
issue largish reads comprised of multiple buffers, via bplist). Thanks
for the comment above pf_read_discontig().
> + } else if (which != PF_META_ONLY ||
> + !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
> num++;
> if (num == MAX_BUFS)
> break;
> @@ -664,10 +740,13 @@ pf_queuing_worker(
> bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum);
>
> do {
> - pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno),
> - blks_per_cluster,
> - (cur_irec->ino_isa_dir != 0) ?
> - B_DIR_INODE : B_INODE);
> + struct xfs_buf_map map;
> +
> + map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno);
> + map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
> + pf_queue_io(args, &map, 1,
> + (cur_irec->ino_isa_dir != 0) ? B_DIR_INODE
> +
Straightforward change based on the new pf_queue_io().
Brian
: B_INODE);
> bno += blks_per_cluster;
> num_inos += inodes_per_cluster;
> } while (num_inos < XFS_IALLOC_INODES(mp));
>
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map
2014-01-23 17:15 ` Brian Foster
@ 2014-01-23 21:27 ` Dave Chinner
0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2014-01-23 21:27 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Jan 23, 2014 at 12:15:10PM -0500, Brian Foster wrote:
> On 01/22/2014 02:17 AM, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> >
> > The map passed in to libxfs_readbufr_map is used to check the buffer
> > matches the map. However, the repair readahead code has no map it
> > can use to validate the buffer it set up previously, so just get rid
> > of the map being passed in because it serves no useful purpose.
> >
>
> The code looks fine, effectively just removing some assert code, but I'm
> not following the reasoning. I'm probably missing some context. Is the
> justification that no callers of libxfs_readbufr_map() will actually
> pass a map, or that the checking is not useful (redundant)?
Lets start with "redundant".
The libxfs_readbuf_map() call passes in the map it just passed to
libxfs_getbuf_map() - there's no real point to testing it as we've
got bigger problems if libxfs_getbuf_map() doesn't build the buffer
correctly from the map.
The call in db/io.c passes in the same map that was
passed to libxfs_readbuf_map(), which means it's checking the buffer
multiple times against the same map. Again, redundant because
the only thing that has a reference to the buffer is the db IO code.
Now API consistency:
libxfs_writebufr() has no requirement for a map for checking,
and takes no parameters other than the buffer and trusts the buffer
to be set up correctly. It treats both contiguous and discontiguous
buffers the same way.
libxfs_readbufr() takes a blkno/len because there are use cases
for partial buffer reads which we don't have for discontiguous
buffers.
Hence we don't need to pass a map to libxfs_readbuf_map()
for any functional reason. And seeing as libxfs_writebufr() already
trusts libxfs_getbuf_map() to set up a discontiguous buffer
correctly, I don't see why the read path should be any different.
Now for correctness:
The ASSERT code is not built in by default. It's pretty
obvious it has never been used because:
> > +libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags)
> > {
> > int fd = libxfs_device_to_fd(btp->dev);
> > int error = 0;
> > char *buf;
> > int i;
> >
> > - ASSERT(BBTOB(len) <= bp->b_bcount);
It's obviously broken.
In summary, the map being passed in is unused, untested, redundant
and broken....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] repair: fix discontiguous directory block support
2014-01-23 17:15 ` Brian Foster
@ 2014-01-23 21:41 ` Dave Chinner
0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2014-01-23 21:41 UTC (permalink / raw)
To: Brian Foster; +Cc: xfs
On Thu, Jan 23, 2014 at 12:15:22PM -0500, Brian Foster wrote:
> On 01/22/2014 02:17 AM, Dave Chinner wrote:
> > @@ -167,6 +167,14 @@ pf_read_bmbt_reclist(
> > xfs_bmbt_irec_t irec;
> > xfs_dfilblks_t cp = 0; /* prev count */
> > xfs_dfiloff_t op = 0; /* prev offset */
> > +#define MAP_ARRAY_SZ 4
> > + struct xfs_buf_map map_array[MAP_ARRAY_SZ];
> > + struct xfs_buf_map *map = map_array;
> > + int max_extents = MAP_ARRAY_SZ;
> > + int nmaps = 0;;
> > + unsigned int len = 0;
> > + int ret = 0;
> > +
>
> So if I understand correctly, the idea here is to now batch extent reads
> into buffers of the directory block size, quieting the messages
> described in the commit log.
Yes.
> > @@ -188,18 +196,60 @@ pf_read_bmbt_reclist(
> > cp = irec.br_blockcount;
> >
> > while (irec.br_blockcount) {
> > - unsigned int len;
> > + unsigned int bm_len;
> >
> > pftrace("queuing dir extent in AG %d", args->agno);
> >
> > - len = (irec.br_blockcount > mp->m_dirblkfsbs) ?
> > - mp->m_dirblkfsbs : irec.br_blockcount;
> > - pf_queue_io(args, irec.br_startblock, len, B_DIR_META);
> > - irec.br_blockcount -= len;
> > - irec.br_startblock += len;
> > + if (len + irec.br_blockcount >= mp->m_dirblkfsbs) {
> > + bm_len = mp->m_dirblkfsbs - len;
> > + len = 0;
> > + } else {
> > + len += irec.br_blockcount;
> > + bm_len = irec.br_blockcount;
> > + }
>
> So len represents the total length of the maps attached to the current
> array...
>
> > +
> > + map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp,
> > + irec.br_startblock);
> > + map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len);
> > + nmaps++;
> > +
> > + if (len == 0) {
> > + pf_queue_io(args, map, nmaps, B_DIR_META);
> > + nmaps = 0;
> > + }
>
> Kind of a nit, but this looks a little weird to me. The logic would be a
> bit more clear with something like:
>
> if (len + irec.br_blockcount > mp->dirblkfsbs)
> bm_len = mp->m_dirblkfsbs - len;
> else
> bm_len = irec.br_blockcount;
> len += bm_len;
>
> ...
>
> if (len == mp->dirblkfsbs) {
> len = 0;
> pf_queue_io(...)
> }
Yeah, that's more obvious and consistent with other code. Will fix.
> ... which then raises the question of what happens if the directory
> we're reading doesn't end with len == mp->dirblkfsbs? If so, perhaps not
> a performance regression, but it looks like we wouldn't queue the last
> I/O. Some of the directory code suggests that we fail if we don't alloc
> the dirblkfsbs block count, so maybe this doesn't happen.
That's not an issue the prefetch code needs to handle. Prefetching
is just about walking the extent tree and pulling the necessary
buffers into the cache prior to scanning them. Other code is
responsible for checking that the block count/extent map is actually
valid.
Also, if we don't prefetch a block, then when it is required later
it will be read directly. Hence not doing IO here is does not affect
the behaviour of xfs_repair at all.
> > +/*
> > + * pf_batch_read must be called with the lock locked.
> > + */
> > static void
> > pf_batch_read(
> > prefetch_args_t *args,
> > @@ -426,8 +495,15 @@ pf_batch_read(
> > max_fsbno = fsbno + pf_max_fsbs;
> > }
> > while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) {
> > - if (which != PF_META_ONLY ||
> > - !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num])))
> > + /*
> > + * Handle discontiguous buffers outside the seek
> > + * optimised IO loop below.
> > + */
> > + if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) {
> > + pf_read_discontig(args, bplist[num]);
> > + bplist[num] = NULL;
>
> So we pull these out from the processing below (which appears to want to
> issue largish reads comprised of multiple buffers, via bplist). Thanks
> for the comment above pf_read_discontig().
Yes, that's right.
FYI, the concept behind the prefetch algorithm is to optimise the IO
if possible by doing a single large IO and cherry-picking the
metadata out of it rather than lots of small semi-random IOs. i.e.
use excess storage bandwidth instead of seeks to read dense
regionsof metadata. With dense enough metadata and multi-threading
this optimisation allows xfs_repair to pull in hundreds of megabytes
of metadata every second for processing and as such can keep
multiple CPUs busy even on seek-limited storage. The code is pretty
gnarly, though....
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map
2014-01-23 23:21 [PATCH 0/3 V2] repair: " Dave Chinner
@ 2014-01-23 23:21 ` Dave Chinner
0 siblings, 0 replies; 10+ messages in thread
From: Dave Chinner @ 2014-01-23 23:21 UTC (permalink / raw)
To: xfs
From: Dave Chinner <dchinner@redhat.com>
It will be needed to make the repair prefetch code aware of compound
buffers.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
include/libxfs.h | 8 ++++----
libxfs/rdwr.c | 15 +++++++++------
libxfs/trans.c | 4 ++--
3 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/include/libxfs.h b/include/libxfs.h
index 4bf331c..2872410 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -392,9 +392,9 @@ extern struct cache_operations libxfs_bcache_operations;
#define libxfs_getbuf(dev, daddr, len) \
libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \
(dev), (daddr), (len))
-#define libxfs_getbuf_map(dev, map, nmaps) \
+#define libxfs_getbuf_map(dev, map, nmaps, flags) \
libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \
- (dev), (map), (nmaps))
+ (dev), (map), (nmaps), (flags))
#define libxfs_getbuf_flags(dev, daddr, len, flags) \
libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \
(dev), (daddr), (len), (flags))
@@ -412,7 +412,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int,
extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int,
struct xfs_buftarg *, xfs_daddr_t, int);
extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int,
- struct xfs_buftarg *, struct xfs_buf_map *, int);
+ struct xfs_buftarg *, struct xfs_buf_map *, int, int);
extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int,
struct xfs_buftarg *, xfs_daddr_t, int, unsigned int);
extern void libxfs_trace_putbuf (const char *, const char *, int,
@@ -427,7 +427,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
extern int libxfs_writebuf(xfs_buf_t *, int);
extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *,
- struct xfs_buf_map *, int);
+ struct xfs_buf_map *, int, int);
extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t,
int, unsigned int);
extern void libxfs_putbuf (xfs_buf_t *);
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 0219a08..bf92788 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
int, int, const struct xfs_buf_ops *);
int libxfs_writebuf(xfs_buf_t *, int);
xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int);
-xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int);
+xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *,
+ int, int);
xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int,
unsigned int);
void libxfs_putbuf (xfs_buf_t *);
@@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line,
xfs_buf_t *
libxfs_trace_getbuf_map(const char *func, const char *file, int line,
- struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
+ struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
+ int flags)
{
- xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps);
+ xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags);
__add_trace(bp, func, file, line);
return bp;
}
@@ -582,7 +584,8 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len)
}
struct xfs_buf *
-libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
+libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map,
+ int nmaps, int flags)
{
struct xfs_bufkey key = {0};
int i;
@@ -595,7 +598,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps)
key.map = map;
key.nmaps = nmaps;
- return __cache_lookup(&key, 0);
+ return __cache_lookup(&key, flags);
}
void
@@ -775,7 +778,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps,
return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len,
flags, ops);
- bp = libxfs_getbuf_map(btp, map, nmaps);
+ bp = libxfs_getbuf_map(btp, map, nmaps, 0);
if (!bp)
return NULL;
diff --git a/libxfs/trans.c b/libxfs/trans.c
index 6a05673..6c9d202 100644
--- a/libxfs/trans.c
+++ b/libxfs/trans.c
@@ -511,7 +511,7 @@ libxfs_trans_get_buf_map(
xfs_buf_log_item_t *bip;
if (tp == NULL)
- return libxfs_getbuf_map(btp, map, nmaps);
+ return libxfs_getbuf_map(btp, map, nmaps, 0);
bp = xfs_trans_buf_item_match(tp, btp, map, nmaps);
if (bp != NULL) {
@@ -522,7 +522,7 @@ libxfs_trans_get_buf_map(
return bp;
}
- bp = libxfs_getbuf_map(btp, map, nmaps);
+ bp = libxfs_getbuf_map(btp, map, nmaps, 0);
if (bp == NULL)
return NULL;
#ifdef XACT_DEBUG
--
1.8.4.rc3
_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2014-01-23 23:21 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-22 7:17 [PATCH 0/3] xfs_repair: fix discontiguous directory block Dave Chinner
2014-01-22 7:17 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
2014-01-23 17:14 ` Brian Foster
2014-01-22 7:17 ` [PATCH 2/3] libxfs: remove map from libxfs_readbufr_map Dave Chinner
2014-01-23 17:15 ` Brian Foster
2014-01-23 21:27 ` Dave Chinner
2014-01-22 7:17 ` [PATCH 3/3] repair: fix discontiguous directory block support Dave Chinner
2014-01-23 17:15 ` Brian Foster
2014-01-23 21:41 ` Dave Chinner
-- strict thread matches above, loose matches on Subject: below --
2014-01-23 23:21 [PATCH 0/3 V2] repair: " Dave Chinner
2014-01-23 23:21 ` [PATCH 1/3] libxfs: add a flags field to libxfs_getbuf_map Dave Chinner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).