From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Price Date: Mon, 18 Feb 2013 10:55:59 +0000 Subject: [Cluster-devel] libgfs2: Add readahead for rgrp headers In-Reply-To: <1361183220.2696.6.camel@menhir> References: <1361183220.2696.6.camel@menhir> Message-ID: <512208BF.3070601@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, On 18/02/13 10:27, Steven Whitehouse wrote: > > This adds readahead to rgrp headers, greatly improving the speed with > which they can be read in during fsck. Also, the multiple reads which > were used before are replaced with a single read per resource group. > > This is an example of the kinds of speed up which may well be possible > elsewhere in the code. I started with this example simply because it was > the easiest one to do. > > An alternative implementation might O_DIRECT and aio, but I'm not sure > that there would be much benefit compared with this method. A further > thought would be to use drop behind in places where we know that we will > not be looking at the data again. > > Taking timings for just the rgrp reading section of fsck, I see almost a > 10x speed up for that section of code using this patch on a 500G > filesystem. Looks good to me, Andy > > Signed-off-by: Steven Whitehouse > > diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c > index 5bc1a4e..68f0731 100644 > --- a/gfs2/libgfs2/buf.c > +++ b/gfs2/libgfs2/buf.c > @@ -7,6 +7,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num) > return bh; > } > > -struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line, > - const char *caller) > +int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, > + uint64_t block, int line, const char *caller) > { > - struct gfs2_buffer_head *bh = bget(sdp, num); > - if (bh == NULL) > - return bh; > - if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) != > - num * sdp->bsize) { > - fprintf(stderr, "bad seek: %s from %s:%d: block " > - "%llu (0x%llx)\n", strerror(errno), > - caller, line, (unsigned long long)num, > - (unsigned long long)num); > - exit(-1); > + struct iovec *iov = alloca(n * sizeof(struct iovec)); > + struct iovec *iovbase = iov; > + uint64_t b = block; > + size_t size = 0; > + size_t i; > + int ret; > + > + for (i = 0; i < n; i++) { > + bhs[i] = bget(sdp, b++); > + if (bhs[i] == NULL) > + return -1; > + *iov++ = bhs[i]->iov; > + size += bhs[i]->iov.iov_len; > } > - if (readv(sdp->device_fd, &bh->iov, 1) < 0) { > + > + ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize); > + > + if (ret != size) { > fprintf(stderr, "bad read: %s from %s:%d: block " > - "%llu (0x%llx)\n", strerror(errno), > - caller, line, (unsigned long long)num, > - (unsigned long long)num); > + "%llu (0x%llx)\n", strerror(errno), > + caller, line, (unsigned long long)block, > + (unsigned long long)block); > exit(-1); > } > - return bh; > + > + return 0; > +} > + > +struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line, > + const char *caller) > +{ > + struct gfs2_buffer_head *bh; > + int ret; > + > + ret = __breadm(sdp, &bh, 1, num, line, caller); > + if (ret >= 0) > + return bh; > + return NULL; > } > > int bwrite(struct gfs2_buffer_head *bh) > { > struct gfs2_sbd *sdp = bh->sdp; > > - if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) != > - bh->b_blocknr * sdp->bsize) { > - return -1; > - } > - if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len) > + if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len) > return -1; > sdp->writes++; > bh->b_modified = 0; > diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h > index 2b109fb..46d4d67 100644 > --- a/gfs2/libgfs2/libgfs2.h > +++ b/gfs2/libgfs2/libgfs2.h > @@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist, > extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num); > extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, > int line, const char *caller); > +extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller); > extern int bwrite(struct gfs2_buffer_head *bh); > extern int brelse(struct gfs2_buffer_head *bh); > extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh); > @@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh); > #define bmodified(bh) do { bh->b_modified = 1; } while(0) > > #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__) > +#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__) > > /* device_geometry.c */ > extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i); > diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c > index cbab2a3..f7dc01e 100644 > --- a/gfs2/libgfs2/rgrp.c > +++ b/gfs2/libgfs2/rgrp.c > @@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd) > return -1; > if (gfs2_check_range(sdp, rgd->ri.ri_addr)) > return -1; > + if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr)) > + return -1; > for (x = 0; x < length; x++){ > - rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x); > - if(gfs2_check_meta(rgd->bh[x], > - (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG)) > + if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG)) > { > uint64_t error; > > diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c > index 8317862..21c9f7b 100644 > --- a/gfs2/libgfs2/super.c > +++ b/gfs2/libgfs2/super.c > @@ -7,6 +7,7 @@ > #include > #include > #include > +#include > > #include "libgfs2.h" > #include "osi_list.h" > @@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane) > return 0; > } > > +#define RA_WINDOW 32 > + > +static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window, > + struct osi_node *n) > +{ > + struct rgrp_tree *rgd; > + unsigned i; > + off_t start, len; > + > + for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) { > + if (n == NULL) > + return i; > + if (i < cur_window) > + continue; > + rgd = (struct rgrp_tree *)n; > + start = rgd->ri.ri_addr * sdp->bsize; > + len = rgd->ri.ri_length * sdp->bsize; > + posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED); > + } > + > + return i; > +} > + > /** > * ri_update - attach rgrps to the super block > * @sdp: incore superblock data > @@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane, > uint64_t errblock = 0; > uint64_t rmax = 0; > struct osi_node *n, *next = NULL; > + unsigned ra_window = 0; > + > + /* Turn off generic readhead */ > + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM); > > if (rindex_read(sdp, fd, &count1, sane)) > goto fail; > for (n = osi_first(&sdp->rgtree); n; n = next) { > next = osi_next(n); > rgd = (struct rgrp_tree *)n; > + /* Readahead resource group headers */ > + if (ra_window < RA_WINDOW/2) > + ra_window = gfs2_rgrp_reada(sdp, ra_window, n); > + /* Read resource group header */ > errblock = gfs2_rgrp_read(sdp, rgd); > if (errblock) > return errblock; > + ra_window--; > count2++; > if (!quiet && count2 % 100 == 0) { > printf("."); > @@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane, > if (count1 != count2) > goto fail; > > + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); > return 0; > > fail: > + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL); > gfs2_rgrp_free(&sdp->rgtree); > return -1; > } > >