* [Cluster-devel] libgfs2: Add readahead for rgrp headers
@ 2013-02-18 10:27 Steven Whitehouse
2013-02-18 10:55 ` Andrew Price
0 siblings, 1 reply; 2+ messages in thread
From: Steven Whitehouse @ 2013-02-18 10:27 UTC (permalink / raw)
To: cluster-devel.redhat.com
This adds readahead to rgrp headers, greatly improving the speed with
which they can be read in during fsck. Also, the multiple reads which
were used before are replaced with a single read per resource group.
This is an example of the kinds of speed up which may well be possible
elsewhere in the code. I started with this example simply because it was
the easiest one to do.
An alternative implementation might O_DIRECT and aio, but I'm not sure
that there would be much benefit compared with this method. A further
thought would be to use drop behind in places where we know that we will
not be looking at the data again.
Taking timings for just the rgrp reading section of fsck, I see almost a
10x speed up for that section of code using this patch on a 500G
filesystem.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
index 5bc1a4e..68f0731 100644
--- a/gfs2/libgfs2/buf.c
+++ b/gfs2/libgfs2/buf.c
@@ -7,6 +7,7 @@
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/time.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
@@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num)
return bh;
}
-struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
- const char *caller)
+int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n,
+ uint64_t block, int line, const char *caller)
{
- struct gfs2_buffer_head *bh = bget(sdp, num);
- if (bh == NULL)
- return bh;
- if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
- num * sdp->bsize) {
- fprintf(stderr, "bad seek: %s from %s:%d: block "
- "%llu (0x%llx)\n", strerror(errno),
- caller, line, (unsigned long long)num,
- (unsigned long long)num);
- exit(-1);
+ struct iovec *iov = alloca(n * sizeof(struct iovec));
+ struct iovec *iovbase = iov;
+ uint64_t b = block;
+ size_t size = 0;
+ size_t i;
+ int ret;
+
+ for (i = 0; i < n; i++) {
+ bhs[i] = bget(sdp, b++);
+ if (bhs[i] == NULL)
+ return -1;
+ *iov++ = bhs[i]->iov;
+ size += bhs[i]->iov.iov_len;
}
- if (readv(sdp->device_fd, &bh->iov, 1) < 0) {
+
+ ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize);
+
+ if (ret != size) {
fprintf(stderr, "bad read: %s from %s:%d: block "
- "%llu (0x%llx)\n", strerror(errno),
- caller, line, (unsigned long long)num,
- (unsigned long long)num);
+ "%llu (0x%llx)\n", strerror(errno),
+ caller, line, (unsigned long long)block,
+ (unsigned long long)block);
exit(-1);
}
- return bh;
+
+ return 0;
+}
+
+struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
+ const char *caller)
+{
+ struct gfs2_buffer_head *bh;
+ int ret;
+
+ ret = __breadm(sdp, &bh, 1, num, line, caller);
+ if (ret >= 0)
+ return bh;
+ return NULL;
}
int bwrite(struct gfs2_buffer_head *bh)
{
struct gfs2_sbd *sdp = bh->sdp;
- if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
- bh->b_blocknr * sdp->bsize) {
- return -1;
- }
- if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len)
+ if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len)
return -1;
sdp->writes++;
bh->b_modified = 0;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 2b109fb..46d4d67 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist,
extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num);
extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
int line, const char *caller);
+extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller);
extern int bwrite(struct gfs2_buffer_head *bh);
extern int brelse(struct gfs2_buffer_head *bh);
extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
@@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
#define bmodified(bh) do { bh->b_modified = 1; } while(0)
#define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__)
+#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__)
/* device_geometry.c */
extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i);
diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
index cbab2a3..f7dc01e 100644
--- a/gfs2/libgfs2/rgrp.c
+++ b/gfs2/libgfs2/rgrp.c
@@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
return -1;
if (gfs2_check_range(sdp, rgd->ri.ri_addr))
return -1;
+ if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr))
+ return -1;
for (x = 0; x < length; x++){
- rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
- if(gfs2_check_meta(rgd->bh[x],
- (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
+ if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
{
uint64_t error;
diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
index 8317862..21c9f7b 100644
--- a/gfs2/libgfs2/super.c
+++ b/gfs2/libgfs2/super.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <fcntl.h>
#include "libgfs2.h"
#include "osi_list.h"
@@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
return 0;
}
+#define RA_WINDOW 32
+
+static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window,
+ struct osi_node *n)
+{
+ struct rgrp_tree *rgd;
+ unsigned i;
+ off_t start, len;
+
+ for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
+ if (n == NULL)
+ return i;
+ if (i < cur_window)
+ continue;
+ rgd = (struct rgrp_tree *)n;
+ start = rgd->ri.ri_addr * sdp->bsize;
+ len = rgd->ri.ri_length * sdp->bsize;
+ posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
+ }
+
+ return i;
+}
+
/**
* ri_update - attach rgrps to the super block
* @sdp: incore superblock data
@@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
uint64_t errblock = 0;
uint64_t rmax = 0;
struct osi_node *n, *next = NULL;
+ unsigned ra_window = 0;
+
+ /* Turn off generic readhead */
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
if (rindex_read(sdp, fd, &count1, sane))
goto fail;
for (n = osi_first(&sdp->rgtree); n; n = next) {
next = osi_next(n);
rgd = (struct rgrp_tree *)n;
+ /* Readahead resource group headers */
+ if (ra_window < RA_WINDOW/2)
+ ra_window = gfs2_rgrp_reada(sdp, ra_window, n);
+ /* Read resource group header */
errblock = gfs2_rgrp_read(sdp, rgd);
if (errblock)
return errblock;
+ ra_window--;
count2++;
if (!quiet && count2 % 100 == 0) {
printf(".");
@@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
if (count1 != count2)
goto fail;
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
return 0;
fail:
+ posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
gfs2_rgrp_free(&sdp->rgtree);
return -1;
}
^ permalink raw reply related [flat|nested] 2+ messages in thread* [Cluster-devel] libgfs2: Add readahead for rgrp headers
2013-02-18 10:27 [Cluster-devel] libgfs2: Add readahead for rgrp headers Steven Whitehouse
@ 2013-02-18 10:55 ` Andrew Price
0 siblings, 0 replies; 2+ messages in thread
From: Andrew Price @ 2013-02-18 10:55 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
On 18/02/13 10:27, Steven Whitehouse wrote:
>
> This adds readahead to rgrp headers, greatly improving the speed with
> which they can be read in during fsck. Also, the multiple reads which
> were used before are replaced with a single read per resource group.
>
> This is an example of the kinds of speed up which may well be possible
> elsewhere in the code. I started with this example simply because it was
> the easiest one to do.
>
> An alternative implementation might O_DIRECT and aio, but I'm not sure
> that there would be much benefit compared with this method. A further
> thought would be to use drop behind in places where we know that we will
> not be looking at the data again.
>
> Taking timings for just the rgrp reading section of fsck, I see almost a
> 10x speed up for that section of code using this patch on a 500G
> filesystem.
Looks good to me,
Andy
>
> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
>
> diff --git a/gfs2/libgfs2/buf.c b/gfs2/libgfs2/buf.c
> index 5bc1a4e..68f0731 100644
> --- a/gfs2/libgfs2/buf.c
> +++ b/gfs2/libgfs2/buf.c
> @@ -7,6 +7,7 @@
> #include <inttypes.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> +#include <sys/time.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <errno.h>
> @@ -30,39 +31,54 @@ struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num)
> return bh;
> }
>
> -struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> - const char *caller)
> +int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n,
> + uint64_t block, int line, const char *caller)
> {
> - struct gfs2_buffer_head *bh = bget(sdp, num);
> - if (bh == NULL)
> - return bh;
> - if (lseek(sdp->device_fd, num * sdp->bsize, SEEK_SET) !=
> - num * sdp->bsize) {
> - fprintf(stderr, "bad seek: %s from %s:%d: block "
> - "%llu (0x%llx)\n", strerror(errno),
> - caller, line, (unsigned long long)num,
> - (unsigned long long)num);
> - exit(-1);
> + struct iovec *iov = alloca(n * sizeof(struct iovec));
> + struct iovec *iovbase = iov;
> + uint64_t b = block;
> + size_t size = 0;
> + size_t i;
> + int ret;
> +
> + for (i = 0; i < n; i++) {
> + bhs[i] = bget(sdp, b++);
> + if (bhs[i] == NULL)
> + return -1;
> + *iov++ = bhs[i]->iov;
> + size += bhs[i]->iov.iov_len;
> }
> - if (readv(sdp->device_fd, &bh->iov, 1) < 0) {
> +
> + ret = preadv(sdp->device_fd, iovbase, n, block * sdp->bsize);
> +
> + if (ret != size) {
> fprintf(stderr, "bad read: %s from %s:%d: block "
> - "%llu (0x%llx)\n", strerror(errno),
> - caller, line, (unsigned long long)num,
> - (unsigned long long)num);
> + "%llu (0x%llx)\n", strerror(errno),
> + caller, line, (unsigned long long)block,
> + (unsigned long long)block);
> exit(-1);
> }
> - return bh;
> +
> + return 0;
> +}
> +
> +struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num, int line,
> + const char *caller)
> +{
> + struct gfs2_buffer_head *bh;
> + int ret;
> +
> + ret = __breadm(sdp, &bh, 1, num, line, caller);
> + if (ret >= 0)
> + return bh;
> + return NULL;
> }
>
> int bwrite(struct gfs2_buffer_head *bh)
> {
> struct gfs2_sbd *sdp = bh->sdp;
>
> - if (lseek(sdp->device_fd, bh->b_blocknr * sdp->bsize, SEEK_SET) !=
> - bh->b_blocknr * sdp->bsize) {
> - return -1;
> - }
> - if (writev(sdp->device_fd, &bh->iov, 1) != bh->iov.iov_len)
> + if (pwritev(sdp->device_fd, &bh->iov, 1, bh->b_blocknr * sdp->bsize) != bh->iov.iov_len)
> return -1;
> sdp->writes++;
> bh->b_modified = 0;
> diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
> index 2b109fb..46d4d67 100644
> --- a/gfs2/libgfs2/libgfs2.h
> +++ b/gfs2/libgfs2/libgfs2.h
> @@ -382,6 +382,7 @@ extern void gfs2_special_clear(struct special_blocks *blocklist,
> extern struct gfs2_buffer_head *bget(struct gfs2_sbd *sdp, uint64_t num);
> extern struct gfs2_buffer_head *__bread(struct gfs2_sbd *sdp, uint64_t num,
> int line, const char *caller);
> +extern int __breadm(struct gfs2_sbd *sdp, struct gfs2_buffer_head **bhs, size_t n, uint64_t block, int line, const char *caller);
> extern int bwrite(struct gfs2_buffer_head *bh);
> extern int brelse(struct gfs2_buffer_head *bh);
> extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
> @@ -389,6 +390,7 @@ extern uint32_t lgfs2_get_block_type(const struct gfs2_buffer_head *lbh);
> #define bmodified(bh) do { bh->b_modified = 1; } while(0)
>
> #define bread(bl, num) __bread(bl, num, __LINE__, __FUNCTION__)
> +#define breadm(bl, bhs, n, block) __breadm(bl, bhs, n, block, __LINE__, __FUNCTION__)
>
> /* device_geometry.c */
> extern int lgfs2_get_dev_info(int fd, struct lgfs2_dev_info *i);
> diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
> index cbab2a3..f7dc01e 100644
> --- a/gfs2/libgfs2/rgrp.c
> +++ b/gfs2/libgfs2/rgrp.c
> @@ -127,10 +127,10 @@ uint64_t gfs2_rgrp_read(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
> return -1;
> if (gfs2_check_range(sdp, rgd->ri.ri_addr))
> return -1;
> + if (breadm(sdp, rgd->bh, length, rgd->ri.ri_addr))
> + return -1;
> for (x = 0; x < length; x++){
> - rgd->bh[x] = bread(sdp, rgd->ri.ri_addr + x);
> - if(gfs2_check_meta(rgd->bh[x],
> - (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
> + if(gfs2_check_meta(rgd->bh[x], (x) ? GFS2_METATYPE_RB : GFS2_METATYPE_RG))
> {
> uint64_t error;
>
> diff --git a/gfs2/libgfs2/super.c b/gfs2/libgfs2/super.c
> index 8317862..21c9f7b 100644
> --- a/gfs2/libgfs2/super.c
> +++ b/gfs2/libgfs2/super.c
> @@ -7,6 +7,7 @@
> #include <stdlib.h>
> #include <string.h>
> #include <errno.h>
> +#include <fcntl.h>
>
> #include "libgfs2.h"
> #include "osi_list.h"
> @@ -198,6 +199,29 @@ int rindex_read(struct gfs2_sbd *sdp, int fd, int *count1, int *sane)
> return 0;
> }
>
> +#define RA_WINDOW 32
> +
> +static unsigned gfs2_rgrp_reada(struct gfs2_sbd *sdp, unsigned cur_window,
> + struct osi_node *n)
> +{
> + struct rgrp_tree *rgd;
> + unsigned i;
> + off_t start, len;
> +
> + for (i = 0; i < RA_WINDOW; i++, n = osi_next(n)) {
> + if (n == NULL)
> + return i;
> + if (i < cur_window)
> + continue;
> + rgd = (struct rgrp_tree *)n;
> + start = rgd->ri.ri_addr * sdp->bsize;
> + len = rgd->ri.ri_length * sdp->bsize;
> + posix_fadvise(sdp->device_fd, start, len, POSIX_FADV_WILLNEED);
> + }
> +
> + return i;
> +}
> +
> /**
> * ri_update - attach rgrps to the super block
> * @sdp: incore superblock data
> @@ -218,15 +242,24 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
> uint64_t errblock = 0;
> uint64_t rmax = 0;
> struct osi_node *n, *next = NULL;
> + unsigned ra_window = 0;
> +
> + /* Turn off generic readhead */
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
>
> if (rindex_read(sdp, fd, &count1, sane))
> goto fail;
> for (n = osi_first(&sdp->rgtree); n; n = next) {
> next = osi_next(n);
> rgd = (struct rgrp_tree *)n;
> + /* Readahead resource group headers */
> + if (ra_window < RA_WINDOW/2)
> + ra_window = gfs2_rgrp_reada(sdp, ra_window, n);
> + /* Read resource group header */
> errblock = gfs2_rgrp_read(sdp, rgd);
> if (errblock)
> return errblock;
> + ra_window--;
> count2++;
> if (!quiet && count2 % 100 == 0) {
> printf(".");
> @@ -242,9 +275,11 @@ static int __ri_update(struct gfs2_sbd *sdp, int fd, int *rgcount, int *sane,
> if (count1 != count2)
> goto fail;
>
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
> return 0;
>
> fail:
> + posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
> gfs2_rgrp_free(&sdp->rgtree);
> return -1;
> }
>
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2013-02-18 10:55 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-02-18 10:27 [Cluster-devel] libgfs2: Add readahead for rgrp headers Steven Whitehouse
2013-02-18 10:55 ` Andrew Price
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).