cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Andrew Price <anprice@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 19/19] mkfs.gfs2: Improve journal creation performance
Date: Tue,  2 Sep 2014 13:07:36 +0100	[thread overview]
Message-ID: <1409659656-23051-20-git-send-email-anprice@redhat.com> (raw)
In-Reply-To: <1409659656-23051-1-git-send-email-anprice@redhat.com>

Now that all of the journal extent allocation and writing building
blocks are in place in libgfs2, we can make use of them in mkfs.gfs2 to
write the journals sequentially and in-order with the resource group
blocks. This patch is a little messy because the changes had to be
introduced at the same time to avoid mismatching old and new behaviour,
but the end result should be fairly easy to follow.

Instead of writing all the resource group headers and then building the
journals afterwards, requiring the resource group headers to be read
back in, we now create a resource group header in memory, allocate the
blocks that we'll use for a journal in its bitmaps, write the resource
group header out, and then write the journal inode, indirect blocks and
data blocks in that order. This gives a substantial speed-up. For
example, running the test suite on my machine takes around 12 minutes
before and just over 2 minutes after this patch set.

Signed-off-by: Andrew Price <anprice@redhat.com>
---
 gfs2/mkfs/main_mkfs.c | 151 +++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 114 insertions(+), 37 deletions(-)

diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 39b9609..530383d 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -166,6 +166,8 @@ static void opts_init(struct mkfs_opts *opts)
 	opts->align = 1;
 }
 
+struct gfs2_inum *mkfs_journals = NULL;
+
 #ifndef BLKDISCARD
 #define BLKDISCARD      _IO(0x12,119)
 #endif
@@ -617,16 +619,11 @@ static lgfs2_rgrps_t rgs_init(struct mkfs_opts *opts, struct gfs2_sbd *sdp)
 	return rgs;
 }
 
-static int place_rgrp(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct gfs2_rindex *ri, int debug)
+static int place_rgrp(struct gfs2_sbd *sdp, lgfs2_rgrp_t rg, int debug)
 {
 	int err = 0;
-	lgfs2_rgrp_t rg = NULL;
+	const struct gfs2_rindex *ri = lgfs2_rgrp_index(rg);
 
-	rg = lgfs2_rgrps_append(rgs, ri);
-	if (rg == NULL) {
-		perror(_("Failed to create resource group"));
-		return -1;
-	}
 	err = lgfs2_rgrp_write(sdp->device_fd, rg);
 	if (err != 0) {
 		perror(_("Failed to write resource group"));
@@ -642,46 +639,121 @@ static int place_rgrp(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct gfs2_rinde
 	return 0;
 }
 
-static int place_rgrps(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct mkfs_opts *opts)
+static int add_rgrp(lgfs2_rgrps_t rgs, uint64_t *addr, uint32_t len, lgfs2_rgrp_t *rg)
 {
 	struct gfs2_rindex ri;
-	uint64_t jfsize = lgfs2_space_for_data(sdp, sdp->bsize, opts->jsize << 20);
-	uint32_t jrgsize = lgfs2_rgsize_for_data(jfsize, sdp->bsize);
-	uint64_t rgaddr = lgfs2_rgrp_align_addr(rgs, sdp->sb_addr + 1);
-	uint32_t rgsize = lgfs2_rgrps_plan(rgs, sdp->device.length - rgaddr, ((opts->rgsize << 20) / sdp->bsize));
-	unsigned j;
 
-	if (rgsize >= jrgsize)
-		jrgsize = rgsize;
+	/* When we get to the end of the device, it's only an error if we have
+	   more structures left to write, i.e. when len is != 0. */
+	*addr = lgfs2_rindex_entry_new(rgs, &ri, *addr, len);
+	if (*addr == 0) {
+		if (len != 0) {
+			perror(_("Failed to create resource group index entry"));
+			return -1;
+		} else {
+			return 1;
+		}
+	}
 
-	if (rgsize < ((GFS2_MIN_RGSIZE << 20) / sdp->bsize)) {
-		fprintf(stderr, _("Resource group size is too small\n"));
+	*rg = lgfs2_rgrps_append(rgs, &ri);
+	if (*rg == NULL) {
+		perror(_("Failed to create resource group"));
 		return -1;
-	} else if (rgsize < ((GFS2_DEFAULT_RGSIZE << 20) / sdp->bsize)) {
-		fprintf(stderr, _("Warning: small resource group size could impact performance\n"));
 	}
+	return 0;
+}
+
+static int place_journals(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct mkfs_opts *opts, uint64_t *rgaddr)
+{
+	uint64_t jfsize = lgfs2_space_for_data(sdp, sdp->bsize, opts->jsize << 20);
+	uint32_t rgsize = lgfs2_rgsize_for_data(jfsize, sdp->bsize);
+	unsigned j;
+
+	/* We'll build the jindex later so remember where we put the journals */
+	mkfs_journals = calloc(opts->journals, sizeof(*mkfs_journals));
+	if (mkfs_journals == NULL)
+		return 1;
+	*rgaddr = lgfs2_rgrp_align_addr(rgs, sdp->sb_addr + 1);
 
 	for (j = 0; j < opts->journals; j++) {
 		int result;
-		rgaddr = lgfs2_rindex_entry_new(rgs, &ri, rgaddr, jrgsize);
-		if (rgaddr == 0) /* Reached the end when we still have journals to write */
-			return 1;
-		result = place_rgrp(sdp, rgs, &ri, opts->debug);
+		lgfs2_rgrp_t rg;
+		struct gfs2_inode in = {0};
+
+		if (opts->debug)
+			printf(_("Placing resource group for journal%u\n"), j);
+
+		result = add_rgrp(rgs, rgaddr, rgsize, &rg);
+		if (result > 0)
+			break;
+		else if (result < 0)
+			return result;
+
+		result = lgfs2_rgrp_bitbuf_alloc(rg);
+		if (result != 0) {
+			perror(_("Failed to allocate space for bitmap buffer"));
+			return result;
+		}
+		/* In order to keep writes sequential here, we have to allocate
+		   the journal, then write the rgrp header (which is now in its
+		   final form) and then write the journal out */
+		result = lgfs2_file_alloc(rg, opts->jsize << 20, &in, GFS2_DIF_SYSTEM, S_IFREG | 0600);
+		if (result != 0) {
+			fprintf(stderr, _("Failed to allocate space for journal %u\n"), j);
+			return result;
+		}
+
+		if (opts->debug)
+			gfs2_dinode_print(&in.i_di);
+
+		result = place_rgrp(sdp, rg, opts->debug);
 		if (result != 0)
 			return result;
+
+		lgfs2_rgrp_bitbuf_free(rg);
+
+		result = lgfs2_write_filemeta(&in);
+		if (result != 0) {
+			fprintf(stderr, _("Failed to write journal %u\n"), j);
+			return result;
+		}
+
+		result = lgfs2_write_journal_data(&in);
+		if (result != 0) {
+			fprintf(stderr, _("Failed to write data blocks for journal %u\n"), j);
+			return result;
+		}
+		mkfs_journals[j] = in.i_di.di_num;
 	}
 
-	if (rgsize != jrgsize)
-		lgfs2_rgrps_plan(rgs, sdp->device.length - rgaddr, ((opts->rgsize << 20) / sdp->bsize));
+	return 0;
+}
+
+static int place_rgrps(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct mkfs_opts *opts)
+{
+	uint64_t rgaddr = lgfs2_rgrp_align_addr(rgs, sdp->sb_addr + 1);
+	uint32_t rgblks = ((opts->rgsize << 20) / sdp->bsize);
+	int result;
+
+	result = place_journals(sdp, rgs, opts, &rgaddr);
+	if (result != 0)
+		return result;
+
+	lgfs2_rgrps_plan(rgs, sdp->device.length - rgaddr, rgblks);
 
 	while (1) {
-		int result;
-		rgaddr = lgfs2_rindex_entry_new(rgs, &ri, rgaddr, 0);
-		if (rgaddr == 0)
-			break; /* Done */
-		result = place_rgrp(sdp, rgs, &ri, opts->debug);
-		if (result)
+		lgfs2_rgrp_t rg;
+		result = add_rgrp(rgs, &rgaddr, 0, &rg);
+		if (result > 0)
+			break;
+		else if (result < 0)
+			return result;
+
+		result = place_rgrp(sdp, rg, opts->debug);
+		if (result != 0) {
+			fprintf(stderr, _("Failed to build resource groups\n"));
 			return result;
+		}
 	}
 	return 0;
 }
@@ -696,7 +768,7 @@ static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, unsigned bsiz
 	sdp->jsize = opts->jsize;
 	sdp->md.journals = opts->journals;
 	sdp->device_fd = opts->dev.fd;
-	sdp->bsize = bsize;
+	sdp->bsize = sdp->sd_sb.sb_bsize = bsize;
 
 	if (compute_constants(sdp)) {
 		perror(_("Failed to compute file system constants"));
@@ -848,17 +920,19 @@ void main_mkfs(int argc, char *argv[])
 	}
 	sbd.rgtree.osi_node = lgfs2_rgrps_root(rgs); // Temporary
 
-	build_root(&sbd);
-	sb.sb_root_dir = sbd.md.rooti->i_di.di_num;
-
-	build_master(&sbd);
+	error = build_master(&sbd);
+	if (error) {
+		fprintf(stderr, _("Error building '%s': %s\n"), "master", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
 	sb.sb_master_dir = sbd.master_dir->i_di.di_num;
 
-	error = build_jindex(&sbd);
+	error = lgfs2_build_jindex(sbd.master_dir, mkfs_journals, opts.journals);
 	if (error) {
 		fprintf(stderr, _("Error building '%s': %s\n"), "jindex", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
+	free(mkfs_journals);
 	error = build_per_node(&sbd);
 	if (error) {
 		fprintf(stderr, _("Error building '%s': %s\n"), "per_node", strerror(errno));
@@ -887,6 +961,9 @@ void main_mkfs(int argc, char *argv[])
 		exit(EXIT_FAILURE);
 	}
 
+	build_root(&sbd);
+	sb.sb_root_dir = sbd.md.rooti->i_di.di_num;
+
 	strcpy(sb.sb_lockproto, opts.lockproto);
 	strcpy(sb.sb_locktable, opts.locktable);
 
-- 
1.9.3



  parent reply	other threads:[~2014-09-02 12:07 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-02 12:07 [Cluster-devel] [PATCH 00/19] gfs2-utils: Introduce extent allocation and speed up journal creation Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 01/19] libgfs2: Keep a pointer to the sbd in lgfs2_rgrps_t Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 02/19] libgfs2: Move bitmap buffers inside struct gfs2_bitmap Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 03/19] libgfs2: Fix an impossible loop condition in gfs2_rgrp_read Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 04/19] libgfs2: Introduce struct lgfs2_rbm Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 05/19] libgfs2: Move struct _lgfs2_rgrps into rgrp.h Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 06/19] libgfs2: Add functions for finding free extents Andrew Price
2014-09-03 10:17   ` Steven Whitehouse
2014-09-03 12:13     ` Andrew Price
2014-09-03 12:24       ` Steven Whitehouse
2014-09-02 12:07 ` [Cluster-devel] [PATCH 07/19] tests: Add unit tests for the new extent search functions Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 08/19] libgfs2: Ignore an empty rgrp plan if a length is specified Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 09/19] libgfs2: Add back-pointer to rgrps in lgfs2_rgrp_t Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 10/19] libgfs2: Const-ify the parameters of print functions Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 11/19] libgfs2: Allow init_dinode to accept a preallocated bh Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 12/19] libgfs2: Add extent allocation functions Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 13/19] libgfs2: Add support for allocating entire rgrp headers Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 14/19] libgfs2: Write file metadata sequentially Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 15/19] libgfs2: Fix alignment in lgfs2_rgsize_for_data Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 16/19] libgfs2: Handle non-zero bitmaps in lgfs2_rgrp_write Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 17/19] libgfs2: Add a speedier journal data block writing function Andrew Price
2014-09-02 12:07 ` [Cluster-devel] [PATCH 18/19] libgfs2: Create jindex directory separately from journals Andrew Price
2014-09-02 12:07 ` Andrew Price [this message]
2014-09-02 14:06 ` [Cluster-devel] [PATCH 00/19] gfs2-utils: Introduce extent allocation and speed up journal creation Bob Peterson
2014-09-03 10:20 ` Steven Whitehouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409659656-23051-20-git-send-email-anprice@redhat.com \
    --to=anprice@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).