From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Tue, 05 Nov 2013 11:00:04 +0000 Subject: [Cluster-devel] [GFS2 PATCH] GFS2: If requested is too large, use the largest extent in the rgrp In-Reply-To: <477475972.15900404.1383583955692.JavaMail.root@redhat.com> References: <477475972.15900404.1383583955692.JavaMail.root@redhat.com> Message-ID: <1383649204.2713.14.camel@menhir> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, On Mon, 2013-11-04 at 11:52 -0500, Bob Peterson wrote: > Hi, > > Before this patch, GFS2 would keep searching through all the rgrps > until it found one that had a chunk of free blocks big enough to > satisfy the size hint, which is based on the file write size, > regardless of whether the chunk was big enough to perform the write. > However, when doing big writes there may not be a large enough > chunk of free blocks in any rgrp, due to file system fragmentation. > The largest chunk may be big enough to satisfy the write request, > but it may not meet the ideal reservation size from the "size hint". > The writes would slow to a crawl because every write would search > every rgrp, then finally give up and default to a single-block write. > In my case, performance would drop from 425MB/s to 18KB/s, or 24000 > times slower. > > This patch basically makes it so that if we can't find a contiguous > chunk of blocks big enough to satisfy the sizehint, we'll use the > largest chunk of blocks we found that will still contain the write. > It does so by keeping track of the largest run of blocks within the > rgrp. > > Regards, > > Bob Peterson > Red Hat File Systems > > Signed-off-by: Bob Peterson > --- > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index 4d83abd..711e835 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -57,6 +57,11 @@ > * 3 = Used (metadata) > */ > > +struct gfs2_extent { > + struct gfs2_rbm ex_rbm; > + u32 ex_len; > +}; > + > static const char valid_change[16] = { > /* current */ > /* n */ 0, 1, 1, 1, > @@ -65,8 +70,9 @@ static const char valid_change[16] = { > 1, 0, 0, 0 > }; > > -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, > - const struct gfs2_inode *ip, bool nowrap); > +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, > + const struct gfs2_inode *ip, bool nowrap, > + const struct gfs2_alloc_parms *ap); > > > /** > @@ -1455,7 +1461,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, > if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) > return; > > - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); > + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); > if (ret == 0) { > rs->rs_rbm = rbm; > rs->rs_free = extlen; > @@ -1520,6 +1526,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, > * @rbm: The current position in the resource group > * @ip: The inode for which we are searching for blocks > * @minext: The minimum extent length > + * @maxext: A pointer to the maximum extent structure > * > * This checks the current position in the rgrp to see whether there is > * a reservation covering this block. If not then this function is a > @@ -1532,7 +1539,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, > > static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, > const struct gfs2_inode *ip, > - u32 minext) > + u32 minext, > + struct gfs2_extent *maxext) > { > u64 block = gfs2_rbm_to_block(rbm); > u32 extlen = 1; > @@ -1545,8 +1553,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, > */ > if (minext) { > extlen = gfs2_free_extlen(rbm, minext); > - nblock = block + extlen; > - if (extlen < minext) > + if (extlen <= maxext->ex_len) > goto fail; > } > > @@ -1555,9 +1562,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, > * and skip if parts of it are already reserved > */ > nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); > - if (nblock == block) > - return 0; > + if (nblock == block) { > + if (!minext || extlen >= minext) > + return 0; > + > + if (extlen > maxext->ex_len) { > + maxext->ex_len = extlen; > + maxext->ex_rbm = *rbm; > + } > fail: > + nblock = block + extlen; > + } > ret = gfs2_rbm_from_block(rbm, nblock); > if (ret < 0) > return ret; > @@ -1568,10 +1583,12 @@ fail: > * gfs2_rbm_find - Look for blocks of a particular state > * @rbm: Value/result starting position and final position > * @state: The state which we want to find > - * @minext: The requested extent length (0 for a single block) > + * @minext: Pointer to the requested extent length (NULL for a single block) > + * This is updated to be the actual reservation size. > * @ip: If set, check for reservations > * @nowrap: Stop looking at the end of the rgrp, rather than wrapping > * around until we've reached the starting point. > + * @ap: the allocation parameters > * > * Side effects: > * - If looking for free blocks, we set GBF_FULL on each bitmap which > @@ -1580,8 +1597,9 @@ fail: > * Returns: 0 on success, -ENOSPC if there is no block of the requested state > */ > > -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, > - const struct gfs2_inode *ip, bool nowrap) > +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, > + const struct gfs2_inode *ip, bool nowrap, > + const struct gfs2_alloc_parms *ap) > { > struct buffer_head *bh; > int initial_bii; > @@ -1592,6 +1610,12 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, > int iters = rbm->rgd->rd_length; > int ret; > struct gfs2_bitmap *bi; > + struct gfs2_extent maxext; > + > + maxext.ex_len = 0; > + maxext.ex_rbm.rgd = rbm->rgd; > + maxext.ex_rbm.bii = 0; > + maxext.ex_rbm.offset = 0; This can be shortened to: struct gfs2_extent maxext = { .ex_rbm.rgd = rbm->rgd, }; to save zeroing the fields explicitly. It might be a bit more readable without the ex_ prefix too, possibly? Steve. > > /* If we are not starting at the beginning of a bitmap, then we > * need to add one to the bitmap count to ensure that we search > @@ -1620,7 +1644,9 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, > return 0; > > initial_bii = rbm->bii; > - ret = gfs2_reservation_check_and_update(rbm, ip, minext); > + ret = gfs2_reservation_check_and_update(rbm, ip, > + minext ? *minext : 0, > + &maxext); > if (ret == 0) > return 0; > if (ret > 0) { > @@ -1655,6 +1681,17 @@ next_iter: > break; > } > > + if (minext == NULL || state != GFS2_BLKST_FREE) > + return -ENOSPC; > + > + /* If the maximum extent we found is big enough to fulfill the > + minimum requirements, use it anyway. */ > + if (maxext.ex_len) { > + *rbm = maxext.ex_rbm; > + *minext = maxext.ex_len; > + return 0; > + } > + > return -ENOSPC; > } > > @@ -1680,7 +1717,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip > > while (1) { > down_write(&sdp->sd_log_flush_lock); > - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); > + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, > + true, NULL); > up_write(&sdp->sd_log_flush_lock); > if (error == -ENOSPC) > break; > @@ -2184,11 +2222,12 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, > int error; > > gfs2_set_alloc_start(&rbm, ip, dinode); > - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); > + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); > > if (error == -ENOSPC) { > gfs2_set_alloc_start(&rbm, ip, dinode); > - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); > + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, > + NULL); > } > > /* Since all blocks are reserved in advance, this shouldn't happen */ >