* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps [not found] <1726844332.50526668.1531512206762.JavaMail.zimbra@redhat.com> @ 2018-07-13 20:19 ` Bob Peterson 2018-07-24 12:59 ` Andreas Gruenbacher 0 siblings, 1 reply; 3+ messages in thread From: Bob Peterson @ 2018-07-13 20:19 UTC (permalink / raw) To: cluster-devel.redhat.com Hi, The previous version of my patch had a bug. Apparently function gfs2_check_blk_type does need to check the non-clone bitmap. The other two callers need to check the clone bitmap, if available. So this version adds a parameter to specify. Also, I got rid of function gfs2_get_block_type() in favor of calling it directly from its only caller, gfs2_check_blk_type. This simplifies the code. Bob Peterson --- Function gfs2_testbit is called in three places. Two of those places, gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the clone bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen is used by the block reservations scheme to determine the length of an extent of free blocks. Before this patch, it wasn't using the clone bitmap, which means soon-to-be-freed blocks will be treated as free blocks for the purposes of an extent, and that's clearly wrong. This patch adds a new parameter to gfs2_testbit to indicate whether or not the clone bitmaps should be used (if available). Signed-off-by: Bob Peterson <rpeterso@redhat.com> --- fs/gfs2/rgrp.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3821b3ab04fa..7e22918d32d6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, /** * gfs2_testbit - test a bit in the bitmaps * @rbm: The bit to test + * @use_clone: If true, test the clone bitmap, not the official bitmap. + * + * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps, + * not the "real" bitmaps, because we can't reserve blocks if they're still + * considered metadata in the journal. * * Returns: The two bit block state of the requested bit */ -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) { struct gfs2_bitmap *bi = rbm_bi(rbm); - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; + const u8 *buffer; const u8 *byte; unsigned int bit; + if (use_clone && bi->bi_clone) + buffer = bi->bi_clone; + else + buffer = bi->bi_bh->b_data; + buffer += bi->bi_offset; byte = buffer + (rbm->offset / GFS2_NBBY); bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; @@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le u8 res; for (n = 0; n < n_unaligned; n++) { - res = gfs2_testbit(rbm); + res = gfs2_testbit(rbm, true); if (res != GFS2_BLKST_FREE) return true; (*len)--; @@ -2142,26 +2152,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); } -/** - * gfs2_get_block_type - Check a block in a RG is of given type - * @rgd: the resource group holding the block - * @block: the block number - * - * Returns: The block type (GFS2_BLKST_*) - */ - -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) -{ - struct gfs2_rbm rbm = { .rgd = rgd, }; - int ret; - - ret = gfs2_rbm_from_block(&rbm, block); - WARN_ON_ONCE(ret != 0); - - return gfs2_testbit(&rbm); -} - - /** * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information @@ -2186,7 +2176,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) break; gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); @@ -2543,6 +2533,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder rgd_gh; + struct gfs2_rbm rbm; int error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); @@ -2553,7 +2544,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) if (error) goto fail; - if (gfs2_get_block_type(rgd, no_addr) != type) + rbm.rgd = rgd; + error = gfs2_rbm_from_block(&rbm, no_addr); + WARN_ON_ONCE(error != 0); + + if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; gfs2_glock_dq_uninit(&rgd_gh); ^ permalink raw reply related [flat|nested] 3+ messages in thread
* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps 2018-07-13 20:19 ` [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps Bob Peterson @ 2018-07-24 12:59 ` Andreas Gruenbacher 2018-07-26 16:08 ` Bob Peterson 0 siblings, 1 reply; 3+ messages in thread From: Andreas Gruenbacher @ 2018-07-24 12:59 UTC (permalink / raw) To: cluster-devel.redhat.com Bob, On 13 July 2018 at 22:19, Bob Peterson <rpeterso@redhat.com> wrote: > Hi, > > The previous version of my patch had a bug. Apparently function > gfs2_check_blk_type does need to check the non-clone bitmap. The other > two callers need to check the clone bitmap, if available. > So this version adds a parameter to specify. Also, I got rid of > function gfs2_get_block_type() in favor of calling it directly > from its only caller, gfs2_check_blk_type. This simplifies the code. could you please add some documentation about how those clone bitmaps are supposed to work? It's really hard to guess that from the code alone. > Bob Peterson > --- > Function gfs2_testbit is called in three places. Two of those places, > gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the > clone bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen > is used by the block reservations scheme to determine the length of > an extent of free blocks. Before this patch, it wasn't using the > clone bitmap, which means soon-to-be-freed blocks will be treated > as free blocks for the purposes of an extent, and that's clearly > wrong. > > This patch adds a new parameter to gfs2_testbit to indicate whether > or not the clone bitmaps should be used (if available). > > Signed-off-by: Bob Peterson <rpeterso@redhat.com> > --- > fs/gfs2/rgrp.c | 45 ++++++++++++++++++++------------------------- > 1 file changed, 20 insertions(+), 25 deletions(-) > > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c > index 3821b3ab04fa..7e22918d32d6 100644 > --- a/fs/gfs2/rgrp.c > +++ b/fs/gfs2/rgrp.c > @@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, > /** > * gfs2_testbit - test a bit in the bitmaps > * @rbm: The bit to test > + * @use_clone: If true, test the clone bitmap, not the official bitmap. > + * > + * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps, > + * not the "real" bitmaps, because we can't reserve blocks if they're still > + * considered metadata in the journal. > * > * Returns: The two bit block state of the requested bit > */ > > -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) > +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) > { > struct gfs2_bitmap *bi = rbm_bi(rbm); > - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; > + const u8 *buffer; > const u8 *byte; > unsigned int bit; > > + if (use_clone && bi->bi_clone) > + buffer = bi->bi_clone; > + else > + buffer = bi->bi_bh->b_data; > + buffer += bi->bi_offset; > byte = buffer + (rbm->offset / GFS2_NBBY); > bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; > > @@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le > u8 res; > > for (n = 0; n < n_unaligned; n++) { > - res = gfs2_testbit(rbm); > + res = gfs2_testbit(rbm, true); > if (res != GFS2_BLKST_FREE) > return true; > (*len)--; > @@ -2142,26 +2152,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip) > gfs2_glock_dq_uninit(&rs->rs_rgd_gh); > } > > -/** > - * gfs2_get_block_type - Check a block in a RG is of given type > - * @rgd: the resource group holding the block > - * @block: the block number > - * > - * Returns: The block type (GFS2_BLKST_*) > - */ > - > -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) > -{ > - struct gfs2_rbm rbm = { .rgd = rgd, }; > - int ret; > - > - ret = gfs2_rbm_from_block(&rbm, block); > - WARN_ON_ONCE(ret != 0); > - > - return gfs2_testbit(&rbm); > -} > - > - > /** > * gfs2_alloc_extent - allocate an extent from a given bitmap > * @rbm: the resource group information > @@ -2186,7 +2176,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, > block++; > while (*n < elen) { > ret = gfs2_rbm_from_block(&pos, block); > - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) > + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) > break; > gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); > gfs2_setbit(&pos, true, GFS2_BLKST_USED); > @@ -2543,6 +2533,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) > { > struct gfs2_rgrpd *rgd; > struct gfs2_holder rgd_gh; > + struct gfs2_rbm rbm; > int error = -EINVAL; > > rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); > @@ -2553,7 +2544,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) > if (error) > goto fail; > > - if (gfs2_get_block_type(rgd, no_addr) != type) > + rbm.rgd = rgd; > + error = gfs2_rbm_from_block(&rbm, no_addr); > + WARN_ON_ONCE(error != 0); > + > + if (gfs2_testbit(&rbm, false) != type) > error = -ESTALE; > > gfs2_glock_dq_uninit(&rgd_gh); > Thanks, Andreas ^ permalink raw reply [flat|nested] 3+ messages in thread
* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps 2018-07-24 12:59 ` Andreas Gruenbacher @ 2018-07-26 16:08 ` Bob Peterson 0 siblings, 0 replies; 3+ messages in thread From: Bob Peterson @ 2018-07-26 16:08 UTC (permalink / raw) To: cluster-devel.redhat.com ----- Original Message ----- > Bob, > > On 13 July 2018 at 22:19, Bob Peterson <rpeterso@redhat.com> wrote: > > Hi, > > > > The previous version of my patch had a bug. Apparently function > > gfs2_check_blk_type does need to check the non-clone bitmap. The other > > two callers need to check the clone bitmap, if available. > > So this version adds a parameter to specify. Also, I got rid of > > function gfs2_get_block_type() in favor of calling it directly > > from its only caller, gfs2_check_blk_type. This simplifies the code. > > could you please add some documentation about how those clone bitmaps > are supposed to work? It's really hard to guess that from the code > alone. Hi Andreas, Here's an updated version of the patch with more details about clone bitmaps, as you requested. Other than the comments, the patch itself is unchanged. Regards, Bob Peterson --- gfs2: Fix gfs2_testbit to use clone bitmaps In the gfs2 block allocator there's a concept of an in-core only "clone" bitmap that may (or may not) exist after blocks are freed or unlinked. When a block is freed or unlinked, memory for the clone bitmap is allocated for the bitmap affected, but only the real bitmap is updated. The clone bitmap will still reflect its prior (allocated) state. When blocks are assigned (free -> used or free -> dinode), it does not need to allocate a clone bitmap. However, if one exists, both the real bitmap and the clone bitmap are updated to reflect the new state of the block. So block assignments are more "sticky" than frees. When the bit search algorithms try to find blocks of a certain state, they use the clone bitmaps (if they exist). That way, the only blocks to be allocated are never ones that have been recently freed. The reason gfs2 goes through all this trouble is to make sure that journals never contain transactions with metadata that both free and reassign the same block. If they were allowed to do so, journal replay (due to a node failure) would introduce file system corruption. For example, an indirect block might end up pointing to a block that's in the "free" state because there's no way for journal replay to know the original intent for that block. I don't know offhand of a scenario in which this can happen, but that was the original intent. The management of the clone bitmaps is not done properly in the case of function gfs2_testbit, however. Function gfs2_testbit is called in three places. Two of those places, gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the clone bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen is used by the block reservations scheme to determine the length of an extent of free blocks. Before this patch, it wasn't using the clone bitmap, which means soon-to-be- freed blocks will be treated as free blocks for the purposes of an extent, and that's clearly wrong for the reasons stated above. This patch adds a new parameter to gfs2_testbit to indicate whether or not the clone bitmaps should be used (if available). Signed-off-by: Bob Peterson <rpeterso@redhat.com> --- fs/gfs2/rgrp.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 68a81afd3b4a..1651721fb2b2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, /** * gfs2_testbit - test a bit in the bitmaps * @rbm: The bit to test + * @use_clone: If true, test the clone bitmap, not the official bitmap. + * + * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps, + * not the "real" bitmaps, because we can't reserve blocks if they're still + * considered metadata in the journal. * * Returns: The two bit block state of the requested bit */ -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone) { struct gfs2_bitmap *bi = rbm_bi(rbm); - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; + const u8 *buffer; const u8 *byte; unsigned int bit; + if (use_clone && bi->bi_clone) + buffer = bi->bi_clone; + else + buffer = bi->bi_bh->b_data; + buffer += bi->bi_offset; byte = buffer + (rbm->offset / GFS2_NBBY); bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; @@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le u8 res; for (n = 0; n < n_unaligned; n++) { - res = gfs2_testbit(rbm); + res = gfs2_testbit(rbm, true); if (res != GFS2_BLKST_FREE) return true; (*len)--; @@ -2146,26 +2156,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); } -/** - * gfs2_get_block_type - Check a block in a RG is of given type - * @rgd: the resource group holding the block - * @block: the block number - * - * Returns: The block type (GFS2_BLKST_*) - */ - -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) -{ - struct gfs2_rbm rbm = { .rgd = rgd, }; - int ret; - - ret = gfs2_rbm_from_block(&rbm, block); - WARN_ON_ONCE(ret != 0); - - return gfs2_testbit(&rbm); -} - - /** * gfs2_alloc_extent - allocate an extent from a given bitmap * @rbm: the resource group information @@ -2190,7 +2180,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE) break; gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); @@ -2547,6 +2537,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder rgd_gh; + struct gfs2_rbm rbm; int error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr, 1); @@ -2557,7 +2548,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) if (error) goto fail; - if (gfs2_get_block_type(rgd, no_addr) != type) + rbm.rgd = rgd; + error = gfs2_rbm_from_block(&rbm, no_addr); + WARN_ON_ONCE(error != 0); + + if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; gfs2_glock_dq_uninit(&rgd_gh); ^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-07-26 16:08 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <1726844332.50526668.1531512206762.JavaMail.zimbra@redhat.com> 2018-07-13 20:19 ` [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps Bob Peterson 2018-07-24 12:59 ` Andreas Gruenbacher 2018-07-26 16:08 ` Bob Peterson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).