* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps
[not found] <1726844332.50526668.1531512206762.JavaMail.zimbra@redhat.com>
@ 2018-07-13 20:19 ` Bob Peterson
2018-07-24 12:59 ` Andreas Gruenbacher
0 siblings, 1 reply; 3+ messages in thread
From: Bob Peterson @ 2018-07-13 20:19 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi,
The previous version of my patch had a bug. Apparently function
gfs2_check_blk_type does need to check the non-clone bitmap. The other
two callers need to check the clone bitmap, if available.
So this version adds a parameter to specify. Also, I got rid of
function gfs2_get_block_type() in favor of calling it directly
from its only caller, gfs2_check_blk_type. This simplifies the code.
Bob Peterson
---
Function gfs2_testbit is called in three places. Two of those places,
gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the
clone bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen
is used by the block reservations scheme to determine the length of
an extent of free blocks. Before this patch, it wasn't using the
clone bitmap, which means soon-to-be-freed blocks will be treated
as free blocks for the purposes of an extent, and that's clearly
wrong.
This patch adds a new parameter to gfs2_testbit to indicate whether
or not the clone bitmaps should be used (if available).
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/rgrp.c | 45 ++++++++++++++++++++-------------------------
1 file changed, 20 insertions(+), 25 deletions(-)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3821b3ab04fa..7e22918d32d6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
/**
* gfs2_testbit - test a bit in the bitmaps
* @rbm: The bit to test
+ * @use_clone: If true, test the clone bitmap, not the official bitmap.
+ *
+ * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps,
+ * not the "real" bitmaps, because we can't reserve blocks if they're still
+ * considered metadata in the journal.
*
* Returns: The two bit block state of the requested bit
*/
-static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
{
struct gfs2_bitmap *bi = rbm_bi(rbm);
- const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
+ const u8 *buffer;
const u8 *byte;
unsigned int bit;
+ if (use_clone && bi->bi_clone)
+ buffer = bi->bi_clone;
+ else
+ buffer = bi->bi_bh->b_data;
+ buffer += bi->bi_offset;
byte = buffer + (rbm->offset / GFS2_NBBY);
bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
@@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
u8 res;
for (n = 0; n < n_unaligned; n++) {
- res = gfs2_testbit(rbm);
+ res = gfs2_testbit(rbm, true);
if (res != GFS2_BLKST_FREE)
return true;
(*len)--;
@@ -2142,26 +2152,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
}
-/**
- * gfs2_get_block_type - Check a block in a RG is of given type
- * @rgd: the resource group holding the block
- * @block: the block number
- *
- * Returns: The block type (GFS2_BLKST_*)
- */
-
-static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
-{
- struct gfs2_rbm rbm = { .rgd = rgd, };
- int ret;
-
- ret = gfs2_rbm_from_block(&rbm, block);
- WARN_ON_ONCE(ret != 0);
-
- return gfs2_testbit(&rbm);
-}
-
-
/**
* gfs2_alloc_extent - allocate an extent from a given bitmap
* @rbm: the resource group information
@@ -2186,7 +2176,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
- if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+ if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
break;
gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
gfs2_setbit(&pos, true, GFS2_BLKST_USED);
@@ -2543,6 +2533,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
{
struct gfs2_rgrpd *rgd;
struct gfs2_holder rgd_gh;
+ struct gfs2_rbm rbm;
int error = -EINVAL;
rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
@@ -2553,7 +2544,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
if (error)
goto fail;
- if (gfs2_get_block_type(rgd, no_addr) != type)
+ rbm.rgd = rgd;
+ error = gfs2_rbm_from_block(&rbm, no_addr);
+ WARN_ON_ONCE(error != 0);
+
+ if (gfs2_testbit(&rbm, false) != type)
error = -ESTALE;
gfs2_glock_dq_uninit(&rgd_gh);
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps
2018-07-13 20:19 ` [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps Bob Peterson
@ 2018-07-24 12:59 ` Andreas Gruenbacher
2018-07-26 16:08 ` Bob Peterson
0 siblings, 1 reply; 3+ messages in thread
From: Andreas Gruenbacher @ 2018-07-24 12:59 UTC (permalink / raw)
To: cluster-devel.redhat.com
Bob,
On 13 July 2018 at 22:19, Bob Peterson <rpeterso@redhat.com> wrote:
> Hi,
>
> The previous version of my patch had a bug. Apparently function
> gfs2_check_blk_type does need to check the non-clone bitmap. The other
> two callers need to check the clone bitmap, if available.
> So this version adds a parameter to specify. Also, I got rid of
> function gfs2_get_block_type() in favor of calling it directly
> from its only caller, gfs2_check_blk_type. This simplifies the code.
could you please add some documentation about how those clone bitmaps
are supposed to work? It's really hard to guess that from the code
alone.
> Bob Peterson
> ---
> Function gfs2_testbit is called in three places. Two of those places,
> gfs2_alloc_extent and gfs2_unaligned_extlen, should be using the
> clone bitmaps, not the "real" bitmaps. Function gfs2_unaligned_extlen
> is used by the block reservations scheme to determine the length of
> an extent of free blocks. Before this patch, it wasn't using the
> clone bitmap, which means soon-to-be-freed blocks will be treated
> as free blocks for the purposes of an extent, and that's clearly
> wrong.
>
> This patch adds a new parameter to gfs2_testbit to indicate whether
> or not the clone bitmaps should be used (if available).
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> fs/gfs2/rgrp.c | 45 ++++++++++++++++++++-------------------------
> 1 file changed, 20 insertions(+), 25 deletions(-)
>
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 3821b3ab04fa..7e22918d32d6 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
> /**
> * gfs2_testbit - test a bit in the bitmaps
> * @rbm: The bit to test
> + * @use_clone: If true, test the clone bitmap, not the official bitmap.
> + *
> + * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps,
> + * not the "real" bitmaps, because we can't reserve blocks if they're still
> + * considered metadata in the journal.
> *
> * Returns: The two bit block state of the requested bit
> */
>
> -static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
> +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
> {
> struct gfs2_bitmap *bi = rbm_bi(rbm);
> - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
> + const u8 *buffer;
> const u8 *byte;
> unsigned int bit;
>
> + if (use_clone && bi->bi_clone)
> + buffer = bi->bi_clone;
> + else
> + buffer = bi->bi_bh->b_data;
> + buffer += bi->bi_offset;
> byte = buffer + (rbm->offset / GFS2_NBBY);
> bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
>
> @@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
> u8 res;
>
> for (n = 0; n < n_unaligned; n++) {
> - res = gfs2_testbit(rbm);
> + res = gfs2_testbit(rbm, true);
> if (res != GFS2_BLKST_FREE)
> return true;
> (*len)--;
> @@ -2142,26 +2152,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
> gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
> }
>
> -/**
> - * gfs2_get_block_type - Check a block in a RG is of given type
> - * @rgd: the resource group holding the block
> - * @block: the block number
> - *
> - * Returns: The block type (GFS2_BLKST_*)
> - */
> -
> -static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
> -{
> - struct gfs2_rbm rbm = { .rgd = rgd, };
> - int ret;
> -
> - ret = gfs2_rbm_from_block(&rbm, block);
> - WARN_ON_ONCE(ret != 0);
> -
> - return gfs2_testbit(&rbm);
> -}
> -
> -
> /**
> * gfs2_alloc_extent - allocate an extent from a given bitmap
> * @rbm: the resource group information
> @@ -2186,7 +2176,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
> block++;
> while (*n < elen) {
> ret = gfs2_rbm_from_block(&pos, block);
> - if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
> + if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
> break;
> gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
> gfs2_setbit(&pos, true, GFS2_BLKST_USED);
> @@ -2543,6 +2533,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
> {
> struct gfs2_rgrpd *rgd;
> struct gfs2_holder rgd_gh;
> + struct gfs2_rbm rbm;
> int error = -EINVAL;
>
> rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
> @@ -2553,7 +2544,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
> if (error)
> goto fail;
>
> - if (gfs2_get_block_type(rgd, no_addr) != type)
> + rbm.rgd = rgd;
> + error = gfs2_rbm_from_block(&rbm, no_addr);
> + WARN_ON_ONCE(error != 0);
> +
> + if (gfs2_testbit(&rbm, false) != type)
> error = -ESTALE;
>
> gfs2_glock_dq_uninit(&rgd_gh);
>
Thanks,
Andreas
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps
2018-07-24 12:59 ` Andreas Gruenbacher
@ 2018-07-26 16:08 ` Bob Peterson
0 siblings, 0 replies; 3+ messages in thread
From: Bob Peterson @ 2018-07-26 16:08 UTC (permalink / raw)
To: cluster-devel.redhat.com
----- Original Message -----
> Bob,
>
> On 13 July 2018 at 22:19, Bob Peterson <rpeterso@redhat.com> wrote:
> > Hi,
> >
> > The previous version of my patch had a bug. Apparently function
> > gfs2_check_blk_type does need to check the non-clone bitmap. The other
> > two callers need to check the clone bitmap, if available.
> > So this version adds a parameter to specify. Also, I got rid of
> > function gfs2_get_block_type() in favor of calling it directly
> > from its only caller, gfs2_check_blk_type. This simplifies the code.
>
> could you please add some documentation about how those clone bitmaps
> are supposed to work? It's really hard to guess that from the code
> alone.
Hi Andreas,
Here's an updated version of the patch with more details about
clone bitmaps, as you requested. Other than the comments, the
patch itself is unchanged.
Regards,
Bob Peterson
---
gfs2: Fix gfs2_testbit to use clone bitmaps
In the gfs2 block allocator there's a concept of an in-core only
"clone" bitmap that may (or may not) exist after blocks are freed
or unlinked. When a block is freed or unlinked, memory for the
clone bitmap is allocated for the bitmap affected, but only the
real bitmap is updated. The clone bitmap will still reflect its
prior (allocated) state.
When blocks are assigned (free -> used or free -> dinode), it
does not need to allocate a clone bitmap. However, if one exists,
both the real bitmap and the clone bitmap are updated to reflect
the new state of the block. So block assignments are more
"sticky" than frees.
When the bit search algorithms try to find blocks of a certain
state, they use the clone bitmaps (if they exist). That way,
the only blocks to be allocated are never ones that have been
recently freed.
The reason gfs2 goes through all this trouble is to make sure
that journals never contain transactions with metadata that both
free and reassign the same block. If they were allowed to do so,
journal replay (due to a node failure) would introduce file
system corruption. For example, an indirect block might end up
pointing to a block that's in the "free" state because there's
no way for journal replay to know the original intent for that
block. I don't know offhand of a scenario in which this can
happen, but that was the original intent.
The management of the clone bitmaps is not done properly in the
case of function gfs2_testbit, however.
Function gfs2_testbit is called in three places. Two of those
places, gfs2_alloc_extent and gfs2_unaligned_extlen, should be
using the clone bitmaps, not the "real" bitmaps. Function
gfs2_unaligned_extlen is used by the block reservations scheme
to determine the length of an extent of free blocks. Before this
patch, it wasn't using the clone bitmap, which means soon-to-be-
freed blocks will be treated as free blocks for the purposes of
an extent, and that's clearly wrong for the reasons stated above.
This patch adds a new parameter to gfs2_testbit to indicate whether
or not the clone bitmaps should be used (if available).
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/rgrp.c | 45 ++++++++++++++++++++-------------------------
1 file changed, 20 insertions(+), 25 deletions(-)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 68a81afd3b4a..1651721fb2b2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -123,17 +123,27 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
/**
* gfs2_testbit - test a bit in the bitmaps
* @rbm: The bit to test
+ * @use_clone: If true, test the clone bitmap, not the official bitmap.
+ *
+ * Some callers, like gfs2_unaligned_extlen, need to test the clone bitmaps,
+ * not the "real" bitmaps, because we can't reserve blocks if they're still
+ * considered metadata in the journal.
*
* Returns: The two bit block state of the requested bit
*/
-static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm, bool use_clone)
{
struct gfs2_bitmap *bi = rbm_bi(rbm);
- const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset;
+ const u8 *buffer;
const u8 *byte;
unsigned int bit;
+ if (use_clone && bi->bi_clone)
+ buffer = bi->bi_clone;
+ else
+ buffer = bi->bi_bh->b_data;
+ buffer += bi->bi_offset;
byte = buffer + (rbm->offset / GFS2_NBBY);
bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
@@ -322,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
u8 res;
for (n = 0; n < n_unaligned; n++) {
- res = gfs2_testbit(rbm);
+ res = gfs2_testbit(rbm, true);
if (res != GFS2_BLKST_FREE)
return true;
(*len)--;
@@ -2146,26 +2156,6 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
}
-/**
- * gfs2_get_block_type - Check a block in a RG is of given type
- * @rgd: the resource group holding the block
- * @block: the block number
- *
- * Returns: The block type (GFS2_BLKST_*)
- */
-
-static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
-{
- struct gfs2_rbm rbm = { .rgd = rgd, };
- int ret;
-
- ret = gfs2_rbm_from_block(&rbm, block);
- WARN_ON_ONCE(ret != 0);
-
- return gfs2_testbit(&rbm);
-}
-
-
/**
* gfs2_alloc_extent - allocate an extent from a given bitmap
* @rbm: the resource group information
@@ -2190,7 +2180,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
- if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+ if (ret || gfs2_testbit(&pos, true) != GFS2_BLKST_FREE)
break;
gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh);
gfs2_setbit(&pos, true, GFS2_BLKST_USED);
@@ -2547,6 +2537,7 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
{
struct gfs2_rgrpd *rgd;
struct gfs2_holder rgd_gh;
+ struct gfs2_rbm rbm;
int error = -EINVAL;
rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);
@@ -2557,7 +2548,11 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
if (error)
goto fail;
- if (gfs2_get_block_type(rgd, no_addr) != type)
+ rbm.rgd = rgd;
+ error = gfs2_rbm_from_block(&rbm, no_addr);
+ WARN_ON_ONCE(error != 0);
+
+ if (gfs2_testbit(&rbm, false) != type)
error = -ESTALE;
gfs2_glock_dq_uninit(&rgd_gh);
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2018-07-26 16:08 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1726844332.50526668.1531512206762.JavaMail.zimbra@redhat.com>
2018-07-13 20:19 ` [Cluster-devel] [PATCH v2] gfs2: Fix gfs2_testbit to use clone bitmaps Bob Peterson
2018-07-24 12:59 ` Andreas Gruenbacher
2018-07-26 16:08 ` Bob Peterson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).