From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH v2 2/2] gfs2: Rewrite gfs2_write_calc_reserv
Date: Mon, 16 Jul 2018 22:45:05 +0200 [thread overview]
Message-ID: <20180716204505.6712-3-agruenba@redhat.com> (raw)
In-Reply-To: <20180716204505.6712-1-agruenba@redhat.com>
For normal writes, replace the existing version of
gfs2_write_calc_reserv with one that takes the offset of the write into
account. Taking the alignment of that offset within the metadata tree
into account allows to determine a better lower bound for the maximum
number of indirect blocks required.
For __gfs2_fallocate, since gfs2_write_calc_reserv and calc_max_reserv
are tightly coupled, keep the old version of gfs2_write_calc_reserv to
avoid rewriting __gfs2_fallocate completely at this point.
The new gfs2_write_calc_reserv function still doesn't look at the actual
metadata tree, so it doesn't know which indirect blocks have already
been allocated; we still always assume the worst case.
This will eventually be fixed by getting rid of
gfs2_write_alloc_required and gfs2_write_calc_reserv in favor of
gsf2_iomap_get, followed by checking the metapath to determine which
indirect blocks are already allocated, followed by gfs2_iomap_alloc.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
fs/gfs2/bmap.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/gfs2/bmap.h | 34 ++++--------------------
fs/gfs2/file.c | 28 +++++++++++++++++---
3 files changed, 100 insertions(+), 32 deletions(-)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7d3bb327f8b7..5eb0c3d8149b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2453,3 +2453,73 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
gfs2_trans_end(sdp);
return error;
}
+
+/**
+ * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
+ * @ip: the file
+ * @pos: offset of the write
+ * @len: the number of bytes to be written to the file
+ * @data_blocks: returns the number of data blocks required
+ * @ind_blocks: returns the number of indirect blocks required
+ *
+ */
+
+void gfs2_write_calc_reserv(const struct gfs2_inode *ip, u64 pos,
+ unsigned int len, unsigned int *data_blocks,
+ unsigned int *ind_blocks)
+{
+ const struct inode *inode = &ip->i_inode;
+ unsigned int blkbits = inode->i_blkbits;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ unsigned int inptrs = sdp->sd_inptrs;
+ unsigned int h = ip->i_height;
+ u64 last;
+
+ BUG_ON(gfs2_is_dir(ip));
+
+ /* Calculate the height required for the new end of file */
+ while (pos + len > sdp->sd_heightsize[h])
+ h++;
+
+ /* Indirect blocks for growing the inode height */
+ *ind_blocks = h - ip->i_height;
+
+ /* Write range rounded to block boundaries */
+ last = (pos + len - 1) >> blkbits;
+ pos >>= blkbits;
+ *data_blocks = last - pos + 1;
+
+ /*
+ * Unstuffing (going from height 0 to 1) may require an additional data
+ * block, but won't require an indirect block.
+ */
+ if (gfs2_is_stuffed(ip)) {
+ (*ind_blocks)--;
+ if (i_size_read(inode) != 0 && pos != 0)
+ (*data_blocks)++;
+ }
+
+ /* Inodes with a height below 2 don't have indirect blocks. */
+ if (h < 2)
+ return;
+
+ /*
+ * Indirect blocks for filling the tree: each layer closer towards the
+ * root may require however many indirect blocks the write range still
+ * spans at that layer, which is at least one.
+ *
+ * We can ignore the data blocks@layer @h as well as the inode at
+ * layer 0.
+ *
+ * Note that we don't take into account which indirect blocks are
+ * already allocated here, so we overestimate the number of indirect
+ * blocks requires in most cases.
+ */
+ h--;
+ while (h >= 1) {
+ do_div(pos, inptrs);
+ do_div(last, inptrs);
+ *ind_blocks += last - pos + 1;
+ h--;
+ }
+}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 64970536c7d6..3b4ee1b6884b 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -18,35 +18,11 @@ struct inode;
struct gfs2_inode;
struct page;
-
-/**
- * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
- * @ip: the file
- * @pos: file offset of the write
- * @len: the number of bytes to be written to the file
- * @data_blocks: returns the number of data blocks required
- * @ind_blocks: returns the number of indirect blocks required
- *
- */
-
-static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
- u64 pos,
- unsigned int len,
- unsigned int *data_blocks,
- unsigned int *ind_blocks)
-{
- const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int tmp;
-
- BUG_ON(gfs2_is_dir(ip));
- *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
- *ind_blocks = 3 * (sdp->sd_max_height - 1);
-
- for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
- tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
- *ind_blocks += tmp;
- }
-}
+extern void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+ u64 pos,
+ unsigned int len,
+ unsigned int *data_blocks,
+ unsigned int *ind_blocks);
extern const struct iomap_ops gfs2_iomap_ops;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 93f59f9eecbd..bf92f8454490 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -907,6 +907,25 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
brelse(dibh);
return error;
}
+
+void old_gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+ unsigned int len,
+ unsigned int *data_blocks,
+ unsigned int *ind_blocks)
+{
+ const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ unsigned int tmp;
+
+ BUG_ON(gfs2_is_dir(ip));
+ *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
+ *ind_blocks = 3 * (sdp->sd_max_height - 1);
+
+ for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
+ tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+ *ind_blocks += tmp;
+ }
+}
+
/**
* calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
* blocks, determine how many bytes can be written.
@@ -936,7 +955,8 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t pos, loff_t *len,
*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
if (*len > max) {
*len = max;
- gfs2_write_calc_reserv(ip, pos, max, data_blocks, ind_blocks);
+ old_gfs2_write_calc_reserv(ip, max,
+ data_blocks, ind_blocks);
}
}
@@ -969,7 +989,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
gfs2_size_hint(file, offset, len);
- gfs2_write_calc_reserv(ip, offset, PAGE_SIZE, &data_blocks, &ind_blocks);
+ old_gfs2_write_calc_reserv(ip, PAGE_SIZE,
+ &data_blocks, &ind_blocks);
ap.min_target = data_blocks + ind_blocks;
while (len > 0) {
@@ -991,7 +1012,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
* calculate a more realistic 'bytes' to serve as a good
* starting point for the number of bytes we may be able
* to write */
- gfs2_write_calc_reserv(ip, offset, bytes, &data_blocks, &ind_blocks);
+ old_gfs2_write_calc_reserv(ip, bytes,
+ &data_blocks, &ind_blocks);
ap.target = data_blocks + ind_blocks;
error = gfs2_quota_lock_check(ip, &ap);
--
2.17.1
prev parent reply other threads:[~2018-07-16 20:45 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-07-16 20:45 [Cluster-devel] [PATCH v2 0/2] gfs2_write_calc_reserv rewrite Andreas Gruenbacher
2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 1/2] gfs2: Pass write offset to gfs2_write_calc_reserv Andreas Gruenbacher
2018-07-16 20:45 ` Andreas Gruenbacher [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180716204505.6712-3-agruenba@redhat.com \
--to=agruenba@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).