cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH v2 0/2] gfs2_write_calc_reserv rewrite
@ 2018-07-16 20:45 Andreas Gruenbacher
  2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 1/2] gfs2: Pass write offset to gfs2_write_calc_reserv Andreas Gruenbacher
  2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 2/2] gfs2: Rewrite gfs2_write_calc_reserv Andreas Gruenbacher
  0 siblings, 2 replies; 3+ messages in thread
From: Andreas Gruenbacher @ 2018-07-16 20:45 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Here's an update on the two patches posted last Friday.

Changes since version 1:

 * Nate has reported a bug in the new gfs2_write_calc_reserv that causes
   it to consume a lot of time.  It turns out that the function was
   unnecessarily trying to compute the nuber of indirect blocks for
   stuffed files, underflowing a counter.

 * Abhi has pointed out that the quota changes were not quite right.

Andreas Gruenbacher (2):
  gfs2: Pass write offset to gfs2_write_calc_reserv
  gfs2: Rewrite gfs2_write_calc_reserv

 fs/gfs2/bmap.c  | 74 +++++++++++++++++++++++++++++++++++++++++++++++--
 fs/gfs2/bmap.h  | 32 ++++-----------------
 fs/gfs2/file.c  | 34 +++++++++++++++++++----
 fs/gfs2/quota.c | 37 +++++++++++++------------
 4 files changed, 125 insertions(+), 52 deletions(-)

-- 
2.17.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH v2 1/2] gfs2: Pass write offset to gfs2_write_calc_reserv
  2018-07-16 20:45 [Cluster-devel] [PATCH v2 0/2] gfs2_write_calc_reserv rewrite Andreas Gruenbacher
@ 2018-07-16 20:45 ` Andreas Gruenbacher
  2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 2/2] gfs2: Rewrite gfs2_write_calc_reserv Andreas Gruenbacher
  1 sibling, 0 replies; 3+ messages in thread
From: Andreas Gruenbacher @ 2018-07-16 20:45 UTC (permalink / raw)
  To: cluster-devel.redhat.com

Pass the offset of the write to gfs2_write_calc_reserv so that we can
then compute a better upper bound of the number of indirect blocks
required.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c  |  4 ++--
 fs/gfs2/bmap.h  |  2 ++
 fs/gfs2/file.c  | 12 ++++++------
 fs/gfs2/quota.c | 37 ++++++++++++++++++++-----------------
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 89f1f7d3186d..7d3bb327f8b7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1003,8 +1003,8 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 	alloc_required = unstuff || iomap->type == IOMAP_HOLE;
 
 	if (alloc_required || gfs2_is_jdata(ip))
-		gfs2_write_calc_reserv(ip, iomap->length, &data_blocks,
-				       &ind_blocks);
+		gfs2_write_calc_reserv(ip, iomap->offset, iomap->length,
+				       &data_blocks, &ind_blocks);
 
 	if (alloc_required) {
 		struct gfs2_alloc_parms ap = {
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 6b18fb323f0a..64970536c7d6 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -22,6 +22,7 @@ struct page;
 /**
  * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
  * @ip: the file
+ * @pos: file offset of the write
  * @len: the number of bytes to be written to the file
  * @data_blocks: returns the number of data blocks required
  * @ind_blocks: returns the number of indirect blocks required
@@ -29,6 +30,7 @@ struct page;
  */
 
 static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+					  u64 pos,
 					  unsigned int len,
 					  unsigned int *data_blocks,
 					  unsigned int *ind_blocks)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 08369c6cd127..93f59f9eecbd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -435,7 +435,7 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
 	if (ret)
 		goto out_unlock;
 
-	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+	gfs2_write_calc_reserv(ip, pos, PAGE_SIZE, &data_blocks, &ind_blocks);
 	ap.target = data_blocks + ind_blocks;
 	ret = gfs2_quota_lock_check(ip, &ap);
 	if (ret)
@@ -918,7 +918,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
  *
  * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
  */
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t pos, loff_t *len,
 			    unsigned int *data_blocks, unsigned int *ind_blocks,
 			    unsigned int max_blocks)
 {
@@ -936,7 +936,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
 	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
 	if (*len > max) {
 		*len = max;
-		gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
+		gfs2_write_calc_reserv(ip, pos, max, data_blocks, ind_blocks);
 	}
 }
 
@@ -969,7 +969,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 
 	gfs2_size_hint(file, offset, len);
 
-	gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+	gfs2_write_calc_reserv(ip, offset, PAGE_SIZE, &data_blocks, &ind_blocks);
 	ap.min_target = data_blocks + ind_blocks;
 
 	while (len > 0) {
@@ -991,7 +991,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 		 * calculate a more realistic 'bytes' to serve as a good
 		 * starting point for the number of bytes we may be able
 		 * to write */
-		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+		gfs2_write_calc_reserv(ip, offset, bytes, &data_blocks, &ind_blocks);
 		ap.target = data_blocks + ind_blocks;
 
 		error = gfs2_quota_lock_check(ip, &ap);
@@ -1014,7 +1014,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 		/* Almost done. Calculate bytes that can be written using
 		 * max_blks. We also recompute max_bytes, data_blocks and
 		 * ind_blocks */
-		calc_max_reserv(ip, &max_bytes, &data_blocks,
+		calc_max_reserv(ip, offset, &max_bytes, &data_blocks,
 				&ind_blocks, max_blks);
 
 		rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 0efae7a0ee80..0e411fbe1e4d 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -870,22 +870,18 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
 	struct gfs2_alloc_parms ap = { .aflags = 0, };
-	unsigned int data_blocks, ind_blocks;
 	struct gfs2_holder *ghs, i_gh;
 	unsigned int qx, x;
 	struct gfs2_quota_data *qd;
 	unsigned reserved;
 	loff_t offset;
-	unsigned int nalloc = 0, blocks;
+	unsigned int blocks;
 	int error;
 
 	error = gfs2_rsqa_alloc(ip);
 	if (error)
 		return error;
 
-	gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
-			      &data_blocks, &ind_blocks);
-
 	ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
 	if (!ghs)
 		return -ENOMEM;
@@ -903,13 +899,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	if (error)
 		goto out;
 
-	for (x = 0; x < num_qd; x++) {
-		offset = qd2offset(qda[x]);
-		if (gfs2_write_alloc_required(ip, offset,
-					      sizeof(struct gfs2_quota)))
-			nalloc++;
-	}
-
 	/* 
 	 * 1 blk for unstuffing inode if stuffed. We add this extra
 	 * block to the reservation unconditionally. If the inode
@@ -919,16 +908,30 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 	/* +3 in the end for unstuffing block, inode size update block
 	 * and another block in case quota straddles page boundary and 
 	 * two blocks need to be updated instead of 1 */
-	blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
 
-	reserved = 1 + (nalloc * (data_blocks + ind_blocks));
+	blocks = RES_DINODE + num_qd + 3;
+	reserved = 1;
+	for (x = 0; x < num_qd; x++) {
+		unsigned int data_blocks, ind_blocks;
+
+		offset = qd2offset(qda[x]);
+		gfs2_write_calc_reserv(ip, offset, sizeof(struct gfs2_quota),
+				       &data_blocks, &ind_blocks);
+		blocks += data_blocks;
+		if (gfs2_write_alloc_required(ip, offset,
+					      sizeof(struct gfs2_quota))) {
+			blocks += ind_blocks;
+			reserved += data_blocks + ind_blocks;
+		}
+	}
+
 	ap.target = reserved;
 	error = gfs2_inplace_reserve(ip, &ap);
 	if (error)
 		goto out_alloc;
 
-	if (nalloc)
-		blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS;
+	if (reserved > 1)
+		blocks += gfs2_rg_blocks(ip, reserved) + RES_STATFS;
 
 	error = gfs2_trans_begin(sdp, blocks, 0);
 	if (error)
@@ -1716,7 +1719,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
 		alloc_required = 1;
 	if (alloc_required) {
 		struct gfs2_alloc_parms ap = { .aflags = 0, };
-		gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
+		gfs2_write_calc_reserv(ip, offset, sizeof(struct gfs2_quota),
 				       &data_blocks, &ind_blocks);
 		blocks = 1 + data_blocks + ind_blocks;
 		ap.target = blocks;
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [Cluster-devel] [PATCH v2 2/2] gfs2: Rewrite gfs2_write_calc_reserv
  2018-07-16 20:45 [Cluster-devel] [PATCH v2 0/2] gfs2_write_calc_reserv rewrite Andreas Gruenbacher
  2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 1/2] gfs2: Pass write offset to gfs2_write_calc_reserv Andreas Gruenbacher
@ 2018-07-16 20:45 ` Andreas Gruenbacher
  1 sibling, 0 replies; 3+ messages in thread
From: Andreas Gruenbacher @ 2018-07-16 20:45 UTC (permalink / raw)
  To: cluster-devel.redhat.com

For normal writes, replace the existing version of
gfs2_write_calc_reserv with one that takes the offset of the write into
account.  Taking the alignment of that offset within the metadata tree
into account allows to determine a better lower bound for the maximum
number of indirect blocks required.

For __gfs2_fallocate, since gfs2_write_calc_reserv and calc_max_reserv
are tightly coupled, keep the old version of gfs2_write_calc_reserv to
avoid rewriting __gfs2_fallocate completely at this point.

The new gfs2_write_calc_reserv function still doesn't look at the actual
metadata tree, so it doesn't know which indirect blocks have already
been allocated; we still always assume the worst case.

This will eventually be fixed by getting rid of
gfs2_write_alloc_required and gfs2_write_calc_reserv in favor of
gsf2_iomap_get, followed by checking the metapath to determine which
indirect blocks are already allocated, followed by gfs2_iomap_alloc.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/bmap.h | 34 ++++--------------------
 fs/gfs2/file.c | 28 +++++++++++++++++---
 3 files changed, 100 insertions(+), 32 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7d3bb327f8b7..5eb0c3d8149b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2453,3 +2453,73 @@ int __gfs2_punch_hole(struct file *file, loff_t offset, loff_t length)
 		gfs2_trans_end(sdp);
 	return error;
 }
+
+/**
+ * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
+ * @ip: the file
+ * @pos: offset of the write
+ * @len: the number of bytes to be written to the file
+ * @data_blocks: returns the number of data blocks required
+ * @ind_blocks: returns the number of indirect blocks required
+ *
+ */
+
+void gfs2_write_calc_reserv(const struct gfs2_inode *ip, u64 pos,
+			    unsigned int len, unsigned int *data_blocks,
+			    unsigned int *ind_blocks)
+{
+	const struct inode *inode = &ip->i_inode;
+	unsigned int blkbits = inode->i_blkbits;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	unsigned int inptrs = sdp->sd_inptrs;
+	unsigned int h = ip->i_height;
+	u64 last;
+
+	BUG_ON(gfs2_is_dir(ip));
+
+	/* Calculate the height required for the new end of file */
+	while (pos + len > sdp->sd_heightsize[h])
+		h++;
+
+	/* Indirect blocks for growing the inode height */
+	*ind_blocks = h - ip->i_height;
+
+	/* Write range rounded to block boundaries */
+	last = (pos + len - 1) >> blkbits;
+	pos >>= blkbits;
+	*data_blocks = last - pos + 1;
+
+	/*
+	 * Unstuffing (going from height 0 to 1) may require an additional data
+	 * block, but won't require an indirect block.
+	 */
+	if (gfs2_is_stuffed(ip)) {
+		(*ind_blocks)--;
+		if (i_size_read(inode) != 0 && pos != 0)
+			(*data_blocks)++;
+	}
+
+	/* Inodes with a height below 2 don't have indirect blocks. */
+	if (h < 2)
+		return;
+
+	/*
+	 * Indirect blocks for filling the tree: each layer closer towards the
+	 * root may require however many indirect blocks the write range still
+	 * spans at that layer, which is at least one.
+	 *
+	 * We can ignore the data blocks@layer @h as well as the inode at
+	 * layer 0.
+	 *
+	 * Note that we don't take into account which indirect blocks are
+	 * already allocated here, so we overestimate the number of indirect
+	 * blocks requires in most cases.
+	 */
+	h--;
+	while (h >= 1) {
+		do_div(pos, inptrs);
+		do_div(last, inptrs);
+		*ind_blocks += last - pos + 1;
+		h--;
+	}
+}
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index 64970536c7d6..3b4ee1b6884b 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -18,35 +18,11 @@ struct inode;
 struct gfs2_inode;
 struct page;
 
-
-/**
- * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
- * @ip: the file
- * @pos: file offset of the write
- * @len: the number of bytes to be written to the file
- * @data_blocks: returns the number of data blocks required
- * @ind_blocks: returns the number of indirect blocks required
- *
- */
-
-static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
-					  u64 pos,
-					  unsigned int len,
-					  unsigned int *data_blocks,
-					  unsigned int *ind_blocks)
-{
-	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned int tmp;
-
-	BUG_ON(gfs2_is_dir(ip));
-	*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
-	*ind_blocks = 3 * (sdp->sd_max_height - 1);
-
-	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
-		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
-		*ind_blocks += tmp;
-	}
-}
+extern void gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+				   u64 pos,
+				   unsigned int len,
+				   unsigned int *data_blocks,
+				   unsigned int *ind_blocks);
 
 extern const struct iomap_ops gfs2_iomap_ops;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 93f59f9eecbd..bf92f8454490 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -907,6 +907,25 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	brelse(dibh);
 	return error;
 }
+
+void old_gfs2_write_calc_reserv(const struct gfs2_inode *ip,
+				unsigned int len,
+				unsigned int *data_blocks,
+				unsigned int *ind_blocks)
+{
+	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	unsigned int tmp;
+
+	BUG_ON(gfs2_is_dir(ip));
+	*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
+	*ind_blocks = 3 * (sdp->sd_max_height - 1);
+
+	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
+		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+		*ind_blocks += tmp;
+	}
+}
+
 /**
  * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
  *                     blocks, determine how many bytes can be written.
@@ -936,7 +955,8 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t pos, loff_t *len,
 	*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
 	if (*len > max) {
 		*len = max;
-		gfs2_write_calc_reserv(ip, pos, max, data_blocks, ind_blocks);
+		old_gfs2_write_calc_reserv(ip, max,
+					   data_blocks, ind_blocks);
 	}
 }
 
@@ -969,7 +989,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 
 	gfs2_size_hint(file, offset, len);
 
-	gfs2_write_calc_reserv(ip, offset, PAGE_SIZE, &data_blocks, &ind_blocks);
+	old_gfs2_write_calc_reserv(ip, PAGE_SIZE,
+				   &data_blocks, &ind_blocks);
 	ap.min_target = data_blocks + ind_blocks;
 
 	while (len > 0) {
@@ -991,7 +1012,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 		 * calculate a more realistic 'bytes' to serve as a good
 		 * starting point for the number of bytes we may be able
 		 * to write */
-		gfs2_write_calc_reserv(ip, offset, bytes, &data_blocks, &ind_blocks);
+		old_gfs2_write_calc_reserv(ip, bytes,
+					   &data_blocks, &ind_blocks);
 		ap.target = data_blocks + ind_blocks;
 
 		error = gfs2_quota_lock_check(ip, &ap);
-- 
2.17.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-07-16 20:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-07-16 20:45 [Cluster-devel] [PATCH v2 0/2] gfs2_write_calc_reserv rewrite Andreas Gruenbacher
2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 1/2] gfs2: Pass write offset to gfs2_write_calc_reserv Andreas Gruenbacher
2018-07-16 20:45 ` [Cluster-devel] [PATCH v2 2/2] gfs2: Rewrite gfs2_write_calc_reserv Andreas Gruenbacher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).