cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [RFC 16/17] gfs2: Limit the maximum amount of reserved space
Date: Wed, 14 Oct 2020 11:58:32 +0200	[thread overview]
Message-ID: <20201014095833.1035870-17-agruenba@redhat.com> (raw)
In-Reply-To: <20201014095833.1035870-1-agruenba@redhat.com>

When allocating blocks for an inode, gfs2 tries to keep future allocations
efficient by "reserving" the blocks adjacent to the allocated blocks for future
use by this inode.  These "reservations" are node-local, and they can be stolen
by other nodes or even other processes on the same node when necessary.

Stealing from those reservations is very inefficient because it involves
scanning the bitmaps, repeatedly.  This slows down workloads that create many
small files, for example.  Fix this by discarding some of those
semi-reservations as soon as they cover more than half of the remaining space.

Fixes xfstests generic/488, generic/531.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     |  1 +
 fs/gfs2/rgrp.c       | 30 ++++++++++++++++++++++++++++++
 fs/gfs2/trace_gfs2.h | 13 ++++++++++---
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 295c22441ade..4f68456da677 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -104,6 +104,7 @@ struct gfs2_rgrpd {
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
 	u32 rd_reserved;                /* number of blocks reserved */
+	u32 rd_wanted;
 	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 751bd31cfa5d..1ed09e45738f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -646,6 +646,7 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	RB_CLEAR_NODE(&rs->rs_node);
 
 	if (rs->rs_free) {
+		rgd->rd_wanted -= rs->rs_free;
 		/* The rgrp extent failure point is likely not to increase;
 		   it will only do so if the freed blocks are somehow
 		   contiguous with a span of free blocks that follows. Still,
@@ -1519,6 +1520,7 @@ static void rs_insert(struct gfs2_inode *ip)
 
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
+	rgd->rd_wanted += rs->rs_free;
 	spin_unlock(&rgd->rd_rsspin);
 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
@@ -1997,6 +1999,26 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
 	return 0;
 }
 
+static bool gfs2_trim_wanted_blocks(struct gfs2_rgrpd *rgd)
+{
+	u32 max_wanted = (rgd->rd_free_clone - rgd->rd_reserved) / 2;
+	struct gfs2_blkreserv *rs, *next;
+	bool trimmed = false;
+
+	if (rgd->rd_wanted <= max_wanted)
+		goto out;
+	rbtree_postorder_for_each_entry_safe(rs, next, &rgd->rd_rstree, rs_node) {
+		if (rs->rs_reserved)
+			continue;
+		__rs_deltree(rs);
+		if (rgd->rd_wanted <= max_wanted)
+			break;
+	}
+
+out:
+	return trimmed;
+}
+
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
@@ -2046,6 +2068,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 
 	while (loops < 3) {
 		struct gfs2_rgrpd *rgd;
+		bool retry = false;
 
 		rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
 		if (rg_locked) {
@@ -2110,7 +2133,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		if (rs->rs_reserved > blocks_available)
 			rs->rs_reserved = blocks_available;
 		rgd->rd_reserved += rs->rs_reserved;
+		if (!gfs2_rs_active(rs)) {
+			if (gfs2_trim_wanted_blocks(rgd))
+				retry = true;
+		}
 		spin_unlock(&rgd->rd_rsspin);
+		if (retry)
+			rg_mblk_search(rs->rs_rgd, ip, ap);
 		rgrp_unlock_local(rs->rs_rgd);
 		return 0;
 check_rgrp:
@@ -2330,6 +2359,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			rs->rs_start += len;
 			rlen = min(rs->rs_free, len);
 			rs->rs_free -= rlen;
+			rgd->rd_wanted -= rlen;
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
 			    rs->rs_free)
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 282fcb1a242f..4fd1614274fd 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -560,6 +560,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(        u64,	rd_addr			)
 		__field(        u32,	rd_free_clone		)
 		__field(	u32,	rd_reserved		)
+		__field(	u32,	rd_wanted		)
 	),
 
 	TP_fast_assign(
@@ -571,16 +572,19 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->rd_addr	= rgd->rd_addr;
 		__entry->rd_free_clone	= rgd->rd_free_clone;
 		__entry->rd_reserved	= rgd->rd_reserved;
+		__entry->rd_wanted	= rgd->rd_wanted;
 	),
 
-	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu rw:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+		  __entry->rd_free_clone,
+		  (unsigned long)__entry->rd_reserved,
+		  (unsigned long)__entry->rd_wanted)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -595,6 +599,7 @@ TRACE_EVENT(gfs2_rs,
 		__field(	u64,	rd_addr			)
 		__field(	u32,	rd_free_clone		)
 		__field(	u32,	rd_reserved		)
+		__field(	u32,	rd_wanted		)
 		__field(	u64,	inum			)
 		__field(	u64,	start			)
 		__field(	u32,	free			)
@@ -607,6 +612,7 @@ TRACE_EVENT(gfs2_rs,
 		__entry->rd_addr	= rs->rs_rgd->rd_addr;
 		__entry->rd_free_clone	= rs->rs_rgd->rd_free_clone;
 		__entry->rd_reserved	= rs->rs_rgd->rd_reserved;
+		__entry->rd_wanted	= rs->rs_rgd->rd_wanted;
 		__entry->inum		= container_of(rs, struct gfs2_inode,
 						       i_res)->i_no_addr;
 		__entry->start		= rs->rs_start;
@@ -615,13 +621,14 @@ TRACE_EVENT(gfs2_rs,
 		__entry->func		= func;
 	),
 
-	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu r:%lu",
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu rw:%lu %s f:%lu r:%lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->rd_addr,
 		  (unsigned long)__entry->rd_free_clone,
 		  (unsigned long)__entry->rd_reserved,
+		  (unsigned long)__entry->rd_wanted,
 		  rs_func_name(__entry->func),
 		  (unsigned long)__entry->free,
 		  (unsigned long)__entry->reserved)
-- 
2.26.2



  parent reply	other threads:[~2020-10-14  9:58 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-14  9:58 [Cluster-devel] [RFC 00/17] gfs2: resource group glock sharing Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 01/17] gfs2: Turn gfs2_rbm_incr into gfs2_rbm_add Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 02/17] gfs2: Only use struct gfs2_rbm for bitmap manipulations Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 03/17] gfs2: Get rid of unnecessary variable in gfs2_alloc_blocks Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 04/17] gfs2: Minor gfs2_inplace_reserve cleanup Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 05/17] Revert "gfs2: Don't reject a supposedly full bitmap if we have blocks reserved" Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 06/17] gfs2: Don't clear GBF_FULL flags in rs_deltree Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 07/17] gfs2: Set GBF_FULL flags when reading resource groups Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 08/17] gfs2: When looking for blocks to allocate, don't clamp at free blocks Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 09/17] gfs2: Also reflect single-block allocations in rgd->rd_extfail_pt Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 10/17] gfs2: Only pass reservation down to gfs2_rbm_find Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 11/17] gfs: Don't search for unreserved space twice Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 12/17] gfs2: Add per-reservation reserved block accounting Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 13/17] gfs2: Add local resource group locking Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 14/17] gfs2: Allow node-wide exclusive glock sharing Andreas Gruenbacher
2020-10-14  9:58 ` [Cluster-devel] [RFC 15/17] gfs2: Use resource group " Andreas Gruenbacher
2020-10-14  9:58 ` Andreas Gruenbacher [this message]
2020-10-14  9:58 ` [Cluster-devel] [RFC 17/17] gfs2: Rename rs_free to rs_wanted Andreas Gruenbacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201014095833.1035870-17-agruenba@redhat.com \
    --to=agruenba@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).