From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH] GFS2: Rework rgrp glock congestion functions for intra-node
Date: Fri, 30 Jun 2017 12:43:40 -0400 (EDT) [thread overview]
Message-ID: <513776377.27803960.1498841020404.JavaMail.zimbra@redhat.com> (raw)
In-Reply-To: <1520935315.27803769.1498840932508.JavaMail.zimbra@redhat.com>
Hi,
This patch reworks the "congestion" algorithms for resource group
glocks to take into account intra-node demands as well as inter-
node demands.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ad62bfb..903f58b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1807,6 +1807,21 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
}
/**
+ * gfs2_rgrp_used_recently
+ * @rs: The block reservation with the rgrp to test
+ * @msecs: The time limit in milliseconds
+ *
+ * Returns: True if the rgrp glock has been used within the time limit
+ */
+static inline bool gfs2_rgrp_used_recently(const struct gfs2_rgrpd *rgd,
+ u64 msecs)
+{
+ return (ktime_before(ktime_get_real(),
+ ktime_add(rgd->rd_gl->gl_dstamp,
+ ms_to_ktime(msecs))));
+}
+
+/**
* gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
* @rgd: The rgrp in question
* @loops: An indication of how picky we can be (0=very, 1=less so)
@@ -1833,7 +1848,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
* Returns: A boolean verdict on the congestion status
*/
-static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
+static bool gfs2_rgrp_congested_dlm(const struct gfs2_rgrpd *rgd, int loops)
{
const struct gfs2_glock *gl = rgd->rd_gl;
const struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -1845,6 +1860,11 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
u64 var;
int cpu, nonzero = 0;
+ /* If it hasn't been used recently we can't judge the statistics, so
+ assume it's not congested. */
+ if (!gfs2_rgrp_used_recently(rgd, HZ))
+ return false;
+
preempt_disable();
for_each_present_cpu(cpu) {
st = &per_cpu_ptr(sdp->sd_lkstats, cpu)->lkstats[LM_TYPE_RGRP];
@@ -1880,21 +1900,66 @@ static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
}
/**
- * gfs2_rgrp_used_recently
- * @rs: The block reservation with the rgrp to test
- * @msecs: The time limit in milliseconds
+ * fast_to_acquire - determine if a resource group will be fast to acquire
*
- * Returns: True if the rgrp glock has been used within the time limit
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline bool fast_to_acquire(const struct gfs2_rgrpd *rgd)
+{
+ struct gfs2_glock *gl = rgd->rd_gl;
+
+ if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+ !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+ !test_bit(GLF_DEMOTE, &gl->gl_flags))
+ return true;
+ if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+ return true;
+ return false;
+}
+
+/**
+ * gfs2_rgrp_congested - decide whether a rgrp glock is congested
+ * @rgd: The rgrp in question
+ * @loops: An indication of how picky we can be (0=very, 1=less so)
+ *
+ * There are two kinds of congestion: inter-node and intra-node.
+ *
+ * Inter-node congestion is where multiple nodes all want to allocate blocks
+ * inside the same rgrp, which means they need to trade the rgrp glock back
+ * and forth, which is a slow process. To mitigate this, we use glock
+ * statistics to predict whether the glock is historically fast to acquire.
+ *
+ * Intra-node congestion is where you have multiple processes on the same
+ * node, all trying to allocate blocks in the same rgrp. There's nothing wrong
+ * with doing so, but each process needs to wait for the other to release the
+ * rgrp glock before it may proceed. We can predict whether a rgrp glock is
+ * congested by how many block reservations are currently attached.
+ *
+ * Both kinds of congestion can hurt performance, but it's faster to check
+ * intra-node, so we do that first. After all, why bother to check if we can
+ * get the glock quickly from DLM if other processes have also used that
+ * same reasoning.
+ *
+ * We know the number of loops we've been around, so we know how desperate we
+ * are to find something. On first loop, call it congested if anyone else has
+ * a block reservation. On second loop, call it congested if it's not fast to
+ * acquire.
*/
-static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
- u64 msecs)
+static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
{
- u64 tdiff;
+ /* Check for intra-node congestion */
+ if (loops == 0 && !RB_EMPTY_ROOT(&rgd->rd_rstree))
+ return true;
+
+ if (loops == 1 && !fast_to_acquire(rgd))
+ return true;
- tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
- rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+ /* Check for inter-node congestion */
+ if (rgd->rd_sbd->sd_lockstruct.ls_ops->lm_lock) /* lock_dlm */
+ return gfs2_rgrp_congested_dlm(rgd, loops);
- return tdiff > (msecs * 1000 * 1000);
+ return false;
}
static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
@@ -1921,25 +1986,6 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
}
/**
- * fast_to_acquire - determine if a resource group will be fast to acquire
- *
- * If this is one of our preferred rgrps, it should be quicker to acquire,
- * because we tried to set ourselves up as dlm lock master.
- */
-static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
-{
- struct gfs2_glock *gl = rgd->rd_gl;
-
- if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
- !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags))
- return 1;
- if (rgd->rd_flags & GFS2_RDF_PREFERRED)
- return 1;
- return 0;
-}
-
-/**
* gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
* @ap: the allocation parameters
@@ -1995,7 +2041,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
!fast_to_acquire(rs->rs_rbm.rgd))
goto next_rgrp;
if ((loops < 2) &&
- gfs2_rgrp_used_recently(rs, 1000) &&
gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
goto next_rgrp;
}
next parent reply other threads:[~2017-06-30 16:43 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1520935315.27803769.1498840932508.JavaMail.zimbra@redhat.com>
2017-06-30 16:43 ` Bob Peterson [this message]
2017-07-03 9:26 ` [Cluster-devel] [GFS2 PATCH] GFS2: Rework rgrp glock congestion functions for intra-node Steven Whitehouse
[not found] <1348332894.38934178.1501871083680.JavaMail.zimbra@redhat.com>
2017-08-04 18:43 ` Bob Peterson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=513776377.27803960.1498841020404.JavaMail.zimbra@redhat.com \
--to=rpeterso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).