From: Mark Syms <mark.syms@citrix.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 2/2] GFS2: Avoid recently demoted rgrps.
Date: Thu, 20 Sep 2018 15:52:13 +0100 [thread overview]
Message-ID: <1537455133-48589-3-git-send-email-mark.syms@citrix.com> (raw)
In-Reply-To: <1537455133-48589-1-git-send-email-mark.syms@citrix.com>
When under heavy I/O load from two or more hosts the resource group
allocation can result in glocks being bounced around between hosts.
Follow the example of inodes and if we have local waiters when asked
to demote the glock on a resource group add a delay. Additionally,
track when last asked to demote a lock and when assessing resource
groups in the allocator prefer, in the first two loop iterations, not
to use resource groups where we've been asked to demote the glock
within the last second.
Signed-off-by: Mark Syms <mark.syms@citrix.com>
---
fs/gfs2/glock.c | 7 +++++--
fs/gfs2/incore.h | 2 ++
fs/gfs2/main.c | 1 +
fs/gfs2/rgrp.c | 10 ++++++++++
fs/gfs2/trace_gfs2.h | 12 +++++++++---
5 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4614ee2..94ef947 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -973,7 +973,9 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
}
if (gl->gl_ops->go_callback)
gl->gl_ops->go_callback(gl, remote);
- trace_gfs2_demote_rq(gl, remote);
+ trace_gfs2_demote_rq(gl, remote, delay);
+ if (remote && !delay)
+ gl->gl_last_demote = jiffies;
}
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
@@ -1339,7 +1341,8 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
gfs2_glock_hold(gl);
holdtime = gl->gl_tchange + gl->gl_hold_time;
if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
- gl->gl_name.ln_type == LM_TYPE_INODE) {
+ (gl->gl_name.ln_type == LM_TYPE_INODE ||
+ gl->gl_name.ln_type == LM_TYPE_RGRP)) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b96d39c..e3d5b10 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -366,6 +366,8 @@ struct gfs2_glock {
gl_reply:8; /* Last reply from the dlm */
unsigned long gl_demote_time; /* time of first demote request */
+ unsigned long gl_last_demote; /* jiffies at last demote transition */
+
long gl_hold_time;
struct list_head gl_holders;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 2d55e2c..2183c73 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -58,6 +58,7 @@ static void gfs2_init_glock_once(void *foo)
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
atomic_set(&gl->gl_revokes, 0);
+ gl->gl_last_demote = jiffies - (2 * HZ);
}
static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 994eb7f..7b77bb2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1955,6 +1955,12 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
return tdiff > (msecs * 1000 * 1000);
}
+static bool gfs2_rgrp_demoted_recently(const struct gfs2_blkreserv *rs,
+ u32 max_age_jiffies, u32 loop)
+{
+ return time_before(jiffies, rs->rs_rbm.rgd->rd_gl->gl_last_demote + max_age_jiffies);
+}
+
static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -2077,6 +2083,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
goto next_rgrp;
}
}
+
+ if (gfs2_rgrp_demoted_recently(rs, HZ, loops))
+ goto next_rgrp;
+
if (gfs2_rgrp_used_recently(rs, 1000) &&
gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
goto next_rgrp;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index e002525..79935dc 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -161,9 +161,9 @@ static inline u8 glock_trace_state(unsigned int state)
/* Callback (local or remote) requesting lock demotion */
TRACE_EVENT(gfs2_demote_rq,
- TP_PROTO(const struct gfs2_glock *gl, bool remote),
+ TP_PROTO(const struct gfs2_glock *gl, bool remote, unsigned long delay),
- TP_ARGS(gl, remote),
+ TP_ARGS(gl, remote, delay),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -173,6 +173,8 @@ static inline u8 glock_trace_state(unsigned int state)
__field( u8, dmt_state )
__field( unsigned long, flags )
__field( bool, remote )
+ __field( unsigned long, gl_last_demote )
+ __field( unsigned long, delay )
),
TP_fast_assign(
@@ -182,15 +184,19 @@ static inline u8 glock_trace_state(unsigned int state)
__entry->cur_state = glock_trace_state(gl->gl_state);
__entry->dmt_state = glock_trace_state(gl->gl_demote_state);
__entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
+ __entry->gl_last_demote = jiffies - gl->gl_last_demote;
__entry->remote = remote;
+ __entry->delay = delay;
),
- TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %s",
+ TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %lu delay %lu %s",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
(unsigned long long)__entry->glnum,
glock_trace_name(__entry->cur_state),
glock_trace_name(__entry->dmt_state),
show_glock_flags(__entry->flags),
+ __entry->gl_last_demote,
+ __entry->delay,
__entry->remote ? "remote" : "local")
);
--
1.8.3.1
next prev parent reply other threads:[~2018-09-20 14:52 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-09-20 14:52 [Cluster-devel] [PATCH 0/2] GFS2: inplace_reserve performance improvements Mark Syms
2018-09-20 14:52 ` [Cluster-devel] [PATCH 1/2] Add some randomisation to the GFS2 resource group allocator Mark Syms
2018-09-20 14:52 ` Mark Syms [this message]
2018-09-20 17:17 ` [Cluster-devel] [PATCH 0/2] GFS2: inplace_reserve performance improvements Bob Peterson
2018-09-20 17:47 ` Mark Syms
2018-09-20 18:16 ` Steven Whitehouse
2018-09-28 12:23 ` Bob Peterson
2018-09-28 12:36 ` Mark Syms
2018-09-28 12:50 ` Mark Syms
2018-09-28 13:18 ` Steven Whitehouse
2018-09-28 13:43 ` Tim Smith
2018-09-28 13:59 ` Bob Peterson
2018-09-28 14:11 ` Mark Syms
2018-09-28 15:09 ` Tim Smith
2018-09-28 15:09 ` Steven Whitehouse
2018-09-28 12:55 ` Bob Peterson
2018-09-28 13:56 ` Mark Syms
2018-10-02 13:50 ` Mark Syms
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1537455133-48589-3-git-send-email-mark.syms@citrix.com \
--to=mark.syms@citrix.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).