From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark Syms Date: Thu, 20 Sep 2018 15:52:13 +0100 Subject: [Cluster-devel] [PATCH 2/2] GFS2: Avoid recently demoted rgrps. In-Reply-To: <1537455133-48589-1-git-send-email-mark.syms@citrix.com> References: <1537455133-48589-1-git-send-email-mark.syms@citrix.com> Message-ID: <1537455133-48589-3-git-send-email-mark.syms@citrix.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit When under heavy I/O load from two or more hosts the resource group allocation can result in glocks being bounced around between hosts. Follow the example of inodes and if we have local waiters when asked to demote the glock on a resource group add a delay. Additionally, track when last asked to demote a lock and when assessing resource groups in the allocator prefer, in the first two loop iterations, not to use resource groups where we've been asked to demote the glock within the last second. Signed-off-by: Mark Syms --- fs/gfs2/glock.c | 7 +++++-- fs/gfs2/incore.h | 2 ++ fs/gfs2/main.c | 1 + fs/gfs2/rgrp.c | 10 ++++++++++ fs/gfs2/trace_gfs2.h | 12 +++++++++--- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4614ee2..94ef947 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -973,7 +973,9 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, } if (gl->gl_ops->go_callback) gl->gl_ops->go_callback(gl, remote); - trace_gfs2_demote_rq(gl, remote); + trace_gfs2_demote_rq(gl, remote, delay); + if (remote && !delay) + gl->gl_last_demote = jiffies; } void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) @@ -1339,7 +1341,8 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) gfs2_glock_hold(gl); holdtime = gl->gl_tchange + gl->gl_hold_time; if (test_bit(GLF_QUEUED, &gl->gl_flags) && - gl->gl_name.ln_type == LM_TYPE_INODE) { + (gl->gl_name.ln_type == LM_TYPE_INODE || + gl->gl_name.ln_type == LM_TYPE_RGRP)) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index b96d39c..e3d5b10 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -366,6 +366,8 @@ struct gfs2_glock { gl_reply:8; /* Last reply from the dlm */ unsigned long gl_demote_time; /* time of first demote request */ + unsigned long gl_last_demote; /* jiffies at last demote transition */ + long gl_hold_time; struct list_head gl_holders; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 2d55e2c..2183c73 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -58,6 +58,7 @@ static void gfs2_init_glock_once(void *foo) INIT_LIST_HEAD(&gl->gl_ail_list); atomic_set(&gl->gl_ail_count, 0); atomic_set(&gl->gl_revokes, 0); + gl->gl_last_demote = jiffies - (2 * HZ); } static void gfs2_init_gl_aspace_once(void *foo) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 994eb7f..7b77bb2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1955,6 +1955,12 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, return tdiff > (msecs * 1000 * 1000); } +static bool gfs2_rgrp_demoted_recently(const struct gfs2_blkreserv *rs, + u32 max_age_jiffies, u32 loop) +{ + return time_before(jiffies, rs->rs_rbm.rgd->rd_gl->gl_last_demote + max_age_jiffies); +} + static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); @@ -2077,6 +2083,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto next_rgrp; } } + + if (gfs2_rgrp_demoted_recently(rs, HZ, loops)) + goto next_rgrp; + if (gfs2_rgrp_used_recently(rs, 1000) && gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) goto next_rgrp; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index e002525..79935dc 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -161,9 +161,9 @@ static inline u8 glock_trace_state(unsigned int state) /* Callback (local or remote) requesting lock demotion */ TRACE_EVENT(gfs2_demote_rq, - TP_PROTO(const struct gfs2_glock *gl, bool remote), + TP_PROTO(const struct gfs2_glock *gl, bool remote, unsigned long delay), - TP_ARGS(gl, remote), + TP_ARGS(gl, remote, delay), TP_STRUCT__entry( __field( dev_t, dev ) @@ -173,6 +173,8 @@ static inline u8 glock_trace_state(unsigned int state) __field( u8, dmt_state ) __field( unsigned long, flags ) __field( bool, remote ) + __field( unsigned long, gl_last_demote ) + __field( unsigned long, delay ) ), TP_fast_assign( @@ -182,15 +184,19 @@ static inline u8 glock_trace_state(unsigned int state) __entry->cur_state = glock_trace_state(gl->gl_state); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<gl_last_demote = jiffies - gl->gl_last_demote; __entry->remote = remote; + __entry->delay = delay; ), - TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %s", + TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %lu delay %lu %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, (unsigned long long)__entry->glnum, glock_trace_name(__entry->cur_state), glock_trace_name(__entry->dmt_state), show_glock_flags(__entry->flags), + __entry->gl_last_demote, + __entry->delay, __entry->remote ? "remote" : "local") ); -- 1.8.3.1