From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Wed, 26 Jan 2011 21:25:44 -0500 (EST) Subject: [Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2 - Try #2 In-Reply-To: <1296075283.2575.88.camel@dolmen> Message-ID: <133260658.181376.1296095144124.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit ----- Original Message ----- | Hi, | | You shouldn't need to do the dual workqueue trick upstream since the | workqueues will already start as many threads as required (so even | better than just using the two here). If that isn't happening we | should | ask Tejun about it. | | So I think we only need the min hold time bit here, | | Steve. Hi, Based on your feedback, I reworked this patch a bit. This is take two. Rather than using a "qwork" function to police the delay based on glock type, I went back to the original queue_delayed_work and adjusted delay based on glock type. So this version has fewer lines changed, but still accomplishes the same thing. Note that I had to tweak function glock_work_func in order to preserve the rather odd logic that decides whether or not to actually queue the work. This patch is a performance improvement for GFS2 in a clustered environment. It makes the glock hold time self-adjusting. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson Bouncing locks in a cluster is slow in GFS2 -- [Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2 fs/gfs2/glock.c | 39 +++++++++++++++++++++++++++++---------- fs/gfs2/glock.h | 6 ++++++ fs/gfs2/glops.c | 2 -- fs/gfs2/incore.h | 2 +- 4 files changed, 36 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c75d499..0523b20 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -407,6 +407,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); + if (new_state != gl->gl_target) + /* shorten our minimum hold time */ + gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, + GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } @@ -670,16 +674,21 @@ static void glock_work_func(struct work_struct *work) gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + holdtime = gl->gl_tchange + gl->gl_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - if (!delay || - queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + if (!delay) gfs2_glock_put(gl); + else { + if (gl->gl_name.ln_type != LM_TYPE_INODE) + delay = 0; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); + } if (drop_ref) gfs2_glock_put(gl); } @@ -741,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; + gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -852,8 +862,15 @@ static int gfs2_glock_demote_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { + unsigned long time1 = jiffies; + might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); + if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ + /* Lengthen the minimum hold time. */ + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + + GL_GLOCK_HOLD_INCR, + GL_GLOCK_MAX_HOLD); } static void wait_on_demote(struct gfs2_glock *gl) @@ -1086,8 +1103,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && - !test_bit(GLF_DEMOTE, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + !test_bit(GLF_DEMOTE, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) + delay = gl->gl_hold_time; if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1270,12 +1288,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) unsigned long now = jiffies; gfs2_glock_hold(gl); - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; - if (test_bit(GLF_QUEUED, &gl->gl_flags)) { + holdtime = gl->gl_tchange + gl->gl_hold_time; + if (test_bit(GLF_QUEUED, &gl->gl_flags) && + gl->gl_name.ln_type == LM_TYPE_INODE) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + delay = gl->gl_hold_time; } spin_lock(&gl->gl_spin); @@ -1658,7 +1677,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1666,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), - atomic_read(&gl->gl_ref)); + atomic_read(&gl->gl_ref), gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index afa8bfe..3233add 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -113,6 +113,12 @@ enum { #define GLR_TRYFAILED 13 +#define GL_GLOCK_MAX_HOLD (long)(HZ / 5) +#define GL_GLOCK_DFT_HOLD (long)(HZ / 5) +#define GL_GLOCK_MIN_HOLD (long)(0) +#define GL_GLOCK_HOLD_INCR (long)(HZ / 20) +#define GL_GLOCK_HOLD_DECR (long)(HZ / 40) + struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index ac5fac9..bba125e 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; @@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 720c1e6..f21f075 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -163,7 +163,6 @@ struct gfs2_glock_operations { int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback) (struct gfs2_glock *gl); const int go_type; - const unsigned long go_min_hold_time; const unsigned long go_flags; #define GLOF_ASPACE 1 }; @@ -237,6 +236,7 @@ struct gfs2_glock { struct delayed_work gl_work; struct work_struct gl_delete; struct rcu_head gl_rcu; + long gl_hold_time; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */