From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Wed, 26 Jan 2011 15:22:18 -0500 (EST) Subject: [Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2 In-Reply-To: <817613070.177010.1296073161976.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com> Message-ID: <195412653.177053.1296073338490.JavaMail.root@zmail06.collab.prod.int.phx2.redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, This patch is a performance improvement for GFS2 in a clustered environment. It makes the glock hold time self-adjusting. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson Bouncing locks in a cluster is slow in GFS2 -- fs/gfs2/glock.c | 89 ++++++++++++++++++++++++++++++++++++++++-------------- fs/gfs2/glock.h | 6 ++++ fs/gfs2/glops.c | 2 - fs/gfs2/incore.h | 2 +- 4 files changed, 73 insertions(+), 26 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c75d499..117d8e2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -58,7 +58,6 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); static struct dentry *gfs2_root; -static struct workqueue_struct *glock_workqueue; struct workqueue_struct *gfs2_delete_workqueue; static LIST_HEAD(lru_list); static atomic_t lru_count = ATOMIC_INIT(0); @@ -67,9 +66,23 @@ static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) +#define GL_WORKQUEUES 0x2 +#define GL_WQ_MASK 0x1 static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; static struct dentry *gfs2_root; +static struct workqueue_struct *glock_workqueue[GL_WORKQUEUES]; + +static inline int qwork(struct gfs2_glock *gl, unsigned long delay) +{ + struct workqueue_struct *wq; + + wq = glock_workqueue[gl->gl_name.ln_type & GL_WQ_MASK]; + + if (gl->gl_name.ln_type != LM_TYPE_INODE) + delay = 0; + return queue_delayed_work(wq, &gl->gl_work, delay); +} /** * gl_hash() - Turn glock number into hash bucket number @@ -407,6 +420,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); + if (new_state != gl->gl_target) + /* shorten our minimum hold time */ + gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR, + GL_GLOCK_MIN_HOLD); gl->gl_state = new_state; gl->gl_tchange = jiffies; } @@ -550,7 +567,7 @@ __acquires(&gl->gl_spin) GLOCK_BUG_ON(gl, ret); } else { /* lock_nolock */ finish_xmote(gl, target); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put(gl); } @@ -623,7 +640,7 @@ out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_clear_bit(); gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put_nolock(gl); return; @@ -670,15 +687,14 @@ static void glock_work_func(struct work_struct *work) gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { unsigned long holdtime, now = jiffies; - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + holdtime = gl->gl_tchange + gl->gl_hold_time; if (time_before(now, holdtime)) delay = holdtime - now; set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - if (!delay || - queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + if (!delay || qwork(gl, delay) == 0) gfs2_glock_put(gl); if (drop_ref) gfs2_glock_put(gl); @@ -741,6 +757,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; + gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); @@ -852,8 +869,15 @@ static int gfs2_glock_demote_wait(void *word) static void wait_on_holder(struct gfs2_holder *gh) { + unsigned long time1 = jiffies; + might_sleep(); wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); + if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */ + /* Lengthen the minimum hold time. */ + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + + GL_GLOCK_HOLD_INCR, + GL_GLOCK_MAX_HOLD); } static void wait_on_demote(struct gfs2_glock *gl) @@ -1087,8 +1111,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_hold(gl); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && !test_bit(GLF_DEMOTE, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; - if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + delay = gl->gl_hold_time; + if (qwork(gl, delay) == 0) gfs2_glock_put(gl); } @@ -1270,18 +1294,18 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) unsigned long now = jiffies; gfs2_glock_hold(gl); - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + holdtime = gl->gl_tchange + gl->gl_hold_time; if (test_bit(GLF_QUEUED, &gl->gl_flags)) { if (time_before(now, holdtime)) delay = holdtime - now; if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) - delay = gl->gl_ops->go_min_hold_time; + delay = gl->gl_hold_time; } spin_lock(&gl->gl_spin); handle_callback(gl, state, delay); spin_unlock(&gl->gl_spin); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + if (qwork(gl, delay) == 0) gfs2_glock_put(gl); } @@ -1343,7 +1367,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) set_bit(GLF_REPLY_PENDING, &gl->gl_flags); smp_wmb(); gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put(gl); } @@ -1379,7 +1403,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m } clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_clear_bit(); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); spin_lock(&lru_lock); @@ -1447,7 +1471,7 @@ static void thaw_glock(struct gfs2_glock *gl) return; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put(gl); } @@ -1471,7 +1495,7 @@ static void clear_glock(struct gfs2_glock *gl) handle_callback(gl, LM_ST_UNLOCKED, 0); spin_unlock(&gl->gl_spin); gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (qwork(gl, 0) == 0) gfs2_glock_put(gl); } @@ -1510,8 +1534,11 @@ static void dump_glock_func(struct gfs2_glock *gl) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { + unsigned int x; + glock_hash_walk(clear_glock, sdp); - flush_workqueue(glock_workqueue); + for (x = 0; x < GL_WORKQUEUES; x++) + flush_workqueue(glock_workqueue[x]); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); glock_hash_walk(dump_glock_func, sdp); } @@ -1658,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1666,7 +1693,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), - atomic_read(&gl->gl_ref)); + atomic_read(&gl->gl_ref), gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); @@ -1685,19 +1712,32 @@ out: int __init gfs2_glock_init(void) { unsigned i; + char qn[32]; + for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { INIT_HLIST_BL_HEAD(&gl_hash_table[i]); } - glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | + for (i = 0; i < GL_WORKQUEUES; i++) { + sprintf(qn, "gfs2workq%d", i); + glock_workqueue[i] = alloc_workqueue(qn, WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZEABLE, 0); - if (IS_ERR(glock_workqueue)) - return PTR_ERR(glock_workqueue); + if (IS_ERR(glock_workqueue[i])) { + int error = PTR_ERR(glock_workqueue[i]); + + while (i > 0) { + i--; + destroy_workqueue(glock_workqueue[i]); + } + return error; + } + } gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { - destroy_workqueue(glock_workqueue); + for (i = 0; i < GL_WORKQUEUES; i++) + destroy_workqueue(glock_workqueue[i]); return PTR_ERR(gfs2_delete_workqueue); } @@ -1708,9 +1748,12 @@ int __init gfs2_glock_init(void) void gfs2_glock_exit(void) { + int i; + unregister_shrinker(&glock_shrinker); - destroy_workqueue(glock_workqueue); destroy_workqueue(gfs2_delete_workqueue); + for (i = 0; i < GL_WORKQUEUES; i++) + destroy_workqueue(glock_workqueue[i]); } static inline struct gfs2_glock *glock_hash_chain(unsigned hash) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index afa8bfe..3233add 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -113,6 +113,12 @@ enum { #define GLR_TRYFAILED 13 +#define GL_GLOCK_MAX_HOLD (long)(HZ / 5) +#define GL_GLOCK_DFT_HOLD (long)(HZ / 5) +#define GL_GLOCK_MIN_HOLD (long)(0) +#define GL_GLOCK_HOLD_INCR (long)(HZ / 20) +#define GL_GLOCK_HOLD_DECR (long)(HZ / 40) + struct lm_lockops { const char *lm_proto_name; int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index ac5fac9..bba125e 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; @@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_min_hold_time = HZ / 5, .go_flags = GLOF_ASPACE, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 720c1e6..f21f075 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -163,7 +163,6 @@ struct gfs2_glock_operations { int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback) (struct gfs2_glock *gl); const int go_type; - const unsigned long go_min_hold_time; const unsigned long go_flags; #define GLOF_ASPACE 1 }; @@ -237,6 +236,7 @@ struct gfs2_glock { struct delayed_work gl_work; struct work_struct gl_delete; struct rcu_head gl_rcu; + long gl_hold_time; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */