From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Fri, 30 Jun 2017 14:18:36 -0400 (EDT) Subject: [Cluster-devel] [GFS2 PATCH] GFS2: Convert LRU list to use a hash table to reduce contention In-Reply-To: <1321492262.27823235.1498846645921.JavaMail.zimbra@redhat.com> Message-ID: <1075213831.27823397.1498846716685.JavaMail.zimbra@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, This patch changes GFS2's LRU list from a single linked list with a much-contended spin_lock to a hash table of linked lists, thus having more locks with less contention. The key for the table is set round-robin to evenly distribute the glocks into the buckets. Signed-off-by: Bob Peterson --- diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6cd71c5..e1e16c4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -63,10 +63,32 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; +static atomic_t lru_bucket = ATOMIC_INIT(0); + struct workqueue_struct *gfs2_delete_workqueue; -static LIST_HEAD(lru_list); + +#define LRU_HASH_BITS 12 +#define LRU_HASH_BUCKETS BIT(LRU_HASH_BITS) +#define LRU_HASH_MASK (LRU_HASH_BUCKETS - 1) + +struct gfs2_lru_bucket { + struct list_head lru_list; + rwlock_t lru_lock; +}; + +static struct gfs2_lru_bucket lru_locks[LRU_HASH_BUCKETS]; + +static inline void lock_lru_bucket(unsigned bucket) +{ + write_lock(&lru_locks[bucket & LRU_HASH_MASK].lru_lock); +} + +static inline void unlock_lru_bucket(unsigned bucket) +{ + write_unlock(&lru_locks[bucket & LRU_HASH_MASK].lru_lock); +} + static atomic_t lru_count = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(lru_lock); #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT) @@ -126,30 +148,36 @@ static int demote_ok(const struct gfs2_glock *gl) return 1; } - -void gfs2_glock_add_to_lru(struct gfs2_glock *gl) +static void remove_from_lru_locked(struct gfs2_glock *gl) { - spin_lock(&lru_lock); - - if (!list_empty(&gl->gl_lru)) + if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); - else - atomic_inc(&lru_count); + atomic_dec(&lru_count); + } +} + +static void glock_remove_from_lru(struct gfs2_glock *gl) +{ + if (!test_and_clear_bit(GLF_LRU, &gl->gl_flags)) + return; - list_add_tail(&gl->gl_lru, &lru_list); - set_bit(GLF_LRU, &gl->gl_flags); - spin_unlock(&lru_lock); + lock_lru_bucket(gl->gl_lru_bucket); + remove_from_lru_locked(gl); + unlock_lru_bucket(gl->gl_lru_bucket); } -static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +void gfs2_glock_add_to_lru(struct gfs2_glock *gl) { - spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) { - list_del_init(&gl->gl_lru); - atomic_dec(&lru_count); - clear_bit(GLF_LRU, &gl->gl_flags); - } - spin_unlock(&lru_lock); + glock_remove_from_lru(gl); + if (test_and_set_bit(GLF_LRU, &gl->gl_flags)) + return; + + lock_lru_bucket(gl->gl_lru_bucket); + BUG_ON(!list_empty(&gl->gl_lru)); + atomic_inc(&lru_count); + list_add_tail(&gl->gl_lru, + &lru_locks[gl->gl_lru_bucket & LRU_HASH_MASK].lru_list); + unlock_lru_bucket(gl->gl_lru_bucket); } /* @@ -182,7 +210,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) lockref_mark_dead(&gl->gl_lockref); - gfs2_glock_remove_from_lru(gl); + glock_remove_from_lru(gl); spin_unlock(&gl->gl_lockref.lock); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); @@ -746,6 +774,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_hold_time = GL_GLOCK_DFT_HOLD; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); + gl->gl_lru_bucket = atomic_inc_return(&lru_bucket); + if (gl->gl_lru_bucket >= LRU_HASH_BUCKETS) { + gl->gl_lru_bucket = 0; + atomic_set(&lru_bucket, gl->gl_lru_bucket); + } mapping = gfs2_glock2aspace(gl); if (mapping) { @@ -1010,8 +1043,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; - if (test_bit(GLF_LRU, &gl->gl_flags)) - gfs2_glock_remove_from_lru(gl); + glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); @@ -1343,24 +1375,20 @@ static int glock_cmp(void *priv, struct list_head *a, struct list_head *b) } /** - * gfs2_dispose_glock_lru - Demote a list of glocks + * gfs2_dispose_glock_lru - Demote a list of glocks from the same LRU bucket * @list: The list to dispose of * * Disposing of glocks may involve disk accesses, so that here we sort * the glocks by number (i.e. disk location of the inodes) so that if * there are any such accesses, they'll be sent in order (mostly). - * - * Must be called under the lru_lock, but may drop and retake this - * lock. While the lru_lock is dropped, entries may vanish from the - * list, but no new entries will appear on the list (since it is - * private) */ -static void gfs2_dispose_glock_lru(struct list_head *list) -__releases(&lru_lock) -__acquires(&lru_lock) +static long gfs2_dispose_glock_lru(struct list_head *list) { - struct gfs2_glock *gl; + struct gfs2_glock *gl = NULL; + LIST_HEAD(rejects); + int reject_count = 0; + long disposed = 0; list_sort(NULL, list, glock_cmp); @@ -1369,23 +1397,30 @@ __acquires(&lru_lock) list_del_init(&gl->gl_lru); if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: - list_add(&gl->gl_lru, &lru_list); - atomic_inc(&lru_count); + list_add_tail(&gl->gl_lru, &rejects); + reject_count++; continue; } if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } - clear_bit(GLF_LRU, &gl->gl_flags); + disposed++; gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); __gfs2_glock_queue_work(gl, 0); spin_unlock(&gl->gl_lockref.lock); - cond_resched_lock(&lru_lock); } + if (!list_empty(&rejects)) { + gl = list_first_entry(&rejects, struct gfs2_glock, gl_lru); + lock_lru_bucket(gl->gl_lru_bucket); + list_splice(&rejects, &lru_locks[gl->gl_lru_bucket].lru_list); + atomic_add(reject_count, &lru_count); + unlock_lru_bucket(gl->gl_lru_bucket); + } + return disposed; } /** @@ -1399,30 +1434,33 @@ __acquires(&lru_lock) static long gfs2_scan_glock_lru(int nr) { - struct gfs2_glock *gl; - LIST_HEAD(skipped); + struct gfs2_glock *gl, *tmp; LIST_HEAD(dispose); + int start_bucket, bucket; long freed = 0; + static int last_lru_scan = 0; - spin_lock(&lru_lock); - while ((nr-- >= 0) && !list_empty(&lru_list)) { - gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); - - /* Test for being demotable */ - if (!test_bit(GLF_LOCK, &gl->gl_flags)) { - list_move(&gl->gl_lru, &dispose); - atomic_dec(&lru_count); - freed++; - continue; + start_bucket = bucket = (last_lru_scan & LRU_HASH_MASK); + do { + lock_lru_bucket(bucket); + list_for_each_entry_safe(gl, tmp, &lru_locks[bucket].lru_list, + gl_lru) { + /* Test for being demotable */ + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { + if (test_and_clear_bit(GLF_LRU, &gl->gl_flags)) + remove_from_lru_locked(gl); + list_add_tail(&gl->gl_lru, &dispose); + nr--; + if (nr == 0) + break; + } } - - list_move(&gl->gl_lru, &skipped); - } - list_splice(&skipped, &lru_list); - if (!list_empty(&dispose)) - gfs2_dispose_glock_lru(&dispose); - spin_unlock(&lru_lock); - + unlock_lru_bucket(bucket); + if (!list_empty(&dispose)) + freed += gfs2_dispose_glock_lru(&dispose); + bucket = (bucket + 1) & LRU_HASH_MASK; + } while (nr && (bucket != start_bucket)); + last_lru_scan = bucket; return freed; } @@ -1504,7 +1542,7 @@ static void thaw_glock(struct gfs2_glock *gl) static void clear_glock(struct gfs2_glock *gl) { - gfs2_glock_remove_from_lru(gl); + glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); if (gl->gl_state != LM_ST_UNLOCKED) @@ -1704,7 +1742,7 @@ void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld l:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1713,7 +1751,8 @@ void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - (int)gl->gl_lockref.count, gl->gl_hold_time); + (int)gl->gl_lockref.count, gl->gl_hold_time, + gl->gl_lru_bucket); list_for_each_entry(gh, &gl->gl_holders, gh_list) dump_holder(seq, gh); @@ -1797,6 +1836,12 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) int __init gfs2_glock_init(void) { int ret; + unsigned i; + + for (i = 0; i < LRU_HASH_BUCKETS; i++) { + INIT_LIST_HEAD(&lru_locks[i].lru_list); + rwlock_init(&lru_locks[i].lru_lock); + } ret = rhashtable_init(&gl_hash_table, &ht_parms); if (ret < 0) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 2ad003b..e8c0f2b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -374,6 +374,7 @@ struct gfs2_glock { } gl_vm; }; struct rhash_head gl_node; + int gl_lru_bucket; }; #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */