From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Mon, 13 Jul 2015 12:19:33 +0100 Subject: [Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks In-Reply-To: <1436466345-11383-3-git-send-email-rpeterso@redhat.com> References: <1436466345-11383-1-git-send-email-rpeterso@redhat.com> <1436466345-11383-3-git-send-email-rpeterso@redhat.com> Message-ID: <55A39EC5.6010905@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, Looks good. Acked-by: Steven Whitehouse Steve. On 09/07/15 19:25, Bob Peterson wrote: > This patch changes the glock hash table from a normal hash table to > a resizable hash table, which scales better. This also simplifies > a lot of code. > --- > fs/gfs2/glock.c | 282 +++++++++++++++++++++---------------------------------- > fs/gfs2/incore.h | 4 +- > 2 files changed, 111 insertions(+), 175 deletions(-) > > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c > index 25e0389..813de00 100644 > --- a/fs/gfs2/glock.c > +++ b/fs/gfs2/glock.c > @@ -34,6 +34,7 @@ > #include > #include > #include > +#include > > #include "gfs2.h" > #include "incore.h" > @@ -51,8 +52,12 @@ > > struct gfs2_glock_iter { > int hash; /* hash bucket index */ > - unsigned nhash; /* Index within current bucket */ > struct gfs2_sbd *sdp; /* incore superblock */ > +}; > + > +struct gfs2_glock_rht_iter { > + struct gfs2_sbd *sdp; /* incore superblock */ > + struct rhashtable_iter hti; /* rhashtable iterator */ > struct gfs2_glock *gl; /* current glock struct */ > loff_t last_pos; /* last position */ > }; > @@ -70,44 +75,20 @@ static DEFINE_SPINLOCK(lru_lock); > > #define GFS2_GL_HASH_SHIFT 15 > #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) > -#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) > - > -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; > -static struct dentry *gfs2_root; > - > -/** > - * gl_hash() - Turn glock number into hash bucket number > - * @lock: The glock number > - * > - * Returns: The number of the corresponding hash bucket > - */ > - > -static unsigned int gl_hash(const struct gfs2_sbd *sdp, > - const struct lm_lockname *name) > -{ > - unsigned int h; > - > - h = jhash(&name->ln_number, sizeof(u64), 0); > - h = jhash(&name->ln_type, sizeof(unsigned int), h); > - h = jhash(&sdp, sizeof(struct gfs2_sbd *), h); > - h &= GFS2_GL_HASH_MASK; > - > - return h; > -} > > -static inline void spin_lock_bucket(unsigned int hash) > -{ > - hlist_bl_lock(&gl_hash_table[hash]); > -} > +struct rhashtable_params ht_parms = { > + .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4, > + .key_len = sizeof(struct lm_lockname), > + .key_offset = offsetof(struct gfs2_glock, gl_name), > + .head_offset = offsetof(struct gfs2_glock, gl_node), > + .hashfn = jhash, > +}; > > -static inline void spin_unlock_bucket(unsigned int hash) > -{ > - hlist_bl_unlock(&gl_hash_table[hash]); > -} > +static struct rhashtable gl_hash_table; > > -static void gfs2_glock_dealloc(struct rcu_head *rcu) > +void gfs2_glock_free(struct gfs2_glock *gl) > { > - struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); > + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; > > if (gl->gl_ops->go_flags & GLOF_ASPACE) { > kmem_cache_free(gfs2_glock_aspace_cachep, gl); > @@ -115,13 +96,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) > kfree(gl->gl_lksb.sb_lvbptr); > kmem_cache_free(gfs2_glock_cachep, gl); > } > -} > - > -void gfs2_glock_free(struct gfs2_glock *gl) > -{ > - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; > - > - call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); > if (atomic_dec_and_test(&sdp->sd_glock_disposal)) > wake_up(&sdp->sd_glock_wait); > } > @@ -202,9 +176,8 @@ void gfs2_glock_put(struct gfs2_glock *gl) > > gfs2_glock_remove_from_lru(gl); > spin_unlock(&gl->gl_lockref.lock); > - spin_lock_bucket(gl->gl_hash); > - hlist_bl_del_rcu(&gl->gl_list); > - spin_unlock_bucket(gl->gl_hash); > + if (gl->gl_node.next != NULL) > + rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); > GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); > GLOCK_BUG_ON(gl, mapping && mapping->nrpages); > trace_gfs2_glock_put(gl); > @@ -212,30 +185,6 @@ void gfs2_glock_put(struct gfs2_glock *gl) > } > > /** > - * search_bucket() - Find struct gfs2_glock by lock number > - * @bucket: the bucket to search > - * @name: The lock name > - * > - * Returns: NULL, or the struct gfs2_glock with the requested number > - */ > - > -static struct gfs2_glock *search_bucket(unsigned int hash, > - const struct lm_lockname *name) > -{ > - struct gfs2_glock *gl; > - struct hlist_bl_node *h; > - > - hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { > - if (!lm_name_equal(&gl->gl_name, name)) > - continue; > - if (lockref_get_not_dead(&gl->gl_lockref)) > - return gl; > - } > - > - return NULL; > -} > - > -/** > * may_grant - check if its ok to grant a new lock > * @gl: The glock > * @gh: The lock request which we wish to grant > @@ -704,14 +653,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, > struct lm_lockname name = { .ln_number = number, > .ln_type = glops->go_type, > .ln_sbd = sdp }; > - struct gfs2_glock *gl, *tmp; > - unsigned int hash = gl_hash(sdp, &name); > + struct gfs2_glock *gl, *tmp = NULL; > struct address_space *mapping; > struct kmem_cache *cachep; > + int ret, tries = 0; > > - rcu_read_lock(); > - gl = search_bucket(hash, &name); > - rcu_read_unlock(); > + gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); > + if (gl && !lockref_get_not_dead(&gl->gl_lockref)) > + gl = NULL; > > *glp = gl; > if (gl) > @@ -738,13 +687,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, > } > > atomic_inc(&sdp->sd_glock_disposal); > + gl->gl_node.next = NULL; > gl->gl_flags = 0; > gl->gl_name = name; > gl->gl_lockref.count = 1; > gl->gl_state = LM_ST_UNLOCKED; > gl->gl_target = LM_ST_UNLOCKED; > gl->gl_demote_state = LM_ST_EXCLUSIVE; > - gl->gl_hash = hash; > gl->gl_ops = glops; > gl->gl_dstamp = ktime_set(0, 0); > preempt_disable(); > @@ -769,22 +718,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, > mapping->writeback_index = 0; > } > > - spin_lock_bucket(hash); > - tmp = search_bucket(hash, &name); > - if (tmp) { > - spin_unlock_bucket(hash); > - kfree(gl->gl_lksb.sb_lvbptr); > - kmem_cache_free(cachep, gl); > - atomic_dec(&sdp->sd_glock_disposal); > - gl = tmp; > - } else { > - hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); > - spin_unlock_bucket(hash); > +again: > + ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node, > + ht_parms); > + if (ret == 0) { > + *glp = gl; > + return 0; > } > > - *glp = gl; > + if (ret == -EEXIST) { > + ret = 0; > + tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms); > + if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) { > + if (++tries < 100) { > + cond_resched(); > + goto again; > + } > + tmp = NULL; > + ret = -ENOMEM; > + } > + } else { > + WARN_ON_ONCE(ret); > + } > + kfree(gl->gl_lksb.sb_lvbptr); > + kmem_cache_free(cachep, gl); > + atomic_dec(&sdp->sd_glock_disposal); > + *glp = tmp; > > - return 0; > + return ret; > } > > /** > @@ -1460,31 +1421,24 @@ static struct shrinker glock_shrinker = { > * > */ > > -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, > - unsigned int hash) > +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) > { > struct gfs2_glock *gl; > - struct hlist_bl_head *head = &gl_hash_table[hash]; > - struct hlist_bl_node *pos; > + struct rhash_head *pos, *next; > + const struct bucket_table *tbl; > + int i; > > - rcu_read_lock(); > - hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { > - if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) > - examiner(gl); > + tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); > + for (i = 0; i < tbl->size; i++) { > + rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) { > + if ((gl->gl_name.ln_sbd == sdp) && > + lockref_get_not_dead(&gl->gl_lockref)) > + examiner(gl); > + } > } > - rcu_read_unlock(); > cond_resched(); > } > > -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) > -{ > - unsigned x; > - > - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) > - examine_bucket(examiner, sdp, x); > -} > - > - > /** > * thaw_glock - thaw out a glock which has an unprocessed reply waiting > * @gl: The glock to thaw > @@ -1802,20 +1756,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr) > > int __init gfs2_glock_init(void) > { > - unsigned i; > - for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { > - INIT_HLIST_BL_HEAD(&gl_hash_table[i]); > - } > + int ret; > + > + ret = rhashtable_init(&gl_hash_table, &ht_parms); > + if (ret < 0) > + return ret; > > glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | > WQ_HIGHPRI | WQ_FREEZABLE, 0); > - if (!glock_workqueue) > + if (!glock_workqueue) { > + rhashtable_destroy(&gl_hash_table); > return -ENOMEM; > + } > gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", > WQ_MEM_RECLAIM | WQ_FREEZABLE, > 0); > if (!gfs2_delete_workqueue) { > destroy_workqueue(glock_workqueue); > + rhashtable_destroy(&gl_hash_table); > return -ENOMEM; > } > > @@ -1827,72 +1785,36 @@ int __init gfs2_glock_init(void) > void gfs2_glock_exit(void) > { > unregister_shrinker(&glock_shrinker); > + rhashtable_destroy(&gl_hash_table); > destroy_workqueue(glock_workqueue); > destroy_workqueue(gfs2_delete_workqueue); > } > > -static inline struct gfs2_glock *glock_hash_chain(unsigned hash) > +static void gfs2_glock_iter_next(struct gfs2_glock_rht_iter *gi) > { > - return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), > - struct gfs2_glock, gl_list); > -} > - > -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) > -{ > - return hlist_bl_entry(rcu_dereference(gl->gl_list.next), > - struct gfs2_glock, gl_list); > -} > - > -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) > -{ > - struct gfs2_glock *gl; > - > do { > - gl = gi->gl; > - if (gl) { > - gi->gl = glock_hash_next(gl); > - gi->nhash++; > - } else { > - if (gi->hash >= GFS2_GL_HASH_SIZE) { > - rcu_read_unlock(); > - return 1; > - } > - gi->gl = glock_hash_chain(gi->hash); > - gi->nhash = 0; > - } > - while (gi->gl == NULL) { > - gi->hash++; > - if (gi->hash >= GFS2_GL_HASH_SIZE) { > - rcu_read_unlock(); > - return 1; > - } > - gi->gl = glock_hash_chain(gi->hash); > - gi->nhash = 0; > - } > + gi->gl = rhashtable_walk_next(&gi->hti); > /* Skip entries for other sb and dead entries */ > - } while (gi->sdp != gi->gl->gl_name.ln_sbd || > - __lockref_is_dead(&gi->gl->gl_lockref)); > - > - return 0; > + } while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) || > + __lockref_is_dead(&gi->gl->gl_lockref))); > } > > static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) > { > - struct gfs2_glock_iter *gi = seq->private; > + struct gfs2_glock_rht_iter *gi = seq->private; > loff_t n = *pos; > + int ret; > > if (gi->last_pos <= *pos) > - n = gi->nhash + (*pos - gi->last_pos); > - else > - gi->hash = 0; > + n = (*pos - gi->last_pos); > > - gi->nhash = 0; > - rcu_read_lock(); > + ret = rhashtable_walk_start(&gi->hti); > + if (ret) > + return NULL; > > do { > - if (gfs2_glock_iter_next(gi)) > - return NULL; > - } while (n--); > + gfs2_glock_iter_next(gi); > + } while (gi->gl && n--); > > gi->last_pos = *pos; > return gi->gl; > @@ -1901,23 +1823,20 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) > static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, > loff_t *pos) > { > - struct gfs2_glock_iter *gi = seq->private; > + struct gfs2_glock_rht_iter *gi = seq->private; > > (*pos)++; > gi->last_pos = *pos; > - if (gfs2_glock_iter_next(gi)) > - return NULL; > - > + gfs2_glock_iter_next(gi); > return gi->gl; > } > > static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) > { > - struct gfs2_glock_iter *gi = seq->private; > + struct gfs2_glock_rht_iter *gi = seq->private; > > - if (gi->gl) > - rcu_read_unlock(); > gi->gl = NULL; > + rhashtable_walk_stop(&gi->hti); > } > > static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) > @@ -1981,29 +1900,46 @@ static const struct seq_operations gfs2_sbstats_seq_ops = { > static int gfs2_glocks_open(struct inode *inode, struct file *file) > { > int ret = seq_open_private(file, &gfs2_glock_seq_ops, > - sizeof(struct gfs2_glock_iter)); > + sizeof(struct gfs2_glock_rht_iter)); > if (ret == 0) { > struct seq_file *seq = file->private_data; > - struct gfs2_glock_iter *gi = seq->private; > + struct gfs2_glock_rht_iter *gi = seq->private; > + > gi->sdp = inode->i_private; > + gi->last_pos = 0; > seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); > if (seq->buf) > seq->size = GFS2_SEQ_GOODSIZE; > + gi->gl = NULL; > + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); > } > return ret; > } > > +static int gfs2_glocks_release(struct inode *inode, struct file *file) > +{ > + struct seq_file *seq = file->private_data; > + struct gfs2_glock_rht_iter *gi = seq->private; > + > + gi->gl = NULL; > + rhashtable_walk_exit(&gi->hti); > + return seq_release_private(inode, file); > +} > + > static int gfs2_glstats_open(struct inode *inode, struct file *file) > { > int ret = seq_open_private(file, &gfs2_glstats_seq_ops, > - sizeof(struct gfs2_glock_iter)); > + sizeof(struct gfs2_glock_rht_iter)); > if (ret == 0) { > struct seq_file *seq = file->private_data; > - struct gfs2_glock_iter *gi = seq->private; > + struct gfs2_glock_rht_iter *gi = seq->private; > gi->sdp = inode->i_private; > + gi->last_pos = 0; > seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); > if (seq->buf) > seq->size = GFS2_SEQ_GOODSIZE; > + gi->gl = NULL; > + ret = rhashtable_walk_init(&gl_hash_table, &gi->hti); > } > return ret; > } > @@ -2025,7 +1961,7 @@ static const struct file_operations gfs2_glocks_fops = { > .open = gfs2_glocks_open, > .read = seq_read, > .llseek = seq_lseek, > - .release = seq_release_private, > + .release = gfs2_glocks_release, > }; > > static const struct file_operations gfs2_glstats_fops = { > @@ -2033,7 +1969,7 @@ static const struct file_operations gfs2_glstats_fops = { > .open = gfs2_glstats_open, > .read = seq_read, > .llseek = seq_lseek, > - .release = seq_release_private, > + .release = gfs2_glocks_release, > }; > > static const struct file_operations gfs2_sbstats_fops = { > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index 4de7853..0d63715 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > > #define DIO_WAIT 0x00000010 > #define DIO_METADATA 0x00000020 > @@ -342,7 +343,6 @@ struct gfs2_glock { > gl_req:2, /* State in last dlm request */ > gl_reply:8; /* Last reply from the dlm */ > > - unsigned int gl_hash; > unsigned long gl_demote_time; /* time of first demote request */ > long gl_hold_time; > struct list_head gl_holders; > @@ -368,7 +368,7 @@ struct gfs2_glock { > loff_t end; > } gl_vm; > }; > - struct rcu_head gl_rcu; > + struct rhash_head gl_node; > }; > > #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */