[Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

From: Steven Whitehouse <swhiteho@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks
Date: Mon, 13 Jul 2015 12:19:33 +0100	[thread overview]
Message-ID: <55A39EC5.6010905@redhat.com> (raw)
In-Reply-To: <1436466345-11383-3-git-send-email-rpeterso@redhat.com>

Hi,

Looks good. Acked-by: Steven Whitehouse <swhiteho@redhat.com>

Steve.

On 09/07/15 19:25, Bob Peterson wrote:
> This patch changes the glock hash table from a normal hash table to
> a resizable hash table, which scales better. This also simplifies
> a lot of code.
> ---
>   fs/gfs2/glock.c  | 282 +++++++++++++++++++++----------------------------------
>   fs/gfs2/incore.h |   4 +-
>   2 files changed, 111 insertions(+), 175 deletions(-)
>
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 25e0389..813de00 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -34,6 +34,7 @@
>   #include <linux/percpu.h>
>   #include <linux/list_sort.h>
>   #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>   
>   #include "gfs2.h"
>   #include "incore.h"
> @@ -51,8 +52,12 @@
>   
>   struct gfs2_glock_iter {
>   	int hash;			/* hash bucket index           */
> -	unsigned nhash;			/* Index within current bucket */
>   	struct gfs2_sbd *sdp;		/* incore superblock           */
> +};
> +
> +struct gfs2_glock_rht_iter {
> +	struct gfs2_sbd *sdp;		/* incore superblock           */
> +	struct rhashtable_iter hti;	/* rhashtable iterator         */
>   	struct gfs2_glock *gl;		/* current glock struct        */
>   	loff_t last_pos;		/* last position               */
>   };
> @@ -70,44 +75,20 @@ static DEFINE_SPINLOCK(lru_lock);
>   
>   #define GFS2_GL_HASH_SHIFT      15
>   #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
> -#define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
> -
> -static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
> -static struct dentry *gfs2_root;
> -
> -/**
> - * gl_hash() - Turn glock number into hash bucket number
> - * @lock: The glock number
> - *
> - * Returns: The number of the corresponding hash bucket
> - */
> -
> -static unsigned int gl_hash(const struct gfs2_sbd *sdp,
> -			    const struct lm_lockname *name)
> -{
> -	unsigned int h;
> -
> -	h = jhash(&name->ln_number, sizeof(u64), 0);
> -	h = jhash(&name->ln_type, sizeof(unsigned int), h);
> -	h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
> -	h &= GFS2_GL_HASH_MASK;
> -
> -	return h;
> -}
>   
> -static inline void spin_lock_bucket(unsigned int hash)
> -{
> -	hlist_bl_lock(&gl_hash_table[hash]);
> -}
> +struct rhashtable_params ht_parms = {
> +	.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
> +	.key_len = sizeof(struct lm_lockname),
> +	.key_offset = offsetof(struct gfs2_glock, gl_name),
> +	.head_offset = offsetof(struct gfs2_glock, gl_node),
> +	.hashfn = jhash,
> +};
>   
> -static inline void spin_unlock_bucket(unsigned int hash)
> -{
> -	hlist_bl_unlock(&gl_hash_table[hash]);
> -}
> +static struct rhashtable gl_hash_table;
>   
> -static void gfs2_glock_dealloc(struct rcu_head *rcu)
> +void gfs2_glock_free(struct gfs2_glock *gl)
>   {
> -	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
> +	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
>   
>   	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
>   		kmem_cache_free(gfs2_glock_aspace_cachep, gl);
> @@ -115,13 +96,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
>   		kfree(gl->gl_lksb.sb_lvbptr);
>   		kmem_cache_free(gfs2_glock_cachep, gl);
>   	}
> -}
> -
> -void gfs2_glock_free(struct gfs2_glock *gl)
> -{
> -	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> -
> -	call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
>   	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
>   		wake_up(&sdp->sd_glock_wait);
>   }
> @@ -202,9 +176,8 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>   
>   	gfs2_glock_remove_from_lru(gl);
>   	spin_unlock(&gl->gl_lockref.lock);
> -	spin_lock_bucket(gl->gl_hash);
> -	hlist_bl_del_rcu(&gl->gl_list);
> -	spin_unlock_bucket(gl->gl_hash);
> +	if (gl->gl_node.next != NULL)
> +		rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
>   	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
>   	GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
>   	trace_gfs2_glock_put(gl);
> @@ -212,30 +185,6 @@ void gfs2_glock_put(struct gfs2_glock *gl)
>   }
>   
>   /**
> - * search_bucket() - Find struct gfs2_glock by lock number
> - * @bucket: the bucket to search
> - * @name: The lock name
> - *
> - * Returns: NULL, or the struct gfs2_glock with the requested number
> - */
> -
> -static struct gfs2_glock *search_bucket(unsigned int hash,
> -					const struct lm_lockname *name)
> -{
> -	struct gfs2_glock *gl;
> -	struct hlist_bl_node *h;
> -
> -	hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
> -		if (!lm_name_equal(&gl->gl_name, name))
> -			continue;
> -		if (lockref_get_not_dead(&gl->gl_lockref))
> -			return gl;
> -	}
> -
> -	return NULL;
> -}
> -
> -/**
>    * may_grant - check if its ok to grant a new lock
>    * @gl: The glock
>    * @gh: The lock request which we wish to grant
> @@ -704,14 +653,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>   	struct lm_lockname name = { .ln_number = number,
>   				    .ln_type = glops->go_type,
>   				    .ln_sbd = sdp };
> -	struct gfs2_glock *gl, *tmp;
> -	unsigned int hash = gl_hash(sdp, &name);
> +	struct gfs2_glock *gl, *tmp = NULL;
>   	struct address_space *mapping;
>   	struct kmem_cache *cachep;
> +	int ret, tries = 0;
>   
> -	rcu_read_lock();
> -	gl = search_bucket(hash, &name);
> -	rcu_read_unlock();
> +	gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> +	if (gl && !lockref_get_not_dead(&gl->gl_lockref))
> +		gl = NULL;
>   
>   	*glp = gl;
>   	if (gl)
> @@ -738,13 +687,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>   	}
>   
>   	atomic_inc(&sdp->sd_glock_disposal);
> +	gl->gl_node.next = NULL;
>   	gl->gl_flags = 0;
>   	gl->gl_name = name;
>   	gl->gl_lockref.count = 1;
>   	gl->gl_state = LM_ST_UNLOCKED;
>   	gl->gl_target = LM_ST_UNLOCKED;
>   	gl->gl_demote_state = LM_ST_EXCLUSIVE;
> -	gl->gl_hash = hash;
>   	gl->gl_ops = glops;
>   	gl->gl_dstamp = ktime_set(0, 0);
>   	preempt_disable();
> @@ -769,22 +718,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>   		mapping->writeback_index = 0;
>   	}
>   
> -	spin_lock_bucket(hash);
> -	tmp = search_bucket(hash, &name);
> -	if (tmp) {
> -		spin_unlock_bucket(hash);
> -		kfree(gl->gl_lksb.sb_lvbptr);
> -		kmem_cache_free(cachep, gl);
> -		atomic_dec(&sdp->sd_glock_disposal);
> -		gl = tmp;
> -	} else {
> -		hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
> -		spin_unlock_bucket(hash);
> +again:
> +	ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
> +					    ht_parms);
> +	if (ret == 0) {
> +		*glp = gl;
> +		return 0;
>   	}
>   
> -	*glp = gl;
> +	if (ret == -EEXIST) {
> +		ret = 0;
> +		tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
> +		if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
> +			if (++tries < 100) {
> +				cond_resched();
> +				goto again;
> +			}
> +			tmp = NULL;
> +			ret = -ENOMEM;
> +		}
> +	} else {
> +		WARN_ON_ONCE(ret);
> +	}
> +	kfree(gl->gl_lksb.sb_lvbptr);
> +	kmem_cache_free(cachep, gl);
> +	atomic_dec(&sdp->sd_glock_disposal);
> +	*glp = tmp;
>   
> -	return 0;
> +	return ret;
>   }
>   
>   /**
> @@ -1460,31 +1421,24 @@ static struct shrinker glock_shrinker = {
>    *
>    */
>   
> -static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
> -			  unsigned int hash)
> +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
>   {
>   	struct gfs2_glock *gl;
> -	struct hlist_bl_head *head = &gl_hash_table[hash];
> -	struct hlist_bl_node *pos;
> +	struct rhash_head *pos, *next;
> +	const struct bucket_table *tbl;
> +	int i;
>   
> -	rcu_read_lock();
> -	hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
> -		if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
> -			examiner(gl);
> +	tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
> +	for (i = 0; i < tbl->size; i++) {
> +		rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
> +			if ((gl->gl_name.ln_sbd == sdp) &&
> +			    lockref_get_not_dead(&gl->gl_lockref))
> +				examiner(gl);
> +		}
>   	}
> -	rcu_read_unlock();
>   	cond_resched();
>   }
>   
> -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
> -{
> -	unsigned x;
> -
> -	for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
> -		examine_bucket(examiner, sdp, x);
> -}
> -
> -
>   /**
>    * thaw_glock - thaw out a glock which has an unprocessed reply waiting
>    * @gl: The glock to thaw
> @@ -1802,20 +1756,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
>   
>   int __init gfs2_glock_init(void)
>   {
> -	unsigned i;
> -	for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
> -		INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
> -	}
> +	int ret;
> +
> +	ret = rhashtable_init(&gl_hash_table, &ht_parms);
> +	if (ret < 0)
> +		return ret;
>   
>   	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
>   					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
> -	if (!glock_workqueue)
> +	if (!glock_workqueue) {
> +		rhashtable_destroy(&gl_hash_table);
>   		return -ENOMEM;
> +	}
>   	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
>   						WQ_MEM_RECLAIM | WQ_FREEZABLE,
>   						0);
>   	if (!gfs2_delete_workqueue) {
>   		destroy_workqueue(glock_workqueue);
> +		rhashtable_destroy(&gl_hash_table);
>   		return -ENOMEM;
>   	}
>   
> @@ -1827,72 +1785,36 @@ int __init gfs2_glock_init(void)
>   void gfs2_glock_exit(void)
>   {
>   	unregister_shrinker(&glock_shrinker);
> +	rhashtable_destroy(&gl_hash_table);
>   	destroy_workqueue(glock_workqueue);
>   	destroy_workqueue(gfs2_delete_workqueue);
>   }
>   
> -static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
> +static void gfs2_glock_iter_next(struct gfs2_glock_rht_iter *gi)
>   {
> -	return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
> -			      struct gfs2_glock, gl_list);
> -}
> -
> -static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
> -{
> -	return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
> -			      struct gfs2_glock, gl_list);
> -}
> -
> -static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
> -{
> -	struct gfs2_glock *gl;
> -
>   	do {
> -		gl = gi->gl;
> -		if (gl) {
> -			gi->gl = glock_hash_next(gl);
> -			gi->nhash++;
> -		} else {
> -			if (gi->hash >= GFS2_GL_HASH_SIZE) {
> -				rcu_read_unlock();
> -				return 1;
> -			}
> -			gi->gl = glock_hash_chain(gi->hash);
> -			gi->nhash = 0;
> -		}
> -		while (gi->gl == NULL) {
> -			gi->hash++;
> -			if (gi->hash >= GFS2_GL_HASH_SIZE) {
> -				rcu_read_unlock();
> -				return 1;
> -			}
> -			gi->gl = glock_hash_chain(gi->hash);
> -			gi->nhash = 0;
> -		}
> +		gi->gl = rhashtable_walk_next(&gi->hti);
>   	/* Skip entries for other sb and dead entries */
> -	} while (gi->sdp != gi->gl->gl_name.ln_sbd ||
> -		 __lockref_is_dead(&gi->gl->gl_lockref));
> -
> -	return 0;
> +	} while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
> +			      __lockref_is_dead(&gi->gl->gl_lockref)));
>   }
>   
>   static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
>   {
> -	struct gfs2_glock_iter *gi = seq->private;
> +	struct gfs2_glock_rht_iter *gi = seq->private;
>   	loff_t n = *pos;
> +	int ret;
>   
>   	if (gi->last_pos <= *pos)
> -		n = gi->nhash + (*pos - gi->last_pos);
> -	else
> -		gi->hash = 0;
> +		n = (*pos - gi->last_pos);
>   
> -	gi->nhash = 0;
> -	rcu_read_lock();
> +	ret = rhashtable_walk_start(&gi->hti);
> +	if (ret)
> +		return NULL;
>   
>   	do {
> -		if (gfs2_glock_iter_next(gi))
> -			return NULL;
> -	} while (n--);
> +		gfs2_glock_iter_next(gi);
> +	} while (gi->gl && n--);
>   
>   	gi->last_pos = *pos;
>   	return gi->gl;
> @@ -1901,23 +1823,20 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
>   static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
>   				 loff_t *pos)
>   {
> -	struct gfs2_glock_iter *gi = seq->private;
> +	struct gfs2_glock_rht_iter *gi = seq->private;
>   
>   	(*pos)++;
>   	gi->last_pos = *pos;
> -	if (gfs2_glock_iter_next(gi))
> -		return NULL;
> -
> +	gfs2_glock_iter_next(gi);
>   	return gi->gl;
>   }
>   
>   static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
>   {
> -	struct gfs2_glock_iter *gi = seq->private;
> +	struct gfs2_glock_rht_iter *gi = seq->private;
>   
> -	if (gi->gl)
> -		rcu_read_unlock();
>   	gi->gl = NULL;
> +	rhashtable_walk_stop(&gi->hti);
>   }
>   
>   static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
> @@ -1981,29 +1900,46 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
>   static int gfs2_glocks_open(struct inode *inode, struct file *file)
>   {
>   	int ret = seq_open_private(file, &gfs2_glock_seq_ops,
> -				   sizeof(struct gfs2_glock_iter));
> +				   sizeof(struct gfs2_glock_rht_iter));
>   	if (ret == 0) {
>   		struct seq_file *seq = file->private_data;
> -		struct gfs2_glock_iter *gi = seq->private;
> +		struct gfs2_glock_rht_iter *gi = seq->private;
> +
>   		gi->sdp = inode->i_private;
> +		gi->last_pos = 0;
>   		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
>   		if (seq->buf)
>   			seq->size = GFS2_SEQ_GOODSIZE;
> +		gi->gl = NULL;
> +		ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
>   	}
>   	return ret;
>   }
>   
> +static int gfs2_glocks_release(struct inode *inode, struct file *file)
> +{
> +	struct seq_file *seq = file->private_data;
> +	struct gfs2_glock_rht_iter *gi = seq->private;
> +
> +	gi->gl = NULL;
> +	rhashtable_walk_exit(&gi->hti);
> +	return seq_release_private(inode, file);
> +}
> +
>   static int gfs2_glstats_open(struct inode *inode, struct file *file)
>   {
>   	int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
> -				   sizeof(struct gfs2_glock_iter));
> +				   sizeof(struct gfs2_glock_rht_iter));
>   	if (ret == 0) {
>   		struct seq_file *seq = file->private_data;
> -		struct gfs2_glock_iter *gi = seq->private;
> +		struct gfs2_glock_rht_iter *gi = seq->private;
>   		gi->sdp = inode->i_private;
> +		gi->last_pos = 0;
>   		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
>   		if (seq->buf)
>   			seq->size = GFS2_SEQ_GOODSIZE;
> +		gi->gl = NULL;
> +		ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
>   	}
>   	return ret;
>   }
> @@ -2025,7 +1961,7 @@ static const struct file_operations gfs2_glocks_fops = {
>   	.open    = gfs2_glocks_open,
>   	.read    = seq_read,
>   	.llseek  = seq_lseek,
> -	.release = seq_release_private,
> +	.release = gfs2_glocks_release,
>   };
>   
>   static const struct file_operations gfs2_glstats_fops = {
> @@ -2033,7 +1969,7 @@ static const struct file_operations gfs2_glstats_fops = {
>   	.open    = gfs2_glstats_open,
>   	.read    = seq_read,
>   	.llseek  = seq_lseek,
> -	.release = seq_release_private,
> +	.release = gfs2_glocks_release,
>   };
>   
>   static const struct file_operations gfs2_sbstats_fops = {
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 4de7853..0d63715 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -22,6 +22,7 @@
>   #include <linux/ktime.h>
>   #include <linux/percpu.h>
>   #include <linux/lockref.h>
> +#include <linux/rhashtable.h>
>   
>   #define DIO_WAIT	0x00000010
>   #define DIO_METADATA	0x00000020
> @@ -342,7 +343,6 @@ struct gfs2_glock {
>   		     gl_req:2,		/* State in last dlm request */
>   		     gl_reply:8;	/* Last reply from the dlm */
>   
> -	unsigned int gl_hash;
>   	unsigned long gl_demote_time; /* time of first demote request */
>   	long gl_hold_time;
>   	struct list_head gl_holders;
> @@ -368,7 +368,7 @@ struct gfs2_glock {
>   			loff_t end;
>   		} gl_vm;
>   	};
> -	struct rcu_head gl_rcu;
> +	struct rhash_head gl_node;
>   };
>   
>   #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */

next prev parent reply	other threads:[~2015-07-13 11:19 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-09 18:25 [Cluster-devel] [GFS2 PATCH 0/2] Increase GFS2 glock scalability Bob Peterson
2015-07-09 18:25 ` [Cluster-devel] [GFS2 PATCH 1/2] GFS2: Move glock superblock pointer to field gl_name Bob Peterson
2015-07-13 11:19   ` Steven Whitehouse
2015-07-13 13:39     ` [Cluster-devel] [GFS2 PATCH 1/2][TRY #2] " Bob Peterson
2015-07-13 19:19       ` Steven Whitehouse
2015-08-22 22:00         ` Andreas Gruenbacher
2015-08-23 10:53           ` Steven Whitehouse
2015-08-26 14:12           ` Bob Peterson
2015-07-09 18:25 ` [Cluster-devel] [GFS2 PATCH 2/2] GFS2: Use resizable hash table for glocks Bob Peterson
2015-07-13 11:19   ` Steven Whitehouse [this message]
2015-08-24 13:16   ` Andreas Gruenbacher
2015-08-24 13:31   ` Andreas Gruenbacher
2015-08-24 13:46   ` Andreas Gruenbacher
2015-08-27 13:34     ` Bob Peterson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55A39EC5.6010905@redhat.com \
    --to=swhiteho@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).