All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH 17/24] GFS2: Use RCU/hlist_bl based hash for quotas
Date: Tue, 21 Jan 2014 21:32:48 -0800	[thread overview]
Message-ID: <20140122053248.GX10038@linux.vnet.ibm.com> (raw)
In-Reply-To: <1390220627-1571-18-git-send-email-swhiteho@redhat.com>

On Mon, Jan 20, 2014 at 12:23:40PM +0000, Steven Whitehouse wrote:
> Prior to this patch, GFS2 kept all the quotas for each
> super block in a single linked list. This is rather slow
> when there are large numbers of quotas.
> 
> This patch introduces a hlist_bl based hash table, similar
> to the one used for glocks. The initial look up of the quota
> is now lockless in the case where it is already cached,
> although we still have to take the per quota spinlock in
> order to bump the ref count. Either way though, this is a
> big improvement on what was there before.
> 
> The qd_lock and the per super block list is preserved, for
> the time being. However it is intended that since this is no
> longer used for its original role, it should be possible to
> shrink the number of items on that list in due course and
> remove the requirement to take qd_lock in qd_get.
> 
> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
> Cc: Abhijith Das <adas@redhat.com>
> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Interesting!  I thought that Sasha Levin had a hash table in the works,
but I don't see it, so CCing him.

A few questions and comments below.

							Thanx, Paul

> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index a99f60c..59d99ec 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -428,10 +428,13 @@ enum {
>  };
> 
>  struct gfs2_quota_data {
> +	struct hlist_bl_node qd_hlist;
>  	struct list_head qd_list;
>  	struct kqid qd_id;
> +	struct gfs2_sbd *qd_sbd;
>  	struct lockref qd_lockref;
>  	struct list_head qd_lru;
> +	unsigned qd_hash;
> 
>  	unsigned long qd_flags;		/* QDF_... */
> 
> @@ -450,6 +453,7 @@ struct gfs2_quota_data {
> 
>  	u64 qd_sync_gen;
>  	unsigned long qd_last_warn;
> +	struct rcu_head qd_rcu;
>  };
> 
>  struct gfs2_trans {
> diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
> index 0650db2..c272e73 100644
> --- a/fs/gfs2/main.c
> +++ b/fs/gfs2/main.c
> @@ -76,6 +76,7 @@ static int __init init_gfs2_fs(void)
> 
>  	gfs2_str2qstr(&gfs2_qdot, ".");
>  	gfs2_str2qstr(&gfs2_qdotdot, "..");
> +	gfs2_quota_hash_init();
> 
>  	error = gfs2_sys_init();
>  	if (error)
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 1b6b367..a1df01d 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -52,6 +52,10 @@
>  #include <linux/dqblk_xfs.h>
>  #include <linux/lockref.h>
>  #include <linux/list_lru.h>
> +#include <linux/rcupdate.h>
> +#include <linux/rculist_bl.h>
> +#include <linux/bit_spinlock.h>
> +#include <linux/jhash.h>
> 
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -67,10 +71,43 @@
>  #include "inode.h"
>  #include "util.h"
> 
> -/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */
> +#define GFS2_QD_HASH_SHIFT      12

Should this be a function of the number of CPUs?  (Might not be an issue
if the really big systems don't use GFS.)

> +#define GFS2_QD_HASH_SIZE       (1 << GFS2_QD_HASH_SHIFT)
> +#define GFS2_QD_HASH_MASK       (GFS2_QD_HASH_SIZE - 1)
> +
> +/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
>  static DEFINE_SPINLOCK(qd_lock);
>  struct list_lru gfs2_qd_lru;
> 
> +static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE];
> +
> +static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp,
> +				 const struct kqid qid)
> +{
> +	unsigned int h;
> +
> +	h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0);
> +	h = jhash(&qid, sizeof(struct kqid), h);
> +
> +	return h & GFS2_QD_HASH_MASK;
> +}
> +
> +static inline void spin_lock_bucket(unsigned int hash)
> +{
> +        hlist_bl_lock(&qd_hash_table[hash]);
> +}
> +
> +static inline void spin_unlock_bucket(unsigned int hash)
> +{
> +        hlist_bl_unlock(&qd_hash_table[hash]);
> +}
> +
> +static void gfs2_qd_dealloc(struct rcu_head *rcu)
> +{
> +	struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
> +	kmem_cache_free(gfs2_quotad_cachep, qd);
> +}
> +
>  static void gfs2_qd_dispose(struct list_head *list)
>  {
>  	struct gfs2_quota_data *qd;
> @@ -87,6 +124,10 @@ static void gfs2_qd_dispose(struct list_head *list)
>  		list_del(&qd->qd_list);
>  		spin_unlock(&qd_lock);
> 
> +		spin_lock_bucket(qd->qd_hash);
> +		hlist_bl_del_rcu(&qd->qd_hlist);
> +		spin_unlock_bucket(qd->qd_hash);
> +

Good, removed from the RCU-traversed list before invoking call_rcu().

>  		gfs2_assert_warn(sdp, !qd->qd_change);
>  		gfs2_assert_warn(sdp, !qd->qd_slot_count);
>  		gfs2_assert_warn(sdp, !qd->qd_bh_count);
> @@ -95,7 +136,7 @@ static void gfs2_qd_dispose(struct list_head *list)
>  		atomic_dec(&sdp->sd_quota_count);
> 
>  		/* Delete it from the common reclaim list */
> -		kmem_cache_free(gfs2_quotad_cachep, qd);
> +		call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
>  	}
>  }
> 
> @@ -165,83 +206,95 @@ static u64 qd2offset(struct gfs2_quota_data *qd)
>  	return offset;
>  }
> 
> -static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
> -		    struct gfs2_quota_data **qdp)
> +static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
>  {
>  	struct gfs2_quota_data *qd;
>  	int error;
> 
>  	qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
>  	if (!qd)
> -		return -ENOMEM;
> +		return NULL;
> 
> +	qd->qd_sbd = sdp;
>  	qd->qd_lockref.count = 1;
>  	spin_lock_init(&qd->qd_lockref.lock);
>  	qd->qd_id = qid;
>  	qd->qd_slot = -1;
>  	INIT_LIST_HEAD(&qd->qd_lru);
> +	qd->qd_hash = hash;
> 
>  	error = gfs2_glock_get(sdp, qd2index(qd),
>  			      &gfs2_quota_glops, CREATE, &qd->qd_gl);
>  	if (error)
>  		goto fail;
> 
> -	*qdp = qd;
> -
> -	return 0;
> +	return qd;
> 
>  fail:
>  	kmem_cache_free(gfs2_quotad_cachep, qd);
> -	return error;
> +	return NULL;
>  }
> 
> -static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
> -		  struct gfs2_quota_data **qdp)
> +static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash,
> +						     const struct gfs2_sbd *sdp,
> +						     struct kqid qid)
>  {
> -	struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
> -	int error, found;
> -
> -	*qdp = NULL;
> +	struct gfs2_quota_data *qd;
> +	struct hlist_bl_node *h;
> 
> -	for (;;) {
> -		found = 0;
> -		spin_lock(&qd_lock);
> -		list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
> -			if (qid_eq(qd->qd_id, qid) &&
> -			    lockref_get_not_dead(&qd->qd_lockref)) {
> -				list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
> -				found = 1;
> -				break;
> -			}
> +	hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) {

All callers of gfs2_qd_search_bucket() either hold the update-side lock
(acquired via spin_lock_bucket()) or have invoked rcu_read_lock(), so
this is good.

> +		if (!qid_eq(qd->qd_id, qid))
> +			continue;
> +		if (qd->qd_sbd != sdp)
> +			continue;
> +		if (lockref_get_not_dead(&qd->qd_lockref)) {
> +			list_lru_del(&gfs2_qd_lru, &qd->qd_lru);

list_lru_del() acquires a lock, but it is from an array whose size
depends on the NODES_SHIFT Kconfig variable.  The array size seems to
vary from 8 to 16 to 64 to 1024, depending on other configuration info,
so should be OK.

> +			return qd;
>  		}
> +	}
> 
> -		if (!found)
> -			qd = NULL;
> +	return NULL;
> +}
> 
> -		if (!qd && new_qd) {
> -			qd = new_qd;
> -			list_add(&qd->qd_list, &sdp->sd_quota_list);
> -			atomic_inc(&sdp->sd_quota_count);
> -			new_qd = NULL;
> -		}
> 
> -		spin_unlock(&qd_lock);
> +static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
> +		  struct gfs2_quota_data **qdp)
> +{
> +	struct gfs2_quota_data *qd, *new_qd;
> +	unsigned int hash = gfs2_qd_hash(sdp, qid);
> 
> -		if (qd) {
> -			if (new_qd) {
> -				gfs2_glock_put(new_qd->qd_gl);
> -				kmem_cache_free(gfs2_quotad_cachep, new_qd);
> -			}
> -			*qdp = qd;
> -			return 0;
> -		}
> +	rcu_read_lock();
> +	*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
> +	rcu_read_unlock();
> 
> -		error = qd_alloc(sdp, qid, &new_qd);
> -		if (error)
> -			return error;
> +	if (qd)
> +		return 0;
> +
> +	new_qd = qd_alloc(hash, sdp, qid);
> +	if (!new_qd)
> +		return -ENOMEM;
> +
> +	spin_lock(&qd_lock);
> +	spin_lock_bucket(hash);
> +	*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
> +	if (qd == NULL) {
> +		*qdp = new_qd;
> +		list_add(&new_qd->qd_list, &sdp->sd_quota_list);
> +		hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
> +		atomic_inc(&sdp->sd_quota_count);
> +	}
> +	spin_unlock_bucket(hash);
> +	spin_unlock(&qd_lock);
> +
> +	if (qd) {
> +		gfs2_glock_put(new_qd->qd_gl);
> +		kmem_cache_free(gfs2_quotad_cachep, new_qd);
>  	}
> +
> +	return 0;
>  }
> 
> +
>  static void qd_hold(struct gfs2_quota_data *qd)
>  {
>  	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
> @@ -1215,6 +1268,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  	unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
>  	unsigned int x, slot = 0;
>  	unsigned int found = 0;
> +	unsigned int hash;
>  	u64 dblock;
>  	u32 extlen = 0;
>  	int error;
> @@ -1272,8 +1326,9 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  			if (!qc_change)
>  				continue;
> 
> -			error = qd_alloc(sdp, qc_id, &qd);
> -			if (error) {
> +			hash = gfs2_qd_hash(sdp, qc_id);
> +			qd = qd_alloc(hash, sdp, qc_id);
> +			if (qd == NULL) {
>  				brelse(bh);
>  				goto fail;
>  			}
> @@ -1289,6 +1344,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  			atomic_inc(&sdp->sd_quota_count);
>  			spin_unlock(&qd_lock);
> 
> +			spin_lock_bucket(hash);
> +			hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]);
> +			spin_unlock_bucket(hash);
> +
>  			found++;
>  		}
> 
> @@ -1335,11 +1394,16 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
>  		spin_unlock(&qd->qd_lockref.lock);
> 
>  		list_del(&qd->qd_list);
> +
>  		/* Also remove if this qd exists in the reclaim list */
>  		list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
>  		atomic_dec(&sdp->sd_quota_count);
>  		spin_unlock(&qd_lock);
> 
> +		spin_lock_bucket(qd->qd_hash);
> +		hlist_bl_del_rcu(&qd->qd_hlist);

Might just be my unfamiliarity with this code, but it took me a bit
to see the difference between ->qd_hlist and ->qd_list.  Of course, until
I spotted the difference, I was wondering why you were removing the
item twice.  ;-)

> +		spin_unlock_bucket(qd->qd_hash);
> +
>  		if (!qd->qd_lockref.count) {
>  			gfs2_assert_warn(sdp, !qd->qd_change);
>  			gfs2_assert_warn(sdp, !qd->qd_slot_count);
> @@ -1348,7 +1412,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
>  		gfs2_assert_warn(sdp, !qd->qd_bh_count);
> 
>  		gfs2_glock_put(qd->qd_gl);
> -		kmem_cache_free(gfs2_quotad_cachep, qd);
> +		call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
> 
>  		spin_lock(&qd_lock);
>  	}
> @@ -1643,3 +1707,11 @@ const struct quotactl_ops gfs2_quotactl_ops = {
>  	.get_dqblk	= gfs2_get_dqblk,
>  	.set_dqblk	= gfs2_set_dqblk,
>  };
> +
> +void __init gfs2_quota_hash_init(void)
> +{
> +	unsigned i;
> +
> +	for(i = 0; i < GFS2_QD_HASH_SIZE; i++)
> +		INIT_HLIST_BL_HEAD(&qd_hash_table[i]);
> +}
> diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
> index 96e4f34..55d506e 100644
> --- a/fs/gfs2/quota.h
> +++ b/fs/gfs2/quota.h
> @@ -57,5 +57,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
>  extern const struct quotactl_ops gfs2_quotactl_ops;
>  extern struct shrinker gfs2_qd_shrinker;
>  extern struct list_lru gfs2_qd_lru;
> +extern void __init gfs2_quota_hash_init(void);
> 
>  #endif /* __QUOTA_DOT_H__ */
> -- 
> 1.8.3.1
> 



WARNING: multiple messages have this Message-ID (diff)
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
To: Steven Whitehouse <swhiteho@redhat.com>
Cc: linux-kernel@vger.kernel.org, cluster-devel@redhat.com,
	Abhijith Das <adas@redhat.com>,
	sasha.levin@oracle.com
Subject: Re: [PATCH 17/24] GFS2: Use RCU/hlist_bl based hash for quotas
Date: Tue, 21 Jan 2014 21:32:48 -0800	[thread overview]
Message-ID: <20140122053248.GX10038@linux.vnet.ibm.com> (raw)
In-Reply-To: <1390220627-1571-18-git-send-email-swhiteho@redhat.com>

On Mon, Jan 20, 2014 at 12:23:40PM +0000, Steven Whitehouse wrote:
> Prior to this patch, GFS2 kept all the quotas for each
> super block in a single linked list. This is rather slow
> when there are large numbers of quotas.
> 
> This patch introduces a hlist_bl based hash table, similar
> to the one used for glocks. The initial look up of the quota
> is now lockless in the case where it is already cached,
> although we still have to take the per quota spinlock in
> order to bump the ref count. Either way though, this is a
> big improvement on what was there before.
> 
> The qd_lock and the per super block list is preserved, for
> the time being. However it is intended that since this is no
> longer used for its original role, it should be possible to
> shrink the number of items on that list in due course and
> remove the requirement to take qd_lock in qd_get.
> 
> Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
> Cc: Abhijith Das <adas@redhat.com>
> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Interesting!  I thought that Sasha Levin had a hash table in the works,
but I don't see it, so CCing him.

A few questions and comments below.

							Thanx, Paul

> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index a99f60c..59d99ec 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -428,10 +428,13 @@ enum {
>  };
> 
>  struct gfs2_quota_data {
> +	struct hlist_bl_node qd_hlist;
>  	struct list_head qd_list;
>  	struct kqid qd_id;
> +	struct gfs2_sbd *qd_sbd;
>  	struct lockref qd_lockref;
>  	struct list_head qd_lru;
> +	unsigned qd_hash;
> 
>  	unsigned long qd_flags;		/* QDF_... */
> 
> @@ -450,6 +453,7 @@ struct gfs2_quota_data {
> 
>  	u64 qd_sync_gen;
>  	unsigned long qd_last_warn;
> +	struct rcu_head qd_rcu;
>  };
> 
>  struct gfs2_trans {
> diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
> index 0650db2..c272e73 100644
> --- a/fs/gfs2/main.c
> +++ b/fs/gfs2/main.c
> @@ -76,6 +76,7 @@ static int __init init_gfs2_fs(void)
> 
>  	gfs2_str2qstr(&gfs2_qdot, ".");
>  	gfs2_str2qstr(&gfs2_qdotdot, "..");
> +	gfs2_quota_hash_init();
> 
>  	error = gfs2_sys_init();
>  	if (error)
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 1b6b367..a1df01d 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -52,6 +52,10 @@
>  #include <linux/dqblk_xfs.h>
>  #include <linux/lockref.h>
>  #include <linux/list_lru.h>
> +#include <linux/rcupdate.h>
> +#include <linux/rculist_bl.h>
> +#include <linux/bit_spinlock.h>
> +#include <linux/jhash.h>
> 
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -67,10 +71,43 @@
>  #include "inode.h"
>  #include "util.h"
> 
> -/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */
> +#define GFS2_QD_HASH_SHIFT      12

Should this be a function of the number of CPUs?  (Might not be an issue
if the really big systems don't use GFS.)

> +#define GFS2_QD_HASH_SIZE       (1 << GFS2_QD_HASH_SHIFT)
> +#define GFS2_QD_HASH_MASK       (GFS2_QD_HASH_SIZE - 1)
> +
> +/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
>  static DEFINE_SPINLOCK(qd_lock);
>  struct list_lru gfs2_qd_lru;
> 
> +static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE];
> +
> +static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp,
> +				 const struct kqid qid)
> +{
> +	unsigned int h;
> +
> +	h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0);
> +	h = jhash(&qid, sizeof(struct kqid), h);
> +
> +	return h & GFS2_QD_HASH_MASK;
> +}
> +
> +static inline void spin_lock_bucket(unsigned int hash)
> +{
> +        hlist_bl_lock(&qd_hash_table[hash]);
> +}
> +
> +static inline void spin_unlock_bucket(unsigned int hash)
> +{
> +        hlist_bl_unlock(&qd_hash_table[hash]);
> +}
> +
> +static void gfs2_qd_dealloc(struct rcu_head *rcu)
> +{
> +	struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
> +	kmem_cache_free(gfs2_quotad_cachep, qd);
> +}
> +
>  static void gfs2_qd_dispose(struct list_head *list)
>  {
>  	struct gfs2_quota_data *qd;
> @@ -87,6 +124,10 @@ static void gfs2_qd_dispose(struct list_head *list)
>  		list_del(&qd->qd_list);
>  		spin_unlock(&qd_lock);
> 
> +		spin_lock_bucket(qd->qd_hash);
> +		hlist_bl_del_rcu(&qd->qd_hlist);
> +		spin_unlock_bucket(qd->qd_hash);
> +

Good, removed from the RCU-traversed list before invoking call_rcu().

>  		gfs2_assert_warn(sdp, !qd->qd_change);
>  		gfs2_assert_warn(sdp, !qd->qd_slot_count);
>  		gfs2_assert_warn(sdp, !qd->qd_bh_count);
> @@ -95,7 +136,7 @@ static void gfs2_qd_dispose(struct list_head *list)
>  		atomic_dec(&sdp->sd_quota_count);
> 
>  		/* Delete it from the common reclaim list */
> -		kmem_cache_free(gfs2_quotad_cachep, qd);
> +		call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
>  	}
>  }
> 
> @@ -165,83 +206,95 @@ static u64 qd2offset(struct gfs2_quota_data *qd)
>  	return offset;
>  }
> 
> -static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid,
> -		    struct gfs2_quota_data **qdp)
> +static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
>  {
>  	struct gfs2_quota_data *qd;
>  	int error;
> 
>  	qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS);
>  	if (!qd)
> -		return -ENOMEM;
> +		return NULL;
> 
> +	qd->qd_sbd = sdp;
>  	qd->qd_lockref.count = 1;
>  	spin_lock_init(&qd->qd_lockref.lock);
>  	qd->qd_id = qid;
>  	qd->qd_slot = -1;
>  	INIT_LIST_HEAD(&qd->qd_lru);
> +	qd->qd_hash = hash;
> 
>  	error = gfs2_glock_get(sdp, qd2index(qd),
>  			      &gfs2_quota_glops, CREATE, &qd->qd_gl);
>  	if (error)
>  		goto fail;
> 
> -	*qdp = qd;
> -
> -	return 0;
> +	return qd;
> 
>  fail:
>  	kmem_cache_free(gfs2_quotad_cachep, qd);
> -	return error;
> +	return NULL;
>  }
> 
> -static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
> -		  struct gfs2_quota_data **qdp)
> +static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash,
> +						     const struct gfs2_sbd *sdp,
> +						     struct kqid qid)
>  {
> -	struct gfs2_quota_data *qd = NULL, *new_qd = NULL;
> -	int error, found;
> -
> -	*qdp = NULL;
> +	struct gfs2_quota_data *qd;
> +	struct hlist_bl_node *h;
> 
> -	for (;;) {
> -		found = 0;
> -		spin_lock(&qd_lock);
> -		list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
> -			if (qid_eq(qd->qd_id, qid) &&
> -			    lockref_get_not_dead(&qd->qd_lockref)) {
> -				list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
> -				found = 1;
> -				break;
> -			}
> +	hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) {

All callers of gfs2_qd_search_bucket() either hold the update-side lock
(acquired via spin_lock_bucket()) or have invoked rcu_read_lock(), so
this is good.

> +		if (!qid_eq(qd->qd_id, qid))
> +			continue;
> +		if (qd->qd_sbd != sdp)
> +			continue;
> +		if (lockref_get_not_dead(&qd->qd_lockref)) {
> +			list_lru_del(&gfs2_qd_lru, &qd->qd_lru);

list_lru_del() acquires a lock, but it is from an array whose size
depends on the NODES_SHIFT Kconfig variable.  The array size seems to
vary from 8 to 16 to 64 to 1024, depending on other configuration info,
so should be OK.

> +			return qd;
>  		}
> +	}
> 
> -		if (!found)
> -			qd = NULL;
> +	return NULL;
> +}
> 
> -		if (!qd && new_qd) {
> -			qd = new_qd;
> -			list_add(&qd->qd_list, &sdp->sd_quota_list);
> -			atomic_inc(&sdp->sd_quota_count);
> -			new_qd = NULL;
> -		}
> 
> -		spin_unlock(&qd_lock);
> +static int qd_get(struct gfs2_sbd *sdp, struct kqid qid,
> +		  struct gfs2_quota_data **qdp)
> +{
> +	struct gfs2_quota_data *qd, *new_qd;
> +	unsigned int hash = gfs2_qd_hash(sdp, qid);
> 
> -		if (qd) {
> -			if (new_qd) {
> -				gfs2_glock_put(new_qd->qd_gl);
> -				kmem_cache_free(gfs2_quotad_cachep, new_qd);
> -			}
> -			*qdp = qd;
> -			return 0;
> -		}
> +	rcu_read_lock();
> +	*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
> +	rcu_read_unlock();
> 
> -		error = qd_alloc(sdp, qid, &new_qd);
> -		if (error)
> -			return error;
> +	if (qd)
> +		return 0;
> +
> +	new_qd = qd_alloc(hash, sdp, qid);
> +	if (!new_qd)
> +		return -ENOMEM;
> +
> +	spin_lock(&qd_lock);
> +	spin_lock_bucket(hash);
> +	*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
> +	if (qd == NULL) {
> +		*qdp = new_qd;
> +		list_add(&new_qd->qd_list, &sdp->sd_quota_list);
> +		hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
> +		atomic_inc(&sdp->sd_quota_count);
> +	}
> +	spin_unlock_bucket(hash);
> +	spin_unlock(&qd_lock);
> +
> +	if (qd) {
> +		gfs2_glock_put(new_qd->qd_gl);
> +		kmem_cache_free(gfs2_quotad_cachep, new_qd);
>  	}
> +
> +	return 0;
>  }
> 
> +
>  static void qd_hold(struct gfs2_quota_data *qd)
>  {
>  	struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd;
> @@ -1215,6 +1268,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  	unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift;
>  	unsigned int x, slot = 0;
>  	unsigned int found = 0;
> +	unsigned int hash;
>  	u64 dblock;
>  	u32 extlen = 0;
>  	int error;
> @@ -1272,8 +1326,9 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  			if (!qc_change)
>  				continue;
> 
> -			error = qd_alloc(sdp, qc_id, &qd);
> -			if (error) {
> +			hash = gfs2_qd_hash(sdp, qc_id);
> +			qd = qd_alloc(hash, sdp, qc_id);
> +			if (qd == NULL) {
>  				brelse(bh);
>  				goto fail;
>  			}
> @@ -1289,6 +1344,10 @@ int gfs2_quota_init(struct gfs2_sbd *sdp)
>  			atomic_inc(&sdp->sd_quota_count);
>  			spin_unlock(&qd_lock);
> 
> +			spin_lock_bucket(hash);
> +			hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]);
> +			spin_unlock_bucket(hash);
> +
>  			found++;
>  		}
> 
> @@ -1335,11 +1394,16 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
>  		spin_unlock(&qd->qd_lockref.lock);
> 
>  		list_del(&qd->qd_list);
> +
>  		/* Also remove if this qd exists in the reclaim list */
>  		list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
>  		atomic_dec(&sdp->sd_quota_count);
>  		spin_unlock(&qd_lock);
> 
> +		spin_lock_bucket(qd->qd_hash);
> +		hlist_bl_del_rcu(&qd->qd_hlist);

Might just be my unfamiliarity with this code, but it took me a bit
to see the difference between ->qd_hlist and ->qd_list.  Of course, until
I spotted the difference, I was wondering why you were removing the
item twice.  ;-)

> +		spin_unlock_bucket(qd->qd_hash);
> +
>  		if (!qd->qd_lockref.count) {
>  			gfs2_assert_warn(sdp, !qd->qd_change);
>  			gfs2_assert_warn(sdp, !qd->qd_slot_count);
> @@ -1348,7 +1412,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
>  		gfs2_assert_warn(sdp, !qd->qd_bh_count);
> 
>  		gfs2_glock_put(qd->qd_gl);
> -		kmem_cache_free(gfs2_quotad_cachep, qd);
> +		call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
> 
>  		spin_lock(&qd_lock);
>  	}
> @@ -1643,3 +1707,11 @@ const struct quotactl_ops gfs2_quotactl_ops = {
>  	.get_dqblk	= gfs2_get_dqblk,
>  	.set_dqblk	= gfs2_set_dqblk,
>  };
> +
> +void __init gfs2_quota_hash_init(void)
> +{
> +	unsigned i;
> +
> +	for(i = 0; i < GFS2_QD_HASH_SIZE; i++)
> +		INIT_HLIST_BL_HEAD(&qd_hash_table[i]);
> +}
> diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
> index 96e4f34..55d506e 100644
> --- a/fs/gfs2/quota.h
> +++ b/fs/gfs2/quota.h
> @@ -57,5 +57,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
>  extern const struct quotactl_ops gfs2_quotactl_ops;
>  extern struct shrinker gfs2_qd_shrinker;
>  extern struct list_lru gfs2_qd_lru;
> +extern void __init gfs2_quota_hash_init(void);
> 
>  #endif /* __QUOTA_DOT_H__ */
> -- 
> 1.8.3.1
> 


  reply	other threads:[~2014-01-22  5:32 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-20 12:23 [Cluster-devel] GFS2: Pre-pull patch posting (merge window) Steven Whitehouse
2014-01-20 12:23 ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 01/24] GFS2: If requested is too large, use the largest extent in the rgrp Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 02/24] GFS2: Drop inadequate rgrps from the reservation tree Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 03/24] GFS2: Implement a "rgrp has no extents longer than X" scheme Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 04/24] GFS2: Clean up releasepage Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 05/24] GFS2: Remove gfs2_quota_change_host structure Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 06/24] GFS2: Remove test which is always true Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 07/24] GFS2: Use range based functions for rgrp sync/invalidation Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 08/24] GFS2: Use only a single address space for rgrps Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 09/24] GFS2: Add directory addition info structure Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 10/24] GFS2: Consolidate transaction blocks calculation for dir add Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 11/24] GFS2: Remember directory insert point Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 12/24] GFS2: Increase i_writecount during gfs2_setattr_chown Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 13/24] GFS2: For exhash conversion, only one block is needed Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 14/24] GFS2: Add hints to directory leaf blocks Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 15/24] GFS2: Add initialization for address space in super block Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 16/24] GFS2: No need to invalidate pages for a dio read Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 17/24] GFS2: Use RCU/hlist_bl based hash for quotas Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-22  5:32   ` Paul E. McKenney [this message]
2014-01-22  5:32     ` Paul E. McKenney
2014-01-22  6:06     ` [Cluster-devel] " Sasha Levin
2014-01-22  6:06       ` Sasha Levin
2014-01-22  9:43       ` [Cluster-devel] " Steven Whitehouse
2014-01-22  9:43         ` Steven Whitehouse
2014-01-22  9:58     ` [Cluster-devel] " Steven Whitehouse
2014-01-22  9:58       ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 18/24] GFS2: Only run logd and quota when mounted read/write Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 19/24] GFS2: Clean up quota slot allocation Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 20/24] GFS2: Move quota bitmap operations under their own lock Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 21/24] GFS2: Fix kbuild test robot reported warning Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 22/24] GFS2: Don't use ENOBUFS when ENOMEM is the correct error code Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 23/24] GFS2: Small cleanup Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse
2014-01-20 12:23 ` [Cluster-devel] [PATCH 24/24] GFS2: revert "GFS2: d_splice_alias() can't return error" Steven Whitehouse
2014-01-20 12:23   ` Steven Whitehouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140122053248.GX10038@linux.vnet.ibm.com \
    --to=paulmck@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.