All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benny Halevy <bhalevy@panasas.com>
To: andros@netapp.com
Cc: linux-nfs@vger.kernel.org
Subject: Re: [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache
Date: Mon, 03 May 2010 14:48:37 +0300	[thread overview]
Message-ID: <4BDEB815.6030200@panasas.com> (raw)
In-Reply-To: <1272298699-11411-2-git-send-email-andros@netapp.com>

On Apr. 26, 2010, 19:18 +0300, andros@netapp.com wrote:
> From: Andy Adamson <andros@netapp.com>
> 
> A shared RCU device ID cache servicing multiple mounts of a single layout type
> per meta data server (struct nfs_client).
> 
> Device IDs of type deviceid4 are required by all layout types, long lived and
> read at each I/O.  They are added to the deviceid cache at first reference by
> a layout via GETDEVICEINFO and (currently) are only removed at umount.
> 
> Reference count the device ID cache for each mounted file system
> in the initialize_mountpoint layoutdriver_io_operation.
> 
> Dereference the device id cache on file system in the uninitialize_mountpoint
> layoutdriver_io_operation called at umount
> 
> Each layoutsegment assigns a pointer and takes a reference to the
> nfs4_deviceid structure identified by the layout deviceid.
> This is so that there are no deviceid lookups for the normal I/O path.
> 
> Even thought required by all layouttypes, the deviceid is not exposed in the
> LAYOUTGET4res but is instead hidden in the opaque layouttype4.
> 
> Therefore, each layout type alloc_lseg calls nfs4_set_layout_deviceid,
> and free_lseg calls nfs4_unset_layout_deviceid.
> 
> While the file layout driver will not cache very many deviceid's, the object
> and block layout drivers could cache 100's for a large installation.
> Use an hlist.
> 
> Signed-off-by: Andy Adamson <andros@netapp.com>
> ---
>  fs/nfs/pnfs.c             |  167 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/nfs4_pnfs.h |   50 +++++++++++++
>  include/linux/nfs_fs_sb.h |    1 +
>  3 files changed, 218 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 91572aa..bf906cc 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -45,6 +45,7 @@
>  #include <linux/nfs4.h>
>  #include <linux/pnfs_xdr.h>
>  #include <linux/nfs4_pnfs.h>
> +#include <linux/rculist.h>
>  
>  #include "internal.h"
>  #include "nfs4_fs.h"
> @@ -2296,3 +2297,169 @@ struct pnfs_client_operations pnfs_ops = {
>  
>  EXPORT_SYMBOL(pnfs_unregister_layoutdriver);
>  EXPORT_SYMBOL(pnfs_register_layoutdriver);
> +
> +
> +/* Device ID cache. Supports one layout type per struct nfs_client */
> +int
> +nfs4_alloc_init_deviceid_cache(struct nfs_client *clp,
> +			 void (*free_callback)(struct kref *))
> +{
> +	struct nfs4_deviceid_cache *c;
> +
> +	c = kzalloc(sizeof(struct nfs4_deviceid_cache), GFP_KERNEL);
> +	if (!c)
> +		return -ENOMEM;
> +	spin_lock(&clp->cl_lock);
> +	if (clp->cl_devid_cache != NULL) {
> +		kref_get(&clp->cl_devid_cache->dc_kref);
> +		spin_unlock(&clp->cl_lock);
> +		dprintk("%s [kref [%d]]\n", __func__,
> +			atomic_read(&clp->cl_devid_cache->dc_kref.refcount));
> +		kfree(c);
> +	} else {
> +		int i;
> +
> +		spin_lock_init(&c->dc_lock);
> +		for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE ; i++)
> +			INIT_HLIST_HEAD(&c->dc_deviceids[i]);
> +		kref_init(&c->dc_kref);
> +		c->dc_free_callback = free_callback;
> +		clp->cl_devid_cache = c;
> +		spin_unlock(&clp->cl_lock);
> +		dprintk("%s [new]\n", __func__);
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL(nfs4_alloc_init_deviceid_cache);
> +
> +void
> +nfs4_init_deviceid_node(struct nfs4_deviceid *d)
> +{
> +	INIT_HLIST_NODE(&d->de_node);
> +	kref_init(&d->de_kref);
> +}
> +EXPORT_SYMBOL(nfs4_init_deviceid_node);
> +
> +/* Called from layoutdriver_io_operations->alloc_lseg */
> +void
> +nfs4_set_layout_deviceid(struct pnfs_layout_segment *l, struct nfs4_deviceid *d)
> +{
> +	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
> +	l->deviceid = d;
> +	kref_get(&d->de_kref);
> +}
> +EXPORT_SYMBOL(nfs4_set_layout_deviceid);
> +
> +/* Called from layoutdriver_io_operations->free_lseg */
> +void
> +nfs4_unset_layout_deviceid(struct pnfs_layout_segment *l,
> +			   struct nfs4_deviceid *d,
> +			   void (*free_callback)(struct kref *))
> +{
> +	dprintk("%s [%d]\n", __func__, atomic_read(&d->de_kref.refcount));
> +	l->deviceid = NULL;
> +	kref_put(&d->de_kref, free_callback);
> +}
> +EXPORT_SYMBOL(nfs4_unset_layout_deviceid);
> +
> +struct nfs4_deviceid *
> +nfs4_find_deviceid(struct nfs4_deviceid_cache *c, struct pnfs_deviceid *id)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +	long hash = nfs4_deviceid_hash(id);
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	rcu_read_lock();
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		if (!memcmp(&d->de_id, id, NFS4_PNFS_DEVICEID4_SIZE)) {
> +			rcu_read_unlock();
> +			return d;
> +		}
> +	}
> +	rcu_read_unlock();
> +	return NULL;
> +}
> +EXPORT_SYMBOL(nfs4_find_deviceid);
> +
> +/*
> + * Add or kref_get a deviceid.
> + * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
> + */
> +struct nfs4_deviceid *
> +nfs4_add_deviceid(struct nfs4_deviceid_cache *c, struct nfs4_deviceid *new)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +	long hash = nfs4_deviceid_hash(&new->de_id);
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	spin_lock(&c->dc_lock);
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		if (!memcmp(&d->de_id, &new->de_id, NFS4_PNFS_DEVICEID4_SIZE)) {
> +			spin_unlock(&c->dc_lock);
> +			dprintk("%s [discard]\n", __func__);
> +			c->dc_free_callback(&new->de_kref);
> +			return d;
> +		}
> +	}
> +	hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
> +	spin_unlock(&c->dc_lock);
> +	dprintk("%s [new]\n", __func__);
> +	return new;
> +}
> +EXPORT_SYMBOL(nfs4_add_deviceid);
> +
> +static int
> +nfs4_remove_deviceid(struct nfs4_deviceid_cache *c, long hash)
> +{
> +	struct nfs4_deviceid *d;
> +	struct hlist_node *n;
> +
> +	dprintk("--> %s hash %ld\n", __func__, hash);
> +	spin_lock(&c->dc_lock);
> +	hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
> +		hlist_del_rcu(&d->de_node);
> +		spin_unlock(&c->dc_lock);
> +		synchronize_rcu();
> +		dprintk("%s [%d]\n", __func__,
> +			atomic_read(&d->de_kref.refcount));
> +		kref_put(&d->de_kref, c->dc_free_callback);
> +		return 1;
> +	}
> +	spin_unlock(&c->dc_lock);
> +	return 0;
> +}
> +
> +static void
> +nfs4_free_deviceid_cache(struct kref *kref)
> +{
> +	struct nfs4_deviceid_cache *cache =
> +		container_of(kref, struct nfs4_deviceid_cache, dc_kref);
> +	int more;
> +	long i;
> +
> +	for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) {
> +		more = 1;
> +		while (more)
> +			more = nfs4_remove_deviceid(cache, i);

Andy, this can be simplified to

		while (nfs4_remove_deviceid(cache, i))
			;

If ok with you, I'll make this change upon merging.

Benny

> +	}
> +	kfree(cache);
> +}
> +
> +void
> +nfs4_put_deviceid_cache(struct nfs_client *clp)
> +{
> +	struct nfs4_deviceid_cache *tmp = clp->cl_devid_cache;
> +	int refcount;
> +
> +	dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
> +	spin_lock(&clp->cl_lock);
> +	refcount = atomic_read(&clp->cl_devid_cache->dc_kref.refcount);
> +	if (refcount == 1)
> +		clp->cl_devid_cache = NULL;
> +	spin_unlock(&clp->cl_lock);
> +	dprintk("%s [%d]\n", __func__, refcount);
> +	kref_put(&tmp->dc_kref, nfs4_free_deviceid_cache);
> +}
> +EXPORT_SYMBOL(nfs4_put_deviceid_cache);
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 3caac60..3b7aeb7 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -106,6 +106,7 @@ struct pnfs_layout_segment {
>  	struct kref kref;
>  	bool valid;
>  	struct pnfs_layout_type *layout;
> +	struct nfs4_deviceid *deviceid;
>  	u8 ld_data[];			/* layout driver private data */
>  };
>  
> @@ -275,6 +276,55 @@ struct pnfs_devicelist {
>  	struct pnfs_deviceid	dev_id[NFS4_PNFS_GETDEVLIST_MAXNUM];
>  };
>  
> +/*
> + * Device ID RCU cache. A device ID is unique per client ID and layout type.
> + */
> +#define NFS4_DEVICE_ID_HASH_BITS	5
> +#define NFS4_DEVICE_ID_HASH_SIZE	(1 << NFS4_DEVICE_ID_HASH_BITS)
> +#define NFS4_DEVICE_ID_HASH_MASK	(NFS4_DEVICE_ID_HASH_SIZE - 1)
> +
> +static inline u32
> +nfs4_deviceid_hash(struct pnfs_deviceid *id)
> +{
> +	unsigned char *cptr = (unsigned char *)id->data;
> +	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
> +	u32 x = 0;
> +
> +	while (nbytes--) {
> +		x *= 37;
> +		x += *cptr++;
> +	}
> +	return x & NFS4_DEVICE_ID_HASH_MASK;
> +}
> +
> +struct nfs4_deviceid_cache {
> +	spinlock_t		dc_lock;
> +	struct kref		dc_kref;
> +	void			(*dc_free_callback)(struct kref *);
> +	struct hlist_head	dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
> +};
> +
> +/* Device ID cache node */
> +struct nfs4_deviceid {
> +	struct hlist_node	de_node;
> +	struct pnfs_deviceid	de_id;
> +	struct kref		de_kref;
> +};
> +
> +extern int nfs4_alloc_init_deviceid_cache(struct nfs_client *,
> +				void (*free_callback)(struct kref *));
> +extern void nfs4_put_deviceid_cache(struct nfs_client *);
> +extern void nfs4_init_deviceid_node(struct nfs4_deviceid *);
> +extern struct nfs4_deviceid *nfs4_find_deviceid(struct nfs4_deviceid_cache *,
> +				struct pnfs_deviceid *);
> +extern struct nfs4_deviceid *nfs4_add_deviceid(struct nfs4_deviceid_cache *,
> +				struct nfs4_deviceid *);
> +extern void nfs4_set_layout_deviceid(struct pnfs_layout_segment *,
> +				struct nfs4_deviceid *);
> +extern void nfs4_unset_layout_deviceid(struct pnfs_layout_segment *,
> +				struct nfs4_deviceid *,
> +				void (*free_callback)(struct kref *));
> +
>  /* pNFS client callback functions.
>   * These operations allow the layout driver to access pNFS client
>   * specific information or call pNFS client->server operations.
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index 8522461..ef2e18e 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -87,6 +87,7 @@ struct nfs_client {
>  	u32			cl_exchange_flags;
>  	struct nfs4_session	*cl_session; 	/* sharred session */
>  	struct list_head	cl_lo_inodes;	/* Inodes having layouts */
> +	struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
>  #endif /* CONFIG_NFS_V4_1 */
>  
>  #ifdef CONFIG_NFS_FSCACHE


  parent reply	other threads:[~2010-05-03 11:48 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-26 16:18 [PATCH 0/3] pNFS generic device ID cache version 3 andros
2010-04-26 16:18 ` [PATCH 1/3] SQUASHME pnfs_submit: generic device ID cache andros
2010-04-26 16:18   ` [PATCH 2/3] SQUASHME pnfs_submit: fix multiple mount set_pnfs_layoutdriver andros
2010-04-26 16:18     ` [PATCH 3/3] SQUASHME pnfs-submit: file layout driver generic device ID cache andros
2010-05-03 11:48   ` Benny Halevy [this message]
2010-05-03 13:57     ` [pnfs] [PATCH 1/3] SQUASHME pnfs_submit: " William A. (Andy) Adamson
  -- strict thread matches above, loose matches on Subject: below --
2010-04-16 15:52 [PATCH 0/3] pNFS " andros
2010-04-16 15:52 ` [PATCH 1/3] SQUASHME pnfs_submit: " andros
2010-04-16 16:04   ` William A. (Andy) Adamson
     [not found]     ` <u2n89c397151004160904m9e862360xcaf0e187640b0177-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-04-21  5:59       ` [pnfs] " Benny Halevy
2010-04-21 15:22         ` William A. (Andy) Adamson
     [not found]           ` <l2l89c397151004210822j8b43009o3a9e78ceed901fd9-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-04-22 11:20             ` Benny Halevy
2010-04-22 15:47               ` William A. (Andy) Adamson
     [not found]                 ` <v2h89c397151004220847v3a31c493s4089d0cd53cf3e19-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-04-22 15:51                   ` Benny Halevy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4BDEB815.6030200@panasas.com \
    --to=bhalevy@panasas.com \
    --cc=andros@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.