linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@poochiereds.net>
To: bfields@fieldses.org
Cc: linux-nfs@vger.kernel.org, hch@lst.de, kinglongmee@gmail.com
Subject: [PATCH v3 02/20] nfsd: add a new struct file caching facility to nfsd
Date: Thu, 20 Aug 2015 07:17:02 -0400	[thread overview]
Message-ID: <1440069440-27454-3-git-send-email-jeff.layton@primarydata.com> (raw)
In-Reply-To: <1440069440-27454-1-git-send-email-jeff.layton@primarydata.com>

Currently, NFSv2/3 reads and writes have to open a file, do the read or
write and then close it again for each RPC. This is highly inefficient,
especially when the underlying filesystem has a relatively slow open
routine.

This patch adds a new open file cache to knfsd. Rather than doing an
open for each RPC, the read/write handlers can call into this cache to
see if there is one already there for the correct filehandle and
NFS_MAY_READ/WRITE flags.

If there isn't an entry, then we create a new one and attempt to
perform the open. If there is, then we wait until the entry is fully
instantiated and return it if it is at the end of the wait. If it's
not, then we attempt to take over construction.

Since the main goal is to speed up NFSv2/3 I/O, we don't want to
close these files on last put of these objects. We need to keep them
around for a little while since we never know when the next READ/WRITE
will come in.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
---
 fs/nfsd/Makefile    |   3 +-
 fs/nfsd/filecache.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/filecache.h |  29 ++++++
 fs/nfsd/nfssvc.c    |  10 +-
 4 files changed, 313 insertions(+), 2 deletions(-)
 create mode 100644 fs/nfsd/filecache.c
 create mode 100644 fs/nfsd/filecache.h

diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e120c6..8908bb467727 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,7 +10,8 @@ obj-$(CONFIG_NFSD)	+= nfsd.o
 nfsd-y			+= trace.o
 
 nfsd-y 			+= nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
-			   export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+			   export.o auth.o lockd.o nfscache.o nfsxdr.o \
+			   stats.o filecache.o
 nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)	+= nfs3proc.o nfs3xdr.o
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..5bb56fa9002f
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,273 @@
+/*
+ * Open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "filecache.h"
+
+#define NFSDDBG_FACILITY	NFSDDBG_FH
+
+/* hash table for nfs4_file */
+#define NFSD_FILE_HASH_BITS                   8
+#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
+
+struct nfsd_fcache_bucket {
+	struct hlist_head	nfb_head;
+	spinlock_t		nfb_lock;
+};
+
+static struct nfsd_fcache_bucket	*nfsd_file_hashtbl;
+
+static struct nfsd_file *
+nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval)
+{
+	struct nfsd_file *nf;
+
+	/* FIXME: create a new slabcache for these? */
+	nf = kzalloc(sizeof(*nf), GFP_KERNEL);
+	if (nf) {
+		INIT_HLIST_NODE(&nf->nf_node);
+		INIT_LIST_HEAD(&nf->nf_dispose);
+		nf->nf_inode = inode;
+		nf->nf_hashval = hashval;
+		atomic_set(&nf->nf_ref, 1);
+		nf->nf_may = NFSD_FILE_MAY_MASK & may;
+	}
+	return nf;
+}
+
+static void
+nfsd_file_put_final(struct nfsd_file *nf)
+{
+	if (nf->nf_file)
+		fput(nf->nf_file);
+	kfree_rcu(nf, nf_rcu);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+		clear_bit(NFSD_FILE_HASHED, &nf->nf_flags);
+		hlist_del_rcu(&nf->nf_node);
+		return true;
+	}
+	return false;
+}
+
+static void
+nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
+{
+	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
+
+	if (!nfsd_file_unhash(nf))
+		return;
+	if (!atomic_dec_and_test(&nf->nf_ref))
+		return;
+
+	list_add(&nf->nf_dispose, dispose);
+}
+
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+	if (!atomic_dec_and_test(&nf->nf_ref))
+		return;
+
+	WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
+	nfsd_file_put_final(nf);
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+	if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+		return nf;
+	return NULL;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+	struct nfsd_file *nf;
+
+	while(!list_empty(dispose)) {
+		nf = list_first_entry(dispose, struct nfsd_file, nf_dispose);
+		list_del(&nf->nf_dispose);
+		nfsd_file_put_final(nf);
+	}
+}
+
+int
+nfsd_file_cache_init(void)
+{
+	unsigned int i;
+
+	if (nfsd_file_hashtbl)
+		return 0;
+
+	nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
+				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
+	if (!nfsd_file_hashtbl)
+		goto out_nomem;
+
+	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+		INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
+		spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
+	}
+
+	return 0;
+out_nomem:
+	printk(KERN_ERR "nfsd: failed to init nfsd file cache\n");
+	return -ENOMEM;
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+	unsigned int		i;
+	struct nfsd_file	*nf;
+	LIST_HEAD(dispose);
+
+	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
+		spin_lock(&nfsd_file_hashtbl[i].nfb_lock);
+		while(!hlist_empty(&nfsd_file_hashtbl[i].nfb_head)) {
+			nf = hlist_entry(nfsd_file_hashtbl[i].nfb_head.first,
+					 struct nfsd_file, nf_node);
+			nfsd_file_unhash_and_release_locked(nf, &dispose);
+		}
+		spin_unlock(&nfsd_file_hashtbl[i].nfb_lock);
+		nfsd_file_dispose_list(&dispose);
+	}
+	kfree(nfsd_file_hashtbl);
+	nfsd_file_hashtbl = NULL;
+}
+
+/*
+ * Search nfsd_file_hashtbl[] for file. We hash on the filehandle and also on
+ * the NFSD_MAY_READ/WRITE flags. If the file is open for r/w, then it's usable
+ * for either.
+ */
+static struct nfsd_file *
+nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
+			unsigned int hashval)
+{
+	struct nfsd_file *nf;
+	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+
+	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
+				 nf_node) {
+		if ((need & nf->nf_may) != need)
+			continue;
+		if (nf->nf_inode == inode)
+			return nfsd_file_get(nf);
+	}
+	return NULL;
+}
+
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		  unsigned int may_flags, struct nfsd_file **pnf)
+{
+	__be32	status = nfs_ok;
+	struct nfsd_file *nf, *new = NULL;
+	struct inode *inode;
+	unsigned int hashval;
+
+	/* FIXME: skip this if fh_dentry is already set? */
+	status = fh_verify(rqstp, fhp, S_IFREG, may_flags);
+	if (status != nfs_ok)
+		return status;
+
+	/* Mask off any extraneous bits */
+	may_flags &= NFSD_FILE_MAY_MASK;
+
+	inode = d_inode(fhp->fh_dentry);
+	hashval = (unsigned int)hash_ptr(inode, NFSD_FILE_HASH_BITS);
+retry:
+	rcu_read_lock();
+	nf = nfsd_file_find_locked(inode, may_flags, hashval);
+	rcu_read_unlock();
+	if (nf)
+		goto wait_for_construction;
+
+	if (!new) {
+		new = nfsd_file_alloc(inode, may_flags, hashval);
+		if (!new)
+			return nfserr_jukebox;
+	}
+
+	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
+	nf = nfsd_file_find_locked(inode, may_flags, hashval);
+	if (likely(nf == NULL)) {
+		/* Take reference for the hashtable */
+		atomic_inc(&new->nf_ref);
+		__set_bit(NFSD_FILE_HASHED, &new->nf_flags);
+		__set_bit(NFSD_FILE_PENDING, &new->nf_flags);
+		hlist_add_head_rcu(&new->nf_node,
+				&nfsd_file_hashtbl[hashval].nfb_head);
+		spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+		nf = new;
+		new = NULL;
+		goto open_file;
+	}
+	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
+
+wait_for_construction:
+	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+	/* Did construction of this file fail? */
+	if (!nf->nf_file) {
+		/*
+		 * We can only take over construction for this nfsd_file if the
+		 * MAY flags are equal. Otherwise, we put the reference and try
+		 * again.
+		 */
+		if (may_flags != nf->nf_may) {
+			nfsd_file_put(nf);
+			goto retry;
+		}
+
+		/* try to take over construction for this file */
+		if (test_and_set_bit(NFSD_FILE_PENDING, &nf->nf_flags))
+			goto wait_for_construction;
+		goto open_file;
+	}
+
+	/*
+	 * We have a file that was opened in the context of another rqst. We
+	 * must check permissions. Since we're dealing with open files here,
+	 * we always want to set the OWNER_OVERRIDE bit.
+	 */
+	status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+					may_flags|NFSD_MAY_OWNER_OVERRIDE);
+out:
+	if (status == nfs_ok)
+		*pnf = nf;
+	else
+		nfsd_file_put(nf);
+
+	if (new)
+		nfsd_file_put(new);
+	return status;
+open_file:
+	status = nfsd_open(rqstp, fhp, S_IFREG, may_flags, &nf->nf_file);
+	clear_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+	goto out;
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..b0f500353ed4
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,29 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only be used for comparison.
+ */
+struct nfsd_file {
+	struct hlist_node	nf_node;
+	struct list_head	nf_dispose;
+	struct rcu_head		nf_rcu;
+	struct file		*nf_file;
+#define NFSD_FILE_HASHED	(0)
+#define NFSD_FILE_PENDING	(1)
+	unsigned long		nf_flags;
+	struct inode		*nf_inode;
+	unsigned int		nf_hashval;
+	atomic_t		nf_ref;
+	unsigned char		nf_may;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_shutdown(void);
+void nfsd_file_put(struct nfsd_file *nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		  unsigned int may_flags, struct nfsd_file **nfp);
+#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377dd63..d816bb3faa6e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -22,6 +22,7 @@
 #include "cache.h"
 #include "vfs.h"
 #include "netns.h"
+#include "filecache.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_SVC
 
@@ -224,11 +225,17 @@ static int nfsd_startup_generic(int nrservs)
 	if (ret)
 		goto dec_users;
 
-	ret = nfs4_state_start();
+	ret = nfsd_file_cache_init();
 	if (ret)
 		goto out_racache;
+
+	ret = nfs4_state_start();
+	if (ret)
+		goto out_file_cache;
 	return 0;
 
+out_file_cache:
+	nfsd_file_cache_shutdown();
 out_racache:
 	nfsd_racache_shutdown();
 dec_users:
@@ -242,6 +249,7 @@ static void nfsd_shutdown_generic(void)
 		return;
 
 	nfs4_state_shutdown();
+	nfsd_file_cache_shutdown();
 	nfsd_racache_shutdown();
 }
 
-- 
2.4.3


  parent reply	other threads:[~2015-08-20 11:17 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-20 11:17 [PATCH v3 00/20] nfsd: open file caching Jeff Layton
2015-08-20 11:17 ` [PATCH v3 01/20] nfsd: allow more than one laundry job to run at a time Jeff Layton
2015-08-20 11:17 ` Jeff Layton [this message]
2015-08-20 23:11   ` [PATCH v3 02/20] nfsd: add a new struct file caching facility to nfsd Peng Tao
2015-08-20 23:43     ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 03/20] list_lru: add list_lru_rotate Jeff Layton
2015-08-21  9:36   ` Vladimir Davydov
2015-08-20 11:17 ` [PATCH v3 04/20] nfsd: add a LRU list for nfsd_files Jeff Layton
2015-08-20 11:17 ` [PATCH v3 05/20] nfsd: add a shrinker to the nfsd_file cache Jeff Layton
2015-08-20 11:17 ` [PATCH v3 06/20] locks/nfsd: create a new notifier chain for lease attempts Jeff Layton
2015-08-26 19:49   ` J. Bruce Fields
2015-08-26 22:39     ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 07/20] nfsd: hook up nfsd_write to the new nfsd_file cache Jeff Layton
2015-08-26 19:53   ` J. Bruce Fields
2015-08-26 22:40     ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 08/20] nfsd: hook up nfsd_read to the " Jeff Layton
2015-08-20 11:17 ` [PATCH v3 09/20] sunrpc: add a new cache_detail operation for when a cache is flushed Jeff Layton
2015-08-20 11:17 ` [PATCH v3 10/20] nfsd: handle NFSD_MAY_NOT_BREAK_LEASE in open file cache Jeff Layton
2015-08-20 11:17 ` [PATCH v3 11/20] nfsd: hook nfsd_commit up to the nfsd_file cache Jeff Layton
2015-08-20 11:17 ` [PATCH v3 12/20] nfsd: move include of state.h from trace.c to trace.h Jeff Layton
2015-08-20 11:17 ` [PATCH v3 13/20] nfsd: add new tracepoints for nfsd_file cache Jeff Layton
2015-08-20 11:17 ` [PATCH v3 14/20] nfsd: close cached files prior to a REMOVE or RENAME that would replace target Jeff Layton
2015-08-26 20:00   ` J. Bruce Fields
2015-08-26 22:53     ` Jeff Layton
2015-08-27 13:38       ` J. Bruce Fields
2015-08-28 12:19         ` Jeff Layton
2015-08-28 17:58           ` J. Bruce Fields
2015-08-31 16:50             ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 15/20] nfsd: call flush_delayed_fput from nfsd_file_close_fh Jeff Layton
2015-08-21  1:01   ` Peng Tao
2015-08-21  2:18     ` Peng Tao
2015-08-21 11:21       ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 16/20] nfsd: convert nfs4_file->fi_fds array to use nfsd_files Jeff Layton
2015-08-20 11:17 ` [PATCH v3 17/20] nfsd: have nfsd_test_lock use the nfsd_file cache Jeff Layton
2015-08-20 11:17 ` [PATCH v3 18/20] nfsd: convert fi_deleg_file and ls_file fields to nfsd_file Jeff Layton
2015-08-20 11:17 ` [PATCH v3 19/20] nfsd: hook up nfs4_preprocess_stateid_op to the nfsd_file cache Jeff Layton
2015-08-21  1:28   ` Peng Tao
2015-08-21 11:23     ` Jeff Layton
2015-08-20 11:17 ` [PATCH v3 20/20] nfsd: rip out the raparms cache Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440069440-27454-3-git-send-email-jeff.layton@primarydata.com \
    --to=jlayton@poochiereds.net \
    --cc=bfields@fieldses.org \
    --cc=hch@lst.de \
    --cc=kinglongmee@gmail.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).