From: Krishna Kumar <krkumar2@in.ibm.com>
To: bfields@fieldses.org
Cc: linux-nfs@vger.kernel.org, Krishna Kumar <krkumar2@in.ibm.com>
Subject: [RFC] [PATCH v2 1/1]: nfsd: Change caching from ino/dev to file-handle
Date: Sat, 24 Jan 2009 18:05:11 +0530 [thread overview]
Message-ID: <20090124123511.10995.88449.sendpatchset@localhost.localdomain> (raw)
In-Reply-To: <20090124123457.10995.57636.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
From: Krishna Kumar <krkumar2@in.ibm.com>
Implement the file handle caching. List of changes:
1. Remove all implementation and users of readahead, rename RA to FH,
parm to cache.
2. Add fields in the fhparms to cache file, svc_export, expiry list
and expiry time. Modify some other fields (eg p_count is atomic).
3. Implement a daemon to clean up cached FH's.
4. Added four helper functions:
fh_cache_get: Hold a reference to dentry and svc_export.
fh_cache_put: Drop a reference to file, dentry and svc_export.
fh_get_cached_entries: Returns cached file and svc_export.
fh_cache_upd: Cache file and svc_export. Add entry to list for
daemon to cleanup.
5. nfsd_get_raparms is slightly rewritten (changed to nfsd_get_fhcache).
6. nfsd_read rewritten to use the cache, remove RA from nfsd_vfs_read.
7. File remove operation from the client results in the server checking
the cache and drops reference immediately (remove operation on the
server still retains the reference for a short time before the
daemon frees up the entry).
8. init and shutdown are slightly modified.
(ra_size, ra_depth, nfsd_racache_init and nfsd_racache_shutdown still
retain the "ra" prefix for now)
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
fs/nfsd/vfs.c | 429 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 338 insertions(+), 91 deletions(-)
diff -ruNp 2.6.29-rc2.org/fs/nfsd/vfs.c 2.6.29-rc2.new/fs/nfsd/vfs.c
--- 2.6.29-rc2.org/fs/nfsd/vfs.c 2009-01-22 13:23:18.000000000 +0530
+++ 2.6.29-rc2.new/fs/nfsd/vfs.c 2009-01-22 13:23:18.000000000 +0530
@@ -55,38 +55,53 @@
#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
#include <linux/jhash.h>
+#include <linux/kthread.h>
#include <asm/uaccess.h>
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
+/* Number of jiffies to cache the file before releasing */
+#define NFSD_CACHE_JIFFIES 100
/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
+ * This is a cache of file handles to quicken file lookup. This also
+ * helps prevent multiple open/close of a file when a client reads it.
+ *
* If you increase the number of cached files very much, you'll need to
* add a hash table here.
*/
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
+struct fhcache {
+ struct fhcache *p_next;
+
+ /* Hashed on this parameter */
+ __u32 p_auth;
+
+ /* Cached information */
+ struct file *p_filp;
+ struct svc_export *p_exp;
+
+ /* Refcount for overwrite */
+ atomic_t p_count;
+
+ /* When this entry expires */
+ unsigned long p_expires;
+
+ /* List of entries linked to 'nfsd_daemon_list' */
+ struct list_head p_list;
+
unsigned int p_hindex;
};
-struct raparm_hbucket {
- struct raparms *pb_head;
+struct fhcache_hbucket {
+ struct fhcache *pb_head;
spinlock_t pb_lock;
} ____cacheline_aligned_in_smp;
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
+#define FHPARM_HASH_BITS 8
+#define FHPARM_HASH_SIZE (1<<FHPARM_HASH_BITS)
+#define FHPARM_HASH_MASK (FHPARM_HASH_SIZE-1)
+static struct fhcache_hbucket fhcache_hash[FHPARM_HASH_SIZE];
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -784,51 +799,223 @@ nfsd_sync_dir(struct dentry *dp)
return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
}
+/* Daemon to handle expired fh cache entries */
+static struct task_struct *k_nfsd_task;
+
+/* Synchronization for daemon with enqueuer's */
+static DEFINE_SPINLOCK(k_nfsd_lock);
+
+/* List of FH cache entries that has to be cleaned up when they expire */
+static struct list_head nfsd_daemon_list;
+
+/*
+ * Returns cached values of 'file' and svc_export; resets these entries
+ * to NULL.
+ */
+static inline void fh_get_cached_entries(struct fhcache *fh,
+ struct file **filep,
+ struct svc_export **expp)
+{
+ *filep = fh->p_filp;
+ *expp = fh->p_exp;
+
+ fh->p_filp = NULL;
+ fh->p_exp = NULL;
+}
+
+/*
+ * Hold a reference to dentry and svc_export (file already has an extra
+ * reference count as it is not closed normally.
+ */
+static inline void fh_cache_get(struct file *file, struct svc_export *exp)
+{
+ dget(file->f_dentry);
+ cache_get(&exp->h);
+}
+
+/* Drop a reference to file, dentry and svc_export */
+static inline void fh_cache_put(struct file *file, struct svc_export *exp)
+{
+ cache_put(&exp->h, &svc_export_cache);
+ dput(file->f_dentry);
+ fput(file);
+}
+
/*
- * Obtain the readahead parameters for the file
- * specified by (dev, ino).
+ * Holds a reference to dentry and svc_export, and caches both. Add fh entry
+ * to list for daemon to cleanup later. Once we add the entry to the list,
+ * we'd rather it expire prematurely rather than updating it on every read.
*/
+static inline void fh_cache_upd(struct fhcache *fh, struct file *file,
+ struct svc_export *exp)
+{
+ if (fh) {
+ if (!fh->p_filp && file) {
+ struct fhcache_hbucket *fhb;
+
+ fh_cache_get(file, exp);
+
+ fhb = &fhcache_hash[fh->p_hindex];
+
+ spin_lock(&fhb->pb_lock);
+ fh->p_filp = file;
+ fh->p_exp = exp;
+ fh->p_expires = jiffies + NFSD_CACHE_JIFFIES;
-static inline struct raparms *
-nfsd_get_raparms(dev_t dev, ino_t ino)
+ spin_lock(&k_nfsd_lock);
+ list_add_tail(&fh->p_list, &nfsd_daemon_list);
+ spin_unlock(&k_nfsd_lock);
+
+ spin_unlock(&fhb->pb_lock);
+ }
+
+ /* Drop our reference */
+ atomic_dec(&fh->p_count);
+ } else if (file)
+ nfsd_close(file);
+}
+
+/* Daemon cache cleanup handler */
+void daemon_free_entries(void)
{
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
+ unsigned long now = jiffies;
+
+ spin_lock(&k_nfsd_lock);
+ while (!list_empty(&nfsd_daemon_list)) {
+ struct fhcache *fh = list_entry(nfsd_daemon_list.next,
+ struct fhcache, p_list);
+ struct fhcache_hbucket *fhb;
+ struct file *file;
+ struct svc_export *exp;
+
+ if (time_after(fh->p_expires, now) || now != jiffies) {
+ /*
+ * This (and all subsequent entries) have not expired;
+ * or we have spent too long in this loop.
+ */
+ break;
+ }
+
+ fhb = &fhcache_hash[fh->p_hindex];
+
+ /*
+ * Make sure we do not deadlock with updaters - we can free
+ * entry next time in case of a race.
+ */
+ if (!spin_trylock(&fhb->pb_lock)) {
+ /*
+ * Entry is being used, no need to free this, try later
+ */
+ break;
+ }
+
+ if (atomic_read(&fh->p_count)) {
+ spin_unlock(&fhb->pb_lock);
+ break;
+ }
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
+ list_del(&fh->p_list);
+ fh_get_cached_entries(fh, &file, &exp);
+ spin_unlock(&fhb->pb_lock);
+ spin_unlock(&k_nfsd_lock);
+
+ fh_cache_put(file, exp);
+ spin_lock(&k_nfsd_lock);
+ }
+ spin_unlock(&k_nfsd_lock);
+}
+
+static int k_nfsd_thread(void *unused)
+{
+ while (!kthread_should_stop()) {
+ schedule_timeout_interruptible(NFSD_CACHE_JIFFIES);
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
+ if (kthread_should_stop())
+ break;
+
+ daemon_free_entries();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ return 0;
+}
+
+/*
+ * Obtain the cached file, export and d_inode values for the FH
+ * specified by fh->auth[3]
+ */
+static inline struct fhcache *
+nfsd_get_fhcache(__u32 auth)
+{
+ struct fhcache *fh, **fhp, **ffhp = NULL;
+ int depth = 0;
+ unsigned int hash;
+ struct fhcache_hbucket *fhb;
+
+ if (!auth)
+ return NULL;
+
+ hash = jhash_1word(auth, 0xfeedbeef) & FHPARM_HASH_MASK;
+ fhb = &fhcache_hash[hash];
+
+ spin_lock(&fhb->pb_lock);
+ for (fhp = &fhb->pb_head; (fh = *fhp); fhp = &fh->p_next) {
+ if (fh->p_auth == auth) {
+ /* Same inode */
+ if (unlikely(!fh->p_filp)) {
+ /*
+ * Someone is racing in the same code, and
+ * this is the first reference to this file.
+ */
+ spin_unlock(&fhb->pb_lock);
+ return NULL;
+ }
+
+ /*
+ * Hold an extra reference to dentry/exp since these
+ * are released in fh_put(). 'file' already has an
+ * extra hold from the first lookup which was never
+ * dropped.
+ */
+ fh_cache_get(fh->p_filp, fh->p_exp);
goto found;
+ }
+
depth++;
- if (ra->p_count == 0)
- frap = rap;
+
+ if (!ffhp && !fh->p_filp) {
+ /*
+ * This is an unused inode (or a different one), and no
+ * entry was found till now.
+ */
+ if (!atomic_read(&fh->p_count)) /* Entry is unused */
+ ffhp = fhp;
+ }
}
- depth = nfsdstats.ra_size*11/10;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
+
+ if (!ffhp) {
+ spin_unlock(&fhb->pb_lock);
return NULL;
}
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
+
+ depth = nfsdstats.ra_size*11/10;
+ fhp = ffhp;
+ fh = *ffhp;
+ fh->p_hindex = hash;
+ fh->p_auth = auth;
+
found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
+ if (fhp != &fhb->pb_head) {
+ *fhp = fh->p_next;
+ fh->p_next = fhb->pb_head;
+ fhb->pb_head = fh;
}
- ra->p_count++;
+
+ atomic_inc(&fh->p_count);
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
- return ra;
+ spin_unlock(&fhb->pb_lock);
+
+ return fh;
}
/*
@@ -892,7 +1079,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
struct inode *inode;
- struct raparms *ra;
mm_segment_t oldfs;
__be32 err;
int host_err;
@@ -903,11 +1089,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
goto out;
- /* Get readahead parameters */
- ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
- if (ra && ra->p_set)
- file->f_ra = ra->p_ra;
if (file->f_op->splice_read && rqstp->rq_splice_ok) {
struct splice_desc sd = {
@@ -926,16 +1107,6 @@ nfsd_vfs_read(struct svc_rqst *rqstp, st
set_fs(oldfs);
}
- /* Write back readahead params */
- if (ra) {
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
- }
-
if (host_err >= 0) {
nfsdstats.io_read += host_err;
*count = host_err;
@@ -1078,12 +1249,30 @@ nfsd_read(struct svc_rqst *rqstp, struct
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
- if (err)
- goto out;
- err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
- nfsd_close(file);
+ struct fhcache *fh;
+
+ /* Check if this fh is cached */
+ fh = nfsd_get_fhcache(fhp->fh_handle.fh_auth[3]);
+ if (fh && fh->p_filp) {
+ /* Got cached values */
+ file = fh->p_filp;
+ fhp->fh_dentry = file->f_dentry;
+ fhp->fh_export = fh->p_exp;
+ err = fh_verify(rqstp, fhp, S_IFREG, NFSD_MAY_READ);
+ } else {
+ /* Nothing in cache */
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ,
+ &file);
+ }
+
+ if (!err)
+ err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen,
+ count);
+
+ /* Update cached values if required, and clean up */
+ fh_cache_upd(fh, file, fhp->fh_export);
}
+
out:
return err;
}
@@ -1791,6 +1980,39 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
goto out_nfserr;
if (type != S_IFDIR) { /* It's UNLINK */
+ int i, found = 0;
+
+ for (i = 0 ; i < FHPARM_HASH_SIZE && !found; i++) {
+ struct fhcache_hbucket *fhb = &fhcache_hash[i];
+ struct fhcache *fh;
+
+ spin_lock(&fhb->pb_lock);
+ for (fh = fhb->pb_head; fh; fh = fh->p_next) {
+ if (fh->p_filp &&
+ fh->p_filp->f_dentry == rdentry) {
+ /* Found the entry for removed file */
+ struct file *file;
+ struct svc_export *exp;
+
+ fh_get_cached_entries(fh, &file, &exp);
+
+ spin_lock(&k_nfsd_lock);
+ list_del(&fh->p_list);
+ spin_unlock(&k_nfsd_lock);
+
+ spin_unlock(&fhb->pb_lock);
+
+ /* Drop reference to this entry */
+ fh_cache_put(file, exp);
+
+ spin_lock(&fhb->pb_lock);
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock(&fhb->pb_lock);
+ }
+
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
(atomic_read(&rdentry->d_count) > 1)) {
@@ -2061,23 +2283,36 @@ nfsd_permission(struct svc_rqst *rqstp,
void
nfsd_racache_shutdown(void)
{
- struct raparms *raparm, *last_raparm;
unsigned int i;
- dprintk("nfsd: freeing readahead buffers.\n");
+ dprintk("nfsd: freeing FH buffers.\n");
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
+ /* Stop the daemon and free the list entries */
+ kthread_stop(k_nfsd_task);
+ k_nfsd_task = NULL;
+
+ for (i = 0; i < FHPARM_HASH_SIZE; i++) {
+ struct fhcache *fhcache, *last_fhcache;
+
+ fhcache = fhcache_hash[i].pb_head;
+ while(fhcache) {
+ last_fhcache = fhcache;
+ if (fhcache->p_filp) {
+ struct file *file;
+ struct svc_export *exp;
+
+ fh_get_cached_entries(fhcache, &file, &exp);
+ list_del(&fhcache->p_list);
+ fh_cache_put(file, exp);
+ }
+ fhcache = fhcache->p_next;
+ kfree(last_fhcache);
}
- raparm_hash[i].pb_head = NULL;
+ fhcache_hash[i].pb_head = NULL;
}
}
/*
- * Initialize readahead param cache
+ * Initialize file handle cache
*/
int
nfsd_racache_init(int cache_size)
@@ -2085,36 +2320,48 @@ nfsd_racache_init(int cache_size)
int i;
int j = 0;
int nperbucket;
- struct raparms **raparm = NULL;
+ struct fhcache **fhcache = NULL;
- if (raparm_hash[0].pb_head)
+ if (fhcache_hash[0].pb_head)
return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
+ nperbucket = DIV_ROUND_UP(cache_size, FHPARM_HASH_SIZE);
if (nperbucket < 2)
nperbucket = 2;
- cache_size = nperbucket * RAPARM_HASH_SIZE;
+ cache_size = nperbucket * FHPARM_HASH_SIZE;
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
+ dprintk("nfsd: allocating %d file handle cache buffers.\n", cache_size);
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
+ for (i = 0; i < FHPARM_HASH_SIZE; i++) {
+ spin_lock_init(&fhcache_hash[i].pb_lock);
- raparm = &raparm_hash[i].pb_head;
+ fhcache = &fhcache_hash[i].pb_head;
for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
+ *fhcache = kzalloc(sizeof(struct fhcache), GFP_KERNEL);
+ if (!*fhcache) {
+ dprintk("nfsd: kmalloc failed, freeing file cache buffers\n");
goto out_nomem;
- raparm = &(*raparm)->p_next;
+ }
+ INIT_LIST_HEAD(&(*fhcache)->p_list);
+ fhcache = &(*fhcache)->p_next;
}
- *raparm = NULL;
+ *fhcache = NULL;
}
nfsdstats.ra_size = cache_size;
+
+ INIT_LIST_HEAD(&nfsd_daemon_list);
+ k_nfsd_task = kthread_run(k_nfsd_thread, NULL, "nfsd_cacher");
+
+ if (IS_ERR(k_nfsd_task)) {
+ printk(KERN_ERR "%s: unable to create kernel thread: %ld\n",
+ __FUNCTION__, PTR_ERR(k_nfsd_task));
+ goto out_nomem;
+ }
+
return 0;
out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
nfsd_racache_shutdown();
return -ENOMEM;
}
next prev parent reply other threads:[~2009-01-24 12:35 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-24 12:34 [RFC] [PATCH v2 0/1] nfsd: Improve NFS server performance Krishna Kumar
[not found] ` <20090124123457.10995.57636.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-01-24 12:35 ` Krishna Kumar [this message]
[not found] ` <20090124123511.10995.88449.sendpatchset-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-02-20 23:49 ` [RFC] [PATCH v2 1/1]: nfsd: Change caching from ino/dev to file-handle Jeff Layton
[not found] ` <20090220154903.1e0c6952-PC62bkCOHzGdMjc06nkz3ljfA9RmPOcC@public.gmane.org>
2009-02-21 7:27 ` Krishna Kumar2
2009-02-21 16:35 ` Jeff Layton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090124123511.10995.88449.sendpatchset@localhost.localdomain \
--to=krkumar2@in.ibm.com \
--cc=bfields@fieldses.org \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox