public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* READDIRPLUS max mount option
@ 2008-03-07 19:37 Kyle Rose
  2008-03-07 19:59 ` Trond Myklebust
  0 siblings, 1 reply; 6+ messages in thread
From: Kyle Rose @ 2008-03-07 19:37 UTC (permalink / raw)
  To: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 653 bytes --]

I have a very specific use for an NFS mount over a WAN, and allowing for 
much larger expected READDIRPLUS requests actually improves performance 
by at least a factor of 10 by eliminating the round-trip latency that 
results from the application's single-threaded 
readdir/stat/stat/stat/... behavior.  Rather than maintain a hacked 
kernel on my end, I'd rather the READDIRPLUS limit be a mount option.  
Hence, the following patch.  It defaults to the old behavior 
(8*PAGE_SIZE), but with a properly-prepared mount binary will allow the 
client to specify a limit.

I'm not subscribed to the list, so please CC me in any relevant discussion.

Kyle


[-- Attachment #2: readdirplusmax.patch --]
[-- Type: text/x-patch, Size: 10063 bytes --]

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c5c0175..97cb997 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -675,6 +675,8 @@ static int nfs_init_server(struct nfs_server *server,
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
 
+	server->readdirplusmax = data->readdirplusmax;
+
 	/* Start lockd here, before we might error out */
 	error = nfs_start_lockd(server);
 	if (error < 0)
@@ -806,6 +808,7 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
 	target->acregmax = source->acregmax;
 	target->acdirmin = source->acdirmin;
 	target->acdirmax = source->acdirmax;
+	target->readdirplusmax = source->readdirplusmax;
 	target->caps = source->caps;
 }
 
@@ -1062,6 +1065,8 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
 
+	server->readdirplusmax = data->readdirplusmax;
+
 	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
 
 error:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 966a885..644239f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -234,9 +234,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	return 0;
 }
 
-/* Don't use READDIRPLUS on directories that we believe are too large */
-#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
-
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
@@ -290,7 +287,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
-			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
+			    && (NFS_READDIRPLUS_MAX(inode) == 0 || fattr->size <= NFS_READDIRPLUS_MAX(inode)))
 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
 			if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9319927..91426a0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -36,6 +36,7 @@ struct nfs_parsed_mount_data {
 	int			timeo, retrans;
 	int			acregmin, acregmax,
 				acdirmin, acdirmax;
+	int			readdirplusmax;
 	int			namlen;
 	unsigned int		bsize;
 	unsigned int		auth_flavor_len;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 531379d..c5a9eb5 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -119,7 +119,7 @@ static int mount_port __initdata = 0;		/* Mount daemon port number */
 enum {
 	/* Options that take integer arguments */
 	Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
-	Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
+	Opt_acregmax, Opt_acdirmin, Opt_acdirmax, Opt_readdirplusmax,
 	/* Options that take no arguments */
 	Opt_soft, Opt_hard, Opt_intr,
 	Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
@@ -139,6 +139,7 @@ static match_table_t __initdata tokens = {
 	{Opt_acregmax, "acregmax=%u"},
 	{Opt_acdirmin, "acdirmin=%u"},
 	{Opt_acdirmax, "acdirmax=%u"},
+	{Opt_readdirplusmax, "readdirplusmax=%u"},
 	{Opt_soft, "soft"},
 	{Opt_hard, "hard"},
 	{Opt_intr, "intr"},
@@ -221,6 +222,9 @@ static int __init root_nfs_parse(char *name, char *buf)
 			case Opt_acdirmax:
 				nfs_data.acdirmax = option;
 				break;
+			case Opt_readdirplusmax:
+				nfs_data.readdirplusmax = option;
+				break;
 			case Opt_soft:
 				nfs_data.flags |= NFS_MOUNT_SOFT;
 				break;
@@ -292,15 +296,17 @@ static int __init root_nfs_name(char *name)
 
 	/* Set some default values */
 	memset(&nfs_data, 0, sizeof(nfs_data));
-	nfs_port          = -1;
-	nfs_data.version  = NFS_MOUNT_VERSION;
-	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
-	nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
-	nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
-	nfs_data.acregmin = 3;
-	nfs_data.acregmax = 60;
-	nfs_data.acdirmin = 30;
-	nfs_data.acdirmax = 60;
+	nfs_port                = -1;
+	nfs_data.version        = NFS_MOUNT_VERSION;
+	nfs_data.flags          = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
+	nfs_data.rsize          = NFS_DEF_FILE_IO_SIZE;
+	nfs_data.wsize          = NFS_DEF_FILE_IO_SIZE;
+	nfs_data.acregmin       = 3;
+	nfs_data.acregmax       = 60;
+	nfs_data.acdirmin       = 30;
+	nfs_data.acdirmax       = 60;
+	nfs_data.readdirplusmax = 8*PAGE_SIZE;
+
 	strcpy(buf, NFS_ROOT);
 
 	/* Process options received from the remote server */
@@ -348,6 +354,8 @@ static void __init root_nfs_print(void)
 	printk(KERN_NOTICE "Root-NFS:     acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
 		nfs_data.acregmin, nfs_data.acregmax,
 		nfs_data.acdirmin, nfs_data.acdirmax);
+	printk(KERN_NOTICE "Root-NFS:     readdirplusmax = %d\n",
+		nfs_data.readdirplusmax);
 	printk(KERN_NOTICE "Root-NFS:     nfsd port = %d, mountd port = %d, flags = %08x\n",
 		nfs_port, mount_port, nfs_data.flags);
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fcf4b98..87dfdfb 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -83,6 +83,7 @@ enum {
 	Opt_acregmin, Opt_acregmax,
 	Opt_acdirmin, Opt_acdirmax,
 	Opt_actimeo,
+	Opt_readdirplusmax,
 	Opt_namelen,
 	Opt_mountport,
 	Opt_mountvers,
@@ -136,6 +137,7 @@ static match_table_t nfs_mount_option_tokens = {
 	{ Opt_acdirmin, "acdirmin=%u" },
 	{ Opt_acdirmax, "acdirmax=%u" },
 	{ Opt_actimeo, "actimeo=%u" },
+	{ Opt_readdirplusmax, "readdirplusmax=%u" },
 	{ Opt_userspace, "retry=%u" },
 	{ Opt_namelen, "namlen=%u" },
 	{ Opt_mountport, "mountport=%u" },
@@ -474,6 +476,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
 	if (nfss->acdirmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+	seq_printf(m, ",readdirplusmax=%d", nfss->readdirplusmax);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
 			seq_puts(m, nfs_infop->str);
@@ -851,6 +854,10 @@ static int nfs_parse_mount_options(char *raw,
 			mnt->acdirmin =
 			mnt->acdirmax = option;
 			break;
+		case Opt_readdirplusmax:
+			if (match_int(args, &mnt->readdirplusmax))
+				return 0;
+			break;
 		case Opt_namelen:
 			if (match_int(args, &mnt->namlen))
 				return 0;
@@ -1190,6 +1197,8 @@ static int nfs_validate_mount_data(void *options,
 	case 5:
 		memset(data->context, 0, sizeof(data->context));
 	case 6:
+		data->readdirplusmax = 8*PAGE_SIZE;
+	case 7:
 		if (data->flags & NFS_MOUNT_VER3)
 			mntfh->size = data->root.size;
 		else
@@ -1217,6 +1226,7 @@ static int nfs_validate_mount_data(void *options,
 		args->acregmax		= data->acregmax;
 		args->acdirmin		= data->acdirmin;
 		args->acdirmax		= data->acdirmax;
+		args->readdirplusmax	= data->readdirplusmax;
 
 		memcpy(&args->nfs_server.address, &data->addr,
 		       sizeof(data->addr));
@@ -1428,6 +1438,8 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n
 		goto Ebusy;
 	if (a->acdirmax != b->acdirmax)
 		goto Ebusy;
+	if (a->readdirplusmax != b->readdirplusmax)
+		goto Ebusy;
 	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
 		goto Ebusy;
 	return 1;
@@ -1757,6 +1769,8 @@ static int nfs4_validate_mount_data(void *options,
 
 	switch (data->version) {
 	case 1:
+		data->readdirplusmax = 8*PAGE_SIZE;
+	case 2:
 		ap = (struct sockaddr_in *)&args->nfs_server.address;
 		if (data->host_addrlen > sizeof(args->nfs_server.address))
 			goto out_no_address;
@@ -1816,6 +1830,7 @@ static int nfs4_validate_mount_data(void *options,
 		args->acregmax	= data->acregmax;
 		args->acdirmin	= data->acdirmin;
 		args->acdirmax	= data->acdirmax;
+		args->readdirplusmax = data->readdirplusmax;
 		args->nfs_server.protocol = data->proto;
 
 		break;
diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h
index a0dcf66..ee9c21d 100644
--- a/include/linux/nfs4_mount.h
+++ b/include/linux/nfs4_mount.h
@@ -16,7 +16,7 @@
  * mount-to-kernel version compatibility.  Some of these aren't used yet
  * but here they are anyway.
  */
-#define NFS4_MOUNT_VERSION	1
+#define NFS4_MOUNT_VERSION	2
 
 struct nfs_string {
 	unsigned int len;
@@ -53,6 +53,8 @@ struct nfs4_mount_data {
 	/* Pseudo-flavours to use for authentication. See RFC2623 */
 	int auth_flavourlen;			/* 1 */
 	int __user *auth_flavours;		/* 1 */
+
+	int readdirplusmax;			/* 2 */
 };
 
 /* bits in the flags field */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a69ba80..d7401b4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -274,6 +274,11 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
 	return NFS_SERVER(inode)->caps & cap;
 }
 
+static inline unsigned int NFS_READDIRPLUS_MAX(struct inode *inode)
+{
+	return NFS_SERVER(inode)->readdirplusmax;
+}
+
 static inline int NFS_USE_READDIRPLUS(struct inode *inode)
 {
 	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3423c67..d596065 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -99,6 +99,8 @@ struct nfs_server {
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
 	unsigned int		namelen;
+	unsigned int		readdirplusmax;	/* max believed dir size for
+						   READDIRPLUS */
 
 	struct nfs_fsid		fsid;
 	__u64			maxfilesize;	/* maximum file size */
diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h
index df7c6b7..686d185 100644
--- a/include/linux/nfs_mount.h
+++ b/include/linux/nfs_mount.h
@@ -20,7 +20,7 @@
  * mount-to-kernel version compatibility.  Some of these aren't used yet
  * but here they are anyway.
  */
-#define NFS_MOUNT_VERSION	6
+#define NFS_MOUNT_VERSION	7
 #define NFS_MAX_CONTEXT_LEN	256
 
 struct nfs_mount_data {
@@ -43,6 +43,7 @@ struct nfs_mount_data {
 	struct nfs3_fh	root;			/* 4 */
 	int		pseudoflavor;		/* 5 */
 	char		context[NFS_MAX_CONTEXT_LEN + 1];	/* 6 */
+	int		readdirplusmax;		/* 7 */
 };
 
 /* bits in the flags field */

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-03-09 16:16 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-07 19:37 READDIRPLUS max mount option Kyle Rose
2008-03-07 19:59 ` Trond Myklebust
2008-03-07 20:09   ` Kyle Rose
2008-03-07 20:42     ` Trond Myklebust
2008-03-07 21:04       ` Kyle Rose
2008-03-09 16:16         ` Jan Engelhardt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox