* RFC: adding an "fs" directory under /sys
@ 2004-04-17 19:46 Lever, Charles
2004-04-17 20:07 ` viro
0 siblings, 1 reply; 11+ messages in thread
From: Lever, Charles @ 2004-04-17 19:46 UTC (permalink / raw)
To: linux-fsdevel; +Cc: Trond Myklebust, Patrick Mochel, nfs
[-- Attachment #1: Type: text/plain, Size: 1554 bytes --]
i've been working on a patch set that will expose I/O counters
for NFS mount points. we're considering exposing these counters
via /sys.
i'm thinking this should go under /sys/fs, where there is one
subdirectory in /sys/fs for each file system type available
on the system.
the patches are against 2.6.5.
attached patch 04-sys-fs adds the mechanicals to allow each
file system implementation to supply a subsystem structure
that can be registered in /sys/fs when the file system
registers with the kernel at module load / boot time.
attached patch 05-sys-fs-nfs demonstrates what might be done
with such a capability by providing specific attributes for
each NFS mount point.
attached patch 06-nfs-iostat implements NFS I/O statistics
counters and exports them via /sys/fs.
[ patch 07-rpc-metrics, not yet written, would extend this
idea to include RPC client I/O statistics for each mount point ]
i'd like some comments on this approach.
1. is it a good and appropriate usage of /sys to add an
fs/ subdirectory for file system types?
2. i've used the kobject embedded in the super_block
for the NFS specific pieces of this. should i instead
add and use an embedded kobject in the nfs_server
struct?
3. would it be better to have a /sys/mount kset that lists
all the mounts on the system, and leave file system
instance related info out of /sys/fs/<file-type> ?
- Chuck Lever
--
corporate: <cel at netapp dot com>
personal: <chucklever at bigfoot dot com>
[-- Attachment #2: 04-sys-fs.patch --]
[-- Type: application/octet-stream, Size: 3235 bytes --]
diff -X ../../dont-diff -Naurp 03-nfs-odirect/fs/dcache.c 04-sys-fs/fs/dcache.c
--- 03-nfs-odirect/fs/dcache.c 2004-04-03 22:36:24.000000000 -0500
+++ 04-sys-fs/fs/dcache.c 2004-04-16 11:35:16.557000000 -0400
@@ -1637,6 +1637,7 @@ void __init vfs_caches_init(unsigned lon
mnt_init(mempages);
bdev_cache_init();
chrdev_init();
+ filesystems_init();
}
EXPORT_SYMBOL(d_alloc);
diff -X ../../dont-diff -Naurp 03-nfs-odirect/fs/filesystems.c 04-sys-fs/fs/filesystems.c
--- 03-nfs-odirect/fs/filesystems.c 2004-04-03 22:37:42.000000000 -0500
+++ 04-sys-fs/fs/filesystems.c 2004-04-16 11:35:16.570004000 -0400
@@ -11,6 +11,7 @@
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/kobject.h>
#include <asm/uaccess.h>
/*
@@ -29,6 +30,8 @@
static struct file_system_type *file_systems;
static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
+static decl_subsys(fs, NULL, NULL);
+
/* WARNING: This can be used only if we _already_ own a reference */
void get_filesystem(struct file_system_type *fs)
{
@@ -76,8 +79,13 @@ int register_filesystem(struct file_syst
p = find_filesystem(fs->name);
if (*p)
res = -EBUSY;
- else
+ else {
*p = fs;
+ if (fs->subsys) {
+ kset_set_kset_s(fs->subsys, fs_subsys);
+ res = subsystem_register(fs->subsys);
+ }
+ }
write_unlock(&file_systems_lock);
return res;
}
@@ -106,6 +114,8 @@ int unregister_filesystem(struct file_sy
if (fs == *tmp) {
*tmp = fs->next;
fs->next = NULL;
+ if (fs->subsys)
+ subsystem_unregister(fs->subsys);
write_unlock(&file_systems_lock);
return 0;
}
@@ -233,3 +243,8 @@ struct file_system_type *get_fs_type(con
}
EXPORT_SYMBOL(get_fs_type);
+
+void filesystems_init(void)
+{
+ subsystem_register(&fs_subsys);
+}
diff -X ../../dont-diff -Naurp 03-nfs-odirect/fs/super.c 04-sys-fs/fs/super.c
--- 03-nfs-odirect/fs/super.c 2004-04-03 22:36:57.000000000 -0500
+++ 04-sys-fs/fs/super.c 2004-04-16 11:35:16.574000000 -0400
@@ -22,6 +22,7 @@
#include <linux/config.h>
#include <linux/module.h>
+#include <linux/kobject.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
@@ -81,6 +82,7 @@ static struct super_block *alloc_super(v
s->dq_op = sb_dquot_ops;
s->s_qcop = sb_quotactl_ops;
s->s_op = &default_op;
+ kobject_init(&s->kobj);
}
out:
return s;
diff -X ../../dont-diff -Naurp 03-nfs-odirect/include/linux/fs.h 04-sys-fs/include/linux/fs.h
--- 03-nfs-odirect/include/linux/fs.h 2004-04-03 22:36:52.000000000 -0500
+++ 04-sys-fs/include/linux/fs.h 2004-04-16 11:35:16.600000000 -0400
@@ -219,6 +219,7 @@ extern void update_atime (struct inode *
extern void inode_init(unsigned long);
extern void mnt_init(unsigned long);
extern void files_init(unsigned long);
+extern void filesystems_init(void);
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -1038,6 +1039,7 @@ struct file_system_type {
struct module *owner;
struct file_system_type * next;
struct list_head fs_supers;
+ struct subsystem *subsys;
};
struct super_block *get_sb_bdev(struct file_system_type *fs_type,
[-- Attachment #3: 05-sys-fs-nfs.patch --]
[-- Type: application/octet-stream, Size: 20789 bytes --]
diff -X ../../dont-diff -Naurp 04-sys-fs/fs/nfs/inode.c 05-sys-fs-nfs/fs/nfs/inode.c
--- 04-sys-fs/fs/nfs/inode.c 2004-04-12 21:12:06.715000000 -0400
+++ 05-sys-fs-nfs/fs/nfs/inode.c 2004-04-16 18:26:49.570000000 -0400
@@ -16,6 +16,7 @@
#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/kobject.h>
#include <linux/time.h>
#include <linux/kernel.h>
@@ -42,14 +43,13 @@
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_PARANOIA 1
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- * their needs. People that do NFS over a slow network, might for
- * instance want to reduce it to something closer to 1 for improved
- * interactive response.
- */
+/* Maximum number of readahead requests */
#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1)
+/* Don't use READDIRPLUS on directories that we believe are too large */
+#define NFS_LIMIT_READDIRPLUS (8 * PAGE_SIZE)
+
+
static void nfs_invalidate_inode(struct inode *);
static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long);
@@ -158,6 +158,8 @@ nfs_put_super(struct super_block *sb)
nfs4_renewd_prepare_shutdown(server);
+ nfs_super_unregister(sb);
+
if (server->client != NULL)
rpc_shutdown_client(server->client);
if (server->client_sys != NULL)
@@ -170,6 +172,7 @@ nfs_put_super(struct super_block *sb)
destroy_nfsv4_state(server);
kfree(server->hostname);
+ kfree(server->devname);
}
void
@@ -332,7 +335,7 @@ nfs_sb_init(struct super_block *sb, rpc_
if (sb->s_maxbytes > MAX_LFS_FILESIZE)
sb->s_maxbytes = MAX_LFS_FILESIZE;
- /* We're airborne Set socket buffersize */
+ /* We're airborne. Set socket buffersize */
rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
return 0;
/* Yargs. It didn't work out. */
@@ -432,6 +435,7 @@ nfs_fill_super(struct super_block *sb, s
#ifdef CONFIG_NFS_V3
server->rpc_ops = &nfs_v3_clientops;
server->caps |= NFS_CAP_READDIRPLUS;
+ server->readdirplus_limit = NFS_LIMIT_READDIRPLUS;
if (data->version < 4) {
printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n");
goto out_fail;
@@ -499,6 +503,8 @@ out_shutdown:
out_fail:
if (server->hostname)
kfree(server->hostname);
+ if (server->devname)
+ kfree(server->devname);
return err;
}
@@ -657,9 +663,6 @@ nfs_init_locked(struct inode *inode, voi
return 0;
}
-/* Don't use READDIRPLUS on directories that we believe are too large */
-#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
-
/*
* This is our front-end to iget that looks up inodes by file handle
* instead of inode number.
@@ -709,7 +712,7 @@ nfs_fhget(struct super_block *sb, struct
inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
- && fattr->size <= NFS_LIMIT_READDIRPLUS)
+ && fattr->size <= NFS_SB(sb)->readdirplus_limit)
NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS;
} else if (S_ISLNK(inode->i_mode))
inode->i_op = &nfs_symlink_inode_operations;
@@ -1360,6 +1363,13 @@ static struct super_block *nfs_get_sb(st
return ERR_PTR(-EINVAL);
}
+ server->devname = kmalloc(strlen(dev_name) + 1, GFP_KERNEL);
+ if (!server->devname) {
+ kfree(server);
+ return ERR_PTR(-ENOMEM);
+ }
+ strcpy(server->devname, dev_name);
+
s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
if (IS_ERR(s) || s->s_root) {
@@ -1375,6 +1385,13 @@ static struct super_block *nfs_get_sb(st
deactivate_super(s);
return ERR_PTR(error);
}
+
+ error = nfs_super_register(s, &nfs_subsys);
+ if (error) {
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ return ERR_PTR(error);
+ }
s->s_flags |= MS_ACTIVE;
return s;
}
@@ -1392,6 +1409,7 @@ static struct file_system_type nfs_fs_ty
.get_sb = nfs_get_sb,
.kill_sb = nfs_kill_super,
.fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .subsys = &nfs_subsys,
};
#ifdef CONFIG_NFS_V4
@@ -1679,6 +1697,13 @@ static struct super_block *nfs4_get_sb(s
goto out_free;
}
+ server->devname = kmalloc(strlen(dev_name) + 1, GFP_KERNEL);
+ if (!server->devname) {
+ kfree(server);
+ return ERR_PTR(-ENOMEM);
+ }
+ strcpy(server->devname, dev_name);
+
s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
if (IS_ERR(s) || s->s_root)
@@ -1692,6 +1717,13 @@ static struct super_block *nfs4_get_sb(s
deactivate_super(s);
return ERR_PTR(error);
}
+
+ error = nfs_super_register(s, &nfs4_subsys);
+ if (error) {
+ up_write(&s->s_umount);
+ deactivate_super(s);
+ return ERR_PTR(error);
+ }
s->s_flags |= MS_ACTIVE;
return s;
out_err:
@@ -1701,6 +1733,8 @@ out_free:
kfree(server->mnt_path);
if (server->hostname)
kfree(server->hostname);
+ if (server->devname)
+ kfree(server->devname);
kfree(server);
return s;
}
@@ -1711,6 +1745,7 @@ static struct file_system_type nfs4_fs_t
.get_sb = nfs4_get_sb,
.kill_sb = nfs_kill_super,
.fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+ .subsys = &nfs4_subsys,
};
#define nfs4_zero_state(nfsi) \
diff -X ../../dont-diff -Naurp 04-sys-fs/fs/nfs/Makefile 05-sys-fs-nfs/fs/nfs/Makefile
--- 04-sys-fs/fs/nfs/Makefile 2004-04-03 22:36:26.000000000 -0500
+++ 05-sys-fs-nfs/fs/nfs/Makefile 2004-04-16 11:38:21.338003000 -0400
@@ -5,7 +5,7 @@
obj-$(CONFIG_NFS_FS) += nfs.o
nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \
- proc.o read.o symlink.o unlink.o write.o
+ proc.o read.o symlink.o unlink.o write.o sys.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff -X ../../dont-diff -Naurp 04-sys-fs/fs/nfs/sys.c 05-sys-fs-nfs/fs/nfs/sys.c
--- 04-sys-fs/fs/nfs/sys.c 1969-12-31 19:00:00.000000000 -0500
+++ 05-sys-fs-nfs/fs/nfs/sys.c 2004-04-17 15:00:58.235000000 -0400
@@ -0,0 +1,521 @@
+/*
+ * linux/fs/nfs/sys.c
+ *
+ * Copyright (C) 2004 Chuck Lever <cel@netapp.com>
+ *
+ * Export NFS client attributes via /sys
+ *
+ * NFS client has a kobject subsystem of its own, under which appears
+ * a kobject for each mount point; ie, each mount point is represented
+ * by a directory under /sys/fs/nfs .
+ *
+ * In each mount point directory, there are a set of files and
+ * directories which export statistical data related to that mount
+ * point, and provide methods for setting NFS client parameters that
+ * are not available via the mount command (the advantage being that
+ * one can change these while the file system is mounted).
+ */
+
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+
+
+/*
+ * Attributes that apply to the whole NFS client
+ */
+
+struct nfs_subsys_attr {
+ struct attribute attr;
+ ssize_t (*show)(char *buf);
+ ssize_t (*store)(const char *buf, size_t count);
+};
+
+#define NFS_SUBSYS_RO_ATTR(_name) \
+static struct nfs_subsys_attr nfs_subsys_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = S_IRUGO, \
+ .owner = THIS_MODULE, \
+ }, \
+ .show = nfs_subsys_##_name##_show, \
+}
+
+#define NFS_SUBSYS_RW_ATTR(_name) \
+static struct nfs_subsys_attr nfs_subsys_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = S_IWUSR | S_IRUGO, \
+ .owner = THIS_MODULE, \
+ }, \
+ .show = nfs_subsys_##_name##_show, \
+ .store = nfs_subsys_##_name##_store, \
+}
+
+static ssize_t
+nfs_subsys_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ ssize_t ret = 0;
+ struct nfs_subsys_attr *nfs_attr =
+ container_of(attr, struct nfs_subsys_attr, attr);
+
+ if (nfs_attr->show)
+ ret = nfs_attr->show(page);
+
+ return ret;
+}
+
+static ssize_t
+nfs_subsys_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ ssize_t ret = 0;
+ struct nfs_subsys_attr *nfs_attr =
+ container_of(attr, struct nfs_subsys_attr, attr);
+
+ if (nfs_attr->store)
+ ret = nfs_attr->store(buf, count);
+
+ return ret;
+}
+
+static struct sysfs_ops nfs_subsys_ops = {
+ .show = nfs_subsys_attr_show,
+ .store = nfs_subsys_attr_store,
+};
+
+/*
+ * debug
+ *
+ * This attribute enables trace debugging messages.
+ *
+ * To disable debugging, simply store a zero.
+ */
+static ssize_t
+nfs_subsys_debug_show(char *buf)
+{
+ return sprintf(buf, "0x%x\n",
+ nfs_debug);
+}
+
+static ssize_t
+nfs_subsys_debug_store(const char *buf, size_t count)
+{
+ u32 new_value;
+
+ if (sscanf(buf, "%u", &new_value) != 1)
+ return -EINVAL;
+ if (new_value <= NFSDBG_ALL)
+ nfs_debug = new_value;
+
+ return count;
+}
+
+NFS_SUBSYS_RW_ATTR(debug);
+
+static struct attribute *nfs_subsys_attrs[] = {
+ &nfs_subsys_debug.attr,
+};
+
+static struct kobj_type ktype_nfs_subsys = {
+ .sysfs_ops = &nfs_subsys_ops,
+ .default_attrs = nfs_subsys_attrs,
+};
+
+struct subsystem nfs_subsys = {
+ .kset = {
+ .kobj = {
+ .name = "nfs",
+ .ktype = &ktype_nfs_subsys,
+ },
+ },
+};
+
+struct subsystem nfs4_subsys = {
+ .kset = {
+ .kobj = {
+ .name = "nfs4",
+ .ktype = &ktype_nfs_subsys,
+ },
+ },
+};
+
+
+/*
+ * Attributes that apply only to a single mount point
+ */
+
+struct nfs_mount_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct nfs_server *server, char *buf);
+ ssize_t (*store)(struct nfs_server *server,
+ const char *buf, size_t count);
+};
+
+#define NFS_MOUNT_RO_ATTR(_name) \
+static struct nfs_mount_attr nfs_mount_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = S_IRUGO, \
+ .owner = THIS_MODULE, \
+ }, \
+ .show = nfs_mount_##_name##_show, \
+}
+
+#define NFS_MOUNT_RW_ATTR(_name) \
+static struct nfs_mount_attr nfs_mount_##_name = { \
+ .attr = { \
+ .name = __stringify(_name), \
+ .mode = S_IWUSR | S_IRUGO, \
+ .owner = THIS_MODULE, \
+ }, \
+ .show = nfs_mount_##_name##_show, \
+ .store = nfs_mount_##_name##_store, \
+}
+
+/*
+ * These map the embedded kobject in a superblock into an nfs_server struct.
+ */
+
+static ssize_t
+nfs_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+ struct super_block *s = container_of(kobj, struct super_block, kobj);
+ struct nfs_mount_attr *nfs_attr = container_of(attr, struct nfs_mount_attr, attr);
+ ssize_t ret = 0;
+
+ if (nfs_attr->show)
+ ret = nfs_attr->show(NFS_SB(s), page);
+
+ return ret;
+}
+
+static ssize_t
+nfs_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct super_block *s = container_of(kobj, struct super_block, kobj);
+ struct nfs_mount_attr *nfs_attr = container_of(attr, struct nfs_mount_attr, attr);
+ ssize_t ret = 0;
+
+ if (nfs_attr->store)
+ ret = nfs_attr->store(NFS_SB(s), buf, count);
+
+ return ret;
+}
+
+static struct sysfs_ops nfs_mount_ops = {
+ .show = nfs_attr_show,
+ .store = nfs_attr_store,
+};
+
+
+/*
+ * Mount characteristics
+ */
+
+#define NFS_MOUNT_OPTION_SHOW(_name, _format, _var) \
+static ssize_t \
+nfs_mount_##_name##_show(struct nfs_server *server, char *buf) \
+{ \
+ return sprintf(buf, _format, \
+ _var); \
+}
+
+/*
+ * elapsed_time
+ *
+ * This attribute reports the number of seconds this file system
+ * has been mounted.
+ */
+NFS_MOUNT_OPTION_SHOW(elapsed_time, "%lu seconds\n",
+ (jiffies - server->mount_time) / HZ)
+NFS_MOUNT_RO_ATTR(elapsed_time);
+
+/*
+ * max_readahead
+ *
+ * This setting controls the maximum number of pages the VFS may
+ * choose to read ahead files on this file system.
+ *
+ * To disable readahead on this mountpoint, simply set the number
+ * of pages to read ahead to zero.
+ */
+NFS_MOUNT_OPTION_SHOW(max_readahead, "%lu pages\n",
+ server->backing_dev_info.ra_pages)
+
+static ssize_t
+nfs_mount_max_readahead_store(struct nfs_server *server, const char *buf,
+ size_t count)
+{
+ long int new_value;
+
+ if (sscanf(buf, "%lu", &new_value) != 1)
+ return -EINVAL;
+ if (new_value < 2047UL)
+ server->backing_dev_info.ra_pages = new_value;
+
+ return count;
+}
+
+NFS_MOUNT_RW_ATTR(max_readahead);
+
+/*
+ * readdirplus_limit
+ *
+ * This setting controls when the NFS client chooses to use
+ * v3 READDIRPLUS over v3 READDIR. The choice is based on the
+ * file size of the directory, as reported by the server.
+ *
+ * To disable the use of READDIRPLUS on this mount point, simply
+ * set the readdirplus_limit value to zero.
+ */
+NFS_MOUNT_OPTION_SHOW(readdirplus_limit, "%u bytes\n",
+ server->readdirplus_limit)
+
+static ssize_t
+nfs_mount_readdirplus_limit_store(struct nfs_server *server, const char *buf,
+ size_t count)
+{
+ u32 new_value;
+
+ if (sscanf(buf, "%u", &new_value) != 1)
+ return -EINVAL;
+ if (new_value < 16777216)
+ server->readdirplus_limit = new_value;
+
+ return count;
+}
+
+NFS_MOUNT_RW_ATTR(readdirplus_limit);
+
+/*
+ * Mount options
+ */
+
+/*
+ * version
+ *
+ * This attribute reports the protocol version for this file system.
+ */
+NFS_MOUNT_OPTION_SHOW(version, "%d\n", server->rpc_ops->version)
+NFS_MOUNT_RO_ATTR(version);
+
+/*
+ * rsize
+ *
+ * This attribute reports the maximum number of bytes allowed
+ * in an NFS read operation on this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(rsize, "%d bytes\n", server->rsize)
+NFS_MOUNT_RO_ATTR(rsize);
+
+/*
+ * wsize
+ *
+ * This attribute reports the maximum number of bytes allowed
+ * in an NFS write operation on this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(wsize, "%d bytes\n", server->wsize)
+NFS_MOUNT_RO_ATTR(wsize);
+
+/*
+ * acregmin
+ *
+ * This attribute reports the minimum attribute cache timeout
+ * value for regular files in effect for this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(acregmin, "%d seconds\n", server->acregmin / HZ)
+NFS_MOUNT_RO_ATTR(acregmin);
+
+/*
+ * acregmax
+ *
+ * This attribute reports the maximum attribute cache timeout
+ * value for regular files in effect for this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(acregmax, "%d seconds\n", server->acregmax / HZ)
+NFS_MOUNT_RO_ATTR(acregmax);
+
+/*
+ * acdirmin
+ *
+ * This attribute reports the minimum attribute cache timeout
+ * value for directories in effect for this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(acdirmin, "%d seconds\n", server->acdirmin / HZ)
+NFS_MOUNT_RO_ATTR(acdirmin);
+
+/*
+ * acdirmax
+ *
+ * This attribute reports the maximum attribute cache timeout
+ * value for directories in effect for this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(acdirmax, "%d seconds\n", server->acdirmax / HZ)
+NFS_MOUNT_RO_ATTR(acdirmax);
+
+/*
+ * hostname
+ *
+ * This attribute reports the NFS server's hostname.
+ */
+NFS_MOUNT_OPTION_SHOW(hostname, "%s\n", server->hostname)
+NFS_MOUNT_RO_ATTR(hostname);
+
+/*
+ * export_path
+ *
+ * This attribute reports the server export path for this file system.
+ */
+NFS_MOUNT_OPTION_SHOW(export_path, "%s\n", server->devname)
+NFS_MOUNT_RO_ATTR(export_path);
+
+/*
+ * soft
+ *
+ * This attribute reports whether soft timeouts are in effect for this
+ * mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(soft, "%s\n",
+ ((server->flags & NFS_MOUNT_SOFT) ? "yes" : "no"))
+NFS_MOUNT_RO_ATTR(soft);
+
+/*
+ * intr
+ *
+ * This attribute reports whether file waits can be interrupted on this
+ * mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(intr, "%s\n",
+ ((server->flags & NFS_MOUNT_INTR) ? "yes" : "no"));
+NFS_MOUNT_RO_ATTR(intr);
+
+/*
+ * cto
+ *
+ * This attribute reports whether close-to-open cache coherency is
+ * in effect for this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(cto, "%s\n",
+ ((server->flags & NFS_MOUNT_NOCTO) ? "no" : "yes"));
+NFS_MOUNT_RO_ATTR(cto);
+
+/*
+ * lock
+ *
+ * This attribute reports whether the NLM is in use on this mount point.
+ */
+NFS_MOUNT_OPTION_SHOW(lock, "%s\n",
+ ((server->flags & NFS_MOUNT_NONLM) ? "no" : "yes"));
+NFS_MOUNT_RO_ATTR(lock);
+
+/*
+ * sec_flavor
+ *
+ * This attribute reports which security flavor is in effect for this
+ * mount point. Needs work.
+ */
+static ssize_t
+nfs_mount_sec_flavor_show(struct nfs_server *server, char *buf)
+{
+ return sprintf(buf, "%s\n",
+ ((server->flags & NFS_MOUNT_SECFLAVOUR) ? "special" : "auth_sys"));
+}
+NFS_MOUNT_RO_ATTR(sec_flavor);
+
+
+static struct attribute *nfs_mount_attrs[] = {
+ &nfs_mount_version.attr,
+ &nfs_mount_rsize.attr,
+ &nfs_mount_wsize.attr,
+ &nfs_mount_acregmin.attr,
+ &nfs_mount_acregmax.attr,
+ &nfs_mount_acdirmin.attr,
+ &nfs_mount_acdirmax.attr,
+ &nfs_mount_hostname.attr,
+ &nfs_mount_export_path.attr,
+ &nfs_mount_soft.attr,
+ &nfs_mount_intr.attr,
+ &nfs_mount_cto.attr,
+ &nfs_mount_lock.attr,
+ &nfs_mount_sec_flavor.attr,
+ &nfs_mount_elapsed_time.attr,
+ &nfs_mount_max_readahead.attr,
+};
+
+static struct attribute *nfs3_mount_attrs[] = {
+ &nfs_mount_version.attr,
+ &nfs_mount_rsize.attr,
+ &nfs_mount_wsize.attr,
+ &nfs_mount_acregmin.attr,
+ &nfs_mount_acregmax.attr,
+ &nfs_mount_acdirmin.attr,
+ &nfs_mount_acdirmax.attr,
+ &nfs_mount_hostname.attr,
+ &nfs_mount_export_path.attr,
+ &nfs_mount_soft.attr,
+ &nfs_mount_intr.attr,
+ &nfs_mount_cto.attr,
+ &nfs_mount_lock.attr,
+ &nfs_mount_sec_flavor.attr,
+ &nfs_mount_elapsed_time.attr,
+ &nfs_mount_max_readahead.attr,
+ &nfs_mount_readdirplus_limit.attr,
+};
+
+static struct attribute *nfs4_mount_attrs[] = {
+ &nfs_mount_version.attr,
+ &nfs_mount_rsize.attr,
+ &nfs_mount_wsize.attr,
+ &nfs_mount_acregmin.attr,
+ &nfs_mount_acregmax.attr,
+ &nfs_mount_acdirmin.attr,
+ &nfs_mount_acdirmax.attr,
+ &nfs_mount_hostname.attr,
+ &nfs_mount_export_path.attr,
+ &nfs_mount_soft.attr,
+ &nfs_mount_intr.attr,
+ &nfs_mount_cto.attr,
+ &nfs_mount_sec_flavor.attr,
+ &nfs_mount_elapsed_time.attr,
+ &nfs_mount_max_readahead.attr,
+};
+
+static struct kobj_type ktype_nfs_mount[] = {
+ { }, { },
+ {
+ .sysfs_ops = &nfs_mount_ops,
+ .default_attrs = nfs_mount_attrs,
+ },
+ {
+ .sysfs_ops = &nfs_mount_ops,
+ .default_attrs = nfs3_mount_attrs,
+ },
+ {
+ .sysfs_ops = &nfs_mount_ops,
+ .default_attrs = nfs4_mount_attrs,
+ },
+};
+
+int nfs_super_register(struct super_block *super, struct subsystem *subsys)
+{
+ struct nfs_server *server = NFS_SB(super);
+ int version = server->rpc_ops->version;
+
+ server->mount_time = jiffies;
+
+ kobject_set_name(&super->kobj, "mount:%d,%d", MAJOR(super->s_dev),
+ MINOR(super->s_dev));
+ super->kobj.parent = &subsys->kset.kobj;
+ super->kobj.ktype = &ktype_nfs_mount[version];
+ return kobject_add(&super->kobj);
+}
+
+void nfs_super_unregister(struct super_block *super)
+{
+ kobject_unregister(&super->kobj);
+}
diff -X ../../dont-diff -Naurp 04-sys-fs/include/linux/nfs_fs.h 05-sys-fs-nfs/include/linux/nfs_fs.h
--- 04-sys-fs/include/linux/nfs_fs.h 2004-04-16 11:30:02.825000000 -0400
+++ 05-sys-fs-nfs/include/linux/nfs_fs.h 2004-04-16 17:29:50.351000000 -0400
@@ -319,6 +319,16 @@ extern struct file_operations nfs_dir_op
extern struct dentry_operations nfs_dentry_operations;
/*
+ * linux/fs/nfs/sys.c
+ */
+extern struct subsystem nfs_subsys;
+extern struct subsystem nfs4_subsys;
+
+extern int nfs_super_register(struct super_block *super,
+ struct subsystem *subsys);
+extern void nfs_super_unregister(struct super_block *super);
+
+/*
* linux/fs/nfs/symlink.c
*/
extern struct inode_operations nfs_symlink_inode_operations;
diff -X ../../dont-diff -Naurp 04-sys-fs/include/linux/nfs_fs_sb.h 05-sys-fs-nfs/include/linux/nfs_fs_sb.h
--- 04-sys-fs/include/linux/nfs_fs_sb.h 2004-04-12 21:12:06.814001000 -0400
+++ 05-sys-fs-nfs/include/linux/nfs_fs_sb.h 2004-04-16 16:16:43.218004000 -0400
@@ -28,6 +28,11 @@ struct nfs_server {
char * hostname; /* remote hostname */
struct nfs_fh fh;
struct sockaddr_in addr;
+ char * devname;
+ unsigned long mount_time;
+#ifdef CONFIG_NFS_V3
+ unsigned int readdirplus_limit;
+#endif
#ifdef CONFIG_NFS_V4
/* Our own IP address, as a null-terminated string.
* This is used to generate the clientid, and the callback address.
[-- Attachment #4: 06-nfs-iostat.patch --]
[-- Type: application/octet-stream, Size: 14890 bytes --]
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/dir.c 06-nfs-iostat/fs/nfs/dir.c
--- 05-sys-fs-nfs/fs/nfs/dir.c 2004-04-16 10:58:17.543000000 -0400
+++ 06-nfs-iostat/fs/nfs/dir.c 2004-04-17 12:40:43.984001000 -0400
@@ -598,6 +598,7 @@ static int nfs_lookup_revalidate(struct
parent = dget_parent(dentry);
lock_kernel();
dir = parent->d_inode;
+ NFS_STATS(dir).dentry_revalidates++;
inode = dentry->d_inode;
if (nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_OPEN))
@@ -740,6 +741,7 @@ static struct dentry *nfs_lookup(struct
dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
+ NFS_STATS(dir).vfs_lookups++;
error = -ENAMETOOLONG;
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -1530,6 +1532,8 @@ nfs_permission(struct inode *inode, int
int mode = inode->i_mode;
int res;
+ NFS_STATS(inode).vfs_accesses++;
+
if (mask == 0)
return 0;
if (mask & MAY_WRITE) {
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/direct.c 06-nfs-iostat/fs/nfs/direct.c
--- 05-sys-fs-nfs/fs/nfs/direct.c 2004-04-16 11:31:42.182000000 -0400
+++ 06-nfs-iostat/fs/nfs/direct.c 2004-04-16 17:45:37.838000000 -0400
@@ -231,6 +231,8 @@ nfs_direct_read(struct inode *inode, str
break;
}
+ if (tot_bytes > 0)
+ NFS_STATS(inode).direct_bytes_read += tot_bytes;
return tot_bytes;
}
@@ -397,6 +399,9 @@ nfs_direct_write(struct inode *inode, st
if (result < size)
break;
}
+
+ if (tot_bytes > 0)
+ NFS_STATS(inode).direct_bytes_written += tot_bytes;
return tot_bytes;
}
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/file.c 06-nfs-iostat/fs/nfs/file.c
--- 05-sys-fs-nfs/fs/nfs/file.c 2004-04-16 11:30:02.820000000 -0400
+++ 06-nfs-iostat/fs/nfs/file.c 2004-04-16 18:36:20.268000000 -0400
@@ -138,6 +138,8 @@ nfs_file_read(struct kiocb *iocb, char *
result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (!result)
result = generic_file_aio_read(iocb, buf, count, pos);
+ if (result > 0)
+ NFS_STATS(inode).sys_bytes_read += result;
return result;
}
@@ -267,6 +269,8 @@ nfs_file_write(struct kiocb *iocb, const
goto out;
result = generic_file_aio_write(iocb, buf, count, pos);
+ if (result > 0)
+ NFS_STATS(inode).sys_bytes_written += result;
out:
return result;
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/read.c 06-nfs-iostat/fs/nfs/read.c
--- 05-sys-fs-nfs/fs/nfs/read.c 2004-04-12 21:12:06.795003000 -0400
+++ 06-nfs-iostat/fs/nfs/read.c 2004-04-16 17:22:41.079000000 -0400
@@ -149,6 +149,7 @@ nfs_readpage_sync(struct file *file, str
}
count -= result;
rdata.args.pgbase += result;
+ NFS_STATS(inode).bytes_read += result;
/* Note: result == 0 should only happen if we're caching
* a write that extends the file and punches a hole.
*/
@@ -488,6 +489,8 @@ void nfs_readpage_result(struct rpc_task
}
task->tk_status = -EIO;
}
+ if (task->tk_status >= 0)
+ NFS_STATS(data->inode).bytes_read += resp->count;
NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
data->complete(data, status);
}
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/sys.c 06-nfs-iostat/fs/nfs/sys.c
--- 05-sys-fs-nfs/fs/nfs/sys.c 2004-04-17 15:00:58.235000000 -0400
+++ 06-nfs-iostat/fs/nfs/sys.c 2004-04-17 14:57:58.107001000 -0400
@@ -428,6 +428,183 @@ nfs_mount_sec_flavor_show(struct nfs_ser
NFS_MOUNT_RO_ATTR(sec_flavor);
+/*
+ * io_stat attributes
+ */
+
+#define NFS_MOUNT_IOSTAT_SHOW(_name, _type, _format) \
+static ssize_t \
+nfs_mount_##_name##_show(struct nfs_server *server, char *buf) \
+{ \
+ int i; \
+ _type total = 0; \
+ for (i = 0; i < NR_CPUS; i++) \
+ total += server->io_stats[i]._name; \
+ return sprintf(buf, _format, \
+ total); \
+}
+
+#define NFS_MOUNT_IOSTAT_STORE(_name) \
+static ssize_t \
+nfs_mount_##_name##_store(struct nfs_server *server, \
+ const char *buf, size_t count) \
+{ \
+ int i; \
+ for (i = 0; i < NR_CPUS; i++) \
+ server->io_stats[i]._name = 0; \
+ return count; \
+}
+
+
+/*
+ * bytes_read
+ *
+ * This attribute exposes the count of bytes read via NFS READ operations
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(bytes_read, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(bytes_read)
+NFS_MOUNT_RW_ATTR(bytes_read);
+
+/*
+ * bytes_written
+ *
+ * This attribute exposes the count of bytes written via NFS WRITE operations
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(bytes_written, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(bytes_written)
+NFS_MOUNT_RW_ATTR(bytes_written);
+
+/*
+ * sys_bytes_read
+ *
+ * This attribute exposes the count of bytes read via read() system calls
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(sys_bytes_read, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(sys_bytes_read)
+NFS_MOUNT_RW_ATTR(sys_bytes_read);
+
+/*
+ * sys_bytes_written
+ *
+ * This attribute exposes the count of bytes written via NFS WRITE operations
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(sys_bytes_written, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(sys_bytes_written)
+NFS_MOUNT_RW_ATTR(sys_bytes_written);
+
+/*
+ * direct_bytes_read
+ *
+ * This attribute exposes the count of bytes read directly
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(direct_bytes_read, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(direct_bytes_read)
+NFS_MOUNT_RW_ATTR(direct_bytes_read);
+
+/*
+ * direct_bytes_written
+ *
+ * This attribute exposes the count of bytes written directly
+ * on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(direct_bytes_written, unsigned long long, "%Lu bytes\n")
+NFS_MOUNT_IOSTAT_STORE(direct_bytes_written)
+NFS_MOUNT_RW_ATTR(direct_bytes_written);
+
+/*
+ * inode_revalidates
+ *
+ * This attribute exposes the count of inode revalidations
+ * on this mount point. Each revalidation may or may not
+ * result in an on-the-wire NFS GETATTR operation.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(inode_revalidates, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(inode_revalidates)
+NFS_MOUNT_RW_ATTR(inode_revalidates);
+
+/*
+ * dentry_revalidates
+ *
+ * This attribute exposes the count of dentry revalidations
+ * on this mount point. Each revalidation may or may not
+ * result in an on-the-wire NFS LOOKUP operation.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(dentry_revalidates, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(dentry_revalidates)
+NFS_MOUNT_RW_ATTR(dentry_revalidates);
+
+/*
+ * vfs_lookups
+ *
+ * This attribute exposes the count of calls to nfs_lookup
+ * on this mount point. Each call may or may not result in
+ * an on-the-wire NFS LOOKUP operation.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(vfs_lookups, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(vfs_lookups)
+NFS_MOUNT_RW_ATTR(vfs_lookups);
+
+/*
+ * vfs_accesses
+ *
+ * This attribute exposes the count of calls to nfs_permission
+ * on this mount point. Each call may or may not result in an
+ * on-the-wire NFS ACCESS operation.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(vfs_accesses, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(vfs_accesses)
+NFS_MOUNT_RW_ATTR(vfs_accesses);
+
+/*
+ * setattr_trunc
+ *
+ * This attribute exposes the count of NFS SETATTR operations
+ * that cause a file truncation on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(setattr_trunc, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(setattr_trunc)
+NFS_MOUNT_RW_ATTR(setattr_trunc);
+
+/*
+ * extend_writes
+ *
+ * This attribute exposes the count of write requests that
+ * extend a file on this mount point.
+ *
+ * Writing anything to this attribute resets it to zero.
+ */
+NFS_MOUNT_IOSTAT_SHOW(extend_writes, unsigned long, "%lu\n")
+NFS_MOUNT_IOSTAT_STORE(extend_writes)
+NFS_MOUNT_RW_ATTR(extend_writes);
+
+
static struct attribute *nfs_mount_attrs[] = {
&nfs_mount_version.attr,
&nfs_mount_rsize.attr,
@@ -445,6 +622,18 @@ static struct attribute *nfs_mount_attrs
&nfs_mount_sec_flavor.attr,
&nfs_mount_elapsed_time.attr,
&nfs_mount_max_readahead.attr,
+ &nfs_mount_bytes_read.attr,
+ &nfs_mount_bytes_written.attr,
+ &nfs_mount_sys_bytes_read.attr,
+ &nfs_mount_sys_bytes_written.attr,
+ &nfs_mount_direct_bytes_read.attr,
+ &nfs_mount_direct_bytes_written.attr,
+ &nfs_mount_inode_revalidates.attr,
+ &nfs_mount_dentry_revalidates.attr,
+ &nfs_mount_vfs_lookups.attr,
+ &nfs_mount_vfs_accesses.attr,
+ &nfs_mount_setattr_trunc.attr,
+ &nfs_mount_extend_writes.attr,
};
static struct attribute *nfs3_mount_attrs[] = {
@@ -465,6 +654,18 @@ static struct attribute *nfs3_mount_attr
&nfs_mount_elapsed_time.attr,
&nfs_mount_max_readahead.attr,
&nfs_mount_readdirplus_limit.attr,
+ &nfs_mount_bytes_read.attr,
+ &nfs_mount_bytes_written.attr,
+ &nfs_mount_sys_bytes_read.attr,
+ &nfs_mount_sys_bytes_written.attr,
+ &nfs_mount_direct_bytes_read.attr,
+ &nfs_mount_direct_bytes_written.attr,
+ &nfs_mount_inode_revalidates.attr,
+ &nfs_mount_dentry_revalidates.attr,
+ &nfs_mount_vfs_lookups.attr,
+ &nfs_mount_vfs_accesses.attr,
+ &nfs_mount_setattr_trunc.attr,
+ &nfs_mount_extend_writes.attr,
};
static struct attribute *nfs4_mount_attrs[] = {
@@ -483,6 +684,18 @@ static struct attribute *nfs4_mount_attr
&nfs_mount_sec_flavor.attr,
&nfs_mount_elapsed_time.attr,
&nfs_mount_max_readahead.attr,
+ &nfs_mount_bytes_read.attr,
+ &nfs_mount_bytes_written.attr,
+ &nfs_mount_sys_bytes_read.attr,
+ &nfs_mount_sys_bytes_written.attr,
+ &nfs_mount_direct_bytes_read.attr,
+ &nfs_mount_direct_bytes_written.attr,
+ &nfs_mount_inode_revalidates.attr,
+ &nfs_mount_dentry_revalidates.attr,
+ &nfs_mount_vfs_lookups.attr,
+ &nfs_mount_vfs_accesses.attr,
+ &nfs_mount_setattr_trunc.attr,
+ &nfs_mount_extend_writes.attr,
};
static struct kobj_type ktype_nfs_mount[] = {
@@ -503,6 +716,8 @@ static struct kobj_type ktype_nfs_mount[
int nfs_super_register(struct super_block *super, struct subsystem *subsys)
{
+ int error;
+ unsigned int size;
struct nfs_server *server = NFS_SB(super);
int version = server->rpc_ops->version;
@@ -512,10 +727,24 @@ int nfs_super_register(struct super_bloc
MINOR(super->s_dev));
super->kobj.parent = &subsys->kset.kobj;
super->kobj.ktype = &ktype_nfs_mount[version];
- return kobject_add(&super->kobj);
+ error = kobject_add(&super->kobj);
+ if (error)
+ return error;
+
+ size = NR_CPUS * sizeof(struct nfs_iostat);
+ server->io_stats = kmalloc(size, GFP_KERNEL);
+ if (!server->io_stats) {
+ kobject_unregister(&super->kobj);
+ return -ENOMEM;
+ }
+ memset(server->io_stats, 0, size);
+
+ return 0;
}
void nfs_super_unregister(struct super_block *super)
{
+ if (NFS_SB(super)->io_stats)
+ kfree(NFS_SB(super)->io_stats);
kobject_unregister(&super->kobj);
}
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/fs/nfs/write.c 06-nfs-iostat/fs/nfs/write.c
--- 05-sys-fs-nfs/fs/nfs/write.c 2004-04-12 21:12:06.802001000 -0400
+++ 06-nfs-iostat/fs/nfs/write.c 2004-04-17 12:42:43.676001000 -0400
@@ -135,6 +135,7 @@ static void nfs_grow_file(struct page *p
end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
if (i_size >= end)
return;
+ NFS_STATS(inode).extend_writes++;
i_size_write(inode, end);
}
@@ -223,6 +224,7 @@ static int nfs_writepage_sync(struct fil
wdata.args.pgbase += result;
written += result;
count -= result;
+ NFS_STATS(inode).bytes_written += result;
} while (count);
/* Update file length */
nfs_grow_file(page, offset, written);
@@ -1191,6 +1193,8 @@ void nfs_writeback_done(struct rpc_task
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
+ if (task->tk_status > 0)
+ NFS_STATS(data->inode).bytes_written += resp->count;
/*
* Process the nfs_page list
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/include/linux/nfs_fs.h 06-nfs-iostat/include/linux/nfs_fs.h
--- 05-sys-fs-nfs/include/linux/nfs_fs.h 2004-04-16 17:29:50.351000000 -0400
+++ 06-nfs-iostat/include/linux/nfs_fs.h 2004-04-17 12:41:17.526000000 -0400
@@ -187,6 +187,7 @@ static inline struct nfs_inode *NFS_I(st
#define NFS_FH(inode) (&NFS_I(inode)->fh)
#define NFS_SERVER(inode) (NFS_SB(inode->i_sb))
+#define NFS_STATS(inode) NFS_SERVER(inode)->io_stats[smp_processor_id()]
#define NFS_CLIENT(inode) (NFS_SERVER(inode)->client)
#define NFS_PROTO(inode) (NFS_SERVER(inode)->rpc_ops)
#define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode)))
@@ -444,6 +445,7 @@ static inline int nfs_attribute_timeout(
*/
static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
+ NFS_STATS(inode).inode_revalidates++;
if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
diff -X ../../dont-diff -Naurp 05-sys-fs-nfs/include/linux/nfs_fs_sb.h 06-nfs-iostat/include/linux/nfs_fs_sb.h
--- 05-sys-fs-nfs/include/linux/nfs_fs_sb.h 2004-04-16 16:16:43.218004000 -0400
+++ 06-nfs-iostat/include/linux/nfs_fs_sb.h 2004-04-17 12:42:16.813001000 -0400
@@ -5,6 +5,24 @@
#include <linux/backing-dev.h>
/*
+ * One CPU's worth of NFS I/O statistics
+ */
+struct nfs_iostat {
+ unsigned long long bytes_read,
+ bytes_written,
+ sys_bytes_read,
+ sys_bytes_written,
+ direct_bytes_read,
+ direct_bytes_written;
+ unsigned long inode_revalidates,
+ dentry_revalidates,
+ vfs_lookups,
+ vfs_accesses,
+ setattr_trunc,
+ extend_writes;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));;
+
+/*
* NFS client parameters stored in the superblock.
*/
struct nfs_server {
@@ -30,6 +48,7 @@ struct nfs_server {
struct sockaddr_in addr;
char * devname;
unsigned long mount_time;
+ struct nfs_iostat * io_stats;
#ifdef CONFIG_NFS_V3
unsigned int readdirplus_limit;
#endif
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: RFC: adding an "fs" directory under /sys
2004-04-17 19:46 Lever, Charles
@ 2004-04-17 20:07 ` viro
0 siblings, 0 replies; 11+ messages in thread
From: viro @ 2004-04-17 20:07 UTC (permalink / raw)
To: Lever, Charles
Cc: linux-fsdevel, Trond Myklebust, Patrick Mochel, nfs,
Linus Torvalds
On Sat, Apr 17, 2004 at 12:46:05PM -0700, Lever, Charles wrote:
> i've been working on a patch set that will expose I/O counters
> for NFS mount points. we're considering exposing these counters
> via /sys.
>
> i'm thinking this should go under /sys/fs, where there is one
> subdirectory in /sys/fs for each file system type available
> on the system.
Hell, *no*. Goddamnit, people, just how many times should that story repeat
itself before it sinks into your skulls - DO NOT MESS WITH KOBJECTS UNLESS
YOU UNDERSTAND LIFETIME RULES.
Linus, please consider that as a formal veto on any patches embedding
kobjects into struct super_block unless they are accompanied by detailed
analysis of lifetime rules _AND_ had been reviewed and ACKed on linux-kernel
and linux-fsdevel.
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
@ 2004-04-17 21:51 Lever, Charles
2004-04-17 21:55 ` Christoph Hellwig
0 siblings, 1 reply; 11+ messages in thread
From: Lever, Charles @ 2004-04-17 21:51 UTC (permalink / raw)
To: viro; +Cc: linux-fsdevel, Trond Myklebust, Patrick Mochel, Linus Torvalds
so, i take it you would prefer me to add a kobject to the
nfs_server struct for my purposes, rather than use the
unused kobject that is *already* embedded in the
super_block?
> Hell, *no*. Goddamnit, people, just how many times should
> that story repeat
> itself before it sinks into your skulls - DO NOT MESS WITH
> KOBJECTS UNLESS
> YOU UNDERSTAND LIFETIME RULES.
>
> Linus, please consider that as a formal veto on any
> patches embedding
> kobjects into struct super_block unless they are accompanied
> by detailed
> analysis of lifetime rules _AND_ had been reviewed and ACKed
> on linux-kernel
> and linux-fsdevel.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: RFC: adding an "fs" directory under /sys
2004-04-17 21:51 Lever, Charles
@ 2004-04-17 21:55 ` Christoph Hellwig
2004-04-17 22:12 ` viro
0 siblings, 1 reply; 11+ messages in thread
From: Christoph Hellwig @ 2004-04-17 21:55 UTC (permalink / raw)
To: Lever, Charles
Cc: viro, linux-fsdevel, Trond Myklebust, Patrick Mochel,
Linus Torvalds
On Sat, Apr 17, 2004 at 02:51:48PM -0700, Lever, Charles wrote:
> so, i take it you would prefer me to add a kobject to the
> nfs_server struct for my purposes, rather than use the
> unused kobject that is *already* embedded in the
> super_block?
I think Al wants you to understand what you're trying to do first.
Please read up about the kobject lifetime rules first, if you can make
the nfs_server lifetime work with kobjects expectation go for it, but
don't think it's easy.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: RFC: adding an "fs" directory under /sys
2004-04-17 21:55 ` Christoph Hellwig
@ 2004-04-17 22:12 ` viro
0 siblings, 0 replies; 11+ messages in thread
From: viro @ 2004-04-17 22:12 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Lever, Charles, linux-fsdevel, Trond Myklebust, Patrick Mochel,
Linus Torvalds
On Sat, Apr 17, 2004 at 10:55:09PM +0100, Christoph Hellwig wrote:
> On Sat, Apr 17, 2004 at 02:51:48PM -0700, Lever, Charles wrote:
> > so, i take it you would prefer me to add a kobject to the
> > nfs_server struct for my purposes, rather than use the
> > unused kobject that is *already* embedded in the
> > super_block?
keyword being "unused". It has no business being there in the first
place and yes, it should be removed. And no, the way you are using it
is not safe - it creates a user-triggerable oops if you ever get an
NFS filesystem unmounted.
> I think Al wants you to understand what you're trying to do first.
> Please read up about the kobject lifetime rules first, if you can make
> the nfs_server lifetime work with kobjects expectation go for it, but
> don't think it's easy.
Precisely. Add to that the lifetime rules for module itself and you are
in for fun.
Linus, could you apply the patch below - the field in question is
a) unused
b) damn next to impossible to use correctly, due to struct super_block
lifetime and locking rules.
diff -urN RC6-rc1-bk1/include/linux/fs.h RC6-rc1-bk1-current/include/linux/fs.h
--- RC6-rc1-bk1/include/linux/fs.h Fri Apr 16 12:39:19 2004
+++ RC6-rc1-bk1-current/include/linux/fs.h Sat Apr 17 18:09:34 2004
@@ -751,7 +751,6 @@
char s_id[32]; /* Informational name */
- struct kobject kobj; /* anchor for sysfs */
void *s_fs_info; /* Filesystem private info */
/*
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
@ 2004-04-19 13:24 Lever, Charles
0 siblings, 0 replies; 11+ messages in thread
From: Lever, Charles @ 2004-04-19 13:24 UTC (permalink / raw)
To: Christoph Hellwig
Cc: viro, linux-fsdevel, Trond Myklebust, Patrick Mochel,
Linus Torvalds
> On Sat, Apr 17, 2004 at 02:51:48PM -0700, Lever, Charles wrote:
> > so, i take it you would prefer me to add a kobject to the
> > nfs_server struct for my purposes, rather than use the
> > unused kobject that is *already* embedded in the
> > super_block?
>
> I think Al wants you to understand what you're trying to do first.
> Please read up about the kobject lifetime rules first, if you can make
> the nfs_server lifetime work with kobjects expectation go for it, but
> don't think it's easy.
i never thought it would be easy, that's why i'm asking for some review
and comments to help me go in the right direction.
where are the lifetime rules documented? i've read the sysfs and
kobject.txt files already several times, as well as the kobject sample
implementations under linux/drivers/* .
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
@ 2004-04-19 13:51 Lever, Charles
2004-05-11 5:54 ` Neil Brown
0 siblings, 1 reply; 11+ messages in thread
From: Lever, Charles @ 2004-04-19 13:51 UTC (permalink / raw)
To: viro, Christoph Hellwig
Cc: linux-fsdevel, Trond Myklebust, Patrick Mochel, Linus Torvalds
> On Sat, Apr 17, 2004 at 10:55:09PM +0100, Christoph Hellwig wrote:
> > On Sat, Apr 17, 2004 at 02:51:48PM -0700, Lever, Charles wrote:
> > > so, i take it you would prefer me to add a kobject to the
> > > nfs_server struct for my purposes, rather than use the
> > > unused kobject that is *already* embedded in the
> > > super_block?
>
> keyword being "unused". It has no business being there in the first
> place and yes, it should be removed. And no, the way you are using it
> is not safe - it creates a user-triggerable oops if you ever get an
> NFS filesystem unmounted.
i haven't seen an Oops in my testing, but understood, i see how this
is incorrect.
> > I think Al wants you to understand what you're trying to do first.
> > Please read up about the kobject lifetime rules first, if
> you can make
> > the nfs_server lifetime work with kobjects expectation go
> for it, but
> > don't think it's easy.
>
> Precisely. Add to that the lifetime rules for module itself
> and you are in for fun.
forgetting /sys and kobjects for a moment, if you had to choose a way
to export I/O statistics for NFS mounts, how would you do it?
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
2004-04-19 13:51 Lever, Charles
@ 2004-05-11 5:54 ` Neil Brown
0 siblings, 0 replies; 11+ messages in thread
From: Neil Brown @ 2004-05-11 5:54 UTC (permalink / raw)
To: Lever, Charles
Cc: viro, Christoph Hellwig, linux-fsdevel, Trond Myklebust,
Patrick Mochel, Linus Torvalds
On Monday April 19, Charles.Lever@netapp.com wrote:
>
> forgetting /sys and kobjects for a moment, if you had to choose a way
> to export I/O statistics for NFS mounts, how would you do it?
I'm going to take a big risk and guess Al's preference:
Create a filesystem type: nfsstats
Arrange that
mount -t nfsstats /path/to/current/nfs/mount /path/to/somewhere
will mount at /path/to/somewhere a filesystem that contains trivial
files containing the relevant stats for /path/to/current/nfs/mount
This mountpoint should own a reference to the nfs filesystem (the
superblock or vfsmnt, not sure which) so that the filesystem, even
if unmounted, will hang around until the nfsstats filesystem were
unmounted.
My preference would be to come up with an alternative to sysfs which
didn't have the problematic lifetime rules. The linkage between the
newsysfs files and the underlying kernelobjects should be soft
linkages somewhat like a NFS filehandle. The kernelobject should be
able to go away at any time (except for a small spinlocked window when
data is being extracted or inserted) and if a related file in newsysfs
is still open, it starts getting ESTALE.
If the file-handle lookup was too expensive for some objects, then a
revocable link could be used: The object holds a list of these
references and can walk through invalidating them all.
NeilBrown
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
@ 2004-05-11 17:20 Lever, Charles
2004-05-13 6:18 ` Neil Brown
0 siblings, 1 reply; 11+ messages in thread
From: Lever, Charles @ 2004-05-11 17:20 UTC (permalink / raw)
To: Neil Brown
Cc: viro, Christoph Hellwig, linux-fsdevel, Trond Myklebust,
Patrick Mochel, Linus Torvalds
we can resolve the object lifetime issues.
the problem we can't yet resolve is how to show only the file systems
that are in a process's name space in /sys/fs/whatever.
> -----Original Message-----
> From: Neil Brown [mailto:neilb@cse.unsw.edu.au]
> Sent: Tuesday, May 11, 2004 1:54 AM
> To: Lever, Charles
> Cc: viro@parcelfarce.linux.theplanet.co.uk; Christoph
> Hellwig; linux-fsdevel; Trond Myklebust; Patrick Mochel;
> Linus Torvalds
> Subject: RE: RFC: adding an "fs" directory under /sys
>
>
> On Monday April 19, Charles.Lever@netapp.com wrote:
> >
> > forgetting /sys and kobjects for a moment, if you had to
> choose a way
> > to export I/O statistics for NFS mounts, how would you do it?
>
> I'm going to take a big risk and guess Al's preference:
>
> Create a filesystem type: nfsstats
> Arrange that
> mount -t nfsstats /path/to/current/nfs/mount /path/to/somewhere
>
> will mount at /path/to/somewhere a filesystem that contains trivial
> files containing the relevant stats for /path/to/current/nfs/mount
> This mountpoint should own a reference to the nfs filesystem (the
> superblock or vfsmnt, not sure which) so that the filesystem, even
> if unmounted, will hang around until the nfsstats filesystem were
> unmounted.
>
>
> My preference would be to come up with an alternative to
> sysfs which didn't have the problematic lifetime rules. The
> linkage between the newsysfs files and the underlying
> kernelobjects should be soft linkages somewhat like a NFS
> filehandle. The kernelobject should be able to go away at
> any time (except for a small spinlocked window when data is
> being extracted or inserted) and if a related file in
> newsysfs is still open, it starts getting ESTALE. If the
> file-handle lookup was too expensive for some objects, then a
> revocable link could be used: The object holds a list of
> these references and can walk through invalidating them all.
>
> NeilBrown
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
2004-05-11 17:20 RFC: adding an "fs" directory under /sys Lever, Charles
@ 2004-05-13 6:18 ` Neil Brown
0 siblings, 0 replies; 11+ messages in thread
From: Neil Brown @ 2004-05-13 6:18 UTC (permalink / raw)
To: Lever, Charles
Cc: viro, Christoph Hellwig, linux-fsdevel, Trond Myklebust,
Patrick Mochel, Linus Torvalds
On Tuesday May 11, Charles.Lever@netapp.com wrote:
> we can resolve the object lifetime issues.
>
> the problem we can't yet resolve is how to show only the file systems
> that are in a process's name space in /sys/fs/whatever.
>
I'm not sure why you would want to. After all, /proc still lists
processes that you cannot interact with at all. Why shouldn't
/sys/fs/whatever list filesystems that are beyond your ken.
But you presumably have a reason.
I tried thinking about it and I cannot even come up with a suitable
name to use to identify a filesystem in /sys/fs.
sprintf(buf, "%p", sb)
would work, but is not very admin-friendly.
maybe:
filesytemtype:filesystem-specific-name
where the filesystem-specific-name might be the name of a block
device, or an nfs-server+path (but slashes would be a problem) or a
simple sequential number, or... maybe something else.
Did you have thoughts about that?
NeilBrown
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: RFC: adding an "fs" directory under /sys
@ 2004-05-13 13:06 Lever, Charles
0 siblings, 0 replies; 11+ messages in thread
From: Lever, Charles @ 2004-05-13 13:06 UTC (permalink / raw)
To: Neil Brown
Cc: viro, Christoph Hellwig, linux-fsdevel, Trond Myklebust,
Patrick Mochel, Linus Torvalds
> On Tuesday May 11, Charles.Lever@netapp.com wrote:
> > we can resolve the object lifetime issues.
> >
> > the problem we can't yet resolve is how to show only the
> file systems
> > that are in a process's name space in /sys/fs/whatever.
> >
>
> I'm not sure why you would want to. After all, /proc still
> lists processes that you cannot interact with at all. Why
> shouldn't /sys/fs/whatever list filesystems that are beyond your ken.
>
> But you presumably have a reason.
i'll let al viro answer that, as this issue is his critique of /sys/fs.
> I tried thinking about it and I cannot even come up with a
> suitable name to use to identify a filesystem in /sys/fs.
> sprintf(buf, "%p", sb)
> would work, but is not very admin-friendly.
>
> maybe:
> filesytemtype:filesystem-specific-name
>
> where the filesystem-specific-name might be the name of a
> block device, or an nfs-server+path (but slashes would be a
> problem) or a simple sequential number, or... maybe something else.
>
> Did you have thoughts about that?
my patch creates a subsystem under /sys/fs for each file system type
that wants one. the lifetime of the subsystem matches the
lifetime of the file system module -- the subsystem is registered in
the same logic that loads a file system module, and is unregistered
in the logic that unloads a file system module. (i don't think i've
properly addressed the case where some process is in a subsystem
directory and that module gets unloaded, but i think we can address
that).
so in /sys/fs, you might find a directory called nfs/, and maybe
one called ext3/, one called xfs/, and one called tmpfs/. one
thing you won't find using this scheme is a directory called nfsd/
because the NFS server is not a file system type. i think nfsd
already has some directories under /var and /proc, right?
under each file system type subsystem directory, the file system can
register kobjects and choose a naming scheme however it likes. if
the file system is registering a kobject for each mounted file system
of that type, then the problem is how to expose to processes only those
file systems that are already in each process's namespace.
if the NFS client wants to register each NFS mount point, then it must
choose an arbitrary naming scheme. i used "mount:MAJ,MIN" ... and
the issue becomes how user space can match one of these subdirectories
to an actual mount point. the NFS client maintains the export path
and server hostname in attribute files in each of these mount point
directories, but this isn't helpful when the same export path is
mounted more than once on a client.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2004-05-13 13:08 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-05-11 17:20 RFC: adding an "fs" directory under /sys Lever, Charles
2004-05-13 6:18 ` Neil Brown
-- strict thread matches above, loose matches on Subject: below --
2004-05-13 13:06 Lever, Charles
2004-04-19 13:51 Lever, Charles
2004-05-11 5:54 ` Neil Brown
2004-04-19 13:24 Lever, Charles
2004-04-17 21:51 Lever, Charles
2004-04-17 21:55 ` Christoph Hellwig
2004-04-17 22:12 ` viro
2004-04-17 19:46 Lever, Charles
2004-04-17 20:07 ` viro
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).