All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] RFC: NFS - filelayout DS rpc mountstats support
@ 2012-02-08 17:10 Weston Andros Adamson
  2012-02-08 17:26 ` Chuck Lever
  0 siblings, 1 reply; 14+ messages in thread
From: Weston Andros Adamson @ 2012-02-08 17:10 UTC (permalink / raw)
  To: Trond.Myklebust, chuck.lever; +Cc: linux-nfs, Weston Andros Adamson

The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
layer and are collected per rpc_client.  This has worked for nfs thus far
since only one nfs_client was ever associated with a mountpoint, but with
NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.

This can be seen with a pnfs mount where no data server is the same as the MDS -
all reads, writes and commits will never make it into the current mountstats
output.

I took the approach of keeping the stats from the MDS and DSs separate in the
/proc/self/mountstats output to avoid doing too much in the kernel, and to
expose the fact that these stats are from separate connections (maybe for later
use).

This method has issues:

 1) even changing the "statvers" of mountstats doesn't stop the userland
    mountstats(8) from trying to parse this output -- and it does so
    incorrectly.

 2) when using DS multipath this method fails to count operations that happened
    on a different path to the same DS (after a failure).

 3) currently this will display stats twice when DS == MDS.

So... What should I do here?

A different approach is to add support in net/sunrpc to specify a "parent"
stat structure so that operations on DS nfs_clients bump stats on
the main nfs_server->nfs_client->rpc_client.  This would take care of all the
issues with the current patch, but seems like a hack.

One task that seems like a good thing to do is to fix mountstats(8) to respect
the "statsvers" value.

Thoughts?
---

I cleaned up this patch, but I still have reservations (noted in commit message)

 fs/nfs/nfs4filelayout.c    |   23 +++++++++++++++++++++++
 fs/nfs/pnfs.h              |   20 ++++++++++++++++++++
 fs/nfs/pnfs_dev.c          |   25 +++++++++++++++++++++++++
 fs/nfs/super.c             |    1 +
 include/linux/nfs_iostat.h |    2 +-
 5 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 79be7ac..9410fd0 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,6 +33,8 @@
 #include <linux/nfs_page.h>
 #include <linux/module.h>
 
+#include <linux/sunrpc/metrics.h>
+
 #include "internal.h"
 #include "nfs4filelayout.h"
 
@@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
 	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
 }
 
+static void
+filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
+{
+	struct nfs4_file_layout_dsaddr *dsaddr;
+	struct nfs4_pnfs_ds *ds;
+	u32 i;
+
+	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+
+	for (i = 0; i < dsaddr->ds_num; i++) {
+		ds = dsaddr->ds_list[i];
+
+		if (ds && ds->ds_clp) {
+			seq_printf(m, "  pnfs dataserver %s\n",
+				   ds->ds_remotestr);
+			rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
+		}
+	}
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.read_pagelist		= filelayout_read_pagelist,
 	.write_pagelist		= filelayout_write_pagelist,
 	.free_deviceid_node	= filelayout_free_deveiceid_node,
+	.ds_rpc_print_iostats	= filelayout_rpc_print_iostats,
 };
 
 static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a..0a5e020 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
 	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutcommit_args *args);
+
+	void (*ds_rpc_print_iostats) (struct seq_file *,
+				      struct nfs4_deviceid_node *);
 };
 
 struct pnfs_layout_hdr {
@@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
 struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
 bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
+void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
+				     const struct nfs_client *clp,
+				     const struct pnfs_layoutdriver_type *ld);
+
 
 static inline int lo_fail_bit(u32 iomode)
 {
@@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+	if (!pnfs_enabled_sb(nfss))
+		return;
+	nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
+					nfss->pnfs_curr_ld);
+}
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
 {
 }
+
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2..277de87 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 }
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
+
+void
+nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
+				const struct nfs_client *clp,
+				const struct pnfs_layoutdriver_type *ld)
+{
+	struct nfs4_deviceid_node *d;
+	struct hlist_node *n;
+	long h;
+
+	if (!ld->ds_rpc_print_iostats)
+		return;
+
+	rcu_read_lock();
+	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
+		hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
+			if (d->ld == ld && d->nfs_client == clp) {
+				if (atomic_read(&d->ref))
+					ld->ds_rpc_print_iostats(seq, d);
+			}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
+
 /*
  * Remove a deviceid from cache
  *
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d18a90b..453e496 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
 	seq_printf(m, "\n");
 
 	rpc_print_iostats(m, nfss->client);
+	pnfs_rpc_print_iostats(m, nfss);
 
 	return 0;
 }
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
index 8866bb3..7fe4605 100644
--- a/include/linux/nfs_iostat.h
+++ b/include/linux/nfs_iostat.h
@@ -21,7 +21,7 @@
 #ifndef _LINUX_NFS_IOSTAT
 #define _LINUX_NFS_IOSTAT
 
-#define NFS_IOSTAT_VERS		"1.0"
+#define NFS_IOSTAT_VERS		"2.0"
 
 /*
  * NFS byte counters
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread
* [PATCH] [RFC] NFS - filelayout DS rpc mountstats support
@ 2012-01-30 20:53 Weston Andros Adamson
  2012-01-30 20:57 ` Adamson, Dros
  2012-01-31 19:19 ` Adamson, Dros
  0 siblings, 2 replies; 14+ messages in thread
From: Weston Andros Adamson @ 2012-01-30 20:53 UTC (permalink / raw)
  To: Trond.Myklebust; +Cc: linux-nfs, Weston Andros Adamson

The RPC stats displayed in /self/proc/mountstats are collected in the sunrpc
layer and are collected per rpc_client.  This has worked for NFS thus far
since only one nfs_client (and it's associated rpc_client) was ever associated
with a mountpoint.

Now with NFS4.1+PNFS+filelayout, NFS can have more than one nfs_client
associated with a mountpoint!  Note that the rpc stats are hung off of the
rpc_client - so even if two connections share the same transport, they will
not use the same stats structure (ie when MDS == DS).

This patch -- admittedly ugly -- poles holes through the PNFS layout driver
to print the stats for rpc_client structures for the dataserver connections
with the filelayout driver.

I took the approach of keeping them separate in the /proc/self/mountstats
output to avoid doing too much in the kernel.  If we agree this is the way to
go, I'll update mountstats(1) to combine them for the default output (much in
the same way that averaging is done in userland instead of kernel here).
The alternative is to poke holes in the sunrpc layer to allow combining stats.

Again, I don't love this solution - it seems like a hack.  The other option
I can see is changing the rpc layer to have support for linking several
rpc_client structures to use the same stat struct, but this would likely look
like a huge hack too.

Thoughts?
---
 fs/nfs/nfs4filelayout.c    |   19 +++++++++++++++++++
 fs/nfs/nfs4filelayout.h    |    2 ++
 fs/nfs/nfs4filelayoutdev.c |   19 +++++++++++++++++++
 fs/nfs/pnfs.h              |   17 +++++++++++++++++
 fs/nfs/pnfs_dev.c          |   36 ++++++++++++++++++++++++++++++++++++
 fs/nfs/super.c             |    1 +
 6 files changed, 94 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b4f8f96..ce20ac3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -916,6 +916,24 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
 	nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
 }
 
+static void
+_rpc_print_iostats_cb(struct nfs4_deviceid_node *d, void *ctx)
+{
+	struct seq_file *m;
+	struct nfs4_file_layout_dsaddr *dsaddr;
+
+	m = (struct seq_file *)ctx;
+	dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+	nfs4_fl_rpc_print_iostats(m, dsaddr);
+}
+
+static void
+filelayout_rpc_print_iostats(struct seq_file *m, const struct nfs_server *nfss)
+{
+	nfs4_foreach_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client,
+			      _rpc_print_iostats_cb, m);
+}
+
 static struct pnfs_layoutdriver_type filelayout_type = {
 	.id			= LAYOUT_NFSV4_1_FILES,
 	.name			= "LAYOUT_NFSV4_1_FILES",
@@ -930,6 +948,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
 	.read_pagelist		= filelayout_read_pagelist,
 	.write_pagelist		= filelayout_write_pagelist,
 	.free_deviceid_node	= filelayout_free_deveiceid_node,
+	.rpc_print_iostats	= filelayout_rpc_print_iostats,
 };
 
 static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284..6942992 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -110,6 +110,8 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
 u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
 struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
 					u32 ds_idx);
+void nfs4_fl_rpc_print_iostats(struct seq_file *,
+			       struct nfs4_file_layout_dsaddr *);
 extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
 struct nfs4_file_layout_dsaddr *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 6eb59b0..1a03a14 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -31,6 +31,8 @@
 #include <linux/nfs_fs.h>
 #include <linux/vmalloc.h>
 
+#include <linux/sunrpc/metrics.h>
+
 #include "internal.h"
 #include "nfs4filelayout.h"
 
@@ -829,6 +831,23 @@ filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
 	spin_unlock(&nfs4_ds_cache_lock);
 }
 
+void
+nfs4_fl_rpc_print_iostats(struct seq_file *m,
+			  struct nfs4_file_layout_dsaddr *dsaddr)
+{
+	struct nfs4_pnfs_ds *ds;
+	u32 i;
+
+	for (i = 0; i < dsaddr->ds_num; i++) {
+		ds = dsaddr->ds_list[i];
+
+		if (ds && ds->ds_clp) {
+			seq_printf(m, "PNFS Dataserver %s\n", ds->ds_remotestr);
+			rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
+		}
+	}
+}
+
 struct nfs4_pnfs_ds *
 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a..86433d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
 	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutcommit_args *args);
+
+	void (*rpc_print_iostats) (struct seq_file *m,
+				   const struct nfs_server *nfss);
 };
 
 struct pnfs_layout_hdr {
@@ -231,6 +234,7 @@ struct nfs4_deviceid_node {
 
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+void nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, void (*callback)(struct nfs4_deviceid_node *, void *), void *);
 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
 			     const struct pnfs_layoutdriver_type *,
@@ -328,6 +332,13 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+	if (pnfs_enabled_sb(nfss) && nfss->pnfs_curr_ld->rpc_print_iostats)
+		nfss->pnfs_curr_ld->rpc_print_iostats(m, nfss);
+}
+
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -429,6 +440,12 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
 {
 }
+
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2..5a65668 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -115,6 +115,42 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 }
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
+
+/*
+ * Apply callback function to each entry in the cache that matches
+ * the specified layout driver and nfs_client (of the MDS)
+ */
+void
+_nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
+		       const struct nfs_client *clp,
+		       void (*callback)(struct nfs4_deviceid_node *, void *),
+		       void *ctx)
+{
+	struct nfs4_deviceid_node *d;
+	struct hlist_node *n;
+	long hash;
+
+	for (hash = 0; hash < NFS4_DEVICE_ID_HASH_SIZE; hash++) {
+		hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+			if (d->ld == ld && d->nfs_client == clp) {
+				if (atomic_read(&d->ref))
+					callback(d, ctx);
+			}
+	}
+}
+
+void
+nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
+		      const struct nfs_client *clp,
+		      void (*callback)(struct nfs4_deviceid_node *, void *),
+		      void *ctx)
+{
+	rcu_read_lock();
+	_nfs4_foreach_deviceid(ld, clp, callback, ctx);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nfs4_foreach_deviceid);
+
 /*
  * Remove a deviceid from cache
  *
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b79f2a1..5d8e00b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -869,6 +869,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
 	seq_printf(m, "\n");
 
 	rpc_print_iostats(m, nfss->client);
+	pnfs_rpc_print_iostats(m, nfss);
 
 	return 0;
 }
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2012-02-14 16:32 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-08 17:10 [PATCH] RFC: NFS - filelayout DS rpc mountstats support Weston Andros Adamson
2012-02-08 17:26 ` Chuck Lever
2012-02-08 21:04   ` Adamson, Dros
2012-02-08 21:55     ` Adamson, Dros
2012-02-08 22:35       ` Chuck Lever
2012-02-13  8:05     ` Benny Halevy
2012-02-13 17:58       ` Adamson, Dros
2012-02-14  9:31         ` Benny Halevy
2012-02-14 15:44           ` Andy Adamson
2012-02-14 15:59             ` Adamson, Dros
2012-02-14 16:32               ` Andy Adamson
  -- strict thread matches above, loose matches on Subject: below --
2012-01-30 20:53 [PATCH] [RFC] " Weston Andros Adamson
2012-01-30 20:57 ` Adamson, Dros
2012-01-31 19:19 ` Adamson, Dros

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.