* [PATCH] RFC: NFS - filelayout DS rpc mountstats support
@ 2012-02-08 17:10 Weston Andros Adamson
2012-02-08 17:26 ` Chuck Lever
0 siblings, 1 reply; 14+ messages in thread
From: Weston Andros Adamson @ 2012-02-08 17:10 UTC (permalink / raw)
To: Trond.Myklebust, chuck.lever; +Cc: linux-nfs, Weston Andros Adamson
The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
layer and are collected per rpc_client. This has worked for nfs thus far
since only one nfs_client was ever associated with a mountpoint, but with
NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
This can be seen with a pnfs mount where no data server is the same as the MDS -
all reads, writes and commits will never make it into the current mountstats
output.
I took the approach of keeping the stats from the MDS and DSs separate in the
/proc/self/mountstats output to avoid doing too much in the kernel, and to
expose the fact that these stats are from separate connections (maybe for later
use).
This method has issues:
1) even changing the "statvers" of mountstats doesn't stop the userland
mountstats(8) from trying to parse this output -- and it does so
incorrectly.
2) when using DS multipath this method fails to count operations that happened
on a different path to the same DS (after a failure).
3) currently this will display stats twice when DS == MDS.
So... What should I do here?
A different approach is to add support in net/sunrpc to specify a "parent"
stat structure so that operations on DS nfs_clients bump stats on
the main nfs_server->nfs_client->rpc_client. This would take care of all the
issues with the current patch, but seems like a hack.
One task that seems like a good thing to do is to fix mountstats(8) to respect
the "statsvers" value.
Thoughts?
---
I cleaned up this patch, but I still have reservations (noted in commit message)
fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
fs/nfs/pnfs.h | 20 ++++++++++++++++++++
fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
fs/nfs/super.c | 1 +
include/linux/nfs_iostat.h | 2 +-
5 files changed, 70 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 79be7ac..9410fd0 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -33,6 +33,8 @@
#include <linux/nfs_page.h>
#include <linux/module.h>
+#include <linux/sunrpc/metrics.h>
+
#include "internal.h"
#include "nfs4filelayout.h"
@@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}
+static void
+filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
+{
+ struct nfs4_file_layout_dsaddr *dsaddr;
+ struct nfs4_pnfs_ds *ds;
+ u32 i;
+
+ dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+
+ for (i = 0; i < dsaddr->ds_num; i++) {
+ ds = dsaddr->ds_list[i];
+
+ if (ds && ds->ds_clp) {
+ seq_printf(m, " pnfs dataserver %s\n",
+ ds->ds_remotestr);
+ rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
+ }
+ }
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
@@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
.free_deviceid_node = filelayout_free_deveiceid_node,
+ .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
};
static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a..0a5e020 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
+
+ void (*ds_rpc_print_iostats) (struct seq_file *,
+ struct nfs4_deviceid_node *);
};
struct pnfs_layout_hdr {
@@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
void nfs4_deviceid_purge_client(const struct nfs_client *);
+void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
+ const struct nfs_client *clp,
+ const struct pnfs_layoutdriver_type *ld);
+
static inline int lo_fail_bit(u32 iomode)
{
@@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+ if (!pnfs_enabled_sb(nfss))
+ return;
+ nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
+ nfss->pnfs_curr_ld);
+}
#else /* CONFIG_NFS_V4_1 */
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
{
}
+
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2..277de87 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
+
+void
+nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
+ const struct nfs_client *clp,
+ const struct pnfs_layoutdriver_type *ld)
+{
+ struct nfs4_deviceid_node *d;
+ struct hlist_node *n;
+ long h;
+
+ if (!ld->ds_rpc_print_iostats)
+ return;
+
+ rcu_read_lock();
+ for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
+ hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
+ if (d->ld == ld && d->nfs_client == clp) {
+ if (atomic_read(&d->ref))
+ ld->ds_rpc_print_iostats(seq, d);
+ }
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
+
/*
* Remove a deviceid from cache
*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d18a90b..453e496 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
seq_printf(m, "\n");
rpc_print_iostats(m, nfss->client);
+ pnfs_rpc_print_iostats(m, nfss);
return 0;
}
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
index 8866bb3..7fe4605 100644
--- a/include/linux/nfs_iostat.h
+++ b/include/linux/nfs_iostat.h
@@ -21,7 +21,7 @@
#ifndef _LINUX_NFS_IOSTAT
#define _LINUX_NFS_IOSTAT
-#define NFS_IOSTAT_VERS "1.0"
+#define NFS_IOSTAT_VERS "2.0"
/*
* NFS byte counters
--
1.7.4.4
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-08 17:10 [PATCH] RFC: NFS - filelayout DS rpc mountstats support Weston Andros Adamson
@ 2012-02-08 17:26 ` Chuck Lever
2012-02-08 21:04 ` Adamson, Dros
0 siblings, 1 reply; 14+ messages in thread
From: Chuck Lever @ 2012-02-08 17:26 UTC (permalink / raw)
To: Weston Andros Adamson; +Cc: Trond.Myklebust, linux-nfs
Hi-
On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
You mean /proc/self/mountstats here.
> layer and are collected per rpc_client. This has worked for nfs thus far
> since only one nfs_client was ever associated with a mountpoint, but with
> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>
> This can be seen with a pnfs mount where no data server is the same as the MDS -
> all reads, writes and commits will never make it into the current mountstats
> output.
>
> I took the approach of keeping the stats from the MDS and DSs separate in the
> /proc/self/mountstats output to avoid doing too much in the kernel, and to
> expose the fact that these stats are from separate connections (maybe for later
> use).
>
> This method has issues:
>
> 1) even changing the "statvers" of mountstats doesn't stop the userland
> mountstats(8) from trying to parse this output -- and it does so
> incorrectly.
User land is broken then. :-)
> 2) when using DS multipath this method fails to count operations that happened
> on a different path to the same DS (after a failure).
>
> 3) currently this will display stats twice when DS == MDS.
Why is this case hard to detect in the kernel or in mountstats?
>
> So... What should I do here?
>
> A different approach is to add support in net/sunrpc to specify a "parent"
> stat structure so that operations on DS nfs_clients bump stats on
> the main nfs_server->nfs_client->rpc_client. This would take care of all the
> issues with the current patch, but seems like a hack.
Is this the same as displaying all data in one set of stats?
Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
> One task that seems like a good thing to do is to fix mountstats(8) to respect
> the "statsvers" value.
Agree.
>
> Thoughts?
> ---
>
> I cleaned up this patch, but I still have reservations (noted in commit message)
>
> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
> fs/nfs/super.c | 1 +
> include/linux/nfs_iostat.h | 2 +-
> 5 files changed, 70 insertions(+), 1 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 79be7ac..9410fd0 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -33,6 +33,8 @@
> #include <linux/nfs_page.h>
> #include <linux/module.h>
>
> +#include <linux/sunrpc/metrics.h>
> +
> #include "internal.h"
> #include "nfs4filelayout.h"
>
> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
> }
>
> +static void
> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
> +{
> + struct nfs4_file_layout_dsaddr *dsaddr;
> + struct nfs4_pnfs_ds *ds;
> + u32 i;
> +
> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
> +
> + for (i = 0; i < dsaddr->ds_num; i++) {
> + ds = dsaddr->ds_list[i];
> +
> + if (ds && ds->ds_clp) {
> + seq_printf(m, " pnfs dataserver %s\n",
> + ds->ds_remotestr);
> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
> + }
> + }
> +}
> +
> static struct pnfs_layoutdriver_type filelayout_type = {
> .id = LAYOUT_NFSV4_1_FILES,
> .name = "LAYOUT_NFSV4_1_FILES",
> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
> .read_pagelist = filelayout_read_pagelist,
> .write_pagelist = filelayout_write_pagelist,
> .free_deviceid_node = filelayout_free_deveiceid_node,
> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
> };
>
> static int __init nfs4filelayout_init(void)
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 53d593a..0a5e020 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
> struct xdr_stream *xdr,
> const struct nfs4_layoutcommit_args *args);
> +
> + void (*ds_rpc_print_iostats) (struct seq_file *,
> + struct nfs4_deviceid_node *);
> };
>
> struct pnfs_layout_hdr {
> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
> void nfs4_deviceid_purge_client(const struct nfs_client *);
> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
> + const struct nfs_client *clp,
> + const struct pnfs_layoutdriver_type *ld);
> +
>
> static inline int lo_fail_bit(u32 iomode)
> {
> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> +static inline void
> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
> +{
> + if (!pnfs_enabled_sb(nfss))
> + return;
> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
> + nfss->pnfs_curr_ld);
> +}
> #else /* CONFIG_NFS_V4_1 */
>
> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
> {
> }
> +
> +static inline void
> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
> +{
> +}
> #endif /* CONFIG_NFS_V4_1 */
>
> #endif /* FS_NFS_PNFS_H */
> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> index 4f359d2..277de87 100644
> --- a/fs/nfs/pnfs_dev.c
> +++ b/fs/nfs/pnfs_dev.c
> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
> }
> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>
> +
> +void
> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
> + const struct nfs_client *clp,
> + const struct pnfs_layoutdriver_type *ld)
> +{
> + struct nfs4_deviceid_node *d;
> + struct hlist_node *n;
> + long h;
> +
> + if (!ld->ds_rpc_print_iostats)
> + return;
> +
> + rcu_read_lock();
> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
> + if (d->ld == ld && d->nfs_client == clp) {
> + if (atomic_read(&d->ref))
> + ld->ds_rpc_print_iostats(seq, d);
> + }
> + }
> + rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
> +
> /*
> * Remove a deviceid from cache
> *
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index d18a90b..453e496 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
> seq_printf(m, "\n");
>
> rpc_print_iostats(m, nfss->client);
> + pnfs_rpc_print_iostats(m, nfss);
>
> return 0;
> }
> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
> index 8866bb3..7fe4605 100644
> --- a/include/linux/nfs_iostat.h
> +++ b/include/linux/nfs_iostat.h
> @@ -21,7 +21,7 @@
> #ifndef _LINUX_NFS_IOSTAT
> #define _LINUX_NFS_IOSTAT
>
> -#define NFS_IOSTAT_VERS "1.0"
> +#define NFS_IOSTAT_VERS "2.0"
>
> /*
> * NFS byte counters
> --
> 1.7.4.4
>
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-08 17:26 ` Chuck Lever
@ 2012-02-08 21:04 ` Adamson, Dros
2012-02-08 21:55 ` Adamson, Dros
2012-02-13 8:05 ` Benny Halevy
0 siblings, 2 replies; 14+ messages in thread
From: Adamson, Dros @ 2012-02-08 21:04 UTC (permalink / raw)
To: Chuck Lever; +Cc: Myklebust, Trond, <linux-nfs@vger.kernel.org>
[-- Attachment #1: Type: text/plain, Size: 8691 bytes --]
On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
> Hi-
>
> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>
>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>
> You mean /proc/self/mountstats here.
oops!
>
>> layer and are collected per rpc_client. This has worked for nfs thus far
>> since only one nfs_client was ever associated with a mountpoint, but with
>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>
>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>> all reads, writes and commits will never make it into the current mountstats
>> output.
>>
>> I took the approach of keeping the stats from the MDS and DSs separate in the
>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>> expose the fact that these stats are from separate connections (maybe for later
>> use).
>>
>> This method has issues:
>>
>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>> mountstats(8) from trying to parse this output -- and it does so
>> incorrectly.
>
> User land is broken then. :-)
Agreed!
>
>> 2) when using DS multipath this method fails to count operations that happened
>> on a different path to the same DS (after a failure).
I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>
>> 3) currently this will display stats twice when DS == MDS.
>
> Why is this case hard to detect in the kernel or in mountstats?
Not hard, just an issue with the current patch.
>
>>
>> So... What should I do here?
>>
>> A different approach is to add support in net/sunrpc to specify a "parent"
>> stat structure so that operations on DS nfs_clients bump stats on
>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>> issues with the current patch, but seems like a hack.
>
> Is this the same as displaying all data in one set of stats?
Yes
>
> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
Ok, I'll try to do that.
Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>
>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>> the "statsvers" value.
>
> Agree.
I'll do this first.
Thanks Chuck!
-dros
>
>>
>> Thoughts?
>> ---
>>
>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>
>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>> fs/nfs/super.c | 1 +
>> include/linux/nfs_iostat.h | 2 +-
>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>> index 79be7ac..9410fd0 100644
>> --- a/fs/nfs/nfs4filelayout.c
>> +++ b/fs/nfs/nfs4filelayout.c
>> @@ -33,6 +33,8 @@
>> #include <linux/nfs_page.h>
>> #include <linux/module.h>
>>
>> +#include <linux/sunrpc/metrics.h>
>> +
>> #include "internal.h"
>> #include "nfs4filelayout.h"
>>
>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>> }
>>
>> +static void
>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>> +{
>> + struct nfs4_file_layout_dsaddr *dsaddr;
>> + struct nfs4_pnfs_ds *ds;
>> + u32 i;
>> +
>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>> +
>> + for (i = 0; i < dsaddr->ds_num; i++) {
>> + ds = dsaddr->ds_list[i];
>> +
>> + if (ds && ds->ds_clp) {
>> + seq_printf(m, " pnfs dataserver %s\n",
>> + ds->ds_remotestr);
>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>> + }
>> + }
>> +}
>> +
>> static struct pnfs_layoutdriver_type filelayout_type = {
>> .id = LAYOUT_NFSV4_1_FILES,
>> .name = "LAYOUT_NFSV4_1_FILES",
>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>> .read_pagelist = filelayout_read_pagelist,
>> .write_pagelist = filelayout_write_pagelist,
>> .free_deviceid_node = filelayout_free_deveiceid_node,
>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>> };
>>
>> static int __init nfs4filelayout_init(void)
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 53d593a..0a5e020 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>> struct xdr_stream *xdr,
>> const struct nfs4_layoutcommit_args *args);
>> +
>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>> + struct nfs4_deviceid_node *);
>> };
>>
>> struct pnfs_layout_hdr {
>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>> + const struct nfs_client *clp,
>> + const struct pnfs_layoutdriver_type *ld);
>> +
>>
>> static inline int lo_fail_bit(u32 iomode)
>> {
>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>> return 0;
>> }
>>
>> +static inline void
>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>> +{
>> + if (!pnfs_enabled_sb(nfss))
>> + return;
>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>> + nfss->pnfs_curr_ld);
>> +}
>> #else /* CONFIG_NFS_V4_1 */
>>
>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>> {
>> }
>> +
>> +static inline void
>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>> +{
>> +}
>> #endif /* CONFIG_NFS_V4_1 */
>>
>> #endif /* FS_NFS_PNFS_H */
>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>> index 4f359d2..277de87 100644
>> --- a/fs/nfs/pnfs_dev.c
>> +++ b/fs/nfs/pnfs_dev.c
>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>> }
>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>
>> +
>> +void
>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>> + const struct nfs_client *clp,
>> + const struct pnfs_layoutdriver_type *ld)
>> +{
>> + struct nfs4_deviceid_node *d;
>> + struct hlist_node *n;
>> + long h;
>> +
>> + if (!ld->ds_rpc_print_iostats)
>> + return;
>> +
>> + rcu_read_lock();
>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>> + if (d->ld == ld && d->nfs_client == clp) {
>> + if (atomic_read(&d->ref))
>> + ld->ds_rpc_print_iostats(seq, d);
>> + }
>> + }
>> + rcu_read_unlock();
>> +}
>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>> +
>> /*
>> * Remove a deviceid from cache
>> *
>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>> index d18a90b..453e496 100644
>> --- a/fs/nfs/super.c
>> +++ b/fs/nfs/super.c
>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>> seq_printf(m, "\n");
>>
>> rpc_print_iostats(m, nfss->client);
>> + pnfs_rpc_print_iostats(m, nfss);
>>
>> return 0;
>> }
>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>> index 8866bb3..7fe4605 100644
>> --- a/include/linux/nfs_iostat.h
>> +++ b/include/linux/nfs_iostat.h
>> @@ -21,7 +21,7 @@
>> #ifndef _LINUX_NFS_IOSTAT
>> #define _LINUX_NFS_IOSTAT
>>
>> -#define NFS_IOSTAT_VERS "1.0"
>> +#define NFS_IOSTAT_VERS "2.0"
>>
>> /*
>> * NFS byte counters
>> --
>> 1.7.4.4
>>
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
>
>
>
>
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-08 21:04 ` Adamson, Dros
@ 2012-02-08 21:55 ` Adamson, Dros
2012-02-08 22:35 ` Chuck Lever
2012-02-13 8:05 ` Benny Halevy
1 sibling, 1 reply; 14+ messages in thread
From: Adamson, Dros @ 2012-02-08 21:55 UTC (permalink / raw)
To: Adamson, Dros
Cc: Chuck Lever, Myklebust, Trond, <linux-nfs@vger.kernel.org>
[-- Attachment #1: Type: text/plain, Size: 9197 bytes --]
On Feb 8, 2012, at 4:04 PM, Adamson, Dros wrote:
>
> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>
>> Hi-
>>
>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>
>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>
>> You mean /proc/self/mountstats here.
>
> oops!
>
>>
>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>> since only one nfs_client was ever associated with a mountpoint, but with
>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>
>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>> all reads, writes and commits will never make it into the current mountstats
>>> output.
>>>
>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>> expose the fact that these stats are from separate connections (maybe for later
>>> use).
>>>
>>> This method has issues:
>>>
>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>> mountstats(8) from trying to parse this output -- and it does so
>>> incorrectly.
>>
>> User land is broken then. :-)
>
> Agreed!
What do you think should happen if statsvers > supported? Just print an error, or print a warning and try to parse anyway?
Thanks!
-dros
>
>>
>>> 2) when using DS multipath this method fails to count operations that happened
>>> on a different path to the same DS (after a failure).
>
> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>
>>>
>>> 3) currently this will display stats twice when DS == MDS.
>>
>> Why is this case hard to detect in the kernel or in mountstats?
>
> Not hard, just an issue with the current patch.
>
>>
>>>
>>> So... What should I do here?
>>>
>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>> stat structure so that operations on DS nfs_clients bump stats on
>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>> issues with the current patch, but seems like a hack.
>>
>> Is this the same as displaying all data in one set of stats?
>
> Yes
>
>>
>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>
> Ok, I'll try to do that.
>
> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>
> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>
>>
>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>> the "statsvers" value.
>>
>> Agree.
>
> I'll do this first.
>
> Thanks Chuck!
>
> -dros
>
>>
>>>
>>> Thoughts?
>>> ---
>>>
>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>
>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>> fs/nfs/super.c | 1 +
>>> include/linux/nfs_iostat.h | 2 +-
>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>> index 79be7ac..9410fd0 100644
>>> --- a/fs/nfs/nfs4filelayout.c
>>> +++ b/fs/nfs/nfs4filelayout.c
>>> @@ -33,6 +33,8 @@
>>> #include <linux/nfs_page.h>
>>> #include <linux/module.h>
>>>
>>> +#include <linux/sunrpc/metrics.h>
>>> +
>>> #include "internal.h"
>>> #include "nfs4filelayout.h"
>>>
>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>> }
>>>
>>> +static void
>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>> +{
>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>> + struct nfs4_pnfs_ds *ds;
>>> + u32 i;
>>> +
>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>> +
>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>> + ds = dsaddr->ds_list[i];
>>> +
>>> + if (ds && ds->ds_clp) {
>>> + seq_printf(m, " pnfs dataserver %s\n",
>>> + ds->ds_remotestr);
>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>> + }
>>> + }
>>> +}
>>> +
>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>> .id = LAYOUT_NFSV4_1_FILES,
>>> .name = "LAYOUT_NFSV4_1_FILES",
>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>> .read_pagelist = filelayout_read_pagelist,
>>> .write_pagelist = filelayout_write_pagelist,
>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>> };
>>>
>>> static int __init nfs4filelayout_init(void)
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 53d593a..0a5e020 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>> struct xdr_stream *xdr,
>>> const struct nfs4_layoutcommit_args *args);
>>> +
>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>> + struct nfs4_deviceid_node *);
>>> };
>>>
>>> struct pnfs_layout_hdr {
>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>> + const struct nfs_client *clp,
>>> + const struct pnfs_layoutdriver_type *ld);
>>> +
>>>
>>> static inline int lo_fail_bit(u32 iomode)
>>> {
>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>> return 0;
>>> }
>>>
>>> +static inline void
>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>> +{
>>> + if (!pnfs_enabled_sb(nfss))
>>> + return;
>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>> + nfss->pnfs_curr_ld);
>>> +}
>>> #else /* CONFIG_NFS_V4_1 */
>>>
>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>> {
>>> }
>>> +
>>> +static inline void
>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>> +{
>>> +}
>>> #endif /* CONFIG_NFS_V4_1 */
>>>
>>> #endif /* FS_NFS_PNFS_H */
>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>> index 4f359d2..277de87 100644
>>> --- a/fs/nfs/pnfs_dev.c
>>> +++ b/fs/nfs/pnfs_dev.c
>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>> }
>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>
>>> +
>>> +void
>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>> + const struct nfs_client *clp,
>>> + const struct pnfs_layoutdriver_type *ld)
>>> +{
>>> + struct nfs4_deviceid_node *d;
>>> + struct hlist_node *n;
>>> + long h;
>>> +
>>> + if (!ld->ds_rpc_print_iostats)
>>> + return;
>>> +
>>> + rcu_read_lock();
>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>> + if (d->ld == ld && d->nfs_client == clp) {
>>> + if (atomic_read(&d->ref))
>>> + ld->ds_rpc_print_iostats(seq, d);
>>> + }
>>> + }
>>> + rcu_read_unlock();
>>> +}
>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>> +
>>> /*
>>> * Remove a deviceid from cache
>>> *
>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>> index d18a90b..453e496 100644
>>> --- a/fs/nfs/super.c
>>> +++ b/fs/nfs/super.c
>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>> seq_printf(m, "\n");
>>>
>>> rpc_print_iostats(m, nfss->client);
>>> + pnfs_rpc_print_iostats(m, nfss);
>>>
>>> return 0;
>>> }
>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>> index 8866bb3..7fe4605 100644
>>> --- a/include/linux/nfs_iostat.h
>>> +++ b/include/linux/nfs_iostat.h
>>> @@ -21,7 +21,7 @@
>>> #ifndef _LINUX_NFS_IOSTAT
>>> #define _LINUX_NFS_IOSTAT
>>>
>>> -#define NFS_IOSTAT_VERS "1.0"
>>> +#define NFS_IOSTAT_VERS "2.0"
>>>
>>> /*
>>> * NFS byte counters
>>> --
>>> 1.7.4.4
>>>
>>
>> --
>> Chuck Lever
>> chuck[dot]lever[at]oracle[dot]com
>>
>>
>>
>>
>
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-08 21:55 ` Adamson, Dros
@ 2012-02-08 22:35 ` Chuck Lever
0 siblings, 0 replies; 14+ messages in thread
From: Chuck Lever @ 2012-02-08 22:35 UTC (permalink / raw)
To: Adamson, Dros; +Cc: Myklebust, Trond, <linux-nfs@vger.kernel.org>
On Feb 8, 2012, at 4:55 PM, Adamson, Dros wrote:
>
> On Feb 8, 2012, at 4:04 PM, Adamson, Dros wrote:
>
>>
>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>
>>> Hi-
>>>
>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>
>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>
>>> You mean /proc/self/mountstats here.
>>
>> oops!
>>
>>>
>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>
>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>> all reads, writes and commits will never make it into the current mountstats
>>>> output.
>>>>
>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>> expose the fact that these stats are from separate connections (maybe for later
>>>> use).
>>>>
>>>> This method has issues:
>>>>
>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>> mountstats(8) from trying to parse this output -- and it does so
>>>> incorrectly.
>>>
>>> User land is broken then. :-)
>>
>> Agreed!
>
> What do you think should happen if statsvers > supported? Just print an error, or print a warning and try to parse anyway?
Hard to say. Do we want it to go on and possibly print wrong data, or stop and insist on an update? I prefer the latter, but some may not. I think distributions should do integration testing to prevent the latter from occurring on a customer's system.
>
> Thanks!
>
> -dros
>
>>
>>>
>>>> 2) when using DS multipath this method fails to count operations that happened
>>>> on a different path to the same DS (after a failure).
>>
>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>
>>>>
>>>> 3) currently this will display stats twice when DS == MDS.
>>>
>>> Why is this case hard to detect in the kernel or in mountstats?
>>
>> Not hard, just an issue with the current patch.
>>
>>>
>>>>
>>>> So... What should I do here?
>>>>
>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>> issues with the current patch, but seems like a hack.
>>>
>>> Is this the same as displaying all data in one set of stats?
>>
>> Yes
>>
>>>
>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>>
>> Ok, I'll try to do that.
>>
>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>
>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>
>>>
>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>> the "statsvers" value.
>>>
>>> Agree.
>>
>> I'll do this first.
>>
>> Thanks Chuck!
>>
>> -dros
>>
>>>
>>>>
>>>> Thoughts?
>>>> ---
>>>>
>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>
>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>> fs/nfs/super.c | 1 +
>>>> include/linux/nfs_iostat.h | 2 +-
>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>> index 79be7ac..9410fd0 100644
>>>> --- a/fs/nfs/nfs4filelayout.c
>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>> @@ -33,6 +33,8 @@
>>>> #include <linux/nfs_page.h>
>>>> #include <linux/module.h>
>>>>
>>>> +#include <linux/sunrpc/metrics.h>
>>>> +
>>>> #include "internal.h"
>>>> #include "nfs4filelayout.h"
>>>>
>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>> }
>>>>
>>>> +static void
>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>> +{
>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>> + struct nfs4_pnfs_ds *ds;
>>>> + u32 i;
>>>> +
>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>> +
>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>> + ds = dsaddr->ds_list[i];
>>>> +
>>>> + if (ds && ds->ds_clp) {
>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>> + ds->ds_remotestr);
>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>> .read_pagelist = filelayout_read_pagelist,
>>>> .write_pagelist = filelayout_write_pagelist,
>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>> };
>>>>
>>>> static int __init nfs4filelayout_init(void)
>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>> index 53d593a..0a5e020 100644
>>>> --- a/fs/nfs/pnfs.h
>>>> +++ b/fs/nfs/pnfs.h
>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>> struct xdr_stream *xdr,
>>>> const struct nfs4_layoutcommit_args *args);
>>>> +
>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>> + struct nfs4_deviceid_node *);
>>>> };
>>>>
>>>> struct pnfs_layout_hdr {
>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>> + const struct nfs_client *clp,
>>>> + const struct pnfs_layoutdriver_type *ld);
>>>> +
>>>>
>>>> static inline int lo_fail_bit(u32 iomode)
>>>> {
>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>> return 0;
>>>> }
>>>>
>>>> +static inline void
>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>> +{
>>>> + if (!pnfs_enabled_sb(nfss))
>>>> + return;
>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>> + nfss->pnfs_curr_ld);
>>>> +}
>>>> #else /* CONFIG_NFS_V4_1 */
>>>>
>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>> {
>>>> }
>>>> +
>>>> +static inline void
>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>> +{
>>>> +}
>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>
>>>> #endif /* FS_NFS_PNFS_H */
>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>> index 4f359d2..277de87 100644
>>>> --- a/fs/nfs/pnfs_dev.c
>>>> +++ b/fs/nfs/pnfs_dev.c
>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>> }
>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>
>>>> +
>>>> +void
>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>> + const struct nfs_client *clp,
>>>> + const struct pnfs_layoutdriver_type *ld)
>>>> +{
>>>> + struct nfs4_deviceid_node *d;
>>>> + struct hlist_node *n;
>>>> + long h;
>>>> +
>>>> + if (!ld->ds_rpc_print_iostats)
>>>> + return;
>>>> +
>>>> + rcu_read_lock();
>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>> + if (atomic_read(&d->ref))
>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>> + }
>>>> + }
>>>> + rcu_read_unlock();
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>> +
>>>> /*
>>>> * Remove a deviceid from cache
>>>> *
>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>> index d18a90b..453e496 100644
>>>> --- a/fs/nfs/super.c
>>>> +++ b/fs/nfs/super.c
>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>> seq_printf(m, "\n");
>>>>
>>>> rpc_print_iostats(m, nfss->client);
>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>
>>>> return 0;
>>>> }
>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>> index 8866bb3..7fe4605 100644
>>>> --- a/include/linux/nfs_iostat.h
>>>> +++ b/include/linux/nfs_iostat.h
>>>> @@ -21,7 +21,7 @@
>>>> #ifndef _LINUX_NFS_IOSTAT
>>>> #define _LINUX_NFS_IOSTAT
>>>>
>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>
>>>> /*
>>>> * NFS byte counters
>>>> --
>>>> 1.7.4.4
>>>>
>>>
>>> --
>>> Chuck Lever
>>> chuck[dot]lever[at]oracle[dot]com
>>>
>>>
>>>
>>>
>>
>
--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-08 21:04 ` Adamson, Dros
2012-02-08 21:55 ` Adamson, Dros
@ 2012-02-13 8:05 ` Benny Halevy
2012-02-13 17:58 ` Adamson, Dros
1 sibling, 1 reply; 14+ messages in thread
From: Benny Halevy @ 2012-02-13 8:05 UTC (permalink / raw)
To: Adamson, Dros; +Cc: Chuck Lever, Myklebust, Trond, linux-nfs
Sorry for the late response.
On 2012-02-08 23:04, Adamson, Dros wrote:
>
> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>
>> Hi-
>>
>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>
>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>
>> You mean /proc/self/mountstats here.
>
> oops!
>
>>
>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>> since only one nfs_client was ever associated with a mountpoint, but with
>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>
>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>> all reads, writes and commits will never make it into the current mountstats
>>> output.
>>>
>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>> expose the fact that these stats are from separate connections (maybe for later
>>> use).
>>>
>>> This method has issues:
>>>
>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>> mountstats(8) from trying to parse this output -- and it does so
>>> incorrectly.
>>
>> User land is broken then. :-)
>
> Agreed!
>
>>
>>> 2) when using DS multipath this method fails to count operations that happened
>>> on a different path to the same DS (after a failure).
>
> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>
>>>
>>> 3) currently this will display stats twice when DS == MDS.
>>
>> Why is this case hard to detect in the kernel or in mountstats?
>
> Not hard, just an issue with the current patch.
>
>>
>>>
>>> So... What should I do here?
>>>
>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>> stat structure so that operations on DS nfs_clients bump stats on
>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>> issues with the current patch, but seems like a hack.
>>
>> Is this the same as displaying all data in one set of stats?
>
> Yes
>
>>
>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
Splitting up the MDS/DS stats is important to tell when DS I/O is being
done and how effective it is.
Benny
>
> Ok, I'll try to do that.
>
> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>
> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>
>>
>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>> the "statsvers" value.
>>
>> Agree.
>
> I'll do this first.
>
> Thanks Chuck!
>
> -dros
>
>>
>>>
>>> Thoughts?
>>> ---
>>>
>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>
>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>> fs/nfs/super.c | 1 +
>>> include/linux/nfs_iostat.h | 2 +-
>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>> index 79be7ac..9410fd0 100644
>>> --- a/fs/nfs/nfs4filelayout.c
>>> +++ b/fs/nfs/nfs4filelayout.c
>>> @@ -33,6 +33,8 @@
>>> #include <linux/nfs_page.h>
>>> #include <linux/module.h>
>>>
>>> +#include <linux/sunrpc/metrics.h>
>>> +
>>> #include "internal.h"
>>> #include "nfs4filelayout.h"
>>>
>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>> }
>>>
>>> +static void
>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>> +{
>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>> + struct nfs4_pnfs_ds *ds;
>>> + u32 i;
>>> +
>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>> +
>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>> + ds = dsaddr->ds_list[i];
>>> +
>>> + if (ds && ds->ds_clp) {
>>> + seq_printf(m, " pnfs dataserver %s\n",
>>> + ds->ds_remotestr);
>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>> + }
>>> + }
>>> +}
>>> +
>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>> .id = LAYOUT_NFSV4_1_FILES,
>>> .name = "LAYOUT_NFSV4_1_FILES",
>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>> .read_pagelist = filelayout_read_pagelist,
>>> .write_pagelist = filelayout_write_pagelist,
>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>> };
>>>
>>> static int __init nfs4filelayout_init(void)
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 53d593a..0a5e020 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>> struct xdr_stream *xdr,
>>> const struct nfs4_layoutcommit_args *args);
>>> +
>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>> + struct nfs4_deviceid_node *);
>>> };
>>>
>>> struct pnfs_layout_hdr {
>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>> + const struct nfs_client *clp,
>>> + const struct pnfs_layoutdriver_type *ld);
>>> +
>>>
>>> static inline int lo_fail_bit(u32 iomode)
>>> {
>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>> return 0;
>>> }
>>>
>>> +static inline void
>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>> +{
>>> + if (!pnfs_enabled_sb(nfss))
>>> + return;
>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>> + nfss->pnfs_curr_ld);
>>> +}
>>> #else /* CONFIG_NFS_V4_1 */
>>>
>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>> {
>>> }
>>> +
>>> +static inline void
>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>> +{
>>> +}
>>> #endif /* CONFIG_NFS_V4_1 */
>>>
>>> #endif /* FS_NFS_PNFS_H */
>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>> index 4f359d2..277de87 100644
>>> --- a/fs/nfs/pnfs_dev.c
>>> +++ b/fs/nfs/pnfs_dev.c
>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>> }
>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>
>>> +
>>> +void
>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>> + const struct nfs_client *clp,
>>> + const struct pnfs_layoutdriver_type *ld)
>>> +{
>>> + struct nfs4_deviceid_node *d;
>>> + struct hlist_node *n;
>>> + long h;
>>> +
>>> + if (!ld->ds_rpc_print_iostats)
>>> + return;
>>> +
>>> + rcu_read_lock();
>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>> + if (d->ld == ld && d->nfs_client == clp) {
>>> + if (atomic_read(&d->ref))
>>> + ld->ds_rpc_print_iostats(seq, d);
>>> + }
>>> + }
>>> + rcu_read_unlock();
>>> +}
>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>> +
>>> /*
>>> * Remove a deviceid from cache
>>> *
>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>> index d18a90b..453e496 100644
>>> --- a/fs/nfs/super.c
>>> +++ b/fs/nfs/super.c
>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>> seq_printf(m, "\n");
>>>
>>> rpc_print_iostats(m, nfss->client);
>>> + pnfs_rpc_print_iostats(m, nfss);
>>>
>>> return 0;
>>> }
>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>> index 8866bb3..7fe4605 100644
>>> --- a/include/linux/nfs_iostat.h
>>> +++ b/include/linux/nfs_iostat.h
>>> @@ -21,7 +21,7 @@
>>> #ifndef _LINUX_NFS_IOSTAT
>>> #define _LINUX_NFS_IOSTAT
>>>
>>> -#define NFS_IOSTAT_VERS "1.0"
>>> +#define NFS_IOSTAT_VERS "2.0"
>>>
>>> /*
>>> * NFS byte counters
>>> --
>>> 1.7.4.4
>>>
>>
>> --
>> Chuck Lever
>> chuck[dot]lever[at]oracle[dot]com
>>
>>
>>
>>
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-13 8:05 ` Benny Halevy
@ 2012-02-13 17:58 ` Adamson, Dros
2012-02-14 9:31 ` Benny Halevy
0 siblings, 1 reply; 14+ messages in thread
From: Adamson, Dros @ 2012-02-13 17:58 UTC (permalink / raw)
To: Benny Halevy; +Cc: Adamson, Dros, Chuck Lever, Myklebust, Trond, linux-nfs
[-- Attachment #1: Type: text/plain, Size: 10045 bytes --]
On Feb 13, 2012, at 3:05 AM, Benny Halevy wrote:
> Sorry for the late response.
>
> On 2012-02-08 23:04, Adamson, Dros wrote:
>>
>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>
>>> Hi-
>>>
>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>
>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>
>>> You mean /proc/self/mountstats here.
>>
>> oops!
>>
>>>
>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>
>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>> all reads, writes and commits will never make it into the current mountstats
>>>> output.
>>>>
>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>> expose the fact that these stats are from separate connections (maybe for later
>>>> use).
>>>>
>>>> This method has issues:
>>>>
>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>> mountstats(8) from trying to parse this output -- and it does so
>>>> incorrectly.
>>>
>>> User land is broken then. :-)
>>
>> Agreed!
>>
>>>
>>>> 2) when using DS multipath this method fails to count operations that happened
>>>> on a different path to the same DS (after a failure).
>>
>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>
>>>>
>>>> 3) currently this will display stats twice when DS == MDS.
>>>
>>> Why is this case hard to detect in the kernel or in mountstats?
>>
>> Not hard, just an issue with the current patch.
>>
>>>
>>>>
>>>> So... What should I do here?
>>>>
>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>> issues with the current patch, but seems like a hack.
>>>
>>> Is this the same as displaying all data in one set of stats?
>>
>> Yes
>>
>>>
>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>
> Splitting up the MDS/DS stats is important to tell when DS I/O is being
> done and how effective it is.
>
> Benny
>
Agreed! It just can't go into mountstats right away (due to statsvers= not being interpreted in mountstats(8)). I'll make a second patch on top of my mountstats changes to add per-DS stats in /proc/fs/nfsfs/ and we can figure out if we want to add it to mountstats later.
Does this sound OK?
-dros
>>
>> Ok, I'll try to do that.
>>
>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>
>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>
>>>
>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>> the "statsvers" value.
>>>
>>> Agree.
>>
>> I'll do this first.
>>
>> Thanks Chuck!
>>
>> -dros
>>
>>>
>>>>
>>>> Thoughts?
>>>> ---
>>>>
>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>
>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>> fs/nfs/super.c | 1 +
>>>> include/linux/nfs_iostat.h | 2 +-
>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>> index 79be7ac..9410fd0 100644
>>>> --- a/fs/nfs/nfs4filelayout.c
>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>> @@ -33,6 +33,8 @@
>>>> #include <linux/nfs_page.h>
>>>> #include <linux/module.h>
>>>>
>>>> +#include <linux/sunrpc/metrics.h>
>>>> +
>>>> #include "internal.h"
>>>> #include "nfs4filelayout.h"
>>>>
>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>> }
>>>>
>>>> +static void
>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>> +{
>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>> + struct nfs4_pnfs_ds *ds;
>>>> + u32 i;
>>>> +
>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>> +
>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>> + ds = dsaddr->ds_list[i];
>>>> +
>>>> + if (ds && ds->ds_clp) {
>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>> + ds->ds_remotestr);
>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>> + }
>>>> + }
>>>> +}
>>>> +
>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>> .read_pagelist = filelayout_read_pagelist,
>>>> .write_pagelist = filelayout_write_pagelist,
>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>> };
>>>>
>>>> static int __init nfs4filelayout_init(void)
>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>> index 53d593a..0a5e020 100644
>>>> --- a/fs/nfs/pnfs.h
>>>> +++ b/fs/nfs/pnfs.h
>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>> struct xdr_stream *xdr,
>>>> const struct nfs4_layoutcommit_args *args);
>>>> +
>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>> + struct nfs4_deviceid_node *);
>>>> };
>>>>
>>>> struct pnfs_layout_hdr {
>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>> + const struct nfs_client *clp,
>>>> + const struct pnfs_layoutdriver_type *ld);
>>>> +
>>>>
>>>> static inline int lo_fail_bit(u32 iomode)
>>>> {
>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>> return 0;
>>>> }
>>>>
>>>> +static inline void
>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>> +{
>>>> + if (!pnfs_enabled_sb(nfss))
>>>> + return;
>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>> + nfss->pnfs_curr_ld);
>>>> +}
>>>> #else /* CONFIG_NFS_V4_1 */
>>>>
>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>> {
>>>> }
>>>> +
>>>> +static inline void
>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>> +{
>>>> +}
>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>
>>>> #endif /* FS_NFS_PNFS_H */
>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>> index 4f359d2..277de87 100644
>>>> --- a/fs/nfs/pnfs_dev.c
>>>> +++ b/fs/nfs/pnfs_dev.c
>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>> }
>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>
>>>> +
>>>> +void
>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>> + const struct nfs_client *clp,
>>>> + const struct pnfs_layoutdriver_type *ld)
>>>> +{
>>>> + struct nfs4_deviceid_node *d;
>>>> + struct hlist_node *n;
>>>> + long h;
>>>> +
>>>> + if (!ld->ds_rpc_print_iostats)
>>>> + return;
>>>> +
>>>> + rcu_read_lock();
>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>> + if (atomic_read(&d->ref))
>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>> + }
>>>> + }
>>>> + rcu_read_unlock();
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>> +
>>>> /*
>>>> * Remove a deviceid from cache
>>>> *
>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>> index d18a90b..453e496 100644
>>>> --- a/fs/nfs/super.c
>>>> +++ b/fs/nfs/super.c
>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>> seq_printf(m, "\n");
>>>>
>>>> rpc_print_iostats(m, nfss->client);
>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>
>>>> return 0;
>>>> }
>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>> index 8866bb3..7fe4605 100644
>>>> --- a/include/linux/nfs_iostat.h
>>>> +++ b/include/linux/nfs_iostat.h
>>>> @@ -21,7 +21,7 @@
>>>> #ifndef _LINUX_NFS_IOSTAT
>>>> #define _LINUX_NFS_IOSTAT
>>>>
>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>
>>>> /*
>>>> * NFS byte counters
>>>> --
>>>> 1.7.4.4
>>>>
>>>
>>> --
>>> Chuck Lever
>>> chuck[dot]lever[at]oracle[dot]com
>>>
>>>
>>>
>>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-13 17:58 ` Adamson, Dros
@ 2012-02-14 9:31 ` Benny Halevy
2012-02-14 15:44 ` Andy Adamson
0 siblings, 1 reply; 14+ messages in thread
From: Benny Halevy @ 2012-02-14 9:31 UTC (permalink / raw)
To: Adamson, Dros; +Cc: Chuck Lever, Myklebust, Trond, linux-nfs
On 2012-02-13 19:58, Adamson, Dros wrote:
>
> On Feb 13, 2012, at 3:05 AM, Benny Halevy wrote:
>
>> Sorry for the late response.
>>
>> On 2012-02-08 23:04, Adamson, Dros wrote:
>>>
>>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>>
>>>> Hi-
>>>>
>>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>>
>>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>>
>>>> You mean /proc/self/mountstats here.
>>>
>>> oops!
>>>
>>>>
>>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>>
>>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>>> all reads, writes and commits will never make it into the current mountstats
>>>>> output.
>>>>>
>>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>>> expose the fact that these stats are from separate connections (maybe for later
>>>>> use).
>>>>>
>>>>> This method has issues:
>>>>>
>>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>>> mountstats(8) from trying to parse this output -- and it does so
>>>>> incorrectly.
>>>>
>>>> User land is broken then. :-)
>>>
>>> Agreed!
>>>
>>>>
>>>>> 2) when using DS multipath this method fails to count operations that happened
>>>>> on a different path to the same DS (after a failure).
>>>
>>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>>
>>>>>
>>>>> 3) currently this will display stats twice when DS == MDS.
>>>>
>>>> Why is this case hard to detect in the kernel or in mountstats?
>>>
>>> Not hard, just an issue with the current patch.
>>>
>>>>
>>>>>
>>>>> So... What should I do here?
>>>>>
>>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>>> issues with the current patch, but seems like a hack.
>>>>
>>>> Is this the same as displaying all data in one set of stats?
>>>
>>> Yes
>>>
>>>>
>>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>>
>> Splitting up the MDS/DS stats is important to tell when DS I/O is being
>> done and how effective it is.
>>
>> Benny
>>
>
> Agreed! It just can't go into mountstats right away (due to statsvers= not being interpreted in mountstats(8)). I'll make a second patch on top of my mountstats changes to add per-DS stats in /proc/fs/nfsfs/ and we can figure out if we want to add it to mountstats later.
>
> Does this sound OK?
Absolutely. Thanks!
Benny
>
> -dros
>
>>>
>>> Ok, I'll try to do that.
>>>
>>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>>
>>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>>
>>>>
>>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>>> the "statsvers" value.
>>>>
>>>> Agree.
>>>
>>> I'll do this first.
>>>
>>> Thanks Chuck!
>>>
>>> -dros
>>>
>>>>
>>>>>
>>>>> Thoughts?
>>>>> ---
>>>>>
>>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>>
>>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>>> fs/nfs/super.c | 1 +
>>>>> include/linux/nfs_iostat.h | 2 +-
>>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>> index 79be7ac..9410fd0 100644
>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>> @@ -33,6 +33,8 @@
>>>>> #include <linux/nfs_page.h>
>>>>> #include <linux/module.h>
>>>>>
>>>>> +#include <linux/sunrpc/metrics.h>
>>>>> +
>>>>> #include "internal.h"
>>>>> #include "nfs4filelayout.h"
>>>>>
>>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>>> }
>>>>>
>>>>> +static void
>>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>>> +{
>>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>>> + struct nfs4_pnfs_ds *ds;
>>>>> + u32 i;
>>>>> +
>>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>>> +
>>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>>> + ds = dsaddr->ds_list[i];
>>>>> +
>>>>> + if (ds && ds->ds_clp) {
>>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>>> + ds->ds_remotestr);
>>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>>> + }
>>>>> + }
>>>>> +}
>>>>> +
>>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>>> .read_pagelist = filelayout_read_pagelist,
>>>>> .write_pagelist = filelayout_write_pagelist,
>>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>>> };
>>>>>
>>>>> static int __init nfs4filelayout_init(void)
>>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>>> index 53d593a..0a5e020 100644
>>>>> --- a/fs/nfs/pnfs.h
>>>>> +++ b/fs/nfs/pnfs.h
>>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>>> struct xdr_stream *xdr,
>>>>> const struct nfs4_layoutcommit_args *args);
>>>>> +
>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>> + struct nfs4_deviceid_node *);
>>>>> };
>>>>>
>>>>> struct pnfs_layout_hdr {
>>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>> + const struct nfs_client *clp,
>>>>> + const struct pnfs_layoutdriver_type *ld);
>>>>> +
>>>>>
>>>>> static inline int lo_fail_bit(u32 iomode)
>>>>> {
>>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>>> return 0;
>>>>> }
>>>>>
>>>>> +static inline void
>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>> +{
>>>>> + if (!pnfs_enabled_sb(nfss))
>>>>> + return;
>>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>>> + nfss->pnfs_curr_ld);
>>>>> +}
>>>>> #else /* CONFIG_NFS_V4_1 */
>>>>>
>>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>>> {
>>>>> }
>>>>> +
>>>>> +static inline void
>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>> +{
>>>>> +}
>>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>>
>>>>> #endif /* FS_NFS_PNFS_H */
>>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>>> index 4f359d2..277de87 100644
>>>>> --- a/fs/nfs/pnfs_dev.c
>>>>> +++ b/fs/nfs/pnfs_dev.c
>>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>>> }
>>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>>
>>>>> +
>>>>> +void
>>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>> + const struct nfs_client *clp,
>>>>> + const struct pnfs_layoutdriver_type *ld)
>>>>> +{
>>>>> + struct nfs4_deviceid_node *d;
>>>>> + struct hlist_node *n;
>>>>> + long h;
>>>>> +
>>>>> + if (!ld->ds_rpc_print_iostats)
>>>>> + return;
>>>>> +
>>>>> + rcu_read_lock();
>>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>>> + if (atomic_read(&d->ref))
>>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>>> + }
>>>>> + }
>>>>> + rcu_read_unlock();
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>>> +
>>>>> /*
>>>>> * Remove a deviceid from cache
>>>>> *
>>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>>> index d18a90b..453e496 100644
>>>>> --- a/fs/nfs/super.c
>>>>> +++ b/fs/nfs/super.c
>>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>>> seq_printf(m, "\n");
>>>>>
>>>>> rpc_print_iostats(m, nfss->client);
>>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>>
>>>>> return 0;
>>>>> }
>>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>>> index 8866bb3..7fe4605 100644
>>>>> --- a/include/linux/nfs_iostat.h
>>>>> +++ b/include/linux/nfs_iostat.h
>>>>> @@ -21,7 +21,7 @@
>>>>> #ifndef _LINUX_NFS_IOSTAT
>>>>> #define _LINUX_NFS_IOSTAT
>>>>>
>>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>>
>>>>> /*
>>>>> * NFS byte counters
>>>>> --
>>>>> 1.7.4.4
>>>>>
>>>>
>>>> --
>>>> Chuck Lever
>>>> chuck[dot]lever[at]oracle[dot]com
>>>>
>>>>
>>>>
>>>>
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-14 9:31 ` Benny Halevy
@ 2012-02-14 15:44 ` Andy Adamson
2012-02-14 15:59 ` Adamson, Dros
0 siblings, 1 reply; 14+ messages in thread
From: Andy Adamson @ 2012-02-14 15:44 UTC (permalink / raw)
To: Benny Halevy; +Cc: Adamson, Dros, Chuck Lever, Myklebust, Trond, linux-nfs
I have an early patch set that prints session stats for MDS and for
DS. The DS pnfs_layoutdriver_type interface for session stats takes
the same arguments as the RPC iostats interface, a seq_file* and a
struct nfs4_deviceid_node.
>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>> + struct nfs4_deviceid_node *);
We can use the same interface for RPC and session stats. I suggest
you rename this to "ds_stats" and add an enum type to identify which
stats are being requested.
-->Andy
On Tue, Feb 14, 2012 at 4:31 AM, Benny Halevy <bhalevy@tonian.com> wrote:
> On 2012-02-13 19:58, Adamson, Dros wrote:
>>
>> On Feb 13, 2012, at 3:05 AM, Benny Halevy wrote:
>>
>>> Sorry for the late response.
>>>
>>> On 2012-02-08 23:04, Adamson, Dros wrote:
>>>>
>>>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>>>
>>>>> Hi-
>>>>>
>>>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>>>
>>>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>>>
>>>>> You mean /proc/self/mountstats here.
>>>>
>>>> oops!
>>>>
>>>>>
>>>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>>>
>>>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>>>> all reads, writes and commits will never make it into the current mountstats
>>>>>> output.
>>>>>>
>>>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>>>> expose the fact that these stats are from separate connections (maybe for later
>>>>>> use).
>>>>>>
>>>>>> This method has issues:
>>>>>>
>>>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>>>> mountstats(8) from trying to parse this output -- and it does so
>>>>>> incorrectly.
>>>>>
>>>>> User land is broken then. :-)
>>>>
>>>> Agreed!
>>>>
>>>>>
>>>>>> 2) when using DS multipath this method fails to count operations that happened
>>>>>> on a different path to the same DS (after a failure).
>>>>
>>>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>>>
>>>>>>
>>>>>> 3) currently this will display stats twice when DS == MDS.
>>>>>
>>>>> Why is this case hard to detect in the kernel or in mountstats?
>>>>
>>>> Not hard, just an issue with the current patch.
>>>>
>>>>>
>>>>>>
>>>>>> So... What should I do here?
>>>>>>
>>>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>>>> issues with the current patch, but seems like a hack.
>>>>>
>>>>> Is this the same as displaying all data in one set of stats?
>>>>
>>>> Yes
>>>>
>>>>>
>>>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>>>
>>> Splitting up the MDS/DS stats is important to tell when DS I/O is being
>>> done and how effective it is.
>>>
>>> Benny
>>>
>>
>> Agreed! It just can't go into mountstats right away (due to statsvers= not being interpreted in mountstats(8)). I'll make a second patch on top of my mountstats changes to add per-DS stats in /proc/fs/nfsfs/ and we can figure out if we want to add it to mountstats later.
>>
>> Does this sound OK?
>
> Absolutely. Thanks!
>
> Benny
>
>>
>> -dros
>>
>>>>
>>>> Ok, I'll try to do that.
>>>>
>>>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>>>
>>>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>>>
>>>>>
>>>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>>>> the "statsvers" value.
>>>>>
>>>>> Agree.
>>>>
>>>> I'll do this first.
>>>>
>>>> Thanks Chuck!
>>>>
>>>> -dros
>>>>
>>>>>
>>>>>>
>>>>>> Thoughts?
>>>>>> ---
>>>>>>
>>>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>>>
>>>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>>>> fs/nfs/super.c | 1 +
>>>>>> include/linux/nfs_iostat.h | 2 +-
>>>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>>> index 79be7ac..9410fd0 100644
>>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>>> @@ -33,6 +33,8 @@
>>>>>> #include <linux/nfs_page.h>
>>>>>> #include <linux/module.h>
>>>>>>
>>>>>> +#include <linux/sunrpc/metrics.h>
>>>>>> +
>>>>>> #include "internal.h"
>>>>>> #include "nfs4filelayout.h"
>>>>>>
>>>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>>>> }
>>>>>>
>>>>>> +static void
>>>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>>>> +{
>>>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>>>> + struct nfs4_pnfs_ds *ds;
>>>>>> + u32 i;
>>>>>> +
>>>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>>>> +
>>>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>>>> + ds = dsaddr->ds_list[i];
>>>>>> +
>>>>>> + if (ds && ds->ds_clp) {
>>>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>>>> + ds->ds_remotestr);
>>>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>>>> + }
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>> .read_pagelist = filelayout_read_pagelist,
>>>>>> .write_pagelist = filelayout_write_pagelist,
>>>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>>>> };
>>>>>>
>>>>>> static int __init nfs4filelayout_init(void)
>>>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>>>> index 53d593a..0a5e020 100644
>>>>>> --- a/fs/nfs/pnfs.h
>>>>>> +++ b/fs/nfs/pnfs.h
>>>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>>>> struct xdr_stream *xdr,
>>>>>> const struct nfs4_layoutcommit_args *args);
>>>>>> +
>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>> + struct nfs4_deviceid_node *);
>>>>>> };
>>>>>>
>>>>>> struct pnfs_layout_hdr {
>>>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>> + const struct nfs_client *clp,
>>>>>> + const struct pnfs_layoutdriver_type *ld);
>>>>>> +
>>>>>>
>>>>>> static inline int lo_fail_bit(u32 iomode)
>>>>>> {
>>>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>>>> return 0;
>>>>>> }
>>>>>>
>>>>>> +static inline void
>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>> +{
>>>>>> + if (!pnfs_enabled_sb(nfss))
>>>>>> + return;
>>>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>>>> + nfss->pnfs_curr_ld);
>>>>>> +}
>>>>>> #else /* CONFIG_NFS_V4_1 */
>>>>>>
>>>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>>>> {
>>>>>> }
>>>>>> +
>>>>>> +static inline void
>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>> +{
>>>>>> +}
>>>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>>>
>>>>>> #endif /* FS_NFS_PNFS_H */
>>>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>>>> index 4f359d2..277de87 100644
>>>>>> --- a/fs/nfs/pnfs_dev.c
>>>>>> +++ b/fs/nfs/pnfs_dev.c
>>>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>>>> }
>>>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>>>
>>>>>> +
>>>>>> +void
>>>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>> + const struct nfs_client *clp,
>>>>>> + const struct pnfs_layoutdriver_type *ld)
>>>>>> +{
>>>>>> + struct nfs4_deviceid_node *d;
>>>>>> + struct hlist_node *n;
>>>>>> + long h;
>>>>>> +
>>>>>> + if (!ld->ds_rpc_print_iostats)
>>>>>> + return;
>>>>>> +
>>>>>> + rcu_read_lock();
>>>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>>>> + if (atomic_read(&d->ref))
>>>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>>>> + }
>>>>>> + }
>>>>>> + rcu_read_unlock();
>>>>>> +}
>>>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>>>> +
>>>>>> /*
>>>>>> * Remove a deviceid from cache
>>>>>> *
>>>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>>>> index d18a90b..453e496 100644
>>>>>> --- a/fs/nfs/super.c
>>>>>> +++ b/fs/nfs/super.c
>>>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>>>> seq_printf(m, "\n");
>>>>>>
>>>>>> rpc_print_iostats(m, nfss->client);
>>>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>>>
>>>>>> return 0;
>>>>>> }
>>>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>>>> index 8866bb3..7fe4605 100644
>>>>>> --- a/include/linux/nfs_iostat.h
>>>>>> +++ b/include/linux/nfs_iostat.h
>>>>>> @@ -21,7 +21,7 @@
>>>>>> #ifndef _LINUX_NFS_IOSTAT
>>>>>> #define _LINUX_NFS_IOSTAT
>>>>>>
>>>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>>>
>>>>>> /*
>>>>>> * NFS byte counters
>>>>>> --
>>>>>> 1.7.4.4
>>>>>>
>>>>>
>>>>> --
>>>>> Chuck Lever
>>>>> chuck[dot]lever[at]oracle[dot]com
>>>>>
>>>>>
>>>>>
>>>>>
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-14 15:44 ` Andy Adamson
@ 2012-02-14 15:59 ` Adamson, Dros
2012-02-14 16:32 ` Andy Adamson
0 siblings, 1 reply; 14+ messages in thread
From: Adamson, Dros @ 2012-02-14 15:59 UTC (permalink / raw)
To: Andy Adamson
Cc: Benny Halevy, Adamson, Dros, Chuck Lever, Myklebust, Trond,
linux-nfs
[-- Attachment #1: Type: text/plain, Size: 12780 bytes --]
On Feb 14, 2012, at 10:44 AM, Andy Adamson wrote:
> I have an early patch set that prints session stats for MDS and for
> DS. The DS pnfs_layoutdriver_type interface for session stats takes
> the same arguments as the RPC iostats interface, a seq_file* and a
> struct nfs4_deviceid_node.
>
>>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>>> + struct nfs4_deviceid_node *);
>
> We can use the same interface for RPC and session stats. I suggest
> you rename this to "ds_stats" and add an enum type to identify which
> stats are being requested.
>
> -->Andy
>
Well, this patch is dead. I reposted a better implementation yesterday (based on Trond's suggestions) that doesn't have to iterate through the DSs, but we can work together to make a common interface for more stats. Once we display per-DS rpc_iostats, there is no getting around the need to iterate through the DSs (like your session stats). I plan on looking at this later today.
-dros
> On Tue, Feb 14, 2012 at 4:31 AM, Benny Halevy <bhalevy@tonian.com> wrote:
>> On 2012-02-13 19:58, Adamson, Dros wrote:
>>>
>>> On Feb 13, 2012, at 3:05 AM, Benny Halevy wrote:
>>>
>>>> Sorry for the late response.
>>>>
>>>> On 2012-02-08 23:04, Adamson, Dros wrote:
>>>>>
>>>>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>>>>
>>>>>> Hi-
>>>>>>
>>>>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>>>>
>>>>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>>>>
>>>>>> You mean /proc/self/mountstats here.
>>>>>
>>>>> oops!
>>>>>
>>>>>>
>>>>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>>>>
>>>>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>>>>> all reads, writes and commits will never make it into the current mountstats
>>>>>>> output.
>>>>>>>
>>>>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>>>>> expose the fact that these stats are from separate connections (maybe for later
>>>>>>> use).
>>>>>>>
>>>>>>> This method has issues:
>>>>>>>
>>>>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>>>>> mountstats(8) from trying to parse this output -- and it does so
>>>>>>> incorrectly.
>>>>>>
>>>>>> User land is broken then. :-)
>>>>>
>>>>> Agreed!
>>>>>
>>>>>>
>>>>>>> 2) when using DS multipath this method fails to count operations that happened
>>>>>>> on a different path to the same DS (after a failure).
>>>>>
>>>>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>>>>
>>>>>>>
>>>>>>> 3) currently this will display stats twice when DS == MDS.
>>>>>>
>>>>>> Why is this case hard to detect in the kernel or in mountstats?
>>>>>
>>>>> Not hard, just an issue with the current patch.
>>>>>
>>>>>>
>>>>>>>
>>>>>>> So... What should I do here?
>>>>>>>
>>>>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>>>>> issues with the current patch, but seems like a hack.
>>>>>>
>>>>>> Is this the same as displaying all data in one set of stats?
>>>>>
>>>>> Yes
>>>>>
>>>>>>
>>>>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>>>>
>>>> Splitting up the MDS/DS stats is important to tell when DS I/O is being
>>>> done and how effective it is.
>>>>
>>>> Benny
>>>>
>>>
>>> Agreed! It just can't go into mountstats right away (due to statsvers= not being interpreted in mountstats(8)). I'll make a second patch on top of my mountstats changes to add per-DS stats in /proc/fs/nfsfs/ and we can figure out if we want to add it to mountstats later.
>>>
>>> Does this sound OK?
>>
>> Absolutely. Thanks!
>>
>> Benny
>>
>>>
>>> -dros
>>>
>>>>>
>>>>> Ok, I'll try to do that.
>>>>>
>>>>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>>>>
>>>>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>>>>
>>>>>>
>>>>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>>>>> the "statsvers" value.
>>>>>>
>>>>>> Agree.
>>>>>
>>>>> I'll do this first.
>>>>>
>>>>> Thanks Chuck!
>>>>>
>>>>> -dros
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thoughts?
>>>>>>> ---
>>>>>>>
>>>>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>>>>
>>>>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>>>>> fs/nfs/super.c | 1 +
>>>>>>> include/linux/nfs_iostat.h | 2 +-
>>>>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>>>> index 79be7ac..9410fd0 100644
>>>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>>>> @@ -33,6 +33,8 @@
>>>>>>> #include <linux/nfs_page.h>
>>>>>>> #include <linux/module.h>
>>>>>>>
>>>>>>> +#include <linux/sunrpc/metrics.h>
>>>>>>> +
>>>>>>> #include "internal.h"
>>>>>>> #include "nfs4filelayout.h"
>>>>>>>
>>>>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>>>>> }
>>>>>>>
>>>>>>> +static void
>>>>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>>>>> +{
>>>>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>>>>> + struct nfs4_pnfs_ds *ds;
>>>>>>> + u32 i;
>>>>>>> +
>>>>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>>>>> +
>>>>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>>>>> + ds = dsaddr->ds_list[i];
>>>>>>> +
>>>>>>> + if (ds && ds->ds_clp) {
>>>>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>>>>> + ds->ds_remotestr);
>>>>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>>>>> + }
>>>>>>> + }
>>>>>>> +}
>>>>>>> +
>>>>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>>> .read_pagelist = filelayout_read_pagelist,
>>>>>>> .write_pagelist = filelayout_write_pagelist,
>>>>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>>>>> };
>>>>>>>
>>>>>>> static int __init nfs4filelayout_init(void)
>>>>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>>>>> index 53d593a..0a5e020 100644
>>>>>>> --- a/fs/nfs/pnfs.h
>>>>>>> +++ b/fs/nfs/pnfs.h
>>>>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>>>>> struct xdr_stream *xdr,
>>>>>>> const struct nfs4_layoutcommit_args *args);
>>>>>>> +
>>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>>> + struct nfs4_deviceid_node *);
>>>>>>> };
>>>>>>>
>>>>>>> struct pnfs_layout_hdr {
>>>>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>>> + const struct nfs_client *clp,
>>>>>>> + const struct pnfs_layoutdriver_type *ld);
>>>>>>> +
>>>>>>>
>>>>>>> static inline int lo_fail_bit(u32 iomode)
>>>>>>> {
>>>>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>>>>> return 0;
>>>>>>> }
>>>>>>>
>>>>>>> +static inline void
>>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>>> +{
>>>>>>> + if (!pnfs_enabled_sb(nfss))
>>>>>>> + return;
>>>>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>>>>> + nfss->pnfs_curr_ld);
>>>>>>> +}
>>>>>>> #else /* CONFIG_NFS_V4_1 */
>>>>>>>
>>>>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>>>>> {
>>>>>>> }
>>>>>>> +
>>>>>>> +static inline void
>>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>>> +{
>>>>>>> +}
>>>>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>>>>
>>>>>>> #endif /* FS_NFS_PNFS_H */
>>>>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>>>>> index 4f359d2..277de87 100644
>>>>>>> --- a/fs/nfs/pnfs_dev.c
>>>>>>> +++ b/fs/nfs/pnfs_dev.c
>>>>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>>>>> }
>>>>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>>>>
>>>>>>> +
>>>>>>> +void
>>>>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>>> + const struct nfs_client *clp,
>>>>>>> + const struct pnfs_layoutdriver_type *ld)
>>>>>>> +{
>>>>>>> + struct nfs4_deviceid_node *d;
>>>>>>> + struct hlist_node *n;
>>>>>>> + long h;
>>>>>>> +
>>>>>>> + if (!ld->ds_rpc_print_iostats)
>>>>>>> + return;
>>>>>>> +
>>>>>>> + rcu_read_lock();
>>>>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>>>>> + if (atomic_read(&d->ref))
>>>>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>>>>> + }
>>>>>>> + }
>>>>>>> + rcu_read_unlock();
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>>>>> +
>>>>>>> /*
>>>>>>> * Remove a deviceid from cache
>>>>>>> *
>>>>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>>>>> index d18a90b..453e496 100644
>>>>>>> --- a/fs/nfs/super.c
>>>>>>> +++ b/fs/nfs/super.c
>>>>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>>>>> seq_printf(m, "\n");
>>>>>>>
>>>>>>> rpc_print_iostats(m, nfss->client);
>>>>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>>>>
>>>>>>> return 0;
>>>>>>> }
>>>>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>>>>> index 8866bb3..7fe4605 100644
>>>>>>> --- a/include/linux/nfs_iostat.h
>>>>>>> +++ b/include/linux/nfs_iostat.h
>>>>>>> @@ -21,7 +21,7 @@
>>>>>>> #ifndef _LINUX_NFS_IOSTAT
>>>>>>> #define _LINUX_NFS_IOSTAT
>>>>>>>
>>>>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>>>>
>>>>>>> /*
>>>>>>> * NFS byte counters
>>>>>>> --
>>>>>>> 1.7.4.4
>>>>>>>
>>>>>>
>>>>>> --
>>>>>> Chuck Lever
>>>>>> chuck[dot]lever[at]oracle[dot]com
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] RFC: NFS - filelayout DS rpc mountstats support
2012-02-14 15:59 ` Adamson, Dros
@ 2012-02-14 16:32 ` Andy Adamson
0 siblings, 0 replies; 14+ messages in thread
From: Andy Adamson @ 2012-02-14 16:32 UTC (permalink / raw)
To: Adamson, Dros; +Cc: Benny Halevy, Chuck Lever, Myklebust, Trond, linux-nfs
On Tue, Feb 14, 2012 at 10:59 AM, Adamson, Dros
<Weston.Adamson@netapp.com> wrote:
>
> On Feb 14, 2012, at 10:44 AM, Andy Adamson wrote:
>
>> I have an early patch set that prints session stats for MDS and for
>> DS. The DS pnfs_layoutdriver_type interface for session stats takes
>> the same arguments as the RPC iostats interface, a seq_file* and a
>> struct nfs4_deviceid_node.
>>
>>>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>>>> + struct nfs4_deviceid_node *);
>>
>> We can use the same interface for RPC and session stats. I suggest
>> you rename this to "ds_stats" and add an enum type to identify which
>> stats are being requested.
>>
>> -->Andy
>>
>
>
> Well, this patch is dead. I reposted a better implementation yesterday (based on Trond's suggestions) that doesn't have to iterate through the DSs, but we can work together to make a common interface for more stats. Once we display per-DS rpc_iostats, there is no getting around the need to iterate through the DSs (like your session stats). I plan on looking at this later today.
OK - cool.
-->Andy
>
> -dros
>
>
>> On Tue, Feb 14, 2012 at 4:31 AM, Benny Halevy <bhalevy@tonian.com> wrote:
>>> On 2012-02-13 19:58, Adamson, Dros wrote:
>>>>
>>>> On Feb 13, 2012, at 3:05 AM, Benny Halevy wrote:
>>>>
>>>>> Sorry for the late response.
>>>>>
>>>>> On 2012-02-08 23:04, Adamson, Dros wrote:
>>>>>>
>>>>>> On Feb 8, 2012, at 12:26 PM, Chuck Lever wrote:
>>>>>>
>>>>>>> Hi-
>>>>>>>
>>>>>>> On Feb 8, 2012, at 12:10 PM, Weston Andros Adamson wrote:
>>>>>>>
>>>>>>>> The per-op stats displayed in /self/proc/mountstats are collected in the sunrpc
>>>>>>>
>>>>>>> You mean /proc/self/mountstats here.
>>>>>>
>>>>>> oops!
>>>>>>
>>>>>>>
>>>>>>>> layer and are collected per rpc_client. This has worked for nfs thus far
>>>>>>>> since only one nfs_client was ever associated with a mountpoint, but with
>>>>>>>> NFS4.1+PNFS+filelayout an nfs mount can have more than one nfs_client.
>>>>>>>>
>>>>>>>> This can be seen with a pnfs mount where no data server is the same as the MDS -
>>>>>>>> all reads, writes and commits will never make it into the current mountstats
>>>>>>>> output.
>>>>>>>>
>>>>>>>> I took the approach of keeping the stats from the MDS and DSs separate in the
>>>>>>>> /proc/self/mountstats output to avoid doing too much in the kernel, and to
>>>>>>>> expose the fact that these stats are from separate connections (maybe for later
>>>>>>>> use).
>>>>>>>>
>>>>>>>> This method has issues:
>>>>>>>>
>>>>>>>> 1) even changing the "statvers" of mountstats doesn't stop the userland
>>>>>>>> mountstats(8) from trying to parse this output -- and it does so
>>>>>>>> incorrectly.
>>>>>>>
>>>>>>> User land is broken then. :-)
>>>>>>
>>>>>> Agreed!
>>>>>>
>>>>>>>
>>>>>>>> 2) when using DS multipath this method fails to count operations that happened
>>>>>>>> on a different path to the same DS (after a failure).
>>>>>>
>>>>>> I should have noted that this isn't a problem yet because I haven't submitted the multipath failover patches :)
>>>>>>
>>>>>>>>
>>>>>>>> 3) currently this will display stats twice when DS == MDS.
>>>>>>>
>>>>>>> Why is this case hard to detect in the kernel or in mountstats?
>>>>>>
>>>>>> Not hard, just an issue with the current patch.
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> So... What should I do here?
>>>>>>>>
>>>>>>>> A different approach is to add support in net/sunrpc to specify a "parent"
>>>>>>>> stat structure so that operations on DS nfs_clients bump stats on
>>>>>>>> the main nfs_server->nfs_client->rpc_client. This would take care of all the
>>>>>>>> issues with the current patch, but seems like a hack.
>>>>>>>
>>>>>>> Is this the same as displaying all data in one set of stats?
>>>>>>
>>>>>> Yes
>>>>>>
>>>>>>>
>>>>>>> Seems like that might be the best next step, then we can split up the MDS and DS stats at some later point.
>>>>>
>>>>> Splitting up the MDS/DS stats is important to tell when DS I/O is being
>>>>> done and how effective it is.
>>>>>
>>>>> Benny
>>>>>
>>>>
>>>> Agreed! It just can't go into mountstats right away (due to statsvers= not being interpreted in mountstats(8)). I'll make a second patch on top of my mountstats changes to add per-DS stats in /proc/fs/nfsfs/ and we can figure out if we want to add it to mountstats later.
>>>>
>>>> Does this sound OK?
>>>
>>> Absolutely. Thanks!
>>>
>>> Benny
>>>
>>>>
>>>> -dros
>>>>
>>>>>>
>>>>>> Ok, I'll try to do that.
>>>>>>
>>>>>> Any advice on how to handle the "xprt: …" line? Obviously the different underlying rpc_clients will have different values. I'd have to look at mountstats(8) some more, but I *think* we could just make it a comma separated list.
>>>>>>
>>>>>> For the 'next step', we can use this patch - we just need to figure out where it should go. Do we keep it in /proc/self/mountstats? I think because of problem #2 we might want to move them out to somewhere in /proc/fs/nfsfs/ along with other per-DS statistics.
>>>>>>
>>>>>>>
>>>>>>>> One task that seems like a good thing to do is to fix mountstats(8) to respect
>>>>>>>> the "statsvers" value.
>>>>>>>
>>>>>>> Agree.
>>>>>>
>>>>>> I'll do this first.
>>>>>>
>>>>>> Thanks Chuck!
>>>>>>
>>>>>> -dros
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thoughts?
>>>>>>>> ---
>>>>>>>>
>>>>>>>> I cleaned up this patch, but I still have reservations (noted in commit message)
>>>>>>>>
>>>>>>>> fs/nfs/nfs4filelayout.c | 23 +++++++++++++++++++++++
>>>>>>>> fs/nfs/pnfs.h | 20 ++++++++++++++++++++
>>>>>>>> fs/nfs/pnfs_dev.c | 25 +++++++++++++++++++++++++
>>>>>>>> fs/nfs/super.c | 1 +
>>>>>>>> include/linux/nfs_iostat.h | 2 +-
>>>>>>>> 5 files changed, 70 insertions(+), 1 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
>>>>>>>> index 79be7ac..9410fd0 100644
>>>>>>>> --- a/fs/nfs/nfs4filelayout.c
>>>>>>>> +++ b/fs/nfs/nfs4filelayout.c
>>>>>>>> @@ -33,6 +33,8 @@
>>>>>>>> #include <linux/nfs_page.h>
>>>>>>>> #include <linux/module.h>
>>>>>>>>
>>>>>>>> +#include <linux/sunrpc/metrics.h>
>>>>>>>> +
>>>>>>>> #include "internal.h"
>>>>>>>> #include "nfs4filelayout.h"
>>>>>>>>
>>>>>>>> @@ -918,6 +920,26 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
>>>>>>>> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
>>>>>>>> }
>>>>>>>>
>>>>>>>> +static void
>>>>>>>> +filelayout_rpc_print_iostats(struct seq_file *m, struct nfs4_deviceid_node *d)
>>>>>>>> +{
>>>>>>>> + struct nfs4_file_layout_dsaddr *dsaddr;
>>>>>>>> + struct nfs4_pnfs_ds *ds;
>>>>>>>> + u32 i;
>>>>>>>> +
>>>>>>>> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
>>>>>>>> +
>>>>>>>> + for (i = 0; i < dsaddr->ds_num; i++) {
>>>>>>>> + ds = dsaddr->ds_list[i];
>>>>>>>> +
>>>>>>>> + if (ds && ds->ds_clp) {
>>>>>>>> + seq_printf(m, " pnfs dataserver %s\n",
>>>>>>>> + ds->ds_remotestr);
>>>>>>>> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
>>>>>>>> + }
>>>>>>>> + }
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>>>> .id = LAYOUT_NFSV4_1_FILES,
>>>>>>>> .name = "LAYOUT_NFSV4_1_FILES",
>>>>>>>> @@ -932,6 +954,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
>>>>>>>> .read_pagelist = filelayout_read_pagelist,
>>>>>>>> .write_pagelist = filelayout_write_pagelist,
>>>>>>>> .free_deviceid_node = filelayout_free_deveiceid_node,
>>>>>>>> + .ds_rpc_print_iostats = filelayout_rpc_print_iostats,
>>>>>>>> };
>>>>>>>>
>>>>>>>> static int __init nfs4filelayout_init(void)
>>>>>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>>>>>> index 53d593a..0a5e020 100644
>>>>>>>> --- a/fs/nfs/pnfs.h
>>>>>>>> +++ b/fs/nfs/pnfs.h
>>>>>>>> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
>>>>>>>> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
>>>>>>>> struct xdr_stream *xdr,
>>>>>>>> const struct nfs4_layoutcommit_args *args);
>>>>>>>> +
>>>>>>>> + void (*ds_rpc_print_iostats) (struct seq_file *,
>>>>>>>> + struct nfs4_deviceid_node *);
>>>>>>>> };
>>>>>>>>
>>>>>>>> struct pnfs_layout_hdr {
>>>>>>>> @@ -239,6 +242,10 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
>>>>>>>> struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
>>>>>>>> bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
>>>>>>>> void nfs4_deviceid_purge_client(const struct nfs_client *);
>>>>>>>> +void nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>>>> + const struct nfs_client *clp,
>>>>>>>> + const struct pnfs_layoutdriver_type *ld);
>>>>>>>> +
>>>>>>>>
>>>>>>>> static inline int lo_fail_bit(u32 iomode)
>>>>>>>> {
>>>>>>>> @@ -328,6 +335,14 @@ static inline int pnfs_return_layout(struct inode *ino)
>>>>>>>> return 0;
>>>>>>>> }
>>>>>>>>
>>>>>>>> +static inline void
>>>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>>>> +{
>>>>>>>> + if (!pnfs_enabled_sb(nfss))
>>>>>>>> + return;
>>>>>>>> + nfs4_deviceid_rpc_print_iostats(m, nfss->nfs_client,
>>>>>>>> + nfss->pnfs_curr_ld);
>>>>>>>> +}
>>>>>>>> #else /* CONFIG_NFS_V4_1 */
>>>>>>>>
>>>>>>>> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
>>>>>>>> @@ -429,6 +444,11 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
>>>>>>>> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
>>>>>>>> {
>>>>>>>> }
>>>>>>>> +
>>>>>>>> +static inline void
>>>>>>>> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
>>>>>>>> +{
>>>>>>>> +}
>>>>>>>> #endif /* CONFIG_NFS_V4_1 */
>>>>>>>>
>>>>>>>> #endif /* FS_NFS_PNFS_H */
>>>>>>>> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
>>>>>>>> index 4f359d2..277de87 100644
>>>>>>>> --- a/fs/nfs/pnfs_dev.c
>>>>>>>> +++ b/fs/nfs/pnfs_dev.c
>>>>>>>> @@ -115,6 +115,31 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
>>>>>>>> }
>>>>>>>> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>>>>>>>>
>>>>>>>> +
>>>>>>>> +void
>>>>>>>> +nfs4_deviceid_rpc_print_iostats(struct seq_file *seq,
>>>>>>>> + const struct nfs_client *clp,
>>>>>>>> + const struct pnfs_layoutdriver_type *ld)
>>>>>>>> +{
>>>>>>>> + struct nfs4_deviceid_node *d;
>>>>>>>> + struct hlist_node *n;
>>>>>>>> + long h;
>>>>>>>> +
>>>>>>>> + if (!ld->ds_rpc_print_iostats)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> + rcu_read_lock();
>>>>>>>> + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) {
>>>>>>>> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[h], node)
>>>>>>>> + if (d->ld == ld && d->nfs_client == clp) {
>>>>>>>> + if (atomic_read(&d->ref))
>>>>>>>> + ld->ds_rpc_print_iostats(seq, d);
>>>>>>>> + }
>>>>>>>> + }
>>>>>>>> + rcu_read_unlock();
>>>>>>>> +}
>>>>>>>> +EXPORT_SYMBOL_GPL(nfs4_deviceid_rpc_print_iostats);
>>>>>>>> +
>>>>>>>> /*
>>>>>>>> * Remove a deviceid from cache
>>>>>>>> *
>>>>>>>> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
>>>>>>>> index d18a90b..453e496 100644
>>>>>>>> --- a/fs/nfs/super.c
>>>>>>>> +++ b/fs/nfs/super.c
>>>>>>>> @@ -871,6 +871,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
>>>>>>>> seq_printf(m, "\n");
>>>>>>>>
>>>>>>>> rpc_print_iostats(m, nfss->client);
>>>>>>>> + pnfs_rpc_print_iostats(m, nfss);
>>>>>>>>
>>>>>>>> return 0;
>>>>>>>> }
>>>>>>>> diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
>>>>>>>> index 8866bb3..7fe4605 100644
>>>>>>>> --- a/include/linux/nfs_iostat.h
>>>>>>>> +++ b/include/linux/nfs_iostat.h
>>>>>>>> @@ -21,7 +21,7 @@
>>>>>>>> #ifndef _LINUX_NFS_IOSTAT
>>>>>>>> #define _LINUX_NFS_IOSTAT
>>>>>>>>
>>>>>>>> -#define NFS_IOSTAT_VERS "1.0"
>>>>>>>> +#define NFS_IOSTAT_VERS "2.0"
>>>>>>>>
>>>>>>>> /*
>>>>>>>> * NFS byte counters
>>>>>>>> --
>>>>>>>> 1.7.4.4
>>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Chuck Lever
>>>>>>> chuck[dot]lever[at]oracle[dot]com
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>>> the body of a message to majordomo@vger.kernel.org
>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH] [RFC] NFS - filelayout DS rpc mountstats support
@ 2012-01-30 20:53 Weston Andros Adamson
2012-01-30 20:57 ` Adamson, Dros
2012-01-31 19:19 ` Adamson, Dros
0 siblings, 2 replies; 14+ messages in thread
From: Weston Andros Adamson @ 2012-01-30 20:53 UTC (permalink / raw)
To: Trond.Myklebust; +Cc: linux-nfs, Weston Andros Adamson
The RPC stats displayed in /self/proc/mountstats are collected in the sunrpc
layer and are collected per rpc_client. This has worked for NFS thus far
since only one nfs_client (and it's associated rpc_client) was ever associated
with a mountpoint.
Now with NFS4.1+PNFS+filelayout, NFS can have more than one nfs_client
associated with a mountpoint! Note that the rpc stats are hung off of the
rpc_client - so even if two connections share the same transport, they will
not use the same stats structure (ie when MDS == DS).
This patch -- admittedly ugly -- poles holes through the PNFS layout driver
to print the stats for rpc_client structures for the dataserver connections
with the filelayout driver.
I took the approach of keeping them separate in the /proc/self/mountstats
output to avoid doing too much in the kernel. If we agree this is the way to
go, I'll update mountstats(1) to combine them for the default output (much in
the same way that averaging is done in userland instead of kernel here).
The alternative is to poke holes in the sunrpc layer to allow combining stats.
Again, I don't love this solution - it seems like a hack. The other option
I can see is changing the rpc layer to have support for linking several
rpc_client structures to use the same stat struct, but this would likely look
like a huge hack too.
Thoughts?
---
fs/nfs/nfs4filelayout.c | 19 +++++++++++++++++++
fs/nfs/nfs4filelayout.h | 2 ++
fs/nfs/nfs4filelayoutdev.c | 19 +++++++++++++++++++
fs/nfs/pnfs.h | 17 +++++++++++++++++
fs/nfs/pnfs_dev.c | 36 ++++++++++++++++++++++++++++++++++++
fs/nfs/super.c | 1 +
6 files changed, 94 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b4f8f96..ce20ac3 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -916,6 +916,24 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}
+static void
+_rpc_print_iostats_cb(struct nfs4_deviceid_node *d, void *ctx)
+{
+ struct seq_file *m;
+ struct nfs4_file_layout_dsaddr *dsaddr;
+
+ m = (struct seq_file *)ctx;
+ dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+ nfs4_fl_rpc_print_iostats(m, dsaddr);
+}
+
+static void
+filelayout_rpc_print_iostats(struct seq_file *m, const struct nfs_server *nfss)
+{
+ nfs4_foreach_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client,
+ _rpc_print_iostats_cb, m);
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
@@ -930,6 +948,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
.free_deviceid_node = filelayout_free_deveiceid_node,
+ .rpc_print_iostats = filelayout_rpc_print_iostats,
};
static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284..6942992 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -110,6 +110,8 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
u32 ds_idx);
+void nfs4_fl_rpc_print_iostats(struct seq_file *,
+ struct nfs4_file_layout_dsaddr *);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 6eb59b0..1a03a14 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -31,6 +31,8 @@
#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
+#include <linux/sunrpc/metrics.h>
+
#include "internal.h"
#include "nfs4filelayout.h"
@@ -829,6 +831,23 @@ filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
spin_unlock(&nfs4_ds_cache_lock);
}
+void
+nfs4_fl_rpc_print_iostats(struct seq_file *m,
+ struct nfs4_file_layout_dsaddr *dsaddr)
+{
+ struct nfs4_pnfs_ds *ds;
+ u32 i;
+
+ for (i = 0; i < dsaddr->ds_num; i++) {
+ ds = dsaddr->ds_list[i];
+
+ if (ds && ds->ds_clp) {
+ seq_printf(m, "PNFS Dataserver %s\n", ds->ds_remotestr);
+ rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
+ }
+ }
+}
+
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 53d593a..86433d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
struct xdr_stream *xdr,
const struct nfs4_layoutcommit_args *args);
+
+ void (*rpc_print_iostats) (struct seq_file *m,
+ const struct nfs_server *nfss);
};
struct pnfs_layout_hdr {
@@ -231,6 +234,7 @@ struct nfs4_deviceid_node {
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
+void nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, void (*callback)(struct nfs4_deviceid_node *, void *), void *);
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
const struct pnfs_layoutdriver_type *,
@@ -328,6 +332,13 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+ if (pnfs_enabled_sb(nfss) && nfss->pnfs_curr_ld->rpc_print_iostats)
+ nfss->pnfs_curr_ld->rpc_print_iostats(m, nfss);
+}
+
#else /* CONFIG_NFS_V4_1 */
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -429,6 +440,12 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
{
}
+
+static inline void
+pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
+{
+}
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 4f359d2..5a65668 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -115,6 +115,42 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
}
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
+
+/*
+ * Apply callback function to each entry in the cache that matches
+ * the specified layout driver and nfs_client (of the MDS)
+ */
+void
+_nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
+ const struct nfs_client *clp,
+ void (*callback)(struct nfs4_deviceid_node *, void *),
+ void *ctx)
+{
+ struct nfs4_deviceid_node *d;
+ struct hlist_node *n;
+ long hash;
+
+ for (hash = 0; hash < NFS4_DEVICE_ID_HASH_SIZE; hash++) {
+ hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
+ if (d->ld == ld && d->nfs_client == clp) {
+ if (atomic_read(&d->ref))
+ callback(d, ctx);
+ }
+ }
+}
+
+void
+nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
+ const struct nfs_client *clp,
+ void (*callback)(struct nfs4_deviceid_node *, void *),
+ void *ctx)
+{
+ rcu_read_lock();
+ _nfs4_foreach_deviceid(ld, clp, callback, ctx);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nfs4_foreach_deviceid);
+
/*
* Remove a deviceid from cache
*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b79f2a1..5d8e00b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -869,6 +869,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
seq_printf(m, "\n");
rpc_print_iostats(m, nfss->client);
+ pnfs_rpc_print_iostats(m, nfss);
return 0;
}
--
1.7.4.4
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH] [RFC] NFS - filelayout DS rpc mountstats support
2012-01-30 20:53 [PATCH] [RFC] " Weston Andros Adamson
@ 2012-01-30 20:57 ` Adamson, Dros
2012-01-31 19:19 ` Adamson, Dros
1 sibling, 0 replies; 14+ messages in thread
From: Adamson, Dros @ 2012-01-30 20:57 UTC (permalink / raw)
To: Myklebust, Trond; +Cc: <linux-nfs@vger.kernel.org>
[-- Attachment #1: Type: text/plain, Size: 8794 bytes --]
On Jan 30, 2012, at 3:53 PM, Weston Andros Adamson wrote:
> The RPC stats displayed in /self/proc/mountstats are collected in the sunrpc
> layer and are collected per rpc_client. This has worked for NFS thus far
> since only one nfs_client (and it's associated rpc_client) was ever associated
> with a mountpoint.
>
> Now with NFS4.1+PNFS+filelayout, NFS can have more than one nfs_client
> associated with a mountpoint! Note that the rpc stats are hung off of the
> rpc_client - so even if two connections share the same transport, they will
> not use the same stats structure (ie when MDS == DS).
>
> This patch -- admittedly ugly -- poles holes through the PNFS layout driver
> to print the stats for rpc_client structures for the dataserver connections
> with the filelayout driver.
>
> I took the approach of keeping them separate in the /proc/self/mountstats
> output to avoid doing too much in the kernel. If we agree this is the way to
> go, I'll update mountstats(1) to combine them for the default output (much in
> the same way that averaging is done in userland instead of kernel here).
> The alternative is to poke holes in the sunrpc layer to allow combining stats.
>
> Again, I don't love this solution - it seems like a hack. The other option
> I can see is changing the rpc layer to have support for linking several
> rpc_client structures to use the same stat struct, but this would likely look
> like a huge hack too.
>
> Thoughts?
> ---
> fs/nfs/nfs4filelayout.c | 19 +++++++++++++++++++
> fs/nfs/nfs4filelayout.h | 2 ++
> fs/nfs/nfs4filelayoutdev.c | 19 +++++++++++++++++++
> fs/nfs/pnfs.h | 17 +++++++++++++++++
> fs/nfs/pnfs_dev.c | 36 ++++++++++++++++++++++++++++++++++++
> fs/nfs/super.c | 1 +
> 6 files changed, 94 insertions(+), 0 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index b4f8f96..ce20ac3 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -916,6 +916,24 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
> nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
> }
>
> +static void
> +_rpc_print_iostats_cb(struct nfs4_deviceid_node *d, void *ctx)
> +{
> + struct seq_file *m;
> + struct nfs4_file_layout_dsaddr *dsaddr;
> +
> + m = (struct seq_file *)ctx;
> + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
> + nfs4_fl_rpc_print_iostats(m, dsaddr);
> +}
> +
> +static void
> +filelayout_rpc_print_iostats(struct seq_file *m, const struct nfs_server *nfss)
> +{
> + nfs4_foreach_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client,
> + _rpc_print_iostats_cb, m);
> +}
> +
> static struct pnfs_layoutdriver_type filelayout_type = {
> .id = LAYOUT_NFSV4_1_FILES,
> .name = "LAYOUT_NFSV4_1_FILES",
> @@ -930,6 +948,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
> .read_pagelist = filelayout_read_pagelist,
> .write_pagelist = filelayout_write_pagelist,
> .free_deviceid_node = filelayout_free_deveiceid_node,
> + .rpc_print_iostats = filelayout_rpc_print_iostats,
> };
>
> static int __init nfs4filelayout_init(void)
> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> index 2e42284..6942992 100644
> --- a/fs/nfs/nfs4filelayout.h
> +++ b/fs/nfs/nfs4filelayout.h
> @@ -110,6 +110,8 @@ u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
> u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
> struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
> u32 ds_idx);
> +void nfs4_fl_rpc_print_iostats(struct seq_file *,
> + struct nfs4_file_layout_dsaddr *);
> extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
> extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
> struct nfs4_file_layout_dsaddr *
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index 6eb59b0..1a03a14 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -31,6 +31,8 @@
> #include <linux/nfs_fs.h>
> #include <linux/vmalloc.h>
>
> +#include <linux/sunrpc/metrics.h>
> +
> #include "internal.h"
> #include "nfs4filelayout.h"
>
> @@ -829,6 +831,23 @@ filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
> spin_unlock(&nfs4_ds_cache_lock);
> }
>
> +void
> +nfs4_fl_rpc_print_iostats(struct seq_file *m,
> + struct nfs4_file_layout_dsaddr *dsaddr)
> +{
> + struct nfs4_pnfs_ds *ds;
> + u32 i;
> +
> + for (i = 0; i < dsaddr->ds_num; i++) {
> + ds = dsaddr->ds_list[i];
> +
> + if (ds && ds->ds_clp) {
> + seq_printf(m, "PNFS Dataserver %s\n", ds->ds_remotestr);
> + rpc_print_iostats(m, ds->ds_clp->cl_rpcclient);
> + }
> + }
> +}
> +
> struct nfs4_pnfs_ds *
> nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
> {
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 53d593a..86433d8 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -119,6 +119,9 @@ struct pnfs_layoutdriver_type {
> void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
> struct xdr_stream *xdr,
> const struct nfs4_layoutcommit_args *args);
> +
> + void (*rpc_print_iostats) (struct seq_file *m,
> + const struct nfs_server *nfss);
> };
>
> struct pnfs_layout_hdr {
> @@ -231,6 +234,7 @@ struct nfs4_deviceid_node {
>
> void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
> struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
> +void nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, void (*callback)(struct nfs4_deviceid_node *, void *), void *);
> void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
> void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
> const struct pnfs_layoutdriver_type *,
> @@ -328,6 +332,13 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> +static inline void
> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
> +{
> + if (pnfs_enabled_sb(nfss) && nfss->pnfs_curr_ld->rpc_print_iostats)
> + nfss->pnfs_curr_ld->rpc_print_iostats(m, nfss);
> +}
> +
> #else /* CONFIG_NFS_V4_1 */
>
> static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
> @@ -429,6 +440,12 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
> static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
> {
> }
> +
> +static inline void
> +pnfs_rpc_print_iostats(struct seq_file *m, struct nfs_server *nfss)
> +{
> +}
> +
> #endif /* CONFIG_NFS_V4_1 */
>
> #endif /* FS_NFS_PNFS_H */
> diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
> index 4f359d2..5a65668 100644
> --- a/fs/nfs/pnfs_dev.c
> +++ b/fs/nfs/pnfs_dev.c
> @@ -115,6 +115,42 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
> }
> EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
>
> +
> +/*
> + * Apply callback function to each entry in the cache that matches
> + * the specified layout driver and nfs_client (of the MDS)
> + */
> +void
> +_nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
> + const struct nfs_client *clp,
> + void (*callback)(struct nfs4_deviceid_node *, void *),
> + void *ctx)
> +{
> + struct nfs4_deviceid_node *d;
> + struct hlist_node *n;
> + long hash;
> +
> + for (hash = 0; hash < NFS4_DEVICE_ID_HASH_SIZE; hash++) {
> + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
> + if (d->ld == ld && d->nfs_client == clp) {
> + if (atomic_read(&d->ref))
> + callback(d, ctx);
> + }
> + }
> +}
FWIW, I'm not thrilled about this :)
> +
> +void
> +nfs4_foreach_deviceid(const struct pnfs_layoutdriver_type *ld,
> + const struct nfs_client *clp,
> + void (*callback)(struct nfs4_deviceid_node *, void *),
> + void *ctx)
> +{
> + rcu_read_lock();
> + _nfs4_foreach_deviceid(ld, clp, callback, ctx);
> + rcu_read_unlock();
> +}
> +EXPORT_SYMBOL_GPL(nfs4_foreach_deviceid);
> +
> /*
> * Remove a deviceid from cache
> *
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index b79f2a1..5d8e00b 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -869,6 +869,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
> seq_printf(m, "\n");
>
> rpc_print_iostats(m, nfss->client);
> + pnfs_rpc_print_iostats(m, nfss);
>
> return 0;
> }
> --
> 1.7.4.4
>
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH] [RFC] NFS - filelayout DS rpc mountstats support
2012-01-30 20:53 [PATCH] [RFC] " Weston Andros Adamson
2012-01-30 20:57 ` Adamson, Dros
@ 2012-01-31 19:19 ` Adamson, Dros
1 sibling, 0 replies; 14+ messages in thread
From: Adamson, Dros @ 2012-01-31 19:19 UTC (permalink / raw)
To: Myklebust, Trond; +Cc: AND... open list:NFS SUNRPC
[-- Attachment #1: Type: text/plain, Size: 192 bytes --]
On Jan 30, 2012, at 3:53 PM, Weston Andros Adamson wrote:
> go, I'll update mountstats(1) to combine them for the default output (much in
i meant mountstats(8) -- sorry, too much coffee!
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 1374 bytes --]
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2012-02-14 16:32 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-08 17:10 [PATCH] RFC: NFS - filelayout DS rpc mountstats support Weston Andros Adamson
2012-02-08 17:26 ` Chuck Lever
2012-02-08 21:04 ` Adamson, Dros
2012-02-08 21:55 ` Adamson, Dros
2012-02-08 22:35 ` Chuck Lever
2012-02-13 8:05 ` Benny Halevy
2012-02-13 17:58 ` Adamson, Dros
2012-02-14 9:31 ` Benny Halevy
2012-02-14 15:44 ` Andy Adamson
2012-02-14 15:59 ` Adamson, Dros
2012-02-14 16:32 ` Andy Adamson
-- strict thread matches above, loose matches on Subject: below --
2012-01-30 20:53 [PATCH] [RFC] " Weston Andros Adamson
2012-01-30 20:57 ` Adamson, Dros
2012-01-31 19:19 ` Adamson, Dros
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.