* [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init
@ 2011-11-19 16:53 Peng Tao
2011-11-19 16:53 ` [PATCH RFC 1/4] nfsv41: export pnfs_find_alloc_layout Peng Tao
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Peng Tao @ 2011-11-19 16:53 UTC (permalink / raw)
To: linux-nfs; +Cc: Trond.Myklebust, bhalevy
Asking for layout in pg_init will always let client ask for only 4KB
layout in every layoutget. This way, client drops the IO size information.
In such case, server needs to pre-allocate layout to allow better performance.
However, IO size information is very useful for MDS to determine how much layout
it gives client. This is more important for block layout MDS because it needs to
pre-allocate disk for layoutget that extends file size.
Initial tests against a server that does not aggressively pre-allocate layout,
shows that passing IO size information to server helps to improve performance
quite a lot.
The generic pnfs layer changes are trival to file layout and object.
iozone cmd:
./iozone -r 1m -s 4G -w -W -c -t 10 -i 0 -F /mnt/iozone.data.1 /mnt/iozone.data.2 /mnt/iozone.data.3 /mnt/iozone.data.4 /mnt/iozone.data.5 /mnt/iozone.data.6 /mnt/iozone.data.7 /mnt/iozone.data.8 /mnt/iozone.data.9 /mnt/iozone.data.10
Befor patch: around 12MB/s throughput
After patch: around 72MB/s throughput
Peng Tao (4):
nfsv41: export pnfs_find_alloc_layout
nfsv41: add and export pnfs_find_get_layout_locked
nfsv41: get lseg before issue LD IO if pgio doesn't carry one
pnfsblock: do not ask for layout in pg_init
fs/nfs/blocklayout/blocklayout.c | 54 +++++++++++++++++++++++++++-
fs/nfs/pnfs.c | 73 +++++++++++++++++++++++++++++++++++++-
fs/nfs/pnfs.h | 9 +++++
3 files changed, 133 insertions(+), 3 deletions(-)
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH RFC 1/4] nfsv41: export pnfs_find_alloc_layout
2011-11-19 16:53 [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init Peng Tao
@ 2011-11-19 16:53 ` Peng Tao
2011-11-19 16:53 ` [PATCH RFC 2/4] nfsv41: add and export pnfs_find_get_layout_locked Peng Tao
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Peng Tao @ 2011-11-19 16:53 UTC (permalink / raw)
To: linux-nfs; +Cc: Trond.Myklebust, bhalevy, Peng Tao
So that layout driver can access layout header when there is none.
Signed-off-by: Peng Tao <peng_tao@emc.com>
---
fs/nfs/pnfs.c | 3 ++-
fs/nfs/pnfs.h | 4 ++++
2 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index baf7353..3be29c7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -848,7 +848,7 @@ alloc_init_layout_hdr(struct inode *ino,
return lo;
}
-static struct pnfs_layout_hdr *
+struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode *ino,
struct nfs_open_context *ctx,
gfp_t gfp_flags)
@@ -875,6 +875,7 @@ pnfs_find_alloc_layout(struct inode *ino,
pnfs_free_layout_hdr(new);
return nfsi->layout;
}
+EXPORT_SYMBOL_GPL(pnfs_find_alloc_layout);
/*
* iomode matching rules:
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1509530..9614ac9 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -209,6 +209,10 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
u64 count,
enum pnfs_iomode iomode,
gfp_t gfp_flags);
+struct pnfs_layout_hdr *
+pnfs_find_alloc_layout(struct inode *ino,
+ struct nfs_open_context *ctx,
+ gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
--
1.7.1.262.g5ef3d
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH RFC 2/4] nfsv41: add and export pnfs_find_get_layout_locked
2011-11-19 16:53 [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init Peng Tao
2011-11-19 16:53 ` [PATCH RFC 1/4] nfsv41: export pnfs_find_alloc_layout Peng Tao
@ 2011-11-19 16:53 ` Peng Tao
2011-11-19 16:53 ` [PATCH RFC 3/4] nfsv41: get lseg before issue LD IO if pgio doesn't carry one Peng Tao
2011-11-19 16:53 ` [PATCH RFC 4/4] pnfsblock: do ask for layout in pg_init Peng Tao
3 siblings, 0 replies; 5+ messages in thread
From: Peng Tao @ 2011-11-19 16:53 UTC (permalink / raw)
To: linux-nfs; +Cc: Trond.Myklebust, bhalevy, Peng Tao
It tries to find the lseg from local cache but not retrive layout from server.
Signed-off-by: Peng Tao <peng_tao@emc.com>
---
fs/nfs/pnfs.c | 25 +++++++++++++++++++++++++
fs/nfs/pnfs.h | 5 +++++
2 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3be29c7..734e670 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -933,6 +933,31 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
}
/*
+ * Find and reference lseg with ino->i_lock held.
+ */
+struct pnfs_layout_segment *
+pnfs_find_get_layout_locked(struct inode *ino,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode)
+{
+ struct pnfs_layout_segment *lseg = NULL;
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .offset = pos,
+ .length = count,
+ };
+
+ if (NFS_I(ino)->layout == NULL)
+ goto out;
+
+ lseg = pnfs_find_lseg(NFS_I(ino)->layout, &range);
+out:
+ return lseg;
+}
+EXPORT_SYMBOL_GPL(pnfs_find_get_layout_locked);
+
+/*
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9614ac9..0c55fc1 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -213,6 +213,11 @@ struct pnfs_layout_hdr *
pnfs_find_alloc_layout(struct inode *ino,
struct nfs_open_context *ctx,
gfp_t gfp_flags);
+struct pnfs_layout_segment *
+pnfs_find_get_layout_locked(struct inode *ino,
+ loff_t pos,
+ u64 count,
+ enum pnfs_iomode iomode);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
--
1.7.1.262.g5ef3d
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH RFC 3/4] nfsv41: get lseg before issue LD IO if pgio doesn't carry one
2011-11-19 16:53 [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init Peng Tao
2011-11-19 16:53 ` [PATCH RFC 1/4] nfsv41: export pnfs_find_alloc_layout Peng Tao
2011-11-19 16:53 ` [PATCH RFC 2/4] nfsv41: add and export pnfs_find_get_layout_locked Peng Tao
@ 2011-11-19 16:53 ` Peng Tao
2011-11-19 16:53 ` [PATCH RFC 4/4] pnfsblock: do ask for layout in pg_init Peng Tao
3 siblings, 0 replies; 5+ messages in thread
From: Peng Tao @ 2011-11-19 16:53 UTC (permalink / raw)
To: linux-nfs; +Cc: Trond.Myklebust, bhalevy, Peng Tao
This gives LD option not to ask for layout in pg_init.
Signed-off-by: Peng Tao <peng_tao@emc.com>
---
fs/nfs/pnfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 46 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 734e670..c8dc0b1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1254,6 +1254,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
struct nfs_write_data *data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ const bool has_lseg = !!lseg;
desc->pg_lseg = NULL;
while (!list_empty(head)) {
@@ -1262,7 +1263,29 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
data = list_entry(head->next, struct nfs_write_data, list);
list_del_init(&data->list);
+ if (!has_lseg) {
+ struct nfs_page *req = nfs_list_entry(data->pages.next);
+ __u64 length = data->npages << PAGE_CACHE_SHIFT;
+
+ lseg = pnfs_update_layout(desc->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ length,
+ IOMODE_RW,
+ GFP_NOFS);
+ if (!lseg || length > (lseg->pls_range.length)) {
+ put_lseg(lseg);
+ lseg = NULL;
+ pnfs_write_through_mds(desc, data);
+ continue;
+ }
+ }
+
trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (!has_lseg) {
+ put_lseg(lseg);
+ lseg = NULL;
+ }
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_write_through_mds(desc, data);
}
@@ -1350,6 +1373,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
struct nfs_read_data *data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ const bool has_lseg = !!lseg;
desc->pg_lseg = NULL;
while (!list_empty(head)) {
@@ -1358,7 +1382,29 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
data = list_entry(head->next, struct nfs_read_data, list);
list_del_init(&data->list);
+ if (!has_lseg) {
+ struct nfs_page *req = nfs_list_entry(data->pages.next);
+ __u64 length = data->npages << PAGE_CACHE_SHIFT;
+
+ lseg = pnfs_update_layout(desc->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ length,
+ IOMODE_READ,
+ GFP_KERNEL);
+ if (!lseg || length > lseg->pls_range.length) {
+ put_lseg(lseg);
+ lseg = NULL;
+ pnfs_read_through_mds(desc, data);
+ continue;
+ }
+ }
+
trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (!has_lseg) {
+ put_lseg(lseg);
+ lseg = NULL;
+ }
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_read_through_mds(desc, data);
}
--
1.7.1.262.g5ef3d
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH RFC 4/4] pnfsblock: do ask for layout in pg_init
2011-11-19 16:53 [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init Peng Tao
` (2 preceding siblings ...)
2011-11-19 16:53 ` [PATCH RFC 3/4] nfsv41: get lseg before issue LD IO if pgio doesn't carry one Peng Tao
@ 2011-11-19 16:53 ` Peng Tao
3 siblings, 0 replies; 5+ messages in thread
From: Peng Tao @ 2011-11-19 16:53 UTC (permalink / raw)
To: linux-nfs; +Cc: Trond.Myklebust, bhalevy, Peng Tao
Asking for layout in pg_init will always make client ask for only 4KB
layout in every layoutget. This way, client drops the IO size information
that is meaningful for MDS in handing out layout.
In stead, if layout is not find in cache, do not send layoutget
at once. Wait until before issuing IO in pnfs_do_multiple_reads/writes
because that is where we know the real size of current IO. By telling the
real IO size to MDS, MDS will have a better chance to give proper layout.
Signed-off-by: Peng Tao <peng_tao@emc.com>
---
fs/nfs/blocklayout/blocklayout.c | 54 ++++++++++++++++++++++++++++++++++++-
1 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 48cfac3..fd585fe 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -39,6 +39,7 @@
#include <linux/prefetch.h>
#include "blocklayout.h"
+#include "../internal.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -990,14 +991,63 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0;
}
+/* While RFC doesn't limit maximum size of layout, we better limit ourself. */
+#define PNFSBLK_MAXRSIZE (0x1<<22)
+#define PNFSBLK_MAXWSIZE (0x1<<21)
+static void
+bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ struct inode *ino = pgio->pg_inode;
+ struct pnfs_layout_hdr *lo;
+
+ BUG_ON(pgio->pg_lseg != NULL);
+ spin_lock(&ino->i_lock);
+ lo = pnfs_find_alloc_layout(ino, req->wb_context, GFP_KERNEL);
+ if (!lo || test_bit(lo_fail_bit(IOMODE_READ), &lo->plh_flags)) {
+ spin_unlock(&ino->i_lock);
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
+
+ pgio->pg_bsize = PNFSBLK_MAXRSIZE;
+ pgio->pg_lseg = pnfs_find_get_layout_locked(ino,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_READ);
+ spin_unlock(&ino->i_lock);
+}
+
+static void
+bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ struct inode *ino = pgio->pg_inode;
+ struct pnfs_layout_hdr *lo;
+
+ BUG_ON(pgio->pg_lseg != NULL);
+ spin_lock(&ino->i_lock);
+ lo = pnfs_find_alloc_layout(ino, req->wb_context, GFP_NOFS);
+ if (!lo || test_bit(lo_fail_bit(IOMODE_RW), &lo->plh_flags)) {
+ spin_unlock(&ino->i_lock);
+ nfs_pageio_reset_write_mds(pgio);
+ return;
+ }
+
+ pgio->pg_bsize = PNFSBLK_MAXWSIZE;
+ pgio->pg_lseg = pnfs_find_get_layout_locked(ino,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_RW);
+ spin_unlock(&ino->i_lock);
+}
+
static const struct nfs_pageio_ops bl_pg_read_ops = {
- .pg_init = pnfs_generic_pg_init_read,
+ .pg_init = bl_pg_init_read,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops bl_pg_write_ops = {
- .pg_init = pnfs_generic_pg_init_write,
+ .pg_init = bl_pg_init_write,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
--
1.7.1.262.g5ef3d
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2011-11-19 16:54 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-19 16:53 [PATCH RFC 0/4] nfsv41/pnfsblock: allow block client not to layoutget in pg_init Peng Tao
2011-11-19 16:53 ` [PATCH RFC 1/4] nfsv41: export pnfs_find_alloc_layout Peng Tao
2011-11-19 16:53 ` [PATCH RFC 2/4] nfsv41: add and export pnfs_find_get_layout_locked Peng Tao
2011-11-19 16:53 ` [PATCH RFC 3/4] nfsv41: get lseg before issue LD IO if pgio doesn't carry one Peng Tao
2011-11-19 16:53 ` [PATCH RFC 4/4] pnfsblock: do ask for layout in pg_init Peng Tao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).