linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Peng Tao <bergwolf@gmail.com>
To: bharrosh@panasas.com
Cc: linux-nfs@vger.kernel.org, Peng Tao <tao.peng@emc.com>
Subject: [PATCH RFC 2/3] NFS41: send real write size in layoutget
Date: Wed,  8 Aug 2012 10:03:11 +0800	[thread overview]
Message-ID: <1344391392-1948-3-git-send-email-bergwolf@gmail.com> (raw)
In-Reply-To: <1344391392-1948-1-git-send-email-bergwolf@gmail.com>

From: Peng Tao <tao.peng@emc.com>

For bufferred write, scan dirty pages to find out longest continuous
dirty pages. In this case, also allow layout driver to specify a
maximum layoutget size which is useful to avoid busy scanning dirty pages
for block layout client.

For direct write, just use dreq->bytes_left.

Signed-off-by: Peng Tao <tao.peng@emc.com>
---
 fs/nfs/direct.c   |    7 ++++++
 fs/nfs/internal.h |    1 +
 fs/nfs/pnfs.c     |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c39f775..c1899dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -46,6 +46,7 @@
 #include <linux/kref.h>
 #include <linux/slab.h>
 #include <linux/task_io_accounting_ops.h>
+#include <linux/module.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -191,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
 	kref_put(&dreq->kref, nfs_direct_req_free);
 }
 
+ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
+{
+	return dreq->bytes_left;
+}
+EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
+
 /*
  * Collects and returns the final error value/byte-count.
  */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31fdb03..e68d329 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
 {
 	inode_dio_wait(inode);
 }
+extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2e00fea..e61a373 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -29,6 +29,7 @@
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/pagevec.h>
 #include <linux/module.h>
 #include "internal.h"
 #include "pnfs.h"
@@ -1172,19 +1173,72 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
 
+/*
+ * Return the number of contiguous bytes in dirty pages for a given inode
+ * starting at page frame idx.
+ */
+static u64 pnfs_num_dirty_bytes(struct inode *inode, pgoff_t idx)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t	index;
+	struct pagevec pvec;
+	pgoff_t num = 1; /* self */
+	int i, done = 0;
+
+	pagevec_init(&pvec, 0);
+	idx++; /* self */
+	while (!done) {
+		index = idx;
+		pagevec_lookup_tag(&pvec, mapping, &index,
+				   PAGECACHE_TAG_DIRTY, (pgoff_t)PAGEVEC_SIZE);
+		if (pagevec_count(&pvec) == 0)
+			break;
+
+		for (i = 0; i < pagevec_count(&pvec); i++) {
+			struct page *page = pvec.pages[i];
+
+			lock_page(page);
+			if (unlikely(page->mapping != mapping) ||
+			    !PageDirty(page) ||
+			    PageWriteback(page) ||
+			    page->index != idx) {
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+			unlock_page(page);
+			if (done)
+				break;
+			idx++;
+			num++;
+		}
+		pagevec_release(&pvec);
+	}
+	return num << PAGE_CACHE_SHIFT;
+}
+
 void
-pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
+			   struct nfs_page *req)
 {
+	u64 wb_size;
+
 	BUG_ON(pgio->pg_lseg != NULL);
 
 	if (req->wb_offset != req->wb_pgbase) {
 		nfs_pageio_reset_write_mds(pgio);
 		return;
 	}
+
+	if (pgio->pg_dreq == NULL)
+		wb_size = pnfs_num_dirty_bytes(pgio->pg_inode, req->wb_index);
+	else
+		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+
 	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 					   req->wb_context,
 					   req_offset(req),
-					   req->wb_bytes,
+					   wb_size?:req->wb_bytes,
 					   IOMODE_RW,
 					   GFP_NOFS);
 	/* If no lseg, fall back to write through mds */
-- 
1.7.1.262.g5ef3d


  parent reply	other threads:[~2012-08-08  2:03 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-08  2:03 [PATCH RFC 0/3] NFS41: optimize layoutget Peng Tao
2012-08-08  2:03 ` [PATCH RFC 1/3] NFS: track direct IO left bytes Peng Tao
2012-08-08  2:03 ` Peng Tao [this message]
2012-08-08 18:50   ` [PATCH RFC 2/3] NFS41: send real write size in layoutget Myklebust, Trond
2012-08-09  2:24     ` Peng Tao
2012-08-12 18:30   ` Boaz Harrosh
2012-08-12 18:40     ` Boaz Harrosh
2012-08-13  6:15     ` Peng Tao
2012-08-13  9:44     ` Peng Tao
2012-08-13 20:13       ` Boaz Harrosh
2012-08-13 20:21         ` Myklebust, Trond
2012-08-08  2:03 ` [PATCH RFC 3/3] NFS41: send real read size in layoutget for DIO Peng Tao
2012-08-08 18:57   ` Myklebust, Trond
2012-08-09  2:30     ` Peng Tao
2012-08-12 17:39       ` Boaz Harrosh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1344391392-1948-3-git-send-email-bergwolf@gmail.com \
    --to=bergwolf@gmail.com \
    --cc=bharrosh@panasas.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=tao.peng@emc.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).