Linux NFS development
 help / color / mirror / Atom feed
* [PATCH] Smooth out NFS client writeback
@ 2005-06-02  1:38 Shantanu Goel
  2005-06-02  1:56 ` Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Shantanu Goel @ 2005-06-02  1:38 UTC (permalink / raw)
  To: trond.myklebust, nfs

[-- Attachment #1: Type: text/plain, Size: 875 bytes --]

Hi Trond,

The current NFS client can cause a program to stall
for long periods of time because it flushes all dirty
pages at once.  The attached patch addresses this by
only writing back the amount requested by the VM
layer.  It also reduces the # commit requests by
waiting for some writeback to complete before issuing
a commit.  The patch also speeds up writebacks of
mmap'ed data by accumulating dirty pages but sending
commits earlier and omitting FLUSH_STABLE.

As part of the patch, I reinstated specifying the
commit range because I observed a marked speed up when
testing on a filesystem mounted from a Solaris 2.10
server.

The patch is against 2.6.12-rc5 w/NFS_ALL.

Thanks,
Shantanu

__________________________________________________
Do You Yahoo!?
Tired of spam?  Yahoo! Mail has the best spam protection around 
http://mail.yahoo.com 

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 2449169141-nfs-writeback.patch --]
[-- Type: text/x-patch; name="nfs-writeback.patch", Size: 18142 bytes --]

diff -ur -x '*~' 00-trond/fs/nfs/inode.c 01-nfs-writeback/fs/nfs/inode.c
--- 00-trond/fs/nfs/inode.c	2005-06-01 18:30:38.000000000 -0400
+++ 01-nfs-writeback/fs/nfs/inode.c	2005-06-01 18:31:00.000000000 -0400
@@ -60,7 +60,6 @@
 
 static struct inode *nfs_alloc_inode(struct super_block *sb);
 static void nfs_destroy_inode(struct inode *);
-static int nfs_write_inode(struct inode *,int);
 static void nfs_delete_inode(struct inode *);
 static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
@@ -74,7 +73,6 @@
 static struct super_operations nfs_sops = { 
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
-	.write_inode	= nfs_write_inode,
 	.delete_inode	= nfs_delete_inode,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs_clear_inode,
@@ -187,18 +185,6 @@
 	return nfs_fileid_to_ino_t(fattr->fileid);
 }
 
-static int
-nfs_write_inode(struct inode *inode, int sync)
-{
-	int flags = sync ? FLUSH_WAIT : 0;
-	int ret;
-
-	ret = nfs_commit_inode(inode, flags);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 static void
 nfs_delete_inode(struct inode * inode)
 {
@@ -1904,7 +1890,6 @@
 static struct super_operations nfs4_sops = { 
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
-	.write_inode	= nfs_write_inode,
 	.delete_inode	= nfs_delete_inode,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs4_clear_inode,
diff -ur -x '*~' 00-trond/fs/nfs/pagelist.c 01-nfs-writeback/fs/nfs/pagelist.c
--- 00-trond/fs/nfs/pagelist.c	2005-06-01 18:30:38.000000000 -0400
+++ 01-nfs-writeback/fs/nfs/pagelist.c	2005-06-01 18:31:00.000000000 -0400
@@ -282,7 +282,8 @@
  */
 int
 nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
-	      unsigned long idx_start, unsigned int npages)
+		    unsigned long idx_start, unsigned int npages,
+		    unsigned int max)
 {
 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 	struct nfs_page *req;
@@ -317,6 +318,8 @@
 				res++;
 			}
 		}
+		if (max > 0 && res >= max)
+			break;
 	}
 out:
 	return res;
diff -ur -x '*~' 00-trond/fs/nfs/write.c 01-nfs-writeback/fs/nfs/write.c
--- 00-trond/fs/nfs/write.c	2005-06-01 18:30:38.000000000 -0400
+++ 01-nfs-writeback/fs/nfs/write.c	2005-06-01 18:31:00.000000000 -0400
@@ -70,6 +70,9 @@
 #define MIN_POOL_WRITE		(32)
 #define MIN_POOL_COMMIT		(4)
 
+#define NFS_WRITE_CLUSTER	((256*1024) >> PAGE_CACHE_SHIFT)
+#define NFS_COMMIT_CLUSTER	((4*1024*1024) >> PAGE_CACHE_SHIFT)
+
 /*
  * Local function declarations
  */
@@ -79,16 +82,60 @@
 					    unsigned int, unsigned int);
 static void nfs_writeback_done_partial(struct nfs_write_data *, int);
 static void nfs_writeback_done_full(struct nfs_write_data *, int);
-static int nfs_wait_on_write_congestion(struct address_space *, int);
+static int nfs_wait_on_write_congestion(struct inode *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-			   unsigned int npages, int how);
+			   unsigned int npages, int how, unsigned int max);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_inode(struct inode *inode, int how);
+static int nfs_commit_check(struct inode *inode, int how, unsigned int thresh);
+#else
+static inline int nfs_commit_inode(struct inode *inode, int how)
+{
+	return 0;
+}
+
+static inline int nfs_commit_check(struct inode *inode, int how, unsigned int thresh)
+{
+	return 0;
+}
+#endif
 
 static kmem_cache_t *nfs_wdata_cachep;
 mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
+static unsigned int nfs_write_throttle;
+
+static inline unsigned int min_wb(struct inode *inode)
+{
+	return max((unsigned int)NFS_WRITE_CLUSTER, NFS_SERVER(inode)->wpages);
+}
+
+static inline int is_congested(struct nfs_inode *nfsi)
+{
+	return (nfsi->npages - nfsi->ndirty - nfsi->ncommit) >= nfs_write_throttle;
+}
+
+static inline int mod_count(struct nfs_inode *nfsi, unsigned int *count, int x)
+{
+	int congested = is_congested(nfsi);
+
+	*count += x;
+	return congested && !is_congested(nfsi);
+}
+
+static int nfs_congested(struct inode *inode)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int congested;
+
+	spin_lock(&nfsi->req_lock);
+	congested = is_congested(nfsi);
+	spin_unlock(&nfsi->req_lock);
+	return congested;
+}
 
 static inline struct nfs_write_data *nfs_commit_alloc(void)
 {
@@ -269,6 +316,7 @@
 	loff_t i_size = i_size_read(inode);
 	int inode_referenced = 0;
 	int priority = wb_priority(wbc);
+	int is_async;
 	int err;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
@@ -284,6 +332,24 @@
 	if (igrab(inode) != 0)
 		inode_referenced = 1;
 	end_index = i_size >> PAGE_CACHE_SHIFT;
+	is_async = !IS_SYNC(inode) && inode_referenced;
+
+	/*
+	 * If we were called due to memory pressure, avoid
+	 * further pressure if there are many writebacks outstanding.
+	 */
+	if (wbc->for_reclaim && is_async) {
+		if (nfs_congested(inode)) {
+			if (wbc->nonblocking) {
+				err = 0;
+				redirty_page_for_writepage(wbc, page);
+				wbc->encountered_congestion = 1;
+				goto out;
+			}
+			nfs_wait_on_write_congestion(inode, 0);
+		}
+		nfs_commit_check(inode, priority, NFS_WRITE_CLUSTER);
+	}
 
 	/* Ensure we've flushed out any previous writes */
 	nfs_wb_page_priority(inode, page, priority);
@@ -305,12 +371,12 @@
 		goto out;
 	}
 	lock_kernel();
-	if (!IS_SYNC(inode) && inode_referenced) {
+	if (is_async) {
 		err = nfs_writepage_async(ctx, inode, page, 0, offset);
 		if (err >= 0) {
 			err = 0;
-			if (wbc->for_reclaim)
-				nfs_flush_inode(inode, 0, 0, FLUSH_STABLE);
+			if (wbc->for_reclaim && NFS_I(inode)->ndirty >= NFS_WRITE_CLUSTER)
+				nfs_flush_inode(inode, 0, 0, priority, min_wb(inode));
 		}
 	} else {
 		err = nfs_writepage_sync(ctx, inode, page, 0,
@@ -331,43 +397,77 @@
 }
 
 /*
- * Note: causes nfs_update_request() to block on the assumption
- * 	 that the writeback is generated due to memory pressure.
+ * Note: We make the assumption that pdflush()'s due to dirty
+ * thresholds are asynchronous so we can build up large commit lists
+ * during synchronous writes and still do the right thing when memory
+ * needs to be reclaimed.
  */
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct inode *inode = mapping->host;
-	int err;
+	long nr = wbc->nr_to_write;
+	int priority = wb_priority(wbc);
+	int nr_commit = 0;
+	int err, is_sync;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	err = generic_writepages(mapping, wbc);
-	if (err)
-		return err;
-	while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
-		if (wbc->nonblocking)
-			return 0;
-		nfs_wait_on_write_congestion(mapping, 0);
+	is_sync = !wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL;
+
+	for (;;) {
+		if (nfs_congested(inode)) {
+			if (wbc->nonblocking) {
+				wbc->encountered_congestion = 1;
+				break;
+			}
+			nfs_wait_on_write_congestion(inode, 0);
+		}
+		if (!is_sync) {
+			err = nfs_commit_check(inode, priority, 0);
+			if (err < 0)
+				return err;
+			if (err > 0) {
+				nr_commit++;
+				nr -= err;
+				if (nr <= 0)
+					break;
+				continue;
+			}
+		}
+		err = nfs_flush_inode(inode, 0, 0, priority, min_wb(inode));
+		if (err < 0)
+			return err;
+		if (err > 0) {
+			nr -= err;
+			if (nr <= 0)
+				break;
+			continue;
+		}
+		if (wbc->nr_to_write > 0) {
+			long n = wbc->nr_to_write;
+
+			err = generic_writepages(mapping, wbc);
+			if (err)
+				return err;
+			if (n - wbc->nr_to_write > 0)
+				continue;
+		}
+		break;
 	}
-	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
-	if (err < 0)
-		goto out;
-	wbc->nr_to_write -= err;
-	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
+	if (is_sync) {
 		err = nfs_wait_on_requests(inode, 0, 0);
 		if (err < 0)
-			goto out;
+			return err;
+		priority |= FLUSH_SYNC;
 	}
-	err = nfs_commit_inode(inode, wb_priority(wbc));
-	if (err > 0) {
-		wbc->nr_to_write -= err;
-		err = 0;
+	if (is_sync || (wbc->for_kupdate && !nr_commit)) {
+		err = nfs_commit_inode(inode, priority);
+		if (err < 0)
+			return err;
+		nr -= err;
 	}
-out:
-	clear_bit(BDI_write_congested, &bdi->state);
-	wake_up_all(&nfs_write_congestion);
-	return err;
+	wbc->nr_to_write = nr;
+	return 0;
 }
 
 /*
@@ -400,12 +500,13 @@
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int need_wakeup;
 
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
 	spin_lock(&nfsi->req_lock);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
-	nfsi->npages--;
+	need_wakeup = mod_count(nfsi, &nfsi->npages, -1);
 	if (!nfsi->npages) {
 		spin_unlock(&nfsi->req_lock);
 		nfs_end_data_update(inode);
@@ -414,6 +515,8 @@
 		spin_unlock(&nfsi->req_lock);
 	nfs_clear_request(req);
 	nfs_release_request(req);
+	if (need_wakeup)
+		wake_up_all(&nfs_write_congestion);
 }
 
 /*
@@ -451,15 +554,18 @@
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int need_wakeup;
 
 	spin_lock(&nfsi->req_lock);
 	radix_tree_tag_set(&nfsi->nfs_page_tree,
 			req->wb_index, NFS_PAGE_TAG_DIRTY);
 	nfs_list_add_request(req, &nfsi->dirty);
-	nfsi->ndirty++;
+	need_wakeup = mod_count(nfsi, &nfsi->ndirty, 1);
 	spin_unlock(&nfsi->req_lock);
 	inc_page_state(nr_dirty);
 	mark_inode_dirty(inode);
+	if (need_wakeup)
+		wake_up_all(&nfs_write_congestion);
 }
 
 /*
@@ -481,13 +587,16 @@
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	int need_wakeup;
 
 	spin_lock(&nfsi->req_lock);
 	nfs_list_add_request(req, &nfsi->commit);
-	nfsi->ncommit++;
+	need_wakeup = mod_count(nfsi, &nfsi->ncommit, 1);
 	spin_unlock(&nfsi->req_lock);
 	inc_page_state(nr_unstable);
 	mark_inode_dirty(inode);
+	if (need_wakeup)
+		wake_up_all(&nfs_write_congestion);
 }
 #endif
 
@@ -543,13 +652,15 @@
  * The requests are *not* checked to ensure that they form a contiguous set.
  */
 static int
-nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+nfs_scan_dirty(struct inode *inode, struct list_head *dst,
+	       unsigned long idx_start, unsigned int npages,
+	       unsigned int max)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int res = 0;
 
 	if (nfsi->ndirty != 0) {
-		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
+		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages, max);
 		nfsi->ndirty -= res;
 		sub_page_state(nr_dirty,res);
 		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
@@ -585,35 +696,38 @@
 }
 #endif
 
-static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
+static int nfs_wait_on_write_congestion(struct inode *inode, int intr)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	DEFINE_WAIT(wait);
 	int ret = 0;
+	int done = 0;
 
 	might_sleep();
 
-	if (!bdi_write_congested(bdi))
-		return 0;
-	if (intr) {
-		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
-		sigset_t oldset;
-
-		rpc_clnt_sigmask(clnt, &oldset);
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
-		if (bdi_write_congested(bdi)) {
-			if (signalled())
-				ret = -ERESTARTSYS;
-			else
+	do {
+		if (intr) {
+			struct rpc_clnt *clnt = NFS_CLIENT(inode);
+			sigset_t oldset;
+
+			rpc_clnt_sigmask(clnt, &oldset);
+			prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
+			if (nfs_congested(inode)) {
+				if (signalled())
+					ret = -ERESTARTSYS;
+				else
+					schedule();
+			} else
+				done = 1;
+			rpc_clnt_sigunmask(clnt, &oldset);
+		} else {
+			prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
+			if (nfs_congested(inode))
 				schedule();
+			else
+				done = 1;
 		}
-		rpc_clnt_sigunmask(clnt, &oldset);
-	} else {
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
-		if (bdi_write_congested(bdi))
-			schedule();
-	}
-	finish_wait(&nfs_write_congestion, &wait);
+		finish_wait(&nfs_write_congestion, &wait);
+	} while (ret == 0 && !done);
 	return ret;
 }
 
@@ -629,15 +743,12 @@
 		struct inode *inode, struct page *page,
 		unsigned int offset, unsigned int bytes)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	unsigned long		rqend, end;
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
-		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
@@ -1228,24 +1339,36 @@
 		struct nfs_write_data *data, int how)
 {
 	struct rpc_task		*task = &data->task;
-	struct nfs_page		*first;
+	struct nfs_page		*first, *last;
 	struct inode		*inode;
+	loff_t			start, end, len;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
+	last = nfs_list_entry(data->pages.prev);
 	inode = first->wb_context->dentry->d_inode;
 
+	/*
+	 * Determine the offset range of requests in the COMMIT call.
+	 * We rely on the fact that data->pages is an ordered list...
+	 */
+	start = req_offset(first);
+	end = req_offset(last) + last->wb_bytes;
+	len = end - start;
+	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
+	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
+		len = 0;
+
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 
 	data->args.fh     = NFS_FH(data->inode);
-	/* Note: we always request a commit of the entire inode */
-	data->args.offset = 0;
-	data->args.count  = 0;
-	data->res.count   = 0;
+	data->args.offset = start;
+	data->args.count  = len;
+	data->res.count   = len;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	
@@ -1338,7 +1461,7 @@
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-			   unsigned int npages, int how)
+			   unsigned int npages, int how, unsigned int max)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
@@ -1346,7 +1469,7 @@
 				error = 0;
 
 	spin_lock(&nfsi->req_lock);
-	res = nfs_scan_dirty(inode, &head, idx_start, npages);
+	res = nfs_scan_dirty(inode, &head, idx_start, npages, max);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
 		struct nfs_server *server = NFS_SERVER(inode);
@@ -1364,14 +1487,13 @@
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-int nfs_commit_inode(struct inode *inode, int how)
+static int nfs_commit_inode_locked(struct inode *inode, int how)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
 	int			res,
 				error = 0;
 
-	spin_lock(&nfsi->req_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
@@ -1381,33 +1503,59 @@
 	}
 	return res;
 }
+
+static int nfs_commit_inode(struct inode *inode, int how)
+{
+	spin_lock(&NFS_I(inode)->req_lock);
+	return nfs_commit_inode_locked(inode, how);
+}
+
+static int nfs_commit_check(struct inode *inode, int how, unsigned int thresh)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+
+	spin_lock(&nfsi->req_lock);
+	if (nfsi->npages > 0) {
+		if (thresh == 0)
+			thresh = NFS_COMMIT_CLUSTER;
+		if (thresh > nfsi->npages)
+			thresh = nfsi->npages;
+		if (nfsi->ncommit >= thresh)
+			return nfs_commit_inode_locked(inode, how);
+	}
+	spin_unlock(&nfsi->req_lock);
+	return 0;
+}
 #endif
 
 int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 		  unsigned int npages, int how)
 {
-	int	error,
+	int	error = 0,
 		wait;
 
 	wait = how & FLUSH_WAIT;
 	how &= ~FLUSH_WAIT;
 
 	do {
-		error = 0;
-		if (wait)
+		if (nfs_congested(inode)) {
+			if (!wait)
+				break;
+			nfs_wait_on_write_congestion(inode, 0);
+		}
+		error = nfs_flush_inode(inode, idx_start, npages, how, min_wb(inode));
+		if (error == 0 && wait)
 			error = nfs_wait_on_requests(inode, idx_start, npages);
 		if (error == 0)
-			error = nfs_flush_inode(inode, idx_start, npages, how);
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (error == 0)
 			error = nfs_commit_inode(inode, how);
-#endif
 	} while (error > 0);
 	return error;
 }
 
 int nfs_init_writepagecache(void)
 {
+	struct sysinfo si;
+
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
 					     sizeof(struct nfs_write_data),
 					     0, SLAB_HWCACHE_ALIGN,
@@ -1429,6 +1577,13 @@
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
+	si_meminfo(&si);
+	nfs_write_throttle = ((si.totalram / 100) * si.mem_unit) >> PAGE_CACHE_SHIFT;
+	if (nfs_write_throttle < NFS_WRITE_CLUSTER)
+		nfs_write_throttle = NFS_WRITE_CLUSTER;
+	if (nfs_write_throttle > NFS_COMMIT_CLUSTER)
+		nfs_write_throttle = NFS_COMMIT_CLUSTER;
+
 	return 0;
 }
 
diff -ur -x '*~' 00-trond/include/linux/nfs_fs.h 01-nfs-writeback/include/linux/nfs_fs.h
--- 00-trond/include/linux/nfs_fs.h	2005-06-01 18:30:38.000000000 -0400
+++ 01-nfs-writeback/include/linux/nfs_fs.h	2005-06-01 18:31:00.000000000 -0400
@@ -408,15 +408,6 @@
  * return value!)
  */
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern int  nfs_commit_inode(struct inode *, int);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
-#endif
 
 static inline int
 nfs_have_writebacks(struct inode *inode)
diff -ur -x '*~' 00-trond/include/linux/nfs_page.h 01-nfs-writeback/include/linux/nfs_page.h
--- 00-trond/include/linux/nfs_page.h	2005-06-01 18:30:38.000000000 -0400
+++ 01-nfs-writeback/include/linux/nfs_page.h	2005-06-01 18:31:00.000000000 -0400
@@ -61,7 +61,7 @@
 
 
 extern  int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
-				unsigned long idx_start, unsigned int npages);
+				unsigned long idx_start, unsigned int npages, unsigned int max);
 extern	int nfs_scan_list(struct list_head *, struct list_head *,
 			  unsigned long, unsigned int);
 extern	int nfs_coalesce_requests(struct list_head *, struct list_head *,

^ permalink raw reply	[flat|nested] 13+ messages in thread
* RE: Re: [PATCH] Smooth out NFS client writeback
@ 2005-06-29 14:25 Lever, Charles
  2005-06-29 15:07 ` Peter Staubach
  0 siblings, 1 reply; 13+ messages in thread
From: Lever, Charles @ 2005-06-29 14:25 UTC (permalink / raw)
  To: Peter Staubach, Shantanu Goel; +Cc: Trond Myklebust, nfs

hi peter-

> On Solaris, at least with UFS as the underlying file system,=20
> the COMMIT
> operations are processed by looking through the entire cached=20
> page list
> or by doing page lookup operations on each individual page. =20
> If the entire
> file is specified, ie. len =3D 0, then the page list is walked.=20
>  If a range
> is specified, then just the pages within the range are looked up.
>=20
> Specifying the range can result in significantly less CPU=20
> overhead on the
> server.  This is why the NFSv3 COMMIT operation has a range=20
> which can be specified...  :-)

a server CPU inefficiency hardly qualifies as a client bug.  in the most
common cases where the client is creating and writing to a file, then
closing with a COMMIT(0,0) request, the server should be changed to
behave in a more efficient manner.

in other words, i think the client should optimize the number of
requests on the wire, and the server should optimize for using its CPU
and disks most efficiently.  i haven't looked closely at shantanu's
patch, but i'm a little leary of the change if it means more wire
operations are generated than before.


-------------------------------------------------------
SF.Net email is sponsored by: Discover Easy Linux Migration Strategies
from IBM. Find simple to follow Roadmaps, straightforward articles,
informative Webcasts and more! Get everything you need to get up to
speed, fast. http://ads.osdn.com/?ad_id=7477&alloc_id=16492&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 13+ messages in thread
* RE: Re: [PATCH] Smooth out NFS client writeback
@ 2005-06-29 15:35 Lever, Charles
  2005-06-29 15:50 ` Peter Staubach
  0 siblings, 1 reply; 13+ messages in thread
From: Lever, Charles @ 2005-06-29 15:35 UTC (permalink / raw)
  To: Peter Staubach; +Cc: Shantanu Goel, Trond Myklebust, nfs

> I don't think that there will be more over the wire=20
> operations generated
> when using the range versus always specifying the entire=20
> file.

committing the whole file at once is simply more efficient from a
network perspective when an application is performing random writes
(unless it is using O_SYNC).  there are some cases where it won't make a
difference whether a range or a whole file commit is used, but i think
it would be really hard to figure out a client-side heuristic to decide
which is better.

> I would claim that a client,
> which simply issues a blanket COMMIT(0,0), without already=20
> having gathered
> up the buffers/pages/whatever that need committing, is=20
> broken.  It will be
> unsafe because it will be subject to races like more data=20
> getting written
> with UNSTABLE while the COMMIT is happening.  This new data=20
> may or may not
> have been committed by the COMMIT.

the linux client already keeps track of the order of writes and commits
well enough that this isn't an issue.

> Bottom line for me -- if the client can do something to help=20
> the server to
> help the client and it is an overall win, then I think that=20
> it should do so...

we're talking about potentially adding complexity to the client and
increasing the number of write and commit operations on the wire, which
could have a negative performance impact in other environments (think
WAN).  all this to optimize a particular workload against a particular
server implementation.

i'm not saying this type of work shouldn't be explored, but as we
consider the change, we should be very careful especially since we don't
have adequate performance regression test coverage yet.


-------------------------------------------------------
SF.Net email is sponsored by: Discover Easy Linux Migration Strategies
from IBM. Find simple to follow Roadmaps, straightforward articles,
informative Webcasts and more! Get everything you need to get up to
speed, fast. http://ads.osdn.com/?ad_id=7477&alloc_id=16492&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2005-06-29 22:35 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-02  1:38 [PATCH] Smooth out NFS client writeback Shantanu Goel
2005-06-02  1:56 ` Trond Myklebust
2005-06-02  3:26   ` Shantanu Goel
2005-06-02  4:38     ` Trond Myklebust
2005-06-02 12:16       ` Shantanu Goel
2005-06-07  4:12         ` Trond Myklebust
2005-06-28 22:43           ` Shantanu Goel
2005-06-29 14:11             ` Peter Staubach
  -- strict thread matches above, loose matches on Subject: below --
2005-06-29 14:25 Lever, Charles
2005-06-29 15:07 ` Peter Staubach
2005-06-29 15:35 Lever, Charles
2005-06-29 15:50 ` Peter Staubach
2005-06-29 22:34   ` Shantanu Goel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox