linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Fred Isaman <iisaman@netapp.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH 20/22] pnfs_submit: filelayout: rewrite filelayout_commit to use new API
Date: Sat, 15 May 2010 21:23:11 -0400	[thread overview]
Message-ID: <1273972993-15369-21-git-send-email-iisaman@netapp.com> (raw)
In-Reply-To: <1273972993-15369-20-git-send-email-iisaman@netapp.com>

In the process, give it a much needed rewrite.

Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/nfs4filelayout.c |  192 ++++++++++++++++++++++++++---------------------
 fs/nfs/write.c          |    9 ++
 2 files changed, 115 insertions(+), 86 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 789706e..6edecc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -530,8 +530,7 @@ filelayout_clone_write_data(struct nfs_write_data *old)
 	nfs_fattr_init(&new->fattr);
 	new->res.verf    = &new->verf;
 	new->args.context = get_nfs_open_context(old->args.context);
-	new->pdata.lseg = old->pdata.lseg;
-	kref_get(&new->pdata.lseg->kref);
+	new->pdata.lseg = NULL;
 	new->pdata.call_ops = old->pdata.call_ops;
 	new->pdata.how = old->pdata.how;
 out:
@@ -559,103 +558,124 @@ enum pnfs_try_status
 filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
 		  struct nfs_write_data *data)
 {
-	struct nfs4_filelayout_segment *nfslay;
-	struct nfs_write_data   *dsdata = NULL;
+	LIST_HEAD(head);
+	struct nfs_page *req;
+	loff_t file_offset = 0;
+	u16 idx, i;
+	struct list_head **ds_page_list = NULL;
+	u16 *indices_used;
+	int num_indices_seen = 0;
+	const struct rpc_call_ops *call_ops;
+	struct rpc_clnt *clnt;
+	struct nfs_write_data **clone_list = NULL;
+	struct nfs_write_data *dsdata;
 	struct nfs4_pnfs_ds *ds;
-	struct nfs_page *req, *reqt;
-	struct list_head *pos, *tmp, head, head2;
-	loff_t file_offset, comp_offset;
-	enum pnfs_try_status trypnfs = PNFS_ATTEMPTED;
-	u32 idx1, idx2;
 
-	nfslay = LSEG_LD_DATA(data->pdata.lseg);
-
-	dprintk("%s data %p pnfs_client %p nfslay %p sync %d\n",
-		__func__, data, data->fldata.pnfs_client, nfslay, sync);
-
-	data->fldata.commit_through_mds = nfslay->commit_through_mds;
-	if (nfslay->commit_through_mds) {
-		dprintk("%s data %p commit through mds\n", __func__, data);
-		return PNFS_NOT_ATTEMPTED;
-	}
-
-	INIT_LIST_HEAD(&head);
-	INIT_LIST_HEAD(&head2);
-	list_add(&head, &data->pages);
-	list_del_init(&data->pages);
-
-	/* COMMIT to each Data Server */
-	while (!list_empty(&head)) {
-		req = nfs_list_entry(head.next);
-
-		file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
-
-		/* Get dserver for the current page */
-		idx1 = nfs4_fl_calc_ds_index(data->pdata.lseg, file_offset);
-		ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx1);
-		if (!ds) {
-			data->pdata.pnfs_error = -EIO;
-			goto err_rewind;
+	dprintk("%s data %p pnfs_client %p sync %d\n",
+		__func__, data, data->fldata.pnfs_client, sync);
+
+	/* Alloc room for both in one go */
+	ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
+			       (sizeof(u16) + sizeof(struct list_head *)),
+			       GFP_KERNEL);
+	if (!ds_page_list)
+		goto mem_error;
+	indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
+
+	/* Sort pages based on which ds to send to.
+	 * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
+	 * Note we are assuming there is only a single lseg in play.
+	 * When that is not true, we could first sort on lseg, then
+	 * sort within each as we do here.
+	 */
+	while (!list_empty(&data->pages)) {
+		req = nfs_list_entry(data->pages.next);
+		nfs_list_remove_request(req);
+		if (!req->wb_lseg ||
+		    ((struct nfs4_filelayout_segment *)
+		     LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
+			idx = NFS4_PNFS_MAX_MULTI_CNT;
+		else {
+			file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
+			idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
 		}
-
-		/* Gather all pages going to the current data server by
-		 * comparing their indices.
-		 * XXX: This recalculates the indices unecessarily.
-		 *      One idea would be to calc the index for every page
-		 *      and then compare if they are the same. */
-		list_for_each_safe(pos, tmp, &head) {
-			reqt = nfs_list_entry(pos);
-			comp_offset = (loff_t)reqt->wb_index << PAGE_CACHE_SHIFT;
-			idx2 = nfs4_fl_calc_ds_index(data->pdata.lseg,
-						     comp_offset);
-			if (idx1 == idx2) {
-				nfs_list_remove_request(reqt);
-				nfs_list_add_request(reqt, &head2);
-			}
+		if (ds_page_list[idx]) {
+			/* Already seen this idx */
+			list_add(&req->wb_list, ds_page_list[idx]);
+		} else {
+			/* New idx not seen so far */
+			list_add_tail(&req->wb_list, &head);
+			indices_used[num_indices_seen++] = idx;
 		}
-
-		if (!list_empty(&head)) {
-			dsdata = filelayout_clone_write_data(data);
-			if (!dsdata) {
-				/* return pages back to head */
-				list_splice(&head2, &head);
-				INIT_LIST_HEAD(&head2);
-				data->pdata.pnfs_error = -ENOMEM;
-				goto err_rewind;
-			}
+		ds_page_list[idx] = &req->wb_list;
+	}
+	/* Once created, clone must be released via call_op */
+	clone_list = kzalloc(num_indices_seen *
+			     sizeof(struct nfs_write_data *), GFP_KERNEL);
+	if (!clone_list)
+		goto mem_error;
+	for (i = 0; i < num_indices_seen - 1; i++) {
+		clone_list[i] = filelayout_clone_write_data(data);
+		if (!clone_list[i])
+			goto mem_error;
+	}
+	clone_list[i] = data;
+	/* Now send off the RPCs to each ds.  Note that it is important
+	 * that any RPC to the MDS be sent last (or at least after all
+	 * clones have been made.)
+	 */
+	for (i = 0; i < num_indices_seen; i++) {
+		dsdata = clone_list[i];
+		idx = indices_used[i];
+		list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
+		if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
+			call_ops = data->pdata.call_ops;;
+			clnt = NFS_CLIENT(dsdata->inode);
+			ds = NULL;
 		} else {
-			dsdata = data;
+			call_ops = &filelayout_commit_call_ops;
+			req = nfs_list_entry(dsdata->pages.next);
+			ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
+			if (!ds) {
+				/* Trigger retry of this chunk through MDS */
+				dsdata->task.tk_status = -EIO;
+				data->pdata.call_ops->rpc_release(dsdata);
+				continue;
+			}
+			clnt = ds->ds_clp->cl_rpcclient;
+			dsdata->fldata.pnfs_client = clnt;
+			dsdata->fldata.ds_nfs_client = ds->ds_clp;
+			dsdata->args.fh = \
+				nfs4_fl_select_ds_fh(LSEG_LD_DATA(req->wb_lseg),
+						     idx);
 		}
-
-		list_add(&dsdata->pages, &head2);
-		list_del_init(&head2);
-
-		dsdata->fldata.pnfs_client = ds->ds_clp->cl_rpcclient;
-		dsdata->fldata.ds_nfs_client = ds->ds_clp;
-		dsdata->args.fh = nfs4_fl_select_ds_fh(nfslay, idx1);
-
 		dprintk("%s: Initiating commit: %llu USE DS:\n",
 			__func__, file_offset);
 		print_ds(ds);
 
 		/* Send COMMIT to data server */
-		nfs_initiate_commit(dsdata, dsdata->fldata.pnfs_client,
-				    &filelayout_commit_call_ops, sync);
+		nfs_initiate_commit(dsdata, clnt, call_ops, sync);
 	}
+	kfree(clone_list);
+	kfree(ds_page_list);
+	data->pdata.pnfs_error = 0;
+	return PNFS_ATTEMPTED;
 
-out:
-	if (data->pdata.pnfs_error)
-		printk(KERN_ERR "%s: ERROR %d\n", __func__,
-		       data->pdata.pnfs_error);
-
-	/* XXX should we send COMMIT to MDS e.g. not free data and return 1 ? */
-	return trypnfs;
-err_rewind:
-	/* put remaining pages back onto the original data->pages */
-	list_add(&data->pages, &head);
-	list_del_init(&head);
-	trypnfs = PNFS_NOT_ATTEMPTED;
-	goto out;
+ mem_error:
+	if (clone_list) {
+		for (i = 0; i < num_indices_seen - 1; i++) {
+			if (!clone_list[i])
+				break;
+			data->pdata.call_ops->rpc_release(clone_list[i]);
+		}
+		kfree(clone_list);
+	}
+	kfree(ds_page_list);
+	/* One of these will be empty, but doesn't hurt to do both */
+	nfs_mark_list_commit(&head);
+	nfs_mark_list_commit(&data->pages);
+	data->pdata.call_ops->rpc_release(data);
+	return PNFS_ATTEMPTED;
 }
 
 /* Return the stripesize for the specified file.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 28e4907..48aa4a9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1461,6 +1461,15 @@ static void nfs_commit_release(void *calldata)
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (status < 0) {
+			if (req->wb_lseg) {
+				struct pnfs_layout_segment *lseg = req->wb_lseg;
+
+				req->wb_lseg = NULL;
+				put_lseg(lseg);
+				dprintk(" retry through MDS\n");
+				nfs_mark_request_dirty(req);
+				goto next;
+			}
 			nfs_context_set_write_error(req->wb_context, status);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", status);
-- 
1.6.6.1


  reply	other threads:[~2010-05-20 10:30 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-05-16  1:22 [PATCH 00/22] LAYOUTGET invocation Fred Isaman
2010-05-16  1:22 ` [PATCH 01/22] Revert "pnfs-nonfilelayout: Prelim support for non-file layout O_DIRECT" Fred Isaman
2010-05-16  1:22   ` [PATCH 02/22] Revert "pnfs: Enable O_DIRECT write path." Fred Isaman
2010-05-16  1:22     ` [PATCH 03/22] Revert "pnfs: Enable O_DIRECT read path." Fred Isaman
2010-05-16  1:22       ` [PATCH 04/22] Revert "pnfs: Add function to set up O_DIRECT I/O" Fred Isaman
2010-05-16  1:22         ` [PATCH 05/22] pnfs: filelayout: clean and breakup nfs4_pnfs_dserver_get Fred Isaman
2010-05-16  1:22           ` [PATCH 06/22] pnfs: filelayout: remove some dead code from filelayout_commit Fred Isaman
2010-05-16  1:22             ` [PATCH 07/22] pnfs: remove PNFS_LAYOUTGET_ON_OPEN Fred Isaman
2010-05-16  1:22               ` [PATCH 08/22] pnfs: track the number of outstanding commits Fred Isaman
2010-05-16  1:23                 ` [PATCH 09/22] pnfs_submit: mandate basic io path operations for layout drivers Fred Isaman
2010-05-16  1:23                   ` [PATCH 10/22] pnfs_submit: expose pnfs_update_layout, put_lseg, and get_lseg functions Fred Isaman
2010-05-16  1:23                     ` [PATCH 11/22] pnfs_submit: stash and refcount lseg in read path Fred Isaman
2010-05-16  1:23                       ` [PATCH 12/22] pnfs_submit: read path changeover Fred Isaman
2010-05-16  1:23                         ` [PATCH 13/22] pnfs_submit: use fsdata to pass lseg Fred Isaman
2010-05-16  1:23                           ` [PATCH 14/22] pnfs_submit: stash and refcount lseg in write path Fred Isaman
2010-05-16  1:23                             ` [PATCH 15/22] pnfs_submit: remove pnfs_file_operations Fred Isaman
2010-05-16  1:23                               ` [PATCH 16/22] pnfs_submit: remove pnfs_update_layout_commit Fred Isaman
2010-05-16  1:23                                 ` [PATCH 17/22] pnfs_submit: remove pnfs_writepages LAYOUTGET invocation Fred Isaman
2010-05-16  1:23                                   ` [PATCH 18/22] pnfs: export some commit error handling for use by layout drivers Fred Isaman
2010-05-16  1:23                                     ` [PATCH 19/22] pnfs_submit: API change: remove pnfs_commit layoutget invocation Fred Isaman
2010-05-16  1:23                                       ` Fred Isaman [this message]
2010-05-16  1:23                                         ` [PATCH 21/22] pnfs_submit: remove unecessary pnfs_fl_call_data field pnfs_client Fred Isaman
2010-05-16  1:23                                           ` [PATCH 22/22] pnfs_submit: remove unecessary pnfs_fl_call_data field commit_through_mds Fred Isaman
2010-05-25 18:27 ` [PATCH 00/22] LAYOUTGET invocation Dean Hildebrand
2010-05-25 19:03   ` Fred Isaman
2010-05-25 20:14     ` Dean Hildebrand
2010-05-26  8:43       ` Boaz Harrosh
2010-05-26 17:39         ` Dean Hildebrand
2010-05-26 17:58           ` Fred Isaman
2010-05-26 18:13           ` Boaz Harrosh
2010-05-26 18:53             ` Dean Hildebrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1273972993-15369-21-git-send-email-iisaman@netapp.com \
    --to=iisaman@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).