From: Fred Isaman <iisaman@netapp.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH 20/22] pnfs_submit: filelayout: rewrite filelayout_commit to use new API
Date: Sat, 15 May 2010 21:23:11 -0400 [thread overview]
Message-ID: <1273972993-15369-21-git-send-email-iisaman@netapp.com> (raw)
In-Reply-To: <1273972993-15369-20-git-send-email-iisaman@netapp.com>
In the process, give it a much needed rewrite.
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
fs/nfs/nfs4filelayout.c | 192 ++++++++++++++++++++++++++---------------------
fs/nfs/write.c | 9 ++
2 files changed, 115 insertions(+), 86 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 789706e..6edecc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -530,8 +530,7 @@ filelayout_clone_write_data(struct nfs_write_data *old)
nfs_fattr_init(&new->fattr);
new->res.verf = &new->verf;
new->args.context = get_nfs_open_context(old->args.context);
- new->pdata.lseg = old->pdata.lseg;
- kref_get(&new->pdata.lseg->kref);
+ new->pdata.lseg = NULL;
new->pdata.call_ops = old->pdata.call_ops;
new->pdata.how = old->pdata.how;
out:
@@ -559,103 +558,124 @@ enum pnfs_try_status
filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
struct nfs_write_data *data)
{
- struct nfs4_filelayout_segment *nfslay;
- struct nfs_write_data *dsdata = NULL;
+ LIST_HEAD(head);
+ struct nfs_page *req;
+ loff_t file_offset = 0;
+ u16 idx, i;
+ struct list_head **ds_page_list = NULL;
+ u16 *indices_used;
+ int num_indices_seen = 0;
+ const struct rpc_call_ops *call_ops;
+ struct rpc_clnt *clnt;
+ struct nfs_write_data **clone_list = NULL;
+ struct nfs_write_data *dsdata;
struct nfs4_pnfs_ds *ds;
- struct nfs_page *req, *reqt;
- struct list_head *pos, *tmp, head, head2;
- loff_t file_offset, comp_offset;
- enum pnfs_try_status trypnfs = PNFS_ATTEMPTED;
- u32 idx1, idx2;
- nfslay = LSEG_LD_DATA(data->pdata.lseg);
-
- dprintk("%s data %p pnfs_client %p nfslay %p sync %d\n",
- __func__, data, data->fldata.pnfs_client, nfslay, sync);
-
- data->fldata.commit_through_mds = nfslay->commit_through_mds;
- if (nfslay->commit_through_mds) {
- dprintk("%s data %p commit through mds\n", __func__, data);
- return PNFS_NOT_ATTEMPTED;
- }
-
- INIT_LIST_HEAD(&head);
- INIT_LIST_HEAD(&head2);
- list_add(&head, &data->pages);
- list_del_init(&data->pages);
-
- /* COMMIT to each Data Server */
- while (!list_empty(&head)) {
- req = nfs_list_entry(head.next);
-
- file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
-
- /* Get dserver for the current page */
- idx1 = nfs4_fl_calc_ds_index(data->pdata.lseg, file_offset);
- ds = nfs4_fl_prepare_ds(data->pdata.lseg, idx1);
- if (!ds) {
- data->pdata.pnfs_error = -EIO;
- goto err_rewind;
+ dprintk("%s data %p pnfs_client %p sync %d\n",
+ __func__, data, data->fldata.pnfs_client, sync);
+
+ /* Alloc room for both in one go */
+ ds_page_list = kzalloc((NFS4_PNFS_MAX_MULTI_CNT + 1) *
+ (sizeof(u16) + sizeof(struct list_head *)),
+ GFP_KERNEL);
+ if (!ds_page_list)
+ goto mem_error;
+ indices_used = (u16 *) (ds_page_list + NFS4_PNFS_MAX_MULTI_CNT + 1);
+
+ /* Sort pages based on which ds to send to.
+ * MDS is given index equal to NFS4_PNFS_MAX_MULTI_CNT.
+ * Note we are assuming there is only a single lseg in play.
+ * When that is not true, we could first sort on lseg, then
+ * sort within each as we do here.
+ */
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+ if (!req->wb_lseg ||
+ ((struct nfs4_filelayout_segment *)
+ LSEG_LD_DATA(req->wb_lseg))->commit_through_mds)
+ idx = NFS4_PNFS_MAX_MULTI_CNT;
+ else {
+ file_offset = (loff_t)req->wb_index << PAGE_CACHE_SHIFT;
+ idx = nfs4_fl_calc_ds_index(req->wb_lseg, file_offset);
}
-
- /* Gather all pages going to the current data server by
- * comparing their indices.
- * XXX: This recalculates the indices unecessarily.
- * One idea would be to calc the index for every page
- * and then compare if they are the same. */
- list_for_each_safe(pos, tmp, &head) {
- reqt = nfs_list_entry(pos);
- comp_offset = (loff_t)reqt->wb_index << PAGE_CACHE_SHIFT;
- idx2 = nfs4_fl_calc_ds_index(data->pdata.lseg,
- comp_offset);
- if (idx1 == idx2) {
- nfs_list_remove_request(reqt);
- nfs_list_add_request(reqt, &head2);
- }
+ if (ds_page_list[idx]) {
+ /* Already seen this idx */
+ list_add(&req->wb_list, ds_page_list[idx]);
+ } else {
+ /* New idx not seen so far */
+ list_add_tail(&req->wb_list, &head);
+ indices_used[num_indices_seen++] = idx;
}
-
- if (!list_empty(&head)) {
- dsdata = filelayout_clone_write_data(data);
- if (!dsdata) {
- /* return pages back to head */
- list_splice(&head2, &head);
- INIT_LIST_HEAD(&head2);
- data->pdata.pnfs_error = -ENOMEM;
- goto err_rewind;
- }
+ ds_page_list[idx] = &req->wb_list;
+ }
+ /* Once created, clone must be released via call_op */
+ clone_list = kzalloc(num_indices_seen *
+ sizeof(struct nfs_write_data *), GFP_KERNEL);
+ if (!clone_list)
+ goto mem_error;
+ for (i = 0; i < num_indices_seen - 1; i++) {
+ clone_list[i] = filelayout_clone_write_data(data);
+ if (!clone_list[i])
+ goto mem_error;
+ }
+ clone_list[i] = data;
+ /* Now send off the RPCs to each ds. Note that it is important
+ * that any RPC to the MDS be sent last (or at least after all
+ * clones have been made.)
+ */
+ for (i = 0; i < num_indices_seen; i++) {
+ dsdata = clone_list[i];
+ idx = indices_used[i];
+ list_cut_position(&dsdata->pages, &head, ds_page_list[idx]);
+ if (idx == NFS4_PNFS_MAX_MULTI_CNT) {
+ call_ops = data->pdata.call_ops;;
+ clnt = NFS_CLIENT(dsdata->inode);
+ ds = NULL;
} else {
- dsdata = data;
+ call_ops = &filelayout_commit_call_ops;
+ req = nfs_list_entry(dsdata->pages.next);
+ ds = nfs4_fl_prepare_ds(req->wb_lseg, idx);
+ if (!ds) {
+ /* Trigger retry of this chunk through MDS */
+ dsdata->task.tk_status = -EIO;
+ data->pdata.call_ops->rpc_release(dsdata);
+ continue;
+ }
+ clnt = ds->ds_clp->cl_rpcclient;
+ dsdata->fldata.pnfs_client = clnt;
+ dsdata->fldata.ds_nfs_client = ds->ds_clp;
+ dsdata->args.fh = \
+ nfs4_fl_select_ds_fh(LSEG_LD_DATA(req->wb_lseg),
+ idx);
}
-
- list_add(&dsdata->pages, &head2);
- list_del_init(&head2);
-
- dsdata->fldata.pnfs_client = ds->ds_clp->cl_rpcclient;
- dsdata->fldata.ds_nfs_client = ds->ds_clp;
- dsdata->args.fh = nfs4_fl_select_ds_fh(nfslay, idx1);
-
dprintk("%s: Initiating commit: %llu USE DS:\n",
__func__, file_offset);
print_ds(ds);
/* Send COMMIT to data server */
- nfs_initiate_commit(dsdata, dsdata->fldata.pnfs_client,
- &filelayout_commit_call_ops, sync);
+ nfs_initiate_commit(dsdata, clnt, call_ops, sync);
}
+ kfree(clone_list);
+ kfree(ds_page_list);
+ data->pdata.pnfs_error = 0;
+ return PNFS_ATTEMPTED;
-out:
- if (data->pdata.pnfs_error)
- printk(KERN_ERR "%s: ERROR %d\n", __func__,
- data->pdata.pnfs_error);
-
- /* XXX should we send COMMIT to MDS e.g. not free data and return 1 ? */
- return trypnfs;
-err_rewind:
- /* put remaining pages back onto the original data->pages */
- list_add(&data->pages, &head);
- list_del_init(&head);
- trypnfs = PNFS_NOT_ATTEMPTED;
- goto out;
+ mem_error:
+ if (clone_list) {
+ for (i = 0; i < num_indices_seen - 1; i++) {
+ if (!clone_list[i])
+ break;
+ data->pdata.call_ops->rpc_release(clone_list[i]);
+ }
+ kfree(clone_list);
+ }
+ kfree(ds_page_list);
+ /* One of these will be empty, but doesn't hurt to do both */
+ nfs_mark_list_commit(&head);
+ nfs_mark_list_commit(&data->pages);
+ data->pdata.call_ops->rpc_release(data);
+ return PNFS_ATTEMPTED;
}
/* Return the stripesize for the specified file.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 28e4907..48aa4a9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1461,6 +1461,15 @@ static void nfs_commit_release(void *calldata)
req->wb_bytes,
(long long)req_offset(req));
if (status < 0) {
+ if (req->wb_lseg) {
+ struct pnfs_layout_segment *lseg = req->wb_lseg;
+
+ req->wb_lseg = NULL;
+ put_lseg(lseg);
+ dprintk(" retry through MDS\n");
+ nfs_mark_request_dirty(req);
+ goto next;
+ }
nfs_context_set_write_error(req->wb_context, status);
nfs_inode_remove_request(req);
dprintk(", error = %d\n", status);
--
1.6.6.1
next prev parent reply other threads:[~2010-05-20 10:30 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-16 1:22 [PATCH 00/22] LAYOUTGET invocation Fred Isaman
2010-05-16 1:22 ` [PATCH 01/22] Revert "pnfs-nonfilelayout: Prelim support for non-file layout O_DIRECT" Fred Isaman
2010-05-16 1:22 ` [PATCH 02/22] Revert "pnfs: Enable O_DIRECT write path." Fred Isaman
2010-05-16 1:22 ` [PATCH 03/22] Revert "pnfs: Enable O_DIRECT read path." Fred Isaman
2010-05-16 1:22 ` [PATCH 04/22] Revert "pnfs: Add function to set up O_DIRECT I/O" Fred Isaman
2010-05-16 1:22 ` [PATCH 05/22] pnfs: filelayout: clean and breakup nfs4_pnfs_dserver_get Fred Isaman
2010-05-16 1:22 ` [PATCH 06/22] pnfs: filelayout: remove some dead code from filelayout_commit Fred Isaman
2010-05-16 1:22 ` [PATCH 07/22] pnfs: remove PNFS_LAYOUTGET_ON_OPEN Fred Isaman
2010-05-16 1:22 ` [PATCH 08/22] pnfs: track the number of outstanding commits Fred Isaman
2010-05-16 1:23 ` [PATCH 09/22] pnfs_submit: mandate basic io path operations for layout drivers Fred Isaman
2010-05-16 1:23 ` [PATCH 10/22] pnfs_submit: expose pnfs_update_layout, put_lseg, and get_lseg functions Fred Isaman
2010-05-16 1:23 ` [PATCH 11/22] pnfs_submit: stash and refcount lseg in read path Fred Isaman
2010-05-16 1:23 ` [PATCH 12/22] pnfs_submit: read path changeover Fred Isaman
2010-05-16 1:23 ` [PATCH 13/22] pnfs_submit: use fsdata to pass lseg Fred Isaman
2010-05-16 1:23 ` [PATCH 14/22] pnfs_submit: stash and refcount lseg in write path Fred Isaman
2010-05-16 1:23 ` [PATCH 15/22] pnfs_submit: remove pnfs_file_operations Fred Isaman
2010-05-16 1:23 ` [PATCH 16/22] pnfs_submit: remove pnfs_update_layout_commit Fred Isaman
2010-05-16 1:23 ` [PATCH 17/22] pnfs_submit: remove pnfs_writepages LAYOUTGET invocation Fred Isaman
2010-05-16 1:23 ` [PATCH 18/22] pnfs: export some commit error handling for use by layout drivers Fred Isaman
2010-05-16 1:23 ` [PATCH 19/22] pnfs_submit: API change: remove pnfs_commit layoutget invocation Fred Isaman
2010-05-16 1:23 ` Fred Isaman [this message]
2010-05-16 1:23 ` [PATCH 21/22] pnfs_submit: remove unecessary pnfs_fl_call_data field pnfs_client Fred Isaman
2010-05-16 1:23 ` [PATCH 22/22] pnfs_submit: remove unecessary pnfs_fl_call_data field commit_through_mds Fred Isaman
2010-05-25 18:27 ` [PATCH 00/22] LAYOUTGET invocation Dean Hildebrand
2010-05-25 19:03 ` Fred Isaman
2010-05-25 20:14 ` Dean Hildebrand
2010-05-26 8:43 ` Boaz Harrosh
2010-05-26 17:39 ` Dean Hildebrand
2010-05-26 17:58 ` Fred Isaman
2010-05-26 18:13 ` Boaz Harrosh
2010-05-26 18:53 ` Dean Hildebrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1273972993-15369-21-git-send-email-iisaman@netapp.com \
--to=iisaman@netapp.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).