Linux NFS development
 help / color / mirror / Atom feed
From: Tom Haynes <thomas.haynes@primarydata.com>
To: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Linux NFS Mailing list <linux-nfs@vger.kernel.org>
Subject: [PATCH v3 36/49] nfs: mirroring support for direct io
Date: Tue,  6 Jan 2015 16:28:41 -0800	[thread overview]
Message-ID: <1420590534-84063-37-git-send-email-loghyr@primarydata.com> (raw)
In-Reply-To: <1420590534-84063-1-git-send-email-loghyr@primarydata.com>

From: Weston Andros Adamson <dros@primarydata.com>

The current mirroring code only notices short writes to the first
mirror. This patch keeps per-mirror byte counts and only considers
a byte to be written once all mirrors report so.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
---
 fs/nfs/direct.c | 71 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0178d4f..651387b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep;
 /*
  * This represents a set of asynchronous requests that we're waiting on
  */
+struct nfs_direct_mirror {
+	ssize_t count;
+};
+
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
 
@@ -78,6 +82,10 @@ struct nfs_direct_req {
 	/* completion state */
 	atomic_t		io_count;	/* i/os we're waiting for */
 	spinlock_t		lock;		/* protect completion state */
+
+	struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
+	int			mirror_count;
+
 	ssize_t			count,		/* bytes actually processed */
 				bytes_left,	/* bytes left to be sent */
 				error;		/* any reported error */
@@ -108,6 +116,29 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 	return atomic_dec_and_test(&dreq->io_count);
 }
 
+static void
+nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
+{
+	int i;
+	ssize_t count;
+
+	WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count);
+
+	dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
+
+	if (hdr->pgio_mirror_idx == 0)
+		dreq->count += hdr->good_bytes;
+
+	/* update the dreq->count by finding the minimum agreed count from all
+	 * mirrors */
+	count = dreq->mirrors[0].count;
+
+	for (i = 1; i < dreq->mirror_count; i++)
+		count = min(count, dreq->mirrors[i].count);
+
+	dreq->count = count;
+}
+
 /*
  * nfs_direct_select_verf - select the right verifier
  * @dreq - direct request possibly spanning multiple servers
@@ -241,6 +272,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
 }
 
+static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
+					     struct nfs_pageio_descriptor *pgio,
+					     struct nfs_page *req)
+{
+	int mirror_count = 1;
+
+	if (pgio->pg_ops->pg_get_mirror_count)
+		mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
+
+	dreq->mirror_count = mirror_count;
+}
+
 static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
 	struct nfs_direct_req *dreq;
@@ -255,6 +298,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
 	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
+	dreq->mirror_count = 1;
 	spin_lock_init(&dreq->lock);
 
 	return dreq;
@@ -360,14 +404,9 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 	spin_lock(&dreq->lock);
 	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
 		dreq->error = hdr->error;
-	else {
-		/*
-		 * FIXME: right now this only accounts for bytes written
-		 *        to the first mirror
-		 */
-		if (hdr->pgio_mirror_idx == 0)
-			dreq->count += hdr->good_bytes;
-	}
+	else
+		nfs_direct_good_bytes(dreq, hdr);
+
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
@@ -598,17 +637,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	LIST_HEAD(reqs);
 	struct nfs_commit_info cinfo;
 	LIST_HEAD(failed);
+	int i;
 
 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
 
 	dreq->count = 0;
+	for (i = 0; i < dreq->mirror_count; i++)
+		dreq->mirrors[i].count = 0;
 	get_dreq(dreq);
 
 	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
+	req = nfs_list_entry(reqs.next);
+	nfs_direct_setup_mirroring(dreq, &desc, req);
+
 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
 		if (!nfs_pageio_add_request(&desc, req)) {
 			nfs_list_remove_request(req);
@@ -730,12 +775,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		dreq->error = hdr->error;
 	}
 	if (dreq->error == 0) {
-		/*
-		 * FIXME: right now this only accounts for bytes written
-		 *        to the first mirror
-		 */
-		if (hdr->pgio_mirror_idx == 0)
-			dreq->count += hdr->good_bytes;
+		nfs_direct_good_bytes(dreq, hdr);
 		if (nfs_write_need_commit(hdr)) {
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				request_commit = true;
@@ -841,6 +881,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 				result = PTR_ERR(req);
 				break;
 			}
+
+			nfs_direct_setup_mirroring(dreq, &desc, req);
+
 			nfs_lock_request(req);
 			req->wb_index = pos >> PAGE_SHIFT;
 			req->wb_offset = pos & ~PAGE_MASK;
-- 
1.9.3


  parent reply	other threads:[~2015-01-07  0:29 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-07  0:28 [PATCH v3 00/49] *** Add Flexfile Layout Module *** Tom Haynes
2015-01-07  0:28 ` [PATCH v3 01/49] pnfs: Prepare for flexfiles by pulling out common code Tom Haynes
2015-01-12 18:59   ` Anna Schumaker
2015-01-12 19:10     ` Tom Haynes
2015-01-12 19:13       ` Christoph Hellwig
2015-01-07  0:28 ` [PATCH v3 02/49] pnfs: Do not grab the commit_info lock twice when rescheduling writes Tom Haynes
2015-01-07  0:28 ` [PATCH v3 03/49] nfs41: pull data server cache from file layout to generic pnfs Tom Haynes
2015-01-07 14:56   ` Christoph Hellwig
2015-01-07 18:28     ` Tom Haynes
2015-01-13 19:48       ` Tom Haynes
2015-01-07  0:28 ` [PATCH v3 04/49] nfs41: pull decode_ds_addr " Tom Haynes
2015-01-12 16:19   ` Anna Schumaker
2015-01-13 19:43     ` Tom Haynes
2015-01-07  0:28 ` [PATCH v3 05/49] nfs41: pull nfs4_ds_connect " Tom Haynes
2015-01-07 14:56   ` Christoph Hellwig
2015-01-07  0:28 ` [PATCH v3 06/49] nfs41: allow LD to choose DS connection auth flavor Tom Haynes
2015-01-07  0:28 ` [PATCH v3 07/49] nfs41: move file layout macros to generic pnfs Tom Haynes
2015-01-07  0:28 ` [PATCH v3 08/49] nfsv3: introduce nfs3_set_ds_client Tom Haynes
2015-01-07  0:28 ` [PATCH v3 09/49] nfs41: allow LD to choose DS connection version/minor_version Tom Haynes
2015-01-07  0:28 ` [PATCH v3 10/49] nfs41: create NFSv3 DS connection if specified Tom Haynes
2015-01-07  0:28 ` [PATCH v3 11/49] pnfs: Add nfs_rpc_ops in calls to nfs_initiate_pgio Tom Haynes
2015-01-12 18:41   ` Anna Schumaker
2015-01-12 18:51     ` Tom Haynes
2015-01-07  0:28 ` [PATCH v3 12/49] nfs: allow different protocol in nfs_initiate_commit Tom Haynes
2015-01-07  0:28 ` [PATCH v3 13/49] nfs4: pass slot table to nfs40_setup_sequence Tom Haynes
2015-01-07  0:28 ` [PATCH v3 14/49] nfs4: export nfs4_sequence_done Tom Haynes
2015-01-07  0:28 ` [PATCH v3 15/49] nfs: allow to specify cred in nfs_initiate_pgio Tom Haynes
2015-01-07  0:28 ` [PATCH v3 16/49] NFSv4.1/NFSv3: Add pNFS callbacks for nfs3_(read|write|commit)_done() Tom Haynes
2015-01-07  0:28 ` [PATCH v3 17/49] sunrpc: add rpc_count_iostats_idx Tom Haynes
2015-01-07  0:28 ` [PATCH v3 18/49] nfs: set hostname when creating nfsv3 ds connection Tom Haynes
2015-01-07  0:28 ` [PATCH v3 19/49] nfs/flexclient: export pnfs_layoutcommit_inode Tom Haynes
2015-01-07  0:28 ` [PATCH v3 20/49] nfs41: close a small race window when adding new layout to global list Tom Haynes
2015-01-07  0:28 ` [PATCH v3 21/49] nfs41: serialize first layoutget of a file Tom Haynes
2015-01-07  0:28 ` [PATCH v3 22/49] nfs: save server READ/WRITE/COMMIT status Tom Haynes
2015-01-07  0:28 ` [PATCH v3 23/49] nfs41: pass iomode through layoutreturn args Tom Haynes
2015-01-07  0:28 ` [PATCH v3 24/49] nfs41: make a helper function to send layoutreturn Tom Haynes
2015-01-07  0:28 ` [PATCH v3 25/49] nfs41: add a helper to mark layout for return Tom Haynes
2015-01-07  0:28 ` [PATCH v3 26/49] nfs41: don't use a layout if it is marked for returning Tom Haynes
2015-01-07  0:28 ` [PATCH v3 27/49] nfs41: send layoutreturn in last put_lseg Tom Haynes
2015-01-07  0:28 ` [PATCH v3 28/49] nfs41: clear NFS_LAYOUT_RETURN if layoutreturn is sent or failed to send Tom Haynes
2015-01-07  0:28 ` [PATCH v3 29/49] nfs/filelayout: use pnfs_error_mark_layout_for_return Tom Haynes
2015-01-07  0:28 ` [PATCH v3 30/49] nfs: introduce pg_cleanup op for pgio descriptors Tom Haynes
2015-01-07  0:28 ` [PATCH v3 31/49] pnfs: release lseg in pnfs_generic_pg_cleanup Tom Haynes
2015-01-07  0:28 ` [PATCH v3 32/49] nfs: handle overlapping reqs in lock_and_join Tom Haynes
2015-01-07  0:28 ` [PATCH v3 33/49] nfs: rename pgio header ds_idx to ds_commit_idx Tom Haynes
2015-01-07  0:28 ` [PATCH v3 34/49] pnfs: pass ds_commit_idx through the commit path Tom Haynes
2015-01-13 15:37   ` Anna Schumaker
2015-01-13 16:10     ` Weston Andros Adamson
2015-01-07  0:28 ` [PATCH v3 35/49] nfs: add mirroring support to pgio layer Tom Haynes
2015-01-07  0:28 ` Tom Haynes [this message]
2015-01-07  0:28 ` [PATCH v3 37/49] pnfs: fail comparison when bucket verifier not set Tom Haynes
2015-01-07  0:28 ` [PATCH v3 38/49] nfs41: add a debug warning if we destroy an unempty layout Tom Haynes
2015-01-07  0:28 ` [PATCH v3 39/49] nfs: only reset desc->pg_mirror_idx when mirroring is supported Tom Haynes
2015-01-07  0:28 ` [PATCH v3 40/49] nfs: add nfs_pgio_current_mirror helper Tom Haynes
2015-01-07  0:28 ` [PATCH v3 41/49] pnfs: allow LD to ask to resend read through pnfs Tom Haynes
2015-01-07  0:28 ` [PATCH v3 42/49] nfs41: add range to layoutreturn args Tom Haynes
2015-01-13 19:15   ` Anna Schumaker
2015-01-13 19:42     ` Tom Haynes
2015-01-13 19:51       ` Anna Schumaker
2015-01-13 22:50         ` Tom Haynes
2015-01-14 13:40           ` Anna Schumaker
2015-01-07  0:28 ` [PATCH v3 43/49] nfs41: allow async version layoutreturn Tom Haynes
2015-01-07  0:28 ` [PATCH v3 44/49] nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE Tom Haynes
2015-01-07  0:28 ` [PATCH v3 45/49] nfs/flexfiles: send layoutreturn before freeing lseg Tom Haynes
2015-01-07  0:28 ` [PATCH v3 46/49] nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags Tom Haynes
2015-01-07  0:28 ` [PATCH v3 47/49] nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes Tom Haynes
2015-01-07  0:28 ` [PATCH v3 48/49] nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET Tom Haynes
2015-01-07  0:28 ` [PATCH v3 49/49] pnfs/flexfiles: Add the FlexFile Layout Driver Tom Haynes
2015-01-08 14:45 ` [PATCH v3 00/49] *** Add Flexfile Layout Module *** Christoph Hellwig
2015-01-08 15:20   ` Anna Schumaker
2015-01-08 18:13     ` Tom Haynes
2015-01-08 16:02   ` Weston Andros Adamson
2015-01-08 17:07   ` Tom Haynes
2015-01-08 19:36     ` Tom Haynes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1420590534-84063-37-git-send-email-loghyr@primarydata.com \
    --to=thomas.haynes@primarydata.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@primarydata.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox