linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions
@ 2016-07-19 20:33 Trond Myklebust
  2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
  To: linux-nfs

v2:
The first patch in the series needs to return on fatal errors. (Thanks
Ben Coddington)

Trond Myklebust (4):
  pNFS: Fix post-layoutget error handling in pnfs_update_layout()
  pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT
  pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET
  pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and
    NFS4ERR_EXPIRED

 fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++---------------------------
 fs/nfs/pnfs.c     | 35 ++++++++++++++++++++++------------
 2 files changed, 52 insertions(+), 40 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout()
  2016-07-19 20:33 [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions Trond Myklebust
@ 2016-07-19 20:33 ` Trond Myklebust
  2016-07-19 20:33   ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
  To: linux-nfs

The non-retry error path is currently broken and ends up releasing the
reference to the layout twice. It also can end up clearing the
NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race.

In addition, the retry path will fail to decrement the plh_outstanding
counter.

Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: stable@vger.kernel.org # 4.7
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfs/pnfs.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0fbe734cc38c..563f131c9abe 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1645,6 +1645,7 @@ lookup_again:
 	lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags);
 	trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
 				 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
+	atomic_dec(&lo->plh_outstanding);
 	if (IS_ERR(lseg)) {
 		switch(PTR_ERR(lseg)) {
 		case -ERECALLCONFLICT:
@@ -1652,26 +1653,26 @@ lookup_again:
 				lseg = NULL;
 			/* Fallthrough */
 		case -EAGAIN:
-			pnfs_put_layout_hdr(lo);
-			if (first)
-				pnfs_clear_first_layoutget(lo);
-			if (lseg) {
-				trace_pnfs_update_layout(ino, pos, count,
-					iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
-				goto lookup_again;
-			}
-			/* Fallthrough */
+			break;
 		default:
 			if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
 				pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
 				lseg = NULL;
 			}
+			goto out_put_layout_hdr;
+		}
+		if (lseg) {
+			if (first)
+				pnfs_clear_first_layoutget(lo);
+			trace_pnfs_update_layout(ino, pos, count,
+				iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY);
+			pnfs_put_layout_hdr(lo);
+			goto lookup_again;
 		}
 	} else {
 		pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
 	}
 
-	atomic_dec(&lo->plh_outstanding);
 out_put_layout_hdr:
 	if (first)
 		pnfs_clear_first_layoutget(lo);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT
  2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust
@ 2016-07-19 20:33   ` Trond Myklebust
  2016-07-19 20:33     ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
  To: linux-nfs

They are not the same error, and need to be handled differently.

Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: stable@vger.kernel.org # 4.7
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfs/nfs4proc.c | 23 ++++++++++++++---------
 fs/nfs/pnfs.c     |  1 +
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 519368b98762..ee8efe0a5202 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -437,6 +437,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
 		case -NFS4ERR_DELAY:
 			nfs_inc_server_stats(server, NFSIOS_DELAY);
 		case -NFS4ERR_GRACE:
+		case -NFS4ERR_LAYOUTTRYLATER:
 		case -NFS4ERR_RECALLCONFLICT:
 			exception->delay = 1;
 			return 0;
@@ -7883,11 +7884,12 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 	struct inode *inode = lgp->args.inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct pnfs_layout_hdr *lo;
-	int status = task->tk_status;
+	int nfs4err = task->tk_status;
+	int err, status = 0;
 
 	dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
 
-	switch (status) {
+	switch (nfs4err) {
 	case 0:
 		goto out;
 
@@ -7919,12 +7921,11 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			status = -EOVERFLOW;
 			goto out;
 		}
-		/* Fallthrough */
+		status = -EBUSY;
+		break;
 	case -NFS4ERR_RECALLCONFLICT:
-		nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT,
-					exception);
 		status = -ERECALLCONFLICT;
-		goto out;
+		break;
 	case -NFS4ERR_EXPIRED:
 	case -NFS4ERR_BAD_STATEID:
 		exception->timeout = 0;
@@ -7955,9 +7956,13 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			spin_unlock(&inode->i_lock);
 	}
 
-	status = nfs4_handle_exception(server, status, exception);
-	if (exception->retry)
-		status = -EAGAIN;
+	err = nfs4_handle_exception(server, nfs4err, exception);
+	if (!status) {
+		if (exception->retry)
+			status = -EAGAIN;
+		else
+			status = err;
+	}
 out:
 	dprintk("<-- %s\n", __func__);
 	return status;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 563f131c9abe..c50d4ebab5c5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1648,6 +1648,7 @@ lookup_again:
 	atomic_dec(&lo->plh_outstanding);
 	if (IS_ERR(lseg)) {
 		switch(PTR_ERR(lseg)) {
+		case -EBUSY:
 		case -ERECALLCONFLICT:
 			if (time_after(jiffies, giveup))
 				lseg = NULL;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET
  2016-07-19 20:33   ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust
@ 2016-07-19 20:33     ` Trond Myklebust
  2016-07-19 20:33       ` [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED Trond Myklebust
  0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
  To: linux-nfs

Instead of giving up altogether and falling back to doing I/O
through the MDS, which may make the situation worse, wait for
2 lease periods for the callback to resolve itself, and then
try destroying the existing layout.

Only if this was an attempt at getting a first layout, do we
give up altogether, as the server is clearly crazy.

Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: stable@vger.kernel.org # 4.7
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfs/pnfs.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c50d4ebab5c5..7d992362ff04 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1505,7 +1505,7 @@ pnfs_update_layout(struct inode *ino,
 	struct pnfs_layout_segment *lseg = NULL;
 	nfs4_stateid stateid;
 	long timeout = 0;
-	unsigned long giveup = jiffies + rpc_get_timeout(server->client);
+	unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
 	bool first;
 
 	if (!pnfs_enabled_sb(NFS_SERVER(ino))) {
@@ -1649,9 +1649,18 @@ lookup_again:
 	if (IS_ERR(lseg)) {
 		switch(PTR_ERR(lseg)) {
 		case -EBUSY:
-		case -ERECALLCONFLICT:
 			if (time_after(jiffies, giveup))
 				lseg = NULL;
+			break;
+		case -ERECALLCONFLICT:
+			/* Huh? We hold no layouts, how is there a recall? */
+			if (first) {
+				lseg = NULL;
+				break;
+			}
+			/* Destroy the existing layout and start over */
+			if (time_after(jiffies, giveup))
+				pnfs_destroy_layout(NFS_I(ino));
 			/* Fallthrough */
 		case -EAGAIN:
 			break;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED
  2016-07-19 20:33     ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust
@ 2016-07-19 20:33       ` Trond Myklebust
  0 siblings, 0 replies; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
  To: linux-nfs

We want to recover the open stateid if there is no layout stateid
and/or the stateid argument matches an open stateid.
Otherwise throw out the existing layout and recover from scratch, as
the layout stateid is bad.

Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling")
Cc: stable@vger.kernel.org # 4.7
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
---
 fs/nfs/nfs4proc.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee8efe0a5202..a1a3b4c9a563 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7886,6 +7886,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 	struct pnfs_layout_hdr *lo;
 	int nfs4err = task->tk_status;
 	int err, status = 0;
+	LIST_HEAD(head);
 
 	dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
 
@@ -7930,30 +7931,25 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 	case -NFS4ERR_BAD_STATEID:
 		exception->timeout = 0;
 		spin_lock(&inode->i_lock);
-		if (nfs4_stateid_match(&lgp->args.stateid,
+		lo = NFS_I(inode)->layout;
+		/* If the open stateid was bad, then recover it. */
+		if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) ||
+		    nfs4_stateid_match_other(&lgp->args.stateid,
 					&lgp->args.ctx->state->stateid)) {
 			spin_unlock(&inode->i_lock);
-			/* If the open stateid was bad, then recover it. */
 			exception->state = lgp->args.ctx->state;
 			break;
 		}
-		lo = NFS_I(inode)->layout;
-		if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
-		    nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
-			LIST_HEAD(head);
-
-			/*
-			 * Mark the bad layout state as invalid, then retry
-			 * with the current stateid.
-			 */
-			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
-			spin_unlock(&inode->i_lock);
-			pnfs_free_lseg_list(&head);
-			status = -EAGAIN;
-			goto out;
-		} else
-			spin_unlock(&inode->i_lock);
+
+		/*
+		 * Mark the bad layout state as invalid, then retry
+		 */
+		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+		pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
+		spin_unlock(&inode->i_lock);
+		pnfs_free_lseg_list(&head);
+		status = -EAGAIN;
+		goto out;
 	}
 
 	err = nfs4_handle_exception(server, nfs4err, exception);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-07-19 20:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-19 20:33 [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions Trond Myklebust
2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust
2016-07-19 20:33   ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust
2016-07-19 20:33     ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust
2016-07-19 20:33       ` [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED Trond Myklebust

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).