* [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions
@ 2016-07-19 20:33 Trond Myklebust
2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust
0 siblings, 1 reply; 5+ messages in thread
From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw)
To: linux-nfs
v2:
The first patch in the series needs to return on fatal errors. (Thanks
Ben Coddington)
Trond Myklebust (4):
pNFS: Fix post-layoutget error handling in pnfs_update_layout()
pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT
pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET
pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and
NFS4ERR_EXPIRED
fs/nfs/nfs4proc.c | 57 ++++++++++++++++++++++++++++---------------------------
fs/nfs/pnfs.c | 35 ++++++++++++++++++++++------------
2 files changed, 52 insertions(+), 40 deletions(-)
--
2.7.4
^ permalink raw reply [flat|nested] 5+ messages in thread* [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() 2016-07-19 20:33 [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions Trond Myklebust @ 2016-07-19 20:33 ` Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw) To: linux-nfs The non-retry error path is currently broken and ends up releasing the reference to the layout twice. It also can end up clearing the NFS_LAYOUT_FIRST_LAYOUTGET flag twice, causing a race. In addition, the retry path will fail to decrement the plh_outstanding counter. Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling") Cc: stable@vger.kernel.org # 4.7 Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> --- fs/nfs/pnfs.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fbe734cc38c..563f131c9abe 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1645,6 +1645,7 @@ lookup_again: lseg = send_layoutget(lo, ctx, &stateid, &arg, &timeout, gfp_flags); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); + atomic_dec(&lo->plh_outstanding); if (IS_ERR(lseg)) { switch(PTR_ERR(lseg)) { case -ERECALLCONFLICT: @@ -1652,26 +1653,26 @@ lookup_again: lseg = NULL; /* Fallthrough */ case -EAGAIN: - pnfs_put_layout_hdr(lo); - if (first) - pnfs_clear_first_layoutget(lo); - if (lseg) { - trace_pnfs_update_layout(ino, pos, count, - iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - /* Fallthrough */ + break; default: if (!nfs_error_is_fatal(PTR_ERR(lseg))) { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); lseg = NULL; } + goto out_put_layout_hdr; + } + if (lseg) { + if (first) + pnfs_clear_first_layoutget(lo); + trace_pnfs_update_layout(ino, pos, count, + iomode, lo, lseg, PNFS_UPDATE_LAYOUT_RETRY); + pnfs_put_layout_hdr(lo); + goto lookup_again; } } else { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); } - atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: if (first) pnfs_clear_first_layoutget(lo); -- 2.7.4 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT 2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust @ 2016-07-19 20:33 ` Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw) To: linux-nfs They are not the same error, and need to be handled differently. Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling") Cc: stable@vger.kernel.org # 4.7 Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> --- fs/nfs/nfs4proc.c | 23 ++++++++++++++--------- fs/nfs/pnfs.c | 1 + 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 519368b98762..ee8efe0a5202 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -437,6 +437,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: + case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: exception->delay = 1; return 0; @@ -7883,11 +7884,12 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; - int status = task->tk_status; + int nfs4err = task->tk_status; + int err, status = 0; dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); - switch (status) { + switch (nfs4err) { case 0: goto out; @@ -7919,12 +7921,11 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = -EOVERFLOW; goto out; } - /* Fallthrough */ + status = -EBUSY; + break; case -NFS4ERR_RECALLCONFLICT: - nfs4_handle_exception(server, -NFS4ERR_RECALLCONFLICT, - exception); status = -ERECALLCONFLICT; - goto out; + break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: exception->timeout = 0; @@ -7955,9 +7956,13 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, spin_unlock(&inode->i_lock); } - status = nfs4_handle_exception(server, status, exception); - if (exception->retry) - status = -EAGAIN; + err = nfs4_handle_exception(server, nfs4err, exception); + if (!status) { + if (exception->retry) + status = -EAGAIN; + else + status = err; + } out: dprintk("<-- %s\n", __func__); return status; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 563f131c9abe..c50d4ebab5c5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1648,6 +1648,7 @@ lookup_again: atomic_dec(&lo->plh_outstanding); if (IS_ERR(lseg)) { switch(PTR_ERR(lseg)) { + case -EBUSY: case -ERECALLCONFLICT: if (time_after(jiffies, giveup)) lseg = NULL; -- 2.7.4 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET 2016-07-19 20:33 ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust @ 2016-07-19 20:33 ` Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw) To: linux-nfs Instead of giving up altogether and falling back to doing I/O through the MDS, which may make the situation worse, wait for 2 lease periods for the callback to resolve itself, and then try destroying the existing layout. Only if this was an attempt at getting a first layout, do we give up altogether, as the server is clearly crazy. Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling") Cc: stable@vger.kernel.org # 4.7 Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> --- fs/nfs/pnfs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c50d4ebab5c5..7d992362ff04 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1505,7 +1505,7 @@ pnfs_update_layout(struct inode *ino, struct pnfs_layout_segment *lseg = NULL; nfs4_stateid stateid; long timeout = 0; - unsigned long giveup = jiffies + rpc_get_timeout(server->client); + unsigned long giveup = jiffies + (clp->cl_lease_time << 1); bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) { @@ -1649,9 +1649,18 @@ lookup_again: if (IS_ERR(lseg)) { switch(PTR_ERR(lseg)) { case -EBUSY: - case -ERECALLCONFLICT: if (time_after(jiffies, giveup)) lseg = NULL; + break; + case -ERECALLCONFLICT: + /* Huh? We hold no layouts, how is there a recall? */ + if (first) { + lseg = NULL; + break; + } + /* Destroy the existing layout and start over */ + if (time_after(jiffies, giveup)) + pnfs_destroy_layout(NFS_I(ino)); /* Fallthrough */ case -EAGAIN: break; -- 2.7.4 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED 2016-07-19 20:33 ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust @ 2016-07-19 20:33 ` Trond Myklebust 0 siblings, 0 replies; 5+ messages in thread From: Trond Myklebust @ 2016-07-19 20:33 UTC (permalink / raw) To: linux-nfs We want to recover the open stateid if there is no layout stateid and/or the stateid argument matches an open stateid. Otherwise throw out the existing layout and recover from scratch, as the layout stateid is bad. Fixes: 183d9e7b112aa ("pnfs: rework LAYOUTGET retry handling") Cc: stable@vger.kernel.org # 4.7 Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> --- fs/nfs/nfs4proc.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ee8efe0a5202..a1a3b4c9a563 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7886,6 +7886,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, struct pnfs_layout_hdr *lo; int nfs4err = task->tk_status; int err, status = 0; + LIST_HEAD(head); dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); @@ -7930,30 +7931,25 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, case -NFS4ERR_BAD_STATEID: exception->timeout = 0; spin_lock(&inode->i_lock); - if (nfs4_stateid_match(&lgp->args.stateid, + lo = NFS_I(inode)->layout; + /* If the open stateid was bad, then recover it. */ + if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + nfs4_stateid_match_other(&lgp->args.stateid, &lgp->args.ctx->state->stateid)) { spin_unlock(&inode->i_lock); - /* If the open stateid was bad, then recover it. */ exception->state = lgp->args.ctx->state; break; } - lo = NFS_I(inode)->layout; - if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) && - nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { - LIST_HEAD(head); - - /* - * Mark the bad layout state as invalid, then retry - * with the current stateid. - */ - set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); - spin_unlock(&inode->i_lock); - pnfs_free_lseg_list(&head); - status = -EAGAIN; - goto out; - } else - spin_unlock(&inode->i_lock); + + /* + * Mark the bad layout state as invalid, then retry + */ + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&head); + status = -EAGAIN; + goto out; } err = nfs4_handle_exception(server, nfs4err, exception); -- 2.7.4 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2016-07-19 20:34 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2016-07-19 20:33 [PATCH v2 0/4] Fix the v4.7 LAYOUTGET regressions Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 1/4] pNFS: Fix post-layoutget error handling in pnfs_update_layout() Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 2/4] pNFS: Separate handling of NFS4ERR_LAYOUTTRYLATER and RECALLCONFLICT Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 3/4] pNFS: Handle NFS4ERR_RECALLCONFLICT correctly in LAYOUTGET Trond Myklebust 2016-07-19 20:33 ` [PATCH v2 4/4] pNFS: Fix LAYOUTGET handling of NFS4ERR_BAD_STATEID and NFS4ERR_EXPIRED Trond Myklebust
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).