* [PATCH 1/5] pNFS: Layoutreturn must invalidate all existing layout segments. @ 2015-07-13 5:20 Trond Myklebust 2015-07-13 5:20 ` [PATCH 2/5] pNFS: pnfs_roc_drain should return 'true' when sleeping Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2015-07-13 5:20 UTC (permalink / raw) To: linux-nfs Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9f24238032f8..671498ca36d7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7964,16 +7964,19 @@ static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; + LIST_HEAD(freeme); dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); + pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); kfree(calldata); -- 2.4.3 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/5] pNFS: pnfs_roc_drain should return 'true' when sleeping 2015-07-13 5:20 [PATCH 1/5] pNFS: Layoutreturn must invalidate all existing layout segments Trond Myklebust @ 2015-07-13 5:20 ` Trond Myklebust 2015-07-13 5:20 ` [PATCH 3/5] pNFS: Fix races between return-on-close and layoutreturn Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2015-07-13 5:20 UTC (permalink / raw) To: linux-nfs Also clean up the case where we don't find a return-on-close layout segment. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> --- fs/nfs/pnfs.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0ba9a02c9566..8e9f467e409c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1146,14 +1146,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) struct pnfs_layout_segment *lseg; nfs4_stateid stateid; u32 current_seqid; - bool found = false, layoutreturn = false; + bool layoutreturn = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - found = true; - goto out; + spin_unlock(&ino->i_lock); + return true; } lo = nfsi->layout; current_seqid = be32_to_cpu(lo->plh_stateid.seqid); @@ -1162,23 +1162,21 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) * a barrier, we choose the worst-case barrier. */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); -out: - if (!found) { - stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + stateid = lo->plh_stateid; + layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } + if (layoutreturn) { + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); } + spin_unlock(&ino->i_lock); if (layoutreturn) { rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); + return true; } - return found; + return false; } /* -- 2.4.3 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/5] pNFS: Fix races between return-on-close and layoutreturn. 2015-07-13 5:20 ` [PATCH 2/5] pNFS: pnfs_roc_drain should return 'true' when sleeping Trond Myklebust @ 2015-07-13 5:20 ` Trond Myklebust 2015-07-13 5:20 ` [PATCH 4/5] pNFS: pnfs_roc_drain() fix a race with open Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2015-07-13 5:20 UTC (permalink / raw) To: linux-nfs If one or more of the layout segments reports an error during I/O, then we may have to send a layoutreturn to report the error back to the NFS metadata server. This patch ensures that the return-on-close code can detect the outstanding layoutreturn, and not preempt it. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> --- fs/nfs/nfs4proc.c | 2 -- fs/nfs/pnfs.c | 63 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 671498ca36d7..c5c9e0d070f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); - clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); pnfs_free_lseg_list(&freeme); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8e9f467e409c..27e2bcaa88da 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, { struct pnfs_layout_segment *s; - if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return false; list_for_each_entry(s, &lo->plh_segs, pls_list) @@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return true; } +static bool +pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) +{ + if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return false; + lo->plh_return_iomode = 0; + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); + return true; +} + static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, struct pnfs_layout_hdr *lo, struct inode *inode) { @@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, if (pnfs_layout_need_return(lo, lseg)) { nfs4_stateid stateid; enum pnfs_iomode iomode; + bool send; stateid = lo->plh_stateid; iomode = lo->plh_return_iomode; - /* decreased in pnfs_send_layoutreturn() */ - lo->plh_block_lgets++; - lo->plh_return_iomode = 0; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&inode->i_lock); - pnfs_get_layout_hdr(lo); - - /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, stateid, iomode, false); + if (send) { + /* Send an async layoutreturn so we dont deadlock */ + pnfs_send_layoutreturn(lo, stateid, iomode, false); + } } else spin_unlock(&inode->i_lock); } @@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } static int @@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino) LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0, empty; + bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino) /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { spin_unlock(&ino->i_lock); - pnfs_put_layout_hdr(lo); dprintk("NFS: %s no layout segments to return\n", __func__); - goto out; + goto out_put_layout_hdr; } set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - lo->plh_block_lgets++; + send = pnfs_prepare_layoutreturn(lo); spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + if (send) + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); +out_put_layout_hdr: + pnfs_put_layout_hdr(lo); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino) out_noroc: if (lo) { stateid = lo->plh_stateid; - layoutreturn = - test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); } spin_unlock(&ino->i_lock); if (layoutreturn) { @@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); stateid = lo->plh_stateid; - layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, - &lo->plh_flags); - if (layoutreturn) { - lo->plh_block_lgets++; - pnfs_get_layout_hdr(lo); - } + if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags)) + layoutreturn = pnfs_prepare_layoutreturn(lo); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); spin_unlock(&ino->i_lock); if (layoutreturn) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); return true; } @@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); /* set failure bit so that pnfs path will be retried later */ pnfs_layout_set_fail_bit(lo, iomode); - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); if (lo->plh_return_iomode == 0) lo->plh_return_iomode = range.iomode; else if (lo->plh_return_iomode != range.iomode) -- 2.4.3 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 4/5] pNFS: pnfs_roc_drain() fix a race with open 2015-07-13 5:20 ` [PATCH 3/5] pNFS: Fix races between return-on-close and layoutreturn Trond Myklebust @ 2015-07-13 5:20 ` Trond Myklebust 2015-07-13 5:20 ` [PATCH 5/5] pNFS: Don't throw out valid layout segments Trond Myklebust 0 siblings, 1 reply; 5+ messages in thread From: Trond Myklebust @ 2015-07-13 5:20 UTC (permalink / raw) To: linux-nfs If a process reopens the file before we can send off the CLOSE/DELEGRETURN, then pnfs_roc_drain() may end up waiting for a new set of layout segments that are marked as return-on-close, but haven't yet been returned. Fix this by only waiting for those layout segments that were invalidated in pnfs_roc(). Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> --- fs/nfs/pnfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 27e2bcaa88da..b02e32e2abeb 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1159,12 +1159,15 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) bool layoutreturn = false; spin_lock(&ino->i_lock); - list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) - if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - spin_unlock(&ino->i_lock); - return true; - } + list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) { + if (!test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + continue; + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + spin_unlock(&ino->i_lock); + return true; + } lo = nfsi->layout; current_seqid = be32_to_cpu(lo->plh_stateid.seqid); -- 2.4.3 ^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 5/5] pNFS: Don't throw out valid layout segments 2015-07-13 5:20 ` [PATCH 4/5] pNFS: pnfs_roc_drain() fix a race with open Trond Myklebust @ 2015-07-13 5:20 ` Trond Myklebust 0 siblings, 0 replies; 5+ messages in thread From: Trond Myklebust @ 2015-07-13 5:20 UTC (permalink / raw) To: linux-nfs It is OK for layout segments to remain hashed even if no-one holds any references to them, provided that the segments are still valid. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com> --- fs/nfs/pnfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b02e32e2abeb..18aa3b7962eb 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -422,6 +422,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { + spin_unlock(&inode->i_lock); + return; + } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); spin_unlock(&inode->i_lock); @@ -462,6 +466,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); if (atomic_dec_and_test(&lseg->pls_refcount)) { struct pnfs_layout_hdr *lo = lseg->pls_layout; + if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + return; pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); pnfs_free_lseg_async(lseg); -- 2.4.3 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-07-13 5:20 UTC | newest] Thread overview: 5+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2015-07-13 5:20 [PATCH 1/5] pNFS: Layoutreturn must invalidate all existing layout segments Trond Myklebust 2015-07-13 5:20 ` [PATCH 2/5] pNFS: pnfs_roc_drain should return 'true' when sleeping Trond Myklebust 2015-07-13 5:20 ` [PATCH 3/5] pNFS: Fix races between return-on-close and layoutreturn Trond Myklebust 2015-07-13 5:20 ` [PATCH 4/5] pNFS: pnfs_roc_drain() fix a race with open Trond Myklebust 2015-07-13 5:20 ` [PATCH 5/5] pNFS: Don't throw out valid layout segments Trond Myklebust
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox