* [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit
@ 2013-03-20 17:39 Trond Myklebust
2013-03-20 17:39 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
2013-03-21 11:22 ` [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Benny Halevy
0 siblings, 2 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-20 17:39 UTC (permalink / raw)
To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao
We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
where the two are out of sync.
The first half of the problem is to ensure that pnfs_layoutcommit_inode
clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
We still need to keep the reference to those segments until the RPC call
is finished, so in order to make it clear _where_ those references come
from, we add a helper pnfs_list_write_lseg_done() that cleans up after
pnfs_list_write_lseg.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
fs/nfs/nfs4proc.c | 14 --------------
fs/nfs/pnfs.c | 19 ++++++++++++++++++-
2 files changed, 18 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dcda2fa..5122753 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6568,22 +6568,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutcommit_release(void *calldata)
{
struct nfs4_layoutcommit_data *data = calldata;
- struct pnfs_layout_segment *lseg, *tmp;
- unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
pnfs_cleanup_layoutcommit(data);
- /* Matched by references in pnfs_set_layoutcommit */
- list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
- list_del_init(&lseg->pls_lc_list);
- if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
- &lseg->pls_flags))
- pnfs_put_lseg(lseg);
- }
-
- clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
- smp_mb__after_clear_bit();
- wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
put_rpccred(data->cred);
kfree(data);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e86e35f..6f6b356 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1748,11 +1748,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
if (lseg->pls_range.iomode == IOMODE_RW &&
- test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
list_add(&lseg->pls_lc_list, listp);
}
}
+void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+ unsigned long *bitlock = &NFS_I(inode)->flags;
+
+ /* Matched by references in pnfs_set_layoutcommit */
+ list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+ list_del_init(&lseg->pls_lc_list);
+ pnfs_put_lseg(lseg);
+ }
+
+ clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+ smp_mb__after_clear_bit();
+ wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1797,6 +1813,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+ pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
}
/*
--
1.8.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
2013-03-20 17:39 [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
@ 2013-03-20 17:39 ` Trond Myklebust
2013-03-20 17:39 ` [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
2013-03-21 11:25 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Benny Halevy
2013-03-21 11:22 ` [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Benny Halevy
1 sibling, 2 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-20 17:39 UTC (permalink / raw)
To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao
Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
segments.
The only two sites that need to do this are the ones that call
pnfs_return_layout() without first doing a layout commit.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
fs/nfs/nfs4filelayout.c | 1 -
fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++--------
2 files changed, 27 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index c5656c5..22d1062 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
{
if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return;
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
pnfs_return_layout(inode);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6f6b356..45badca 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
lo_seg_intersecting(lseg_range, recall_range);
}
+static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ if (!atomic_dec_and_test(&lseg->pls_refcount))
+ return false;
+ pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+ list_add(&lseg->pls_list, tmp_list);
+ return true;
+}
+
/* Returns 1 if lseg is removed from list, 0 otherwise */
static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
*/
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
- pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
- list_add(&lseg->pls_list, tmp_list);
+ if (lseg_dec_and_remove_zero(lseg, tmp_list))
rv = 1;
- }
}
return rv;
}
@@ -779,6 +786,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
return lseg;
}
+static void pnfs_clear_layoutcommit(struct inode *inode,
+ struct list_head *head)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+ return;
+ list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+ if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+ continue;
+ lseg_dec_and_remove_zero(lseg, head);
+ }
+}
+
/*
* Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
* when the layout segment list is empty.
@@ -810,6 +832,7 @@ _pnfs_return_layout(struct inode *ino)
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
+ pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
@@ -822,8 +845,6 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
- WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
@@ -1460,7 +1481,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1615,7 +1635,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
--
1.8.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
2013-03-20 17:39 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
@ 2013-03-20 17:39 ` Trond Myklebust
2013-03-21 11:25 ` Boaz Harrosh
2013-03-21 11:27 ` Benny Halevy
2013-03-21 11:25 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Benny Halevy
1 sibling, 2 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-20 17:39 UTC (permalink / raw)
To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao
In order to be able to safely return the layout in nfs4_proc_setattr,
we need to block new uses of the layout, wait for all outstanding
users of the layout to complete, commit the layout and then return it.
This patch adds a helper in order to do all this safely.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
---
fs/nfs/nfs4proc.c | 2 +-
fs/nfs/pnfs.c | 22 ++++++++++++++++++++++
fs/nfs/pnfs.h | 6 ++++++
3 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5122753..c560c8f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2695,7 +2695,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
int status;
if (pnfs_ld_layoutret_on_setattr(inode))
- pnfs_return_layout(inode);
+ pnfs_commit_and_return_layout(inode);
nfs_fattr_init(fattr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 45badca..5a5e14d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -868,6 +868,28 @@ out:
}
EXPORT_SYMBOL_GPL(_pnfs_return_layout);
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+ struct pnfs_layout_hdr *lo;
+ int ret;
+
+ spin_lock(&inode->i_lock);
+ lo = NFS_I(inode)->layout;
+ if (lo == NULL) {
+ spin_unlock(&inode->i_lock);
+ return 0;
+ }
+ /* Block new layoutgets and read/write to ds */
+ lo->plh_block_lgets++;
+ spin_unlock(&inode->i_lock);
+ filemap_fdatawait(inode->i_mapping);
+ ret = pnfs_layoutcommit_inode(inode, true);
+ if (ret == 0)
+ ret = _pnfs_return_layout(inode);
+ return ret;
+}
+
bool pnfs_roc(struct inode *ino)
{
struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba804..f5f8a47 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_write_data *);
void pnfs_ld_read_done(struct nfs_read_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+ return 0;
+}
+
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
{
--
1.8.1.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit
2013-03-20 17:39 [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
2013-03-20 17:39 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
@ 2013-03-21 11:22 ` Benny Halevy
1 sibling, 0 replies; 9+ messages in thread
From: Benny Halevy @ 2013-03-21 11:22 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs, Boaz Harrosh, Peng Tao
On 2013-03-20 19:39, Trond Myklebust wrote:
> We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
> NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
> where the two are out of sync.
> The first half of the problem is to ensure that pnfs_layoutcommit_inode
> clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
> We still need to keep the reference to those segments until the RPC call
> is finished, so in order to make it clear _where_ those references come
> from, we add a helper pnfs_list_write_lseg_done() that cleans up after
> pnfs_list_write_lseg.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: Benny Halevy <bhalevy@tonian.com>
ACK
> Cc: stable@vger.kernel.org
> ---
> fs/nfs/nfs4proc.c | 14 --------------
> fs/nfs/pnfs.c | 19 ++++++++++++++++++-
> 2 files changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index dcda2fa..5122753 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6568,22 +6568,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
> static void nfs4_layoutcommit_release(void *calldata)
> {
> struct nfs4_layoutcommit_data *data = calldata;
> - struct pnfs_layout_segment *lseg, *tmp;
> - unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
>
> pnfs_cleanup_layoutcommit(data);
> - /* Matched by references in pnfs_set_layoutcommit */
> - list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
> - list_del_init(&lseg->pls_lc_list);
> - if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
> - &lseg->pls_flags))
> - pnfs_put_lseg(lseg);
> - }
> -
> - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> - smp_mb__after_clear_bit();
> - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> -
> put_rpccred(data->cred);
> kfree(data);
> }
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index e86e35f..6f6b356 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1748,11 +1748,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
>
> list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
> if (lseg->pls_range.iomode == IOMODE_RW &&
> - test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> + test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> list_add(&lseg->pls_lc_list, listp);
> }
> }
>
> +void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
> +{
> + struct pnfs_layout_segment *lseg, *tmp;
> + unsigned long *bitlock = &NFS_I(inode)->flags;
> +
> + /* Matched by references in pnfs_set_layoutcommit */
> + list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
> + list_del_init(&lseg->pls_lc_list);
> + pnfs_put_lseg(lseg);
> + }
> +
> + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> + smp_mb__after_clear_bit();
Out of curiousity, should this be done the same way in pnfs_layoutcommit_inode()
in the following block?
spin_lock(&inode->i_lock);
if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags);
spin_unlock(&inode->i_lock);
wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING);
goto out_free;
}
Benny
> + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> +}
> +
> void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
> {
> pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
> @@ -1797,6 +1813,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
>
> if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
> nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
> + pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
> }
>
> /*
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
2013-03-20 17:39 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
2013-03-20 17:39 ` [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
@ 2013-03-21 11:25 ` Benny Halevy
1 sibling, 0 replies; 9+ messages in thread
From: Benny Halevy @ 2013-03-21 11:25 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs, Boaz Harrosh, Peng Tao
On 2013-03-20 19:39, Trond Myklebust wrote:
> Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
> you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
> segments.
> The only two sites that need to do this are the ones that call
> pnfs_return_layout() without first doing a layout commit.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: Benny Halevy <bhalevy@tonian.com>
ACK
> Cc: stable@vger.kernel.org
> ---
> fs/nfs/nfs4filelayout.c | 1 -
> fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++--------
> 2 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index c5656c5..22d1062 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
> {
> if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
> return;
> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
> pnfs_return_layout(inode);
> }
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 6f6b356..45badca 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
> lo_seg_intersecting(lseg_range, recall_range);
> }
>
> +static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
> + struct list_head *tmp_list)
> +{
> + if (!atomic_dec_and_test(&lseg->pls_refcount))
> + return false;
> + pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
> + list_add(&lseg->pls_list, tmp_list);
> + return true;
> +}
> +
> /* Returns 1 if lseg is removed from list, 0 otherwise */
> static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
> struct list_head *tmp_list)
> @@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
> */
> dprintk("%s: lseg %p ref %d\n", __func__, lseg,
> atomic_read(&lseg->pls_refcount));
> - if (atomic_dec_and_test(&lseg->pls_refcount)) {
> - pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
> - list_add(&lseg->pls_list, tmp_list);
> + if (lseg_dec_and_remove_zero(lseg, tmp_list))
> rv = 1;
> - }
> }
> return rv;
> }
> @@ -779,6 +786,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
> return lseg;
> }
>
> +static void pnfs_clear_layoutcommit(struct inode *inode,
> + struct list_head *head)
> +{
> + struct nfs_inode *nfsi = NFS_I(inode);
> + struct pnfs_layout_segment *lseg, *tmp;
> +
> + if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
> + return;
> + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
> + if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> + continue;
> + lseg_dec_and_remove_zero(lseg, head);
> + }
> +}
> +
> /*
> * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
> * when the layout segment list is empty.
> @@ -810,6 +832,7 @@ _pnfs_return_layout(struct inode *ino)
> /* Reference matched in nfs4_layoutreturn_release */
> pnfs_get_layout_hdr(lo);
> empty = list_empty(&lo->plh_segs);
> + pnfs_clear_layoutcommit(ino, &tmp_list);
> pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
> /* Don't send a LAYOUTRETURN if list was initially empty */
> if (empty) {
> @@ -822,8 +845,6 @@ _pnfs_return_layout(struct inode *ino)
> spin_unlock(&ino->i_lock);
> pnfs_free_lseg_list(&tmp_list);
>
> - WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
> -
> lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
> if (unlikely(lrp == NULL)) {
> status = -ENOMEM;
> @@ -1460,7 +1481,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
> dprintk("pnfs write error = %d\n", hdr->pnfs_error);
> if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
> PNFS_LAYOUTRET_ON_ERROR) {
> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
> pnfs_return_layout(hdr->inode);
> }
> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
> @@ -1615,7 +1635,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
> dprintk("pnfs read error = %d\n", hdr->pnfs_error);
> if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
> PNFS_LAYOUTRET_ON_ERROR) {
> - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
> pnfs_return_layout(hdr->inode);
> }
> if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
2013-03-20 17:39 ` [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
@ 2013-03-21 11:25 ` Boaz Harrosh
2013-03-21 14:07 ` Myklebust, Trond
2013-03-21 11:27 ` Benny Halevy
1 sibling, 1 reply; 9+ messages in thread
From: Boaz Harrosh @ 2013-03-21 11:25 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs, Benny Halevy, Peng Tao
On 03/20/2013 07:39 PM, Trond Myklebust wrote:
> In order to be able to safely return the layout in nfs4_proc_setattr,
> we need to block new uses of the layout, wait for all outstanding
> users of the layout to complete, commit the layout and then return it.
>
> This patch adds a helper in order to do all this safely.
>
Hi Trond
It looks like this patchset might actually fix my problem as well.
I've been super swamped with internal work at Panasas, and never got
a chance to cleanup and review my experimental fixes. So it looks like
you bit me to it. Your work looks much better, and deeper then what I
had. Though one concern I have is that I need those for @stable so my
change was as minimal as possible, single patch.
I have just arrived back to Israel today, and will only have time to test
all this Next week. I have just the right setup for this. I understand that
for you it might be harder because Files-layout does not support segments and
my deadlock can only happen with two segments and up.
[BTW: I did not even reviewed the code yet will do ASAP]
Thanks
Boaz
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: Boaz Harrosh <bharrosh@panasas.com>
> ---
> fs/nfs/nfs4proc.c | 2 +-
> fs/nfs/pnfs.c | 22 ++++++++++++++++++++++
> fs/nfs/pnfs.h | 6 ++++++
> 3 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 5122753..c560c8f 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -2695,7 +2695,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
> int status;
>
> if (pnfs_ld_layoutret_on_setattr(inode))
> - pnfs_return_layout(inode);
> + pnfs_commit_and_return_layout(inode);
>
> nfs_fattr_init(fattr);
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 45badca..5a5e14d 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -868,6 +868,28 @@ out:
> }
> EXPORT_SYMBOL_GPL(_pnfs_return_layout);
>
> +int
> +pnfs_commit_and_return_layout(struct inode *inode)
> +{
> + struct pnfs_layout_hdr *lo;
> + int ret;
> +
> + spin_lock(&inode->i_lock);
> + lo = NFS_I(inode)->layout;
> + if (lo == NULL) {
> + spin_unlock(&inode->i_lock);
> + return 0;
> + }
> + /* Block new layoutgets and read/write to ds */
> + lo->plh_block_lgets++;
> + spin_unlock(&inode->i_lock);
> + filemap_fdatawait(inode->i_mapping);
> + ret = pnfs_layoutcommit_inode(inode, true);
> + if (ret == 0)
> + ret = _pnfs_return_layout(inode);
> + return ret;
> +}
> +
> bool pnfs_roc(struct inode *ino)
> {
> struct pnfs_layout_hdr *lo;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 94ba804..f5f8a47 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
> void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
> int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
> int _pnfs_return_layout(struct inode *);
> +int pnfs_commit_and_return_layout(struct inode *);
> void pnfs_ld_write_done(struct nfs_write_data *);
> void pnfs_ld_read_done(struct nfs_read_data *);
> struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
> @@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> +static inline int pnfs_commit_and_return_layout(struct inode *inode)
> +{
> + return 0;
> +}
> +
> static inline bool
> pnfs_ld_layoutret_on_setattr(struct inode *inode)
> {
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
2013-03-20 17:39 ` [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
2013-03-21 11:25 ` Boaz Harrosh
@ 2013-03-21 11:27 ` Benny Halevy
2013-03-21 13:59 ` Myklebust, Trond
1 sibling, 1 reply; 9+ messages in thread
From: Benny Halevy @ 2013-03-21 11:27 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs, Boaz Harrosh, Peng Tao
On 2013-03-20 19:39, Trond Myklebust wrote:
> In order to be able to safely return the layout in nfs4_proc_setattr,
> we need to block new uses of the layout, wait for all outstanding
> users of the layout to complete, commit the layout and then return it.
>
> This patch adds a helper in order to do all this safely.
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: Boaz Harrosh <bharrosh@panasas.com>
> ---
> fs/nfs/nfs4proc.c | 2 +-
> fs/nfs/pnfs.c | 22 ++++++++++++++++++++++
> fs/nfs/pnfs.h | 6 ++++++
> 3 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 5122753..c560c8f 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -2695,7 +2695,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
> int status;
>
> if (pnfs_ld_layoutret_on_setattr(inode))
> - pnfs_return_layout(inode);
> + pnfs_commit_and_return_layout(inode);
>
> nfs_fattr_init(fattr);
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 45badca..5a5e14d 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -868,6 +868,28 @@ out:
> }
> EXPORT_SYMBOL_GPL(_pnfs_return_layout);
>
> +int
> +pnfs_commit_and_return_layout(struct inode *inode)
> +{
> + struct pnfs_layout_hdr *lo;
> + int ret;
> +
> + spin_lock(&inode->i_lock);
> + lo = NFS_I(inode)->layout;
> + if (lo == NULL) {
> + spin_unlock(&inode->i_lock);
> + return 0;
> + }
> + /* Block new layoutgets and read/write to ds */
> + lo->plh_block_lgets++;
> + spin_unlock(&inode->i_lock);
> + filemap_fdatawait(inode->i_mapping);
> + ret = pnfs_layoutcommit_inode(inode, true);
> + if (ret == 0)
> + ret = _pnfs_return_layout(inode);
else {
spin_lock(&inode->i_lock);
lo->plh_block_lgets--;
spin_unlock(&inode->i_lock);
}
?
Benny
> + return ret;
> +}
> +
> bool pnfs_roc(struct inode *ino)
> {
> struct pnfs_layout_hdr *lo;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 94ba804..f5f8a47 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
> void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
> int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
> int _pnfs_return_layout(struct inode *);
> +int pnfs_commit_and_return_layout(struct inode *);
> void pnfs_ld_write_done(struct nfs_write_data *);
> void pnfs_ld_read_done(struct nfs_read_data *);
> struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
> @@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> +static inline int pnfs_commit_and_return_layout(struct inode *inode)
> +{
> + return 0;
> +}
> +
> static inline bool
> pnfs_ld_layoutret_on_setattr(struct inode *inode)
> {
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
2013-03-21 11:27 ` Benny Halevy
@ 2013-03-21 13:59 ` Myklebust, Trond
0 siblings, 0 replies; 9+ messages in thread
From: Myklebust, Trond @ 2013-03-21 13:59 UTC (permalink / raw)
To: Benny Halevy; +Cc: linux-nfs@vger.kernel.org, Boaz Harrosh, Peng Tao
On Thu, 2013-03-21 at 13:27 +0200, Benny Halevy wrote:
> On 2013-03-20 19:39, Trond Myklebust wrote:
> > In order to be able to safely return the layout in nfs4_proc_setattr,
> > we need to block new uses of the layout, wait for all outstanding
> > users of the layout to complete, commit the layout and then return it.
> >
> > This patch adds a helper in order to do all this safely.
> >
> > Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> > Cc: Boaz Harrosh <bharrosh@panasas.com>
> > ---
> > fs/nfs/nfs4proc.c | 2 +-
> > fs/nfs/pnfs.c | 22 ++++++++++++++++++++++
> > fs/nfs/pnfs.h | 6 ++++++
> > 3 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 5122753..c560c8f 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -2695,7 +2695,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
> > int status;
> >
> > if (pnfs_ld_layoutret_on_setattr(inode))
> > - pnfs_return_layout(inode);
> > + pnfs_commit_and_return_layout(inode);
> >
> > nfs_fattr_init(fattr);
> >
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 45badca..5a5e14d 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -868,6 +868,28 @@ out:
> > }
> > EXPORT_SYMBOL_GPL(_pnfs_return_layout);
> >
> > +int
> > +pnfs_commit_and_return_layout(struct inode *inode)
> > +{
> > + struct pnfs_layout_hdr *lo;
> > + int ret;
> > +
> > + spin_lock(&inode->i_lock);
> > + lo = NFS_I(inode)->layout;
> > + if (lo == NULL) {
> > + spin_unlock(&inode->i_lock);
> > + return 0;
> > + }
> > + /* Block new layoutgets and read/write to ds */
> > + lo->plh_block_lgets++;
> > + spin_unlock(&inode->i_lock);
> > + filemap_fdatawait(inode->i_mapping);
> > + ret = pnfs_layoutcommit_inode(inode, true);
> > + if (ret == 0)
> > + ret = _pnfs_return_layout(inode);
>
> else {
> spin_lock(&inode->i_lock);
> lo->plh_block_lgets--;
> spin_unlock(&inode->i_lock);
> }
>
> ?
Yeah... We should probably do that unconditionally. That means we need
to pin the layout, though. I'll fix and resend.
--
Trond Myklebust
Linux NFS client maintainer
NetApp
Trond.Myklebust@netapp.com
www.netapp.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
2013-03-21 11:25 ` Boaz Harrosh
@ 2013-03-21 14:07 ` Myklebust, Trond
0 siblings, 0 replies; 9+ messages in thread
From: Myklebust, Trond @ 2013-03-21 14:07 UTC (permalink / raw)
To: Boaz Harrosh; +Cc: linux-nfs@vger.kernel.org, Benny Halevy, Peng Tao
On Thu, 2013-03-21 at 13:25 +0200, Boaz Harrosh wrote:
> On 03/20/2013 07:39 PM, Trond Myklebust wrote:
> > In order to be able to safely return the layout in nfs4_proc_setattr,
> > we need to block new uses of the layout, wait for all outstanding
> > users of the layout to complete, commit the layout and then return it.
> >
> > This patch adds a helper in order to do all this safely.
> >
>
> Hi Trond
>
> It looks like this patchset might actually fix my problem as well.
>
> I've been super swamped with internal work at Panasas, and never got
> a chance to cleanup and review my experimental fixes. So it looks like
> you bit me to it. Your work looks much better, and deeper then what I
> had. Though one concern I have is that I need those for @stable so my
> change was as minimal as possible, single patch.
I chose to split it into 3 patches because these are really 3 different
problems. The first patch (hopefully) fixes Benny's Oops. The second
fixes a memory leak, while the last one fixes a potential data
corruption.
> I have just arrived back to Israel today, and will only have time to test
> all this Next week. I have just the right setup for this. I understand that
> for you it might be harder because Files-layout does not support segments and
> my deadlock can only happen with two segments and up.
>
> [BTW: I did not even reviewed the code yet will do ASAP]
Thanks!
--
Trond Myklebust
Linux NFS client maintainer
NetApp
Trond.Myklebust@netapp.com
www.netapp.com
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2013-03-21 14:07 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-20 17:39 [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
2013-03-20 17:39 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
2013-03-20 17:39 ` [PATCH 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
2013-03-21 11:25 ` Boaz Harrosh
2013-03-21 14:07 ` Myklebust, Trond
2013-03-21 11:27 ` Benny Halevy
2013-03-21 13:59 ` Myklebust, Trond
2013-03-21 11:25 ` [PATCH 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Benny Halevy
2013-03-21 11:22 ` [PATCH 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Benny Halevy
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.