public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit
@ 2013-03-21 14:12 Trond Myklebust
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-21 14:12 UTC (permalink / raw)
  To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao

We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
where the two are out of sync.
The first half of the problem is to ensure that pnfs_layoutcommit_inode
clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
We still need to keep the reference to those segments until the RPC call
is finished, so in order to make it clear _where_ those references come
from, we add a helper pnfs_list_write_lseg_done() that cleans up after
pnfs_list_write_lseg.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4proc.c | 14 --------------
 fs/nfs/pnfs.c     | 19 ++++++++++++++++++-
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b2671cb..6ccdd4f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
 	struct nfs4_layoutcommit_data *data = calldata;
-	struct pnfs_layout_segment *lseg, *tmp;
-	unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
 
 	pnfs_cleanup_layoutcommit(data);
-	/* Matched by references in pnfs_set_layoutcommit */
-	list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
-		list_del_init(&lseg->pls_lc_list);
-		if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
-				       &lseg->pls_flags))
-			pnfs_put_lseg(lseg);
-	}
-
-	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-	smp_mb__after_clear_bit();
-	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
 	put_rpccred(data->cred);
 	kfree(data);
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 48ac5aa..c6bbb53 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1746,11 +1746,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
 
 	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
 		if (lseg->pls_range.iomode == IOMODE_RW &&
-		    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
 			list_add(&lseg->pls_lc_list, listp);
 	}
 }
 
+void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)
+{
+	struct pnfs_layout_segment *lseg, *tmp;
+	unsigned long *bitlock = &NFS_I(inode)->flags;
+
+	/* Matched by references in pnfs_set_layoutcommit */
+	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
+		list_del_init(&lseg->pls_lc_list);
+		pnfs_put_lseg(lseg);
+	}
+
+	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
+	smp_mb__after_clear_bit();
+	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
+}
+
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 {
 	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
@@ -1795,6 +1811,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 
 	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
 		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
 }
 
 /*
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
  2013-03-21 14:12 [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
@ 2013-03-21 14:13 ` Trond Myklebust
  2013-03-21 14:13   ` [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
                     ` (2 more replies)
  2013-03-21 14:32 ` [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Myklebust, Trond
  2013-03-21 17:18 ` Peng Tao
  2 siblings, 3 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-21 14:13 UTC (permalink / raw)
  To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao

Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
segments.
The only two sites that need to do this are the ones that call
pnfs_return_layout() without first doing a layout commit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Benny Halevy <bhalevy@tonian.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/nfs4filelayout.c |  1 -
 fs/nfs/pnfs.c           | 35 +++++++++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 49eeb04..4fb234d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
 {
 	if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
 		return;
-	clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
 	pnfs_return_layout(inode);
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c6bbb53..b9d0682 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
 	       lo_seg_intersecting(lseg_range, recall_range);
 }
 
+static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
+		struct list_head *tmp_list)
+{
+	if (!atomic_dec_and_test(&lseg->pls_refcount))
+		return false;
+	pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
+	list_add(&lseg->pls_list, tmp_list);
+	return true;
+}
+
 /* Returns 1 if lseg is removed from list, 0 otherwise */
 static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 			     struct list_head *tmp_list)
@@ -430,11 +440,8 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
 		 */
 		dprintk("%s: lseg %p ref %d\n", __func__, lseg,
 			atomic_read(&lseg->pls_refcount));
-		if (atomic_dec_and_test(&lseg->pls_refcount)) {
-			pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
-			list_add(&lseg->pls_list, tmp_list);
+		if (lseg_dec_and_remove_zero(lseg, tmp_list))
 			rv = 1;
-		}
 	}
 	return rv;
 }
@@ -777,6 +784,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 	return lseg;
 }
 
+static void pnfs_clear_layoutcommit(struct inode *inode,
+		struct list_head *head)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct pnfs_layout_segment *lseg, *tmp;
+
+	if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
+		return;
+	list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) {
+		if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+			continue;
+		lseg_dec_and_remove_zero(lseg, head);
+	}
+}
+
 /*
  * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
  * when the layout segment list is empty.
@@ -808,6 +830,7 @@ _pnfs_return_layout(struct inode *ino)
 	/* Reference matched in nfs4_layoutreturn_release */
 	pnfs_get_layout_hdr(lo);
 	empty = list_empty(&lo->plh_segs);
+	pnfs_clear_layoutcommit(ino, &tmp_list);
 	pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
 	/* Don't send a LAYOUTRETURN if list was initially empty */
 	if (empty) {
@@ -820,8 +843,6 @@ _pnfs_return_layout(struct inode *ino)
 	spin_unlock(&ino->i_lock);
 	pnfs_free_lseg_list(&tmp_list);
 
-	WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
-
 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 	if (unlikely(lrp == NULL)) {
 		status = -ENOMEM;
@@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -1613,7 +1633,6 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 	dprintk("pnfs read error = %d\n", hdr->pnfs_error);
 	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
 	    PNFS_LAYOUTRET_ON_ERROR) {
-		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
 		pnfs_return_layout(hdr->inode);
 	}
 	if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
@ 2013-03-21 14:13   ` Trond Myklebust
  2013-03-21 14:33   ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Myklebust, Trond
  2013-03-21 17:06   ` Peng Tao
  2 siblings, 0 replies; 9+ messages in thread
From: Trond Myklebust @ 2013-03-21 14:13 UTC (permalink / raw)
  To: linux-nfs; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao

In order to be able to safely return the layout in nfs4_proc_setattr,
we need to block new uses of the layout, wait for all outstanding
users of the layout to complete, commit the layout and then return it.

This patch adds a helper in order to do all this safely.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/nfs4proc.c |  2 +-
 fs/nfs/pnfs.c     | 27 +++++++++++++++++++++++++++
 fs/nfs/pnfs.h     |  6 ++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6ccdd4f..26431cf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2632,7 +2632,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int status;
 
 	if (pnfs_ld_layoutret_on_setattr(inode))
-		pnfs_return_layout(inode);
+		pnfs_commit_and_return_layout(inode);
 
 	nfs_fattr_init(fattr);
 	
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b9d0682..2b5d98e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -866,6 +866,33 @@ out:
 }
 EXPORT_SYMBOL_GPL(_pnfs_return_layout);
 
+int
+pnfs_commit_and_return_layout(struct inode *inode)
+{
+	struct pnfs_layout_hdr *lo;
+	int ret;
+
+	spin_lock(&inode->i_lock);
+	lo = NFS_I(inode)->layout;
+	if (lo == NULL) {
+		spin_unlock(&inode->i_lock);
+		return 0;
+	}
+	pnfs_get_layout_hdr(lo);
+	/* Block new layoutgets and read/write to ds */
+	lo->plh_block_lgets++;
+	spin_unlock(&inode->i_lock);
+	filemap_fdatawait(inode->i_mapping);
+	ret = pnfs_layoutcommit_inode(inode, true);
+	if (ret == 0)
+		ret = _pnfs_return_layout(inode);
+	spin_lock(&inode->i_lock);
+	lo->plh_block_lgets--;
+	spin_unlock(&inode->i_lock);
+	pnfs_put_layout_hdr(lo);
+	return ret;
+}
+
 bool pnfs_roc(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94ba804..f5f8a47 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
+int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_write_data *);
 void pnfs_ld_read_done(struct nfs_read_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
@@ -407,6 +408,11 @@ static inline int pnfs_return_layout(struct inode *ino)
 	return 0;
 }
 
+static inline int pnfs_commit_and_return_layout(struct inode *inode)
+{
+	return 0;
+}
+
 static inline bool
 pnfs_ld_layoutret_on_setattr(struct inode *inode)
 {
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit
  2013-03-21 14:12 [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
@ 2013-03-21 14:32 ` Myklebust, Trond
  2013-03-21 17:18 ` Peng Tao
  2 siblings, 0 replies; 9+ messages in thread
From: Myklebust, Trond @ 2013-03-21 14:32 UTC (permalink / raw)
  To: linux-nfs@vger.kernel.org; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao

On Thu, 2013-03-21 at 10:12 -0400, Trond Myklebust wrote:
> We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
> NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
> where the two are out of sync.
> The first half of the problem is to ensure that pnfs_layoutcommit_inode
> clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
> We still need to keep the reference to those segments until the RPC call
> is finished, so in order to make it clear _where_ those references come
> from, we add a helper pnfs_list_write_lseg_done() that cleans up after
> pnfs_list_write_lseg.
> 
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Acked-by: Benny Halevy <bhalevy@tonian.com>
> Cc: stable@vger.kernel.org
> ---
>  fs/nfs/nfs4proc.c | 14 --------------
>  fs/nfs/pnfs.c     | 19 ++++++++++++++++++-
>  2 files changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index b2671cb..6ccdd4f 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -6416,22 +6416,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
>  static void nfs4_layoutcommit_release(void *calldata)
>  {
>  	struct nfs4_layoutcommit_data *data = calldata;
> -	struct pnfs_layout_segment *lseg, *tmp;
> -	unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
>  
>  	pnfs_cleanup_layoutcommit(data);
> -	/* Matched by references in pnfs_set_layoutcommit */
> -	list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
> -		list_del_init(&lseg->pls_lc_list);
> -		if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
> -				       &lseg->pls_flags))
> -			pnfs_put_lseg(lseg);
> -	}
> -
> -	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> -	smp_mb__after_clear_bit();
> -	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> -
>  	put_rpccred(data->cred);
>  	kfree(data);
>  }
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 48ac5aa..c6bbb53 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1746,11 +1746,27 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
>  
>  	list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
>  		if (lseg->pls_range.iomode == IOMODE_RW &&
> -		    test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
> +		    test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
>  			list_add(&lseg->pls_lc_list, listp);
>  	}
>  }
>  
> +void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp)

Doh! This needs to be declared static... Will fix.

> +{
> +	struct pnfs_layout_segment *lseg, *tmp;
> +	unsigned long *bitlock = &NFS_I(inode)->flags;
> +
> +	/* Matched by references in pnfs_set_layoutcommit */
> +	list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) {
> +		list_del_init(&lseg->pls_lc_list);
> +		pnfs_put_lseg(lseg);
> +	}
> +
> +	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
> +	smp_mb__after_clear_bit();
> +	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
> +}
> +
>  void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
>  {
>  	pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
> @@ -1795,6 +1811,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
>  
>  	if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
>  		nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
> +	pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list);
>  }
>  
>  /*


-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
  2013-03-21 14:13   ` [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
@ 2013-03-21 14:33   ` Myklebust, Trond
  2013-03-21 17:06   ` Peng Tao
  2 siblings, 0 replies; 9+ messages in thread
From: Myklebust, Trond @ 2013-03-21 14:33 UTC (permalink / raw)
  To: linux-nfs@vger.kernel.org; +Cc: Benny Halevy, Boaz Harrosh, Peng Tao

On Thu, 2013-03-21 at 10:13 -0400, Trond Myklebust wrote:
> Note that clearing NFS_INO_LAYOUTCOMMIT is tricky, since it requires
> you to also clear the NFS_LSEG_LAYOUTCOMMIT bits from the layout
> segments.
> The only two sites that need to do this are the ones that call
> pnfs_return_layout() without first doing a layout commit.
> 
> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
> Acked-by: Benny Halevy <bhalevy@tonian.com>
> Cc: stable@vger.kernel.org
> ---
>  fs/nfs/nfs4filelayout.c |  1 -
>  fs/nfs/pnfs.c           | 35 +++++++++++++++++++++++++++--------
>  2 files changed, 27 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 49eeb04..4fb234d 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -129,7 +129,6 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo)
>  {
>  	if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
>  		return;
> -	clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
>  	pnfs_return_layout(inode);
>  }
>  
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index c6bbb53..b9d0682 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -417,6 +417,16 @@ should_free_lseg(struct pnfs_layout_range *lseg_range,
>  	       lo_seg_intersecting(lseg_range, recall_range);
>  }
>  
> +static bool lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,

I'm adding a 'pnfs' prefix for looks...

-- 
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@netapp.com
www.netapp.com

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
  2013-03-21 14:13   ` [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
  2013-03-21 14:33   ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Myklebust, Trond
@ 2013-03-21 17:06   ` Peng Tao
  2013-03-21 21:59     ` Boaz Harrosh
  2 siblings, 1 reply; 9+ messages in thread
From: Peng Tao @ 2013-03-21 17:06 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, Benny Halevy, Boaz Harrosh

On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
<snip>
> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
>  	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
>  	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
>  	    PNFS_LAYOUTRET_ON_ERROR) {
> -		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
Hi Trond and Boaz,

If object layout requires layout being committed before returned (as fixed in
the 3/3 patch), is it a potential problem to directly return layout here as
well? e.g., if one lseg is successfully written and pending layoutcommit,
then another lseg of the same file failed read/write, then layout will be
returned w/o layoutcommit. For blocklayout, it is a potential data corruption
and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
I'm wondering if object will suffer from the same issue?

Thanks,
Tao

-- 
Here lieth one whos name was writ on water.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit
  2013-03-21 14:12 [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
  2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
  2013-03-21 14:32 ` [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Myklebust, Trond
@ 2013-03-21 17:18 ` Peng Tao
  2 siblings, 0 replies; 9+ messages in thread
From: Peng Tao @ 2013-03-21 17:18 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-nfs, Benny Halevy, Boaz Harrosh

On Thu, Mar 21, 2013 at 10:12:59AM -0400, Trond Myklebust wrote:
> We need to clear the NFS_LSEG_LAYOUTCOMMIT bits atomically with the
> NFS_INO_LAYOUTCOMMIT bit, otherwise we may end up with situations
> where the two are out of sync.
> The first half of the problem is to ensure that pnfs_layoutcommit_inode
> clears the NFS_LSEG_LAYOUTCOMMIT bit through pnfs_list_write_lseg.
> We still need to keep the reference to those segments until the RPC call
> is finished, so in order to make it clear _where_ those references come
> from, we add a helper pnfs_list_write_lseg_done() that cleans up after
> pnfs_list_write_lseg.
> 
I don't see any harm to block layout client in all the three patches.
Thanks for sorting it out.

Best,
Tao

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
  2013-03-21 17:06   ` Peng Tao
@ 2013-03-21 21:59     ` Boaz Harrosh
  2013-03-22  1:28       ` Peng Tao
  0 siblings, 1 reply; 9+ messages in thread
From: Boaz Harrosh @ 2013-03-21 21:59 UTC (permalink / raw)
  To: Peng Tao; +Cc: Trond Myklebust, linux-nfs, Benny Halevy

On 03/21/2013 07:06 PM, Peng Tao wrote:
> On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
> <snip>
>> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
>>  	dprintk("pnfs write error = %d\n", hdr->pnfs_error);
>>  	if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
>>  	    PNFS_LAYOUTRET_ON_ERROR) {
>> -		clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
> Hi Trond and Boaz,
> 
> If object layout requires layout being committed before returned (as fixed in
> the 3/3 patch), is it a potential problem to directly return layout here as
> well? e.g., if one lseg is successfully written and pending layoutcommit,
> then another lseg of the same file failed read/write, then layout will be
> returned w/o layoutcommit. For blocklayout, it is a potential data corruption
> and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
> I'm wondering if object will suffer from the same issue?
> 

Hi Tao

No, not at all. The objects layout has error reported as part of the
layout_return OPT. With exact devices that failed and why. In fact the
data should not be "committed" per ce, but a recovery process must be
preformed because we know that not all data of a stripe was committed
including parity, and the raid5 check-some is surly wrong. This is why
there is an error bit in layout_commit OPT to denote that this is not
a true commit and that there is an Error report on the way, for those
clients that must always lo_commit before lo_return even on Error.

(I know that 4.2 has plans for error-report RETURNs for other layout
 types as well, this is part of why)

> Thanks,
> Tao
> 

Cheers
Boaz


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn
  2013-03-21 21:59     ` Boaz Harrosh
@ 2013-03-22  1:28       ` Peng Tao
  0 siblings, 0 replies; 9+ messages in thread
From: Peng Tao @ 2013-03-22  1:28 UTC (permalink / raw)
  To: Boaz Harrosh; +Cc: Trond Myklebust, linux-nfs, Benny Halevy

On Fri, Mar 22, 2013 at 5:59 AM, Boaz Harrosh <bharrosh@panasas.com> wrote:
> On 03/21/2013 07:06 PM, Peng Tao wrote:
>> On Thu, Mar 21, 2013 at 10:13:00AM -0400, Trond Myklebust wrote:
>> <snip>
>>> @@ -1458,7 +1479,6 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
>>>      dprintk("pnfs write error = %d\n", hdr->pnfs_error);
>>>      if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
>>>          PNFS_LAYOUTRET_ON_ERROR) {
>>> -            clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
>> Hi Trond and Boaz,
>>
>> If object layout requires layout being committed before returned (as fixed in
>> the 3/3 patch), is it a potential problem to directly return layout here as
>> well? e.g., if one lseg is successfully written and pending layoutcommit,
>> then another lseg of the same file failed read/write, then layout will be
>> returned w/o layoutcommit. For blocklayout, it is a potential data corruption
>> and that's why block layout doesn't set PNFS_LAYOUTRET_ON_ERROR bit. So
>> I'm wondering if object will suffer from the same issue?
>>
>
> Hi Tao
>
> No, not at all. The objects layout has error reported as part of the
> layout_return OPT. With exact devices that failed and why. In fact the
> data should not be "committed" per ce, but a recovery process must be
> preformed because we know that not all data of a stripe was committed
> including parity, and the raid5 check-some is surly wrong. This is why
> there is an error bit in layout_commit OPT to denote that this is not
> a true commit and that there is an Error report on the way, for those
> clients that must always lo_commit before lo_return even on Error.
>
Thanks for explaining. I learned more about objects :)

Cheers,
Tao

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-03-22  1:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-21 14:12 [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Trond Myklebust
2013-03-21 14:13 ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Trond Myklebust
2013-03-21 14:13   ` [PATCH v2 3/3] NFSv4.1: Add a helper pnfs_commit_and_return_layout Trond Myklebust
2013-03-21 14:33   ` [PATCH v2 2/3] NFSv4.1: Always clear the NFS_INO_LAYOUTCOMMIT in layoutreturn Myklebust, Trond
2013-03-21 17:06   ` Peng Tao
2013-03-21 21:59     ` Boaz Harrosh
2013-03-22  1:28       ` Peng Tao
2013-03-21 14:32 ` [PATCH v2 1/3] NFSv4.1: Fix a race in pNFS layoutcommit Myklebust, Trond
2013-03-21 17:18 ` Peng Tao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox