All of lore.kernel.org
 help / color / mirror / Atom feed
* [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs
@ 2019-08-12 18:58 Liu Bo
  2019-08-12 18:58 ` [Virtio-fs] [PATCH 2/2] Virtio-fs: align down start offset when reclaim dmap range Liu Bo
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Liu Bo @ 2019-08-12 18:58 UTC (permalink / raw)
  To: virtio-fs

Currently fuse/virtio-fs de-allocation doesn't clean up dax mapping range,
which may result in hang problems when the shared backend fs experiences
"NO Space Error".

The root cause is that the first writing to a dax mapping range triggers a
WRITE page fault on host side, which calls ->page_mkwrite() where block
allocation is required, if the fs is already full, ->page_mkwrite() returns
error so that page fault fails, however, for kvm is not able to propogate
errors while handling EPT_VIOLATION, thus guest keeps trying to resolve the
fault.

Fortunately, we can fix/work around the problem by dropping dax mapping
range for de-allocation operations.

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 fs/fuse/dir.c    | 3 +++
 fs/fuse/file.c   | 9 +++++++--
 fs/fuse/fuse_i.h | 2 +-
 fs/fuse/inode.c  | 2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ed740a5..99a218c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1805,6 +1805,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 
 		truncate_pagecache(inode, outarg.attr.size);
 		invalidate_inode_pages2(inode->i_mapping);
+		if (IS_DAX(inode) && oldsize > outarg.attr.size)
+			fuse_cleanup_inode_mappings(inode, outarg.attr.size,
+						    (loff_t)-1);
 		up_write(&fi->i_mmap_sem);
 	}
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c52260c..4f2d908 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -417,6 +417,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
 	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
 	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
 
+	down_write(&fi->i_dmap_sem);
 	while (1) {
 		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
 							 end);
@@ -426,6 +427,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
 		num++;
 		list_add(&dmap->list, &to_remove);
 	}
+	up_write(&fi->i_dmap_sem);
 
 	/* Nothing to remove */
 	if (list_empty(&to_remove))
@@ -478,7 +480,7 @@ static int dmap_removemapping_one(struct inode *inode,
  * that fuse inode interval tree. If that lock is taken then lock validator
  * complains of deadlock situation w.r.t fs_reclaim lock.
  */
-void fuse_cleanup_inode_mappings(struct inode *inode)
+void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	/*
@@ -486,7 +488,7 @@ void fuse_cleanup_inode_mappings(struct inode *inode)
 	 * before we arrive here. So we should not have to worry about
 	 * any pages/exception entries still associated with inode.
 	 */
-	inode_reclaim_dmap_range(fc, inode, 0, -1);
+	inode_reclaim_dmap_range(fc, inode, start, end);
 }
 
 void fuse_finish_open(struct inode *inode, struct file *file)
@@ -3867,6 +3869,9 @@ static long __fuse_file_fallocate(struct file *file, int mode,
 		}
 
 		truncate_pagecache_range(inode, offset, offset + length - 1);
+		if (IS_DAX(inode))
+			fuse_cleanup_inode_mappings(inode, offset,
+						    offset + length - 1);
 		up_write(&fi->i_mmap_sem);
 	}
 	fuse_invalidate_attr(inode);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6956b62..6104d61 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -1213,6 +1213,6 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
  */
 u64 fuse_get_unique(struct fuse_iqueue *fiq);
 void fuse_dax_free_mem_worker(struct work_struct *work);
-void fuse_cleanup_inode_mappings(struct inode *inode);
+void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end);
 
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e0d792b..629c1a7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -125,7 +125,7 @@ static void fuse_evict_inode(struct inode *inode)
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
 		if (IS_DAX(inode)) {
-			fuse_cleanup_inode_mappings(inode);
+			fuse_cleanup_inode_mappings(inode, 0, (loff_t)-1);
 			WARN_ON(fi->nr_dmaps);
 		}
 		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Virtio-fs] [PATCH 2/2] Virtio-fs: align down start offset when reclaim dmap range
  2019-08-12 18:58 [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Liu Bo
@ 2019-08-12 18:58 ` Liu Bo
  2019-08-13  9:39 ` [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Eric Ren
  2019-08-13 15:41 ` piaojun
  2 siblings, 0 replies; 6+ messages in thread
From: Liu Bo @ 2019-08-12 18:58 UTC (permalink / raw)
  To: virtio-fs

Given a share directory is mounted on host like,
---
Filesystem     1K-blocks    Used Available Use% Mounted on
/dev/loop1       1998672 1982288         0 100% /home/b.liu/1_source/2_github/virtio-fs-all/qemu/share_dir2
---

Here is the reproducer,
---

umount /mnt/test/
mount -t virtio_fs btest /mnt/test/ -otag=myfs-2,rootmode=040000,user_id=0,group_id=0,dax

rm -f /mnt/test/*
fallocate -l 1500M /mnt/test/dummy

truncate -s 10G /mnt/test/foobar
xfs_io -c "pwrite 0 2G" /mnt/test/foobar
truncate -s 4k /mnt/test/foobar

fallocate -l 2G /mnt/test/foo1

truncate -s 10G /mnt/test/foobar
xfs_io -c "pwrite 0 2M" /mnt/test/foobar
---

File 'foobar' is truncated down from 10G to 4k but the dmap [0, 2M] is
still in fi->dmap_tree.  Now that all space has been allocated for file
'dummy' and 'foo1', the very last write to file 'foobar' will hang since
the underlying page_mkwrite encounters -ENOSPC error.

This fixes the problem by aligning down the start offset in
inode_reclaim_dmap_range().

Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
---
 fs/fuse/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4f2d908..0033ace 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -414,7 +414,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
 	 * Interval tree search matches intersecting entries. Adjust the range
 	 * to avoid dropping partial valid entries.
 	 */
-	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
+	start = ALIGN_DOWN(start, FUSE_DAX_MEM_RANGE_SZ);
 	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
 
 	down_write(&fi->i_dmap_sem);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs
  2019-08-12 18:58 [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Liu Bo
  2019-08-12 18:58 ` [Virtio-fs] [PATCH 2/2] Virtio-fs: align down start offset when reclaim dmap range Liu Bo
@ 2019-08-13  9:39 ` Eric Ren
  2019-08-13 15:41 ` piaojun
  2 siblings, 0 replies; 6+ messages in thread
From: Eric Ren @ 2019-08-13  9:39 UTC (permalink / raw)
  To: Liu Bo; +Cc: virtio-fs

On Tue, Aug 13, 2019 at 02:58:54AM +0800, Liu Bo wrote:
> Currently fuse/virtio-fs de-allocation doesn't clean up dax mapping range,
> which may result in hang problems when the shared backend fs experiences
> "NO Space Error".
> 
> The root cause is that the first writing to a dax mapping range triggers a
> WRITE page fault on host side, which calls ->page_mkwrite() where block
> allocation is required, if the fs is already full, ->page_mkwrite() returns
> error so that page fault fails, however, for kvm is not able to propogate
> errors while handling EPT_VIOLATION, thus guest keeps trying to resolve the
> fault.
> 
> Fortunately, we can fix/work around the problem by dropping dax mapping
> range for de-allocation operations.
> 
> Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>

Hi,

This issue can be reproduced by xfstest generic/476 on a share dir
hosted by 10G loop device.

With this patch, this testcase can pass.

Tested-by: Eric Ren <renzhen@linux.alibaba.com>

Eric

> ---
>  fs/fuse/dir.c    | 3 +++
>  fs/fuse/file.c   | 9 +++++++--
>  fs/fuse/fuse_i.h | 2 +-
>  fs/fuse/inode.c  | 2 +-
>  4 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index ed740a5..99a218c 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1805,6 +1805,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>  
>  		truncate_pagecache(inode, outarg.attr.size);
>  		invalidate_inode_pages2(inode->i_mapping);
> +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
> +			fuse_cleanup_inode_mappings(inode, outarg.attr.size,
> +						    (loff_t)-1);
>  		up_write(&fi->i_mmap_sem);
>  	}
>  
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index c52260c..4f2d908 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -417,6 +417,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>  	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
>  	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
>  
> +	down_write(&fi->i_dmap_sem);
>  	while (1) {
>  		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
>  							 end);
> @@ -426,6 +427,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>  		num++;
>  		list_add(&dmap->list, &to_remove);
>  	}
> +	up_write(&fi->i_dmap_sem);
>  
>  	/* Nothing to remove */
>  	if (list_empty(&to_remove))
> @@ -478,7 +480,7 @@ static int dmap_removemapping_one(struct inode *inode,
>   * that fuse inode interval tree. If that lock is taken then lock validator
>   * complains of deadlock situation w.r.t fs_reclaim lock.
>   */
> -void fuse_cleanup_inode_mappings(struct inode *inode)
> +void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	/*
> @@ -486,7 +488,7 @@ void fuse_cleanup_inode_mappings(struct inode *inode)
>  	 * before we arrive here. So we should not have to worry about
>  	 * any pages/exception entries still associated with inode.
>  	 */
> -	inode_reclaim_dmap_range(fc, inode, 0, -1);
> +	inode_reclaim_dmap_range(fc, inode, start, end);
>  }
>  
>  void fuse_finish_open(struct inode *inode, struct file *file)
> @@ -3867,6 +3869,9 @@ static long __fuse_file_fallocate(struct file *file, int mode,
>  		}
>  
>  		truncate_pagecache_range(inode, offset, offset + length - 1);
> +		if (IS_DAX(inode))
> +			fuse_cleanup_inode_mappings(inode, offset,
> +						    offset + length - 1);
>  		up_write(&fi->i_mmap_sem);
>  	}
>  	fuse_invalidate_attr(inode);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 6956b62..6104d61 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1213,6 +1213,6 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
>   */
>  u64 fuse_get_unique(struct fuse_iqueue *fiq);
>  void fuse_dax_free_mem_worker(struct work_struct *work);
> -void fuse_cleanup_inode_mappings(struct inode *inode);
> +void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end);
>  
>  #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index e0d792b..629c1a7 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -125,7 +125,7 @@ static void fuse_evict_inode(struct inode *inode)
>  		struct fuse_conn *fc = get_fuse_conn(inode);
>  		struct fuse_inode *fi = get_fuse_inode(inode);
>  		if (IS_DAX(inode)) {
> -			fuse_cleanup_inode_mappings(inode);
> +			fuse_cleanup_inode_mappings(inode, 0, (loff_t)-1);
>  			WARN_ON(fi->nr_dmaps);
>  		}
>  		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
> -- 
> 1.8.3.1
> 
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs
  2019-08-12 18:58 [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Liu Bo
  2019-08-12 18:58 ` [Virtio-fs] [PATCH 2/2] Virtio-fs: align down start offset when reclaim dmap range Liu Bo
  2019-08-13  9:39 ` [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Eric Ren
@ 2019-08-13 15:41 ` piaojun
  2019-08-13 18:24   ` Liu Bo
  2 siblings, 1 reply; 6+ messages in thread
From: piaojun @ 2019-08-13 15:41 UTC (permalink / raw)
  To: Liu Bo, virtio-fs

Hi Bo,

On 2019/8/13 2:58, Liu Bo wrote:
> Currently fuse/virtio-fs de-allocation doesn't clean up dax mapping range,
> which may result in hang problems when the shared backend fs experiences
> "NO Space Error".
> 
> The root cause is that the first writing to a dax mapping range triggers a
> WRITE page fault on host side, which calls ->page_mkwrite() where block
> allocation is required, if the fs is already full, ->page_mkwrite() returns
> error so that page fault fails, however, for kvm is not able to propogate
> errors while handling EPT_VIOLATION, thus guest keeps trying to resolve the
> fault.
> 
> Fortunately, we can fix/work around the problem by dropping dax mapping
> range for de-allocation operations.
> 
> Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
> ---
>  fs/fuse/dir.c    | 3 +++
>  fs/fuse/file.c   | 9 +++++++--
>  fs/fuse/fuse_i.h | 2 +-
>  fs/fuse/inode.c  | 2 +-
>  4 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index ed740a5..99a218c 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1805,6 +1805,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>  
>  		truncate_pagecache(inode, outarg.attr.size);
>  		invalidate_inode_pages2(inode->i_mapping);
> +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
> +			fuse_cleanup_inode_mappings(inode, outarg.attr.size,
> +						    (loff_t)-1);
>  		up_write(&fi->i_mmap_sem);
>  	}
>  
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index c52260c..4f2d908 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -417,6 +417,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>  	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
>  	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
>  
> +	down_write(&fi->i_dmap_sem);

I'm not sure if this is related with the hang bug. Or it's another lock
missing bug which could be extracted as a new patch.

>  	while (1) {
>  		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
>  							 end);
> @@ -426,6 +427,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>  		num++;
>  		list_add(&dmap->list, &to_remove);
>  	}
> +	up_write(&fi->i_dmap_sem);
>  
>  	/* Nothing to remove */
>  	if (list_empty(&to_remove))
> @@ -478,7 +480,7 @@ static int dmap_removemapping_one(struct inode *inode,
>   * that fuse inode interval tree. If that lock is taken then lock validator
>   * complains of deadlock situation w.r.t fs_reclaim lock.
>   */
> -void fuse_cleanup_inode_mappings(struct inode *inode)
> +void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end)
>  {
>  	struct fuse_conn *fc = get_fuse_conn(inode);
>  	/*
> @@ -486,7 +488,7 @@ void fuse_cleanup_inode_mappings(struct inode *inode)
>  	 * before we arrive here. So we should not have to worry about
>  	 * any pages/exception entries still associated with inode.
>  	 */
> -	inode_reclaim_dmap_range(fc, inode, 0, -1);
> +	inode_reclaim_dmap_range(fc, inode, start, end);
>  }
>  
>  void fuse_finish_open(struct inode *inode, struct file *file)
> @@ -3867,6 +3869,9 @@ static long __fuse_file_fallocate(struct file *file, int mode,

It's strange that I could not find *__fuse_file_fallocate()* at
virtio-fs-dev-5.1 code.

>  		}
>  
>  		truncate_pagecache_range(inode, offset, offset + length - 1);
> +		if (IS_DAX(inode))
> +			fuse_cleanup_inode_mappings(inode, offset,
> +						    offset + length - 1);

I prefer using "loff_t endbyte" to replace "offset + length - 1" which
makes code easier.

Jun


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs
  2019-08-13 15:41 ` piaojun
@ 2019-08-13 18:24   ` Liu Bo
  2019-08-14  0:29     ` piaojun
  0 siblings, 1 reply; 6+ messages in thread
From: Liu Bo @ 2019-08-13 18:24 UTC (permalink / raw)
  To: piaojun; +Cc: virtio-fs

On Tue, Aug 13, 2019 at 11:41:23PM +0800, piaojun wrote:
> Hi Bo,
> 
> On 2019/8/13 2:58, Liu Bo wrote:
> > Currently fuse/virtio-fs de-allocation doesn't clean up dax mapping range,
> > which may result in hang problems when the shared backend fs experiences
> > "NO Space Error".
> > 
> > The root cause is that the first writing to a dax mapping range triggers a
> > WRITE page fault on host side, which calls ->page_mkwrite() where block
> > allocation is required, if the fs is already full, ->page_mkwrite() returns
> > error so that page fault fails, however, for kvm is not able to propogate
> > errors while handling EPT_VIOLATION, thus guest keeps trying to resolve the
> > fault.
> > 
> > Fortunately, we can fix/work around the problem by dropping dax mapping
> > range for de-allocation operations.
> > 
> > Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
> > ---
> >  fs/fuse/dir.c    | 3 +++
> >  fs/fuse/file.c   | 9 +++++++--
> >  fs/fuse/fuse_i.h | 2 +-
> >  fs/fuse/inode.c  | 2 +-
> >  4 files changed, 12 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > index ed740a5..99a218c 100644
> > --- a/fs/fuse/dir.c
> > +++ b/fs/fuse/dir.c
> > @@ -1805,6 +1805,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
> >  
> >  		truncate_pagecache(inode, outarg.attr.size);
> >  		invalidate_inode_pages2(inode->i_mapping);
> > +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
> > +			fuse_cleanup_inode_mappings(inode, outarg.attr.size,
> > +						    (loff_t)-1);
> >  		up_write(&fi->i_mmap_sem);
> >  	}
> >  
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index c52260c..4f2d908 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -417,6 +417,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
> >  	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> >  	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> >  
> > +	down_write(&fi->i_dmap_sem);
> 
> I'm not sure if this is related with the hang bug. Or it's another lock
> missing bug which could be extracted as a new patch.
>

inode_reclaim_dmap_range was only used in inode evict path where
others are unable to access fi->dmap_tree.

This patch adds two callers for the function, as such we need the lock
to protect the tree operations.

> >  	while (1) {
> >  		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
> >  							 end);
> > @@ -426,6 +427,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
> >  		num++;
> >  		list_add(&dmap->list, &to_remove);
> >  	}
> > +	up_write(&fi->i_dmap_sem);
> >  
> >  	/* Nothing to remove */
> >  	if (list_empty(&to_remove))
> > @@ -478,7 +480,7 @@ static int dmap_removemapping_one(struct inode *inode,
> >   * that fuse inode interval tree. If that lock is taken then lock validator
> >   * complains of deadlock situation w.r.t fs_reclaim lock.
> >   */
> > -void fuse_cleanup_inode_mappings(struct inode *inode)
> > +void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end)
> >  {
> >  	struct fuse_conn *fc = get_fuse_conn(inode);
> >  	/*
> > @@ -486,7 +488,7 @@ void fuse_cleanup_inode_mappings(struct inode *inode)
> >  	 * before we arrive here. So we should not have to worry about
> >  	 * any pages/exception entries still associated with inode.
> >  	 */
> > -	inode_reclaim_dmap_range(fc, inode, 0, -1);
> > +	inode_reclaim_dmap_range(fc, inode, start, end);
> >  }
> >  
> >  void fuse_finish_open(struct inode *inode, struct file *file)
> > @@ -3867,6 +3869,9 @@ static long __fuse_file_fallocate(struct file *file, int mode,
> 
> It's strange that I could not find *__fuse_file_fallocate()* at
> virtio-fs-dev-5.1 code.
>

Oh OK, I didn't realize that this piece of code has been refactored,
it was also used by dax write path, anyway __fuse_file_fallocate() was
basically identical to fuse_file_fallocate().

> >  		}
> >  
> >  		truncate_pagecache_range(inode, offset, offset + length - 1);
> > +		if (IS_DAX(inode))
> > +			fuse_cleanup_inode_mappings(inode, offset,
> > +						    offset + length - 1);
> 
> I prefer using "loff_t endbyte" to replace "offset + length - 1" which
> makes code easier.
>

Well, I'd like to leave it for a later cleanup patch in which I'll
convert all (offset+length-1) here.

thanks,
-liubo


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs
  2019-08-13 18:24   ` Liu Bo
@ 2019-08-14  0:29     ` piaojun
  0 siblings, 0 replies; 6+ messages in thread
From: piaojun @ 2019-08-14  0:29 UTC (permalink / raw)
  To: bo.liu; +Cc: virtio-fs



On 2019/8/14 2:24, Liu Bo wrote:
> On Tue, Aug 13, 2019 at 11:41:23PM +0800, piaojun wrote:
>> Hi Bo,
>>
>> On 2019/8/13 2:58, Liu Bo wrote:
>>> Currently fuse/virtio-fs de-allocation doesn't clean up dax mapping range,
>>> which may result in hang problems when the shared backend fs experiences
>>> "NO Space Error".
>>>
>>> The root cause is that the first writing to a dax mapping range triggers a
>>> WRITE page fault on host side, which calls ->page_mkwrite() where block
>>> allocation is required, if the fs is already full, ->page_mkwrite() returns
>>> error so that page fault fails, however, for kvm is not able to propogate
>>> errors while handling EPT_VIOLATION, thus guest keeps trying to resolve the
>>> fault.
>>>
>>> Fortunately, we can fix/work around the problem by dropping dax mapping
>>> range for de-allocation operations.
>>>
>>> Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
>>> ---
>>>  fs/fuse/dir.c    | 3 +++
>>>  fs/fuse/file.c   | 9 +++++++--
>>>  fs/fuse/fuse_i.h | 2 +-
>>>  fs/fuse/inode.c  | 2 +-
>>>  4 files changed, 12 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
>>> index ed740a5..99a218c 100644
>>> --- a/fs/fuse/dir.c
>>> +++ b/fs/fuse/dir.c
>>> @@ -1805,6 +1805,9 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>>>  
>>>  		truncate_pagecache(inode, outarg.attr.size);
>>>  		invalidate_inode_pages2(inode->i_mapping);
>>> +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
>>> +			fuse_cleanup_inode_mappings(inode, outarg.attr.size,
>>> +						    (loff_t)-1);
>>>  		up_write(&fi->i_mmap_sem);
>>>  	}
>>>  
>>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>>> index c52260c..4f2d908 100644
>>> --- a/fs/fuse/file.c
>>> +++ b/fs/fuse/file.c
>>> @@ -417,6 +417,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>>>  	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
>>>  	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
>>>  
>>> +	down_write(&fi->i_dmap_sem);
>>
>> I'm not sure if this is related with the hang bug. Or it's another lock
>> missing bug which could be extracted as a new patch.
>>
> 
> inode_reclaim_dmap_range was only used in inode evict path where
> others are unable to access fi->dmap_tree.
> 
> This patch adds two callers for the function, as such we need the lock
> to protect the tree operations.

Got it, thanks.

> 
>>>  	while (1) {
>>>  		dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
>>>  							 end);
>>> @@ -426,6 +427,7 @@ static void inode_reclaim_dmap_range(struct fuse_conn *fc, struct inode *inode,
>>>  		num++;
>>>  		list_add(&dmap->list, &to_remove);
>>>  	}
>>> +	up_write(&fi->i_dmap_sem);
>>>  
>>>  	/* Nothing to remove */
>>>  	if (list_empty(&to_remove))
>>> @@ -478,7 +480,7 @@ static int dmap_removemapping_one(struct inode *inode,
>>>   * that fuse inode interval tree. If that lock is taken then lock validator
>>>   * complains of deadlock situation w.r.t fs_reclaim lock.
>>>   */
>>> -void fuse_cleanup_inode_mappings(struct inode *inode)
>>> +void fuse_cleanup_inode_mappings(struct inode *inode, loff_t start, loff_t end)
>>>  {
>>>  	struct fuse_conn *fc = get_fuse_conn(inode);
>>>  	/*
>>> @@ -486,7 +488,7 @@ void fuse_cleanup_inode_mappings(struct inode *inode)
>>>  	 * before we arrive here. So we should not have to worry about
>>>  	 * any pages/exception entries still associated with inode.
>>>  	 */
>>> -	inode_reclaim_dmap_range(fc, inode, 0, -1);
>>> +	inode_reclaim_dmap_range(fc, inode, start, end);
>>>  }
>>>  
>>>  void fuse_finish_open(struct inode *inode, struct file *file)
>>> @@ -3867,6 +3869,9 @@ static long __fuse_file_fallocate(struct file *file, int mode,
>>
>> It's strange that I could not find *__fuse_file_fallocate()* at
>> virtio-fs-dev-5.1 code.
>>
> 
> Oh OK, I didn't realize that this piece of code has been refactored,
> it was also used by dax write path, anyway __fuse_file_fallocate() was
> basically identical to fuse_file_fallocate().
> 
>>>  		}
>>>  
>>>  		truncate_pagecache_range(inode, offset, offset + length - 1);
>>> +		if (IS_DAX(inode))
>>> +			fuse_cleanup_inode_mappings(inode, offset,
>>> +						    offset + length - 1);
>>
>> I prefer using "loff_t endbyte" to replace "offset + length - 1" which
>> makes code easier.
>>
> 
> Well, I'd like to leave it for a later cleanup patch in which I'll
> convert all (offset+length-1) here.

OK, that will be good to me.

Jun


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-08-14  0:29 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-12 18:58 [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Liu Bo
2019-08-12 18:58 ` [Virtio-fs] [PATCH 2/2] Virtio-fs: align down start offset when reclaim dmap range Liu Bo
2019-08-13  9:39 ` [Virtio-fs] [PATCH 1/2] Virtio-fs: fix hang due to ENOSPC in shared backend fs Eric Ren
2019-08-13 15:41 ` piaojun
2019-08-13 18:24   ` Liu Bo
2019-08-14  0:29     ` piaojun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.