All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vivek Goyal <vgoyal@redhat.com>
To: Peng Tao <tao.peng@linux.alibaba.com>
Cc: virtio-fs@redhat.com
Subject: Re: [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode
Date: Fri, 10 May 2019 11:23:38 -0400	[thread overview]
Message-ID: <20190510152338.GA15465@redhat.com> (raw)
In-Reply-To: <20190510102957.8705-3-tao.peng@linux.alibaba.com>

On Fri, May 10, 2019 at 06:29:57PM +0800, Peng Tao wrote:
> The obseleted dmap entries can be put back to global free list
> immediately and we don't have to rely on reclaim code to free them.
> 
> Signed-off-by: Peng Tao <tao.peng@linux.alibaba.com>
> ---
>  fs/fuse/dir.c    |  5 ++++
>  fs/fuse/file.c   | 69 +++++++++++++++++++++++++++++++++++++++---------
>  fs/fuse/fuse_i.h |  2 ++
>  3 files changed, 63 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 3f923fe7841a..233c3ed391f1 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -1751,6 +1751,11 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
>  		down_write(&fi->i_mmap_sem);
>  		truncate_pagecache(inode, outarg.attr.size);
>  		invalidate_inode_pages2(inode->i_mapping);
> +		// Free dmap beyond i_size
> +		if (IS_DAX(inode) && oldsize > outarg.attr.size)
> +			fuse_dax_free_mappings_range(fc, inode,
> +						     outarg.attr.size, -1);
> +
>  		up_write(&fi->i_mmap_sem);
>  	}
>  
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 51faed351c7c..6d130f2f3a23 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -30,6 +30,8 @@ static long __fuse_file_fallocate(struct file *file, int mode,
>  					loff_t offset, loff_t length);
>  static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  					struct inode *inode);
> +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi,
> +					struct fuse_dax_mapping *dmap);
>  
>  static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
>  			  int opcode, struct fuse_open_out *outargp)
> @@ -3752,9 +3754,7 @@ int fuse_dax_reclaim_dmap_locked(struct fuse_conn *fc, struct inode *inode,
>  		return ret;
>  	}
>  
> -	/* Remove dax mapping from inode interval tree now */
> -	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> -	fi->nr_dmaps--;
> +	fuse_dax_do_remove_mapping_locked(fi, dmap);
>  	return 0;
>  }
>  
> @@ -3831,6 +3831,58 @@ static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
>  	}
>  }
>  
> +/* Cleanup dmap entry and add back to free list */
> +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct fuse_dax_mapping *dmap)
> +{
> +	pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> +		"window_offset=0x%llx length=0x%llx\n", dmap->start,
> +		dmap->end, dmap->window_offset, dmap->length);
> +	spin_lock(&fc->lock);
> +	__dmap_remove_busy_list(fc, dmap);
> +	dmap->inode = NULL;
> +	dmap->start = dmap->end = 0;
> +	__free_dax_mapping(fc, dmap);
> +	spin_unlock(&fc->lock);
> +}
> +
> +/* Remove dax mapping from inode interval tree */
> +static void fuse_dax_do_remove_mapping_locked(struct fuse_inode *fi, struct fuse_dax_mapping *dmap)
> +{
> +	fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> +	fi->nr_dmaps--;
> +}

this is just a two line funciton. I think lets open code it and we don't
need a separate function for this.


> +
> +/*
> + * Free inode dmap entries whose range falls entirely inside [start, end].
> + * Called with inode->i_rwsem and fuse_inode->i_mmap_sem held.
> + *
> + * No need to send FUSE_REMOVEMAPPING to userspace because the userspace mappings
> + * will be overridden next time dmap is used -- same logic is applied in
> + * fuse_dax_reclaim_first_mapping().

This is true that sending FUSE_REMOVEMAPPING is not strictly needed. But,
thinking more about it, what if range does not get used soon. Then it
will keep resources unnecessarily busy on host? (fd, file, inode, dentry,
vma etc).

I think it probably is a good idea to send FUSE_REMOVEMAPPING to daemon
when range is being freed. Only case where we are reclaiming a range
and reusing immediately, we can skip sending FUSE_REMOVEMAPPING.

This will especially be more meaniningful if dax window is large and
has potential to create lots of mappings.


Thanks
Vivek

> + */
> +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode, loff_t start, loff_t end)
> +{
> +	struct fuse_inode *fi = get_fuse_inode(inode);
> +	struct fuse_dax_mapping *dmap;
> +
> +	WARN_ON(!inode_is_locked(inode));
> +	WARN_ON(!rwsem_is_locked(&fi->i_mmap_sem));
> +
> +	/* interval tree search matches intersecting entries.
> +	 * Adjust the range to avoid dropping partial valid entries. */
> +	start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> +	end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> +
> +	pr_debug("fuse: fuse_dax_free_mappings_range start=0x%llx, end=0x%llx\n", start, end);
> +	/* Lock ordering follows fuse_dax_free_one_mapping() */
> +	down_write(&fi->i_dmap_sem);
> +	while ((dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, end))) {
> +		fuse_dax_do_remove_mapping_locked(fi, dmap);
> +		fuse_dax_do_free_mapping_locked(fc, dmap);
> +	}
> +	up_write(&fi->i_dmap_sem);
> +}
> +
>  int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  				u64 dmap_start)
>  {
> @@ -3852,17 +3904,8 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
>  	if (ret < 0)
>  		return ret;
>  
> -	/* Cleanup dmap entry and add back to free list */
> -	spin_lock(&fc->lock);
> -	__dmap_remove_busy_list(fc, dmap);
> -	dmap->inode = NULL;
> -	dmap->start = dmap->end = 0;
> -	__free_dax_mapping(fc, dmap);
> -	spin_unlock(&fc->lock);
> +	fuse_dax_do_free_mapping_locked(fc, dmap);
>  
> -	pr_debug("fuse: freed memory range window_offset=0x%llx,"
> -				" length=0x%llx\n", dmap->window_offset,
> -				dmap->length);
>  	return ret;
>  }
>  
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 1149281ab1e8..e273f1209f5b 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -1189,5 +1189,7 @@ unsigned fuse_len_args(unsigned numargs, struct fuse_arg *args);
>  u64 fuse_get_unique(struct fuse_iqueue *fiq);
>  void fuse_dax_free_mem_worker(struct work_struct *work);
>  void fuse_removemapping(struct inode *inode);
> +void fuse_dax_free_mappings_range(struct fuse_conn *fc, struct inode *inode,
> +			loff_t start, loff_t end);
>  
>  #endif /* _FS_FUSE_I_H */
> -- 
> 2.17.1
> 


  reply	other threads:[~2019-05-10 15:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-10 10:29 [Virtio-fs] [PATCH 0/2] virtiofs: remove dmap when truncating inode Peng Tao
2019-05-10 10:29 ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Peng Tao
2019-05-10 10:29   ` [Virtio-fs] [PATCH 2/2] fuse: remove dmap when truncating inode Peng Tao
2019-05-10 15:23     ` Vivek Goyal [this message]
2019-05-10 16:16       ` Tao Peng
2019-05-10 17:31         ` Vivek Goyal
2019-05-11 15:24           ` Tao Peng
2019-05-10 15:24   ` [Virtio-fs] [PATCH 1/2] fuse: remove unnecessary allocated_ranges Vivek Goyal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190510152338.GA15465@redhat.com \
    --to=vgoyal@redhat.com \
    --cc=tao.peng@linux.alibaba.com \
    --cc=virtio-fs@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.