All of lore.kernel.org
 help / color / mirror / Atom feed
From: Goldwyn Rodrigues <RGoldwyn@suse.com>
To: "hch@lst.de" <hch@lst.de>,
	"linux-xfs@vger.kernel.org" <linux-xfs@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>
Subject: Re: [PATCH 07/19] iomap: use a srcmap for a read-modify-write I/O
Date: Tue, 10 Sep 2019 12:48:14 +0000	[thread overview]
Message-ID: <1568119693.12944.16.camel@suse.com> (raw)
In-Reply-To: <20190909182722.16783-8-hch@lst.de>

On Mon, 2019-09-09 at 20:27 +0200, Christoph Hellwig wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> The srcmap is used to identify where the read is to be performed
> from.
> It is passed to ->iomap_begin, which can fill it in if we need to
> read
> data for partially written blocks from a different location than the
> write target.  The srcmap is only supported for buffered writes so
> far.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as
>       srcmap if not set, adjust length down to srcmap end as well]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c               |  9 ++++--
>  fs/ext2/inode.c        |  2 +-
>  fs/ext4/inode.c        |  2 +-
>  fs/gfs2/bmap.c         |  3 +-
>  fs/iomap/apply.c       | 23 +++++++++++----
>  fs/iomap/buffered-io.c | 65 +++++++++++++++++++++++-----------------
> --
>  fs/iomap/direct-io.c   |  2 +-
>  fs/iomap/fiemap.c      |  4 +--
>  fs/iomap/seek.c        |  4 +--
>  fs/iomap/swapfile.c    |  3 +-
>  fs/xfs/xfs_iomap.c     |  9 ++++--
>  include/linux/iomap.h  |  5 ++--
>  12 files changed, 79 insertions(+), 52 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index a237141d8787..8016be24bbd9 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
>  
>  static loff_t
>  dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void
> *data,
> -		struct iomap *iomap)
> +		struct iomap *iomap, struct iomap *srcmap)
>  {
>  	struct block_device *bdev = iomap->bdev;
>  	struct dax_device *dax_dev = iomap->dax_dev;
> @@ -1248,6 +1248,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	unsigned long vaddr = vmf->address;
>  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	unsigned flags = IOMAP_FAULT;
>  	int error, major = 0;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	 * the file system block size to be equal the page size,
> which means
>  	 * that we never have to deal with more than a single extent
> here.
>  	 */
> -	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap);
> +	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags,
> &iomap, &srcmap);
>  	if (iomap_errp)
>  		*iomap_errp = error;
>  	if (error) {
> @@ -1472,6 +1473,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	struct inode *inode = mapping->host;
>  	vm_fault_t result = VM_FAULT_FALLBACK;
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	pgoff_t max_pgoff;
>  	void *entry;
>  	loff_t pos;
> @@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct
> vm_fault *vmf, pfn_t *pfnp,
>  	 * to look up our filesystem block.
>  	 */
>  	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
> -	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap);
> +	error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags,
> &iomap,
> +			&srcmap);
>  	if (error)
>  		goto unlock_entry;
>  
> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
> index 7004ce581a32..467c13ff6b40 100644
> --- a/fs/ext2/inode.c
> +++ b/fs/ext2/inode.c
> @@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t
> iblock,
>  
>  #ifdef CONFIG_FS_DAX
>  static int ext2_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> -		unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
>  {
>  	unsigned int blkbits = inode->i_blkbits;
>  	unsigned long first_block = offset >> blkbits;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 420fe3deed39..e2116e8228d2 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3453,7 +3453,7 @@ static bool ext4_inode_datasync_dirty(struct
> inode *inode)
>  }
>  
>  static int ext4_iomap_begin(struct inode *inode, loff_t offset,
> loff_t length,
> -			    unsigned flags, struct iomap *iomap)
> +		unsigned flags, struct iomap *iomap, struct iomap
> *srcmap)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>  	unsigned int blkbits = inode->i_blkbits;
> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
> index 79581b9bdebb..0bf8e8fa82bd 100644
> --- a/fs/gfs2/bmap.c
> +++ b/fs/gfs2/bmap.c
> @@ -1123,7 +1123,8 @@ static int gfs2_iomap_begin_write(struct inode
> *inode, loff_t pos,
>  }
>  
>  static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t
> length,
> -			    unsigned flags, struct iomap *iomap)
> +			    unsigned flags, struct iomap *iomap,
> +			    struct iomap *srcmap)
>  {
>  	struct gfs2_inode *ip = GFS2_I(inode);
>  	struct metapath mp = { .mp_aheight = 1, };
> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c
> index 54c02aecf3cd..67efd86675e6 100644
> --- a/fs/iomap/apply.c
> +++ b/fs/iomap/apply.c
> @@ -24,7 +24,9 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
>  		const struct iomap_ops *ops, void *data,
> iomap_actor_t actor)
>  {
>  	struct iomap iomap = { 0 };
> +	struct iomap srcmap = { 0 };
>  	loff_t written = 0, ret;
> +	u64 end;
>  
>  	/*
>  	 * Need to map a range from start position for length bytes.
> This can
> @@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t
> length, unsigned flags,
>  	 * expose transient stale data. If the reserve fails, we can
> safely
>  	 * back out at this point as there is nothing to undo.
>  	 */
> -	ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
> +	ret = ops->iomap_begin(inode, pos, length, flags, &iomap,
> &srcmap);
>  	if (ret)
>  		return ret;
>  	if (WARN_ON(iomap.offset > pos))
> @@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos,
> loff_t length, unsigned flags,
>  	 * Cut down the length to the one actually provided by the
> filesystem,
>  	 * as it might not be able to give us the whole size that we
> requested.
>  	 */
> -	if (iomap.offset + iomap.length < pos + length)
> -		length = iomap.offset + iomap.length - pos;
> +	end = iomap.offset + iomap.length;
> +	if (srcmap.type)
> +		end = min(end, srcmap.offset + srcmap.length);
> +	if (pos + length > end)
> +		length = end - pos;
>  


Yes, that looks more correct. However, can we be smart and not bother
setting the minimum of end and srcmap.offset + srcmap.length if it is
not required? ie in situations where end coincides with block boundary.
 Or if srcmap.length falls short, until the last block boundary of
iomap? 

I did think about this scenario. This case is specific to CoW and
thought this is best handled by filesystem's iomap_begin(). If this
goes in, the filesystems would have to "falsify" srcmap length
information to maximize the amount of I/O that goes in one iteration.

-- 
Goldwyn

  reply	other threads:[~2019-09-10 12:51 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-09 18:27 iomap and xfs COW cleanups Christoph Hellwig
2019-09-09 18:27 ` [PATCH 01/19] iomap: better document the IOMAP_F_* flags Christoph Hellwig
2019-09-14  0:42   ` Allison Collins
2019-09-16 18:08   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 02/19] iomap: remove the unused iomap argument to __iomap_write_end Christoph Hellwig
2019-09-14  0:42   ` Allison Collins
2019-09-16 18:10   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 03/19] iomap: always use AOP_FLAG_NOFS in iomap_write_begin Christoph Hellwig
2019-09-16 18:11   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 04/19] iomap: ignore non-shared or non-data blocks in xfs_file_dirty Christoph Hellwig
2019-09-16 18:12   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 05/19] iomap: move the zeroing case out of iomap_read_page_sync Christoph Hellwig
2019-09-16 18:17   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 06/19] iomap: use write_begin to read pages to unshare Christoph Hellwig
2019-09-16 18:34   ` Darrick J. Wong
2019-09-30 11:07     ` Christoph Hellwig
2019-10-08 15:12       ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 07/19] iomap: use a srcmap for a read-modify-write I/O Christoph Hellwig
2019-09-10 12:48   ` Goldwyn Rodrigues [this message]
2019-09-10 14:39     ` hch
2019-09-16 17:57       ` Darrick J. Wong
2019-09-16 18:42   ` Darrick J. Wong
2019-09-18 18:15     ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 08/19] xfs: also call xfs_file_iomap_end_delalloc for zeroing operations Christoph Hellwig
2019-09-18 17:09   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 09/19] xfs: remove xfs_reflink_dirty_extents Christoph Hellwig
2019-09-18 17:17   ` Darrick J. Wong
2019-09-18 17:25     ` Christoph Hellwig
2019-09-18 17:31       ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 10/19] xfs: pass two imaps to xfs_reflink_allocate_cow Christoph Hellwig
2019-09-18 17:26   ` Darrick J. Wong
2019-09-30 11:10     ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 11/19] xfs: refactor xfs_file_iomap_begin_delay Christoph Hellwig
2019-09-18 17:30   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 12/19] xfs: fill out the srcmap in iomap_begin Christoph Hellwig
2019-09-18 17:52   ` Darrick J. Wong
2019-10-01  6:26     ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 13/19] xfs: factor out a helper to calculate the end_fsb Christoph Hellwig
2019-09-14  0:42   ` Allison Collins
2019-09-18 17:55   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 14/19] xfs: split out a new set of read-only iomap ops Christoph Hellwig
2019-09-18 17:56   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 15/19] xfs: move xfs_file_iomap_begin_delay around Christoph Hellwig
2019-09-18 17:59   ` Darrick J. Wong
2019-09-30 11:14     ` Christoph Hellwig
2019-09-09 18:27 ` [PATCH 16/19] xfs: split the iomap ops for buffered vs direct writes Christoph Hellwig
2019-09-18 18:00   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 17/19] xfs: rename the whichfork variable in xfs_buffered_write_iomap_begin Christoph Hellwig
2019-09-14  0:42   ` Allison Collins
2019-09-18 18:00   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 18/19] xfs: cleanup xfs_iomap_write_unwritten Christoph Hellwig
2019-09-18 18:06   ` Darrick J. Wong
2019-09-09 18:27 ` [PATCH 19/19] xfs: improve the IOMAP_NOWAIT check for COW inodes Christoph Hellwig
2019-09-18 18:09   ` Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1568119693.12944.16.camel@suse.com \
    --to=rgoldwyn@suse.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.