public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@infradead.org>
To: axboe@kernel.dk
Cc: linux-kernel@vger.kernel.org
Subject: Re: [PATCH] loop: remove the incorrect write_begin/write_end shortcut
Date: Mon, 17 Oct 2011 03:49:18 -0400	[thread overview]
Message-ID: <20111017074918.GA14337@infradead.org> (raw)
In-Reply-To: <20110920160922.GA2491@infradead.org>

ping?

On Tue, Sep 20, 2011 at 12:09:22PM -0400, Christoph Hellwig wrote:
> Currently the loop device tries to call directly into write_begin/write_end
> instead of going through ->write if it can.  This is a fairly nasty shortcut
> as write_begin and write_end are only callbacks for the generic write code
> and expect to be called with filesystem specific locks held.  
> 
> This code currently causes various issues for clustered filesystems as it
> doesn't take the required cluster locks, and it also causes issues for XFS
> as it doesn't properly lock against the swapext ioctl as called by the
> defragmentation tools.  This in case causes data corruption if
> defragmentation hits a busy loop device in the wrong time window, as
> reported by RH QA.
> 
> The reason why we have this shortcut is that it saves a data copy when
> doing a transformation on the loop device, which is the technical term
> for using cryptoloop (or an XOR transformation).  Given that cryptoloop
> has been deprecated in favour of dm-crypt my opinion is that we should
> simply drop this shortcut instead of finding complicated ways to to
> introduce a formal interface for this shortcut.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Index: linux-2.6/drivers/block/loop.c
> ===================================================================
> --- linux-2.6.orig/drivers/block/loop.c	2011-09-19 12:21:06.725231729 -0400
> +++ linux-2.6/drivers/block/loop.c	2011-09-19 12:28:01.671730150 -0400
> @@ -203,74 +203,6 @@ lo_do_transfer(struct loop_device *lo, i
>  }
>  
>  /**
> - * do_lo_send_aops - helper for writing data to a loop device
> - *
> - * This is the fast version for backing filesystems which implement the address
> - * space operations write_begin and write_end.
> - */
> -static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
> -		loff_t pos, struct page *unused)
> -{
> -	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
> -	struct address_space *mapping = file->f_mapping;
> -	pgoff_t index;
> -	unsigned offset, bv_offs;
> -	int len, ret;
> -
> -	mutex_lock(&mapping->host->i_mutex);
> -	index = pos >> PAGE_CACHE_SHIFT;
> -	offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
> -	bv_offs = bvec->bv_offset;
> -	len = bvec->bv_len;
> -	while (len > 0) {
> -		sector_t IV;
> -		unsigned size, copied;
> -		int transfer_result;
> -		struct page *page;
> -		void *fsdata;
> -
> -		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
> -		size = PAGE_CACHE_SIZE - offset;
> -		if (size > len)
> -			size = len;
> -
> -		ret = pagecache_write_begin(file, mapping, pos, size, 0,
> -							&page, &fsdata);
> -		if (ret)
> -			goto fail;
> -
> -		file_update_time(file);
> -
> -		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
> -				bvec->bv_page, bv_offs, size, IV);
> -		copied = size;
> -		if (unlikely(transfer_result))
> -			copied = 0;
> -
> -		ret = pagecache_write_end(file, mapping, pos, size, copied,
> -							page, fsdata);
> -		if (ret < 0 || ret != copied)
> -			goto fail;
> -
> -		if (unlikely(transfer_result))
> -			goto fail;
> -
> -		bv_offs += copied;
> -		len -= copied;
> -		offset = 0;
> -		index++;
> -		pos += copied;
> -	}
> -	ret = 0;
> -out:
> -	mutex_unlock(&mapping->host->i_mutex);
> -	return ret;
> -fail:
> -	ret = -1;
> -	goto out;
> -}
> -
> -/**
>   * __do_lo_send_write - helper for writing data to a loop device
>   *
>   * This helper just factors out common code between do_lo_send_direct_write()
> @@ -297,10 +229,8 @@ static int __do_lo_send_write(struct fil
>  /**
>   * do_lo_send_direct_write - helper for writing data to a loop device
>   *
> - * This is the fast, non-transforming version for backing filesystems which do
> - * not implement the address space operations write_begin and write_end.
> - * It uses the write file operation which should be present on all writeable
> - * filesystems.
> + * This is the fast, non-transforming version that does not need double
> + * buffering.
>   */
>  static int do_lo_send_direct_write(struct loop_device *lo,
>  		struct bio_vec *bvec, loff_t pos, struct page *page)
> @@ -316,15 +246,9 @@ static int do_lo_send_direct_write(struc
>  /**
>   * do_lo_send_write - helper for writing data to a loop device
>   *
> - * This is the slow, transforming version for filesystems which do not
> - * implement the address space operations write_begin and write_end.  It
> - * uses the write file operation which should be present on all writeable
> - * filesystems.
> - *
> - * Using fops->write is slower than using aops->{prepare,commit}_write in the
> - * transforming case because we need to double buffer the data as we cannot do
> - * the transformations in place as we do not have direct access to the
> - * destination pages of the backing file.
> + * This is the slow, transforming version that needs to double buffer the
> + * data as it cannot do the transformations in place without having direct
> + * access to the destination pages of the backing file.
>   */
>  static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
>  		loff_t pos, struct page *page)
> @@ -350,17 +274,16 @@ static int lo_send(struct loop_device *l
>  	struct page *page = NULL;
>  	int i, ret = 0;
>  
> -	do_lo_send = do_lo_send_aops;
> -	if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
> +	if (lo->transfer != transfer_none) {
> +		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
> +		if (unlikely(!page))
> +			goto fail;
> +		kmap(page);
> +		do_lo_send = do_lo_send_write;
> +	} else {
>  		do_lo_send = do_lo_send_direct_write;
> -		if (lo->transfer != transfer_none) {
> -			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
> -			if (unlikely(!page))
> -				goto fail;
> -			kmap(page);
> -			do_lo_send = do_lo_send_write;
> -		}
>  	}
> +
>  	bio_for_each_segment(bvec, bio, i) {
>  		ret = do_lo_send(lo, bvec, pos, page);
>  		if (ret < 0)
> @@ -849,35 +772,23 @@ static int loop_set_fd(struct loop_devic
>  	mapping = file->f_mapping;
>  	inode = mapping->host;
>  
> -	if (!(file->f_mode & FMODE_WRITE))
> -		lo_flags |= LO_FLAGS_READ_ONLY;
> -
>  	error = -EINVAL;
> -	if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
> -		const struct address_space_operations *aops = mapping->a_ops;
> -
> -		if (aops->write_begin)
> -			lo_flags |= LO_FLAGS_USE_AOPS;
> -		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
> -			lo_flags |= LO_FLAGS_READ_ONLY;
> +	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
> +		goto out_putf;
>  
> -		lo_blocksize = S_ISBLK(inode->i_mode) ?
> -			inode->i_bdev->bd_block_size : PAGE_SIZE;
> +	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
> +	    !file->f_op->write)
> +		lo_flags |= LO_FLAGS_READ_ONLY;
>  
> -		error = 0;
> -	} else {
> -		goto out_putf;
> -	}
> +	lo_blocksize = S_ISBLK(inode->i_mode) ?
> +		inode->i_bdev->bd_block_size : PAGE_SIZE;
>  
> +	error = -EFBIG;
>  	size = get_loop_size(lo, file);
> -
> -	if ((loff_t)(sector_t)size != size) {
> -		error = -EFBIG;
> +	if ((loff_t)(sector_t)size != size)
>  		goto out_putf;
> -	}
>  
> -	if (!(mode & FMODE_WRITE))
> -		lo_flags |= LO_FLAGS_READ_ONLY;
> +	error = 0;
>  
>  	set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
>  
> Index: linux-2.6/include/linux/loop.h
> ===================================================================
> --- linux-2.6.orig/include/linux/loop.h	2011-09-19 12:21:32.241231706 -0400
> +++ linux-2.6/include/linux/loop.h	2011-09-19 12:21:40.768232536 -0400
> @@ -73,7 +73,6 @@ struct loop_device {
>   */
>  enum {
>  	LO_FLAGS_READ_ONLY	= 1,
> -	LO_FLAGS_USE_AOPS	= 2,
>  	LO_FLAGS_AUTOCLEAR	= 4,
>  };
>  
---end quoted text---

  reply	other threads:[~2011-10-17  7:49 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-20 16:09 [PATCH] loop: remove the incorrect write_begin/write_end shortcut Christoph Hellwig
2011-10-17  7:49 ` Christoph Hellwig [this message]
2011-10-17 10:57   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111017074918.GA14337@infradead.org \
    --to=hch@infradead.org \
    --cc=axboe@kernel.dk \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox