All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@redhat.com>
To: Al Viro <viro@ZenIV.linux.org.uk>, "Yan, Zheng" <zyan@redhat.com>
Cc: Sage Weil <sage@redhat.com>, Ilya Dryomov <idryomov@gmail.com>,
	ceph-devel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, "Zhu,
	Caifeng" <zhucaifeng@unissoft-nj.com>
Subject: Re: [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
Date: Fri, 06 Jan 2017 15:36:58 -0500	[thread overview]
Message-ID: <1483735018.2733.5.camel@redhat.com> (raw)
In-Reply-To: <1483727016-343-1-git-send-email-jlayton@redhat.com>

On Fri, 2017-01-06 at 13:23 -0500, Jeff Layton wrote:
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
> access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
> pick up a wrong offset and get stuck in an infinite loop while trying to
> populate the page array.
> 
> To fix this, add a new iov_iter helper for to determine the offset into
> the page for the current segment and have ceph call that.
> 
> dio_get_pagev_size has similar problems, so fix that as well by adding a
> new helper function in lib/iov_iter.c that does what it does, but for
> all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to handle this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/ceph/file.c      | 28 ++------------------
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>   */
>  
>  /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>   * Allocate a page vector based on (@it, @nbytes).
>   * The return value is the tuple describing a page vector,
>   * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
>  	struct page **pages;
>  	int ret = 0, idx, npages;
>  
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
>  	npages = calc_pages_for(align, nbytes);
>  	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
>  	if (!pages) {
> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
>  		size_t start = 0;
>  		ssize_t len;
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i);
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
>  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
>  unsigned long iov_iter_alignment(const struct iov_iter *i);
>  unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
>  void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
>  			unsigned long nr_segs, size_t count);
>  void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..d22850bbb1e9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
>  void iov_iter_kvec(struct iov_iter *i, int direction,
>  			const struct kvec *kvec, unsigned long nr_segs,
>  			size_t count)
> @@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +
> +	pv_size -= i->iov_offset;
> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,

Ugh, but this breaks generic/113...let me see what's going on there.
-- 
Jeff Layton <jlayton@redhat.com>

  reply	other threads:[~2017-01-06 20:37 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-06 18:23 [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths Jeff Layton
2017-01-06 20:36 ` Jeff Layton [this message]
2017-01-09 23:11 ` Jeff Layton
2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
2017-01-11  2:42   ` Yan, Zheng
2017-01-12  7:59   ` Al Viro
2017-01-12 11:13     ` Ilya Dryomov
2017-01-12 11:37       ` Al Viro
2017-01-12 11:46         ` Ilya Dryomov
2017-01-12 11:53           ` Al Viro
2017-01-12 12:17             ` Ilya Dryomov
2017-01-12 11:57           ` Jeff Layton
2017-01-12 11:27     ` Jeff Layton
2017-01-12 11:37       ` Ilya Dryomov
2017-01-18 12:14         ` Jeff Layton
2017-01-24 15:00           ` Jeff Layton
2017-01-24 20:51             ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1483735018.2733.5.camel@redhat.com \
    --to=jlayton@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sage@redhat.com \
    --cc=viro@ZenIV.linux.org.uk \
    --cc=zhucaifeng@unissoft-nj.com \
    --cc=zyan@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.