linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@redhat.com>
To: Martin Brandenburg <martin@omnibond.com>,
	hubcap@omnibond.com, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 13/13] orangefs: implement write through the page cache
Date: Thu, 25 May 2017 12:09:49 -0400	[thread overview]
Message-ID: <1495728589.2928.5.camel@redhat.com> (raw)
In-Reply-To: <1495447141-12216-14-git-send-email-martin@omnibond.com>

On Mon, 2017-05-22 at 05:59 -0400, Martin Brandenburg wrote:
> With this and the last commit, OrangeFS is capable of writing through
> the page cache.  This should significantly increase performance of very
> small writes, since writeback will not be done for every write call.
> 
> However it is not appropriate for use with multiple clients due to the
> long writeback delay.
> 
> Signed-off-by: Martin Brandenburg <martin@omnibond.com>
> ---
>  fs/orangefs/file.c | 128 +++++++++++++++++++++++------------------------------
>  1 file changed, 56 insertions(+), 72 deletions(-)
> 
> diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
> index c03deea..3ab0a1f 100644
> --- a/fs/orangefs/file.c
> +++ b/fs/orangefs/file.c
> @@ -448,69 +448,11 @@ static ssize_t orangefs_file_read_iter(struct kiocb *iocb,
>  	return generic_file_read_iter(iocb, iter);
>  }
>  
> -static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
> +static ssize_t orangefs_file_write_iter(struct kiocb *iocb,
> +    struct iov_iter *iter)
>  {
> -	struct file *file = iocb->ki_filp;
> -	loff_t pos;
> -	ssize_t rc;
> -
> -	BUG_ON(iocb->private);
> -
> -	gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n");
> -
> -	inode_lock(file->f_mapping->host);
> -
> -	/* Make sure generic_write_checks sees an up to date inode size. */
> -	if (file->f_flags & O_APPEND) {
> -		rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
> -		    STATX_SIZE);
> -		if (rc == -ESTALE)
> -			rc = -EIO;
> -		if (rc) {
> -			gossip_err("%s: orangefs_inode_getattr failed, "
> -			    "rc:%zd:.\n", __func__, rc);
> -			goto out;
> -		}
> -	}
> -
> -	if (file->f_pos > i_size_read(file->f_mapping->host))
> -		orangefs_i_size_write(file->f_mapping->host, file->f_pos);
> -
> -	rc = generic_write_checks(iocb, iter);
> -
> -	if (rc <= 0) {
> -		gossip_err("%s: generic_write_checks failed, rc:%zd:.\n",
> -			   __func__, rc);
> -		goto out;
> -	}
> -
> -	/*
> -	 * if we are appending, generic_write_checks would have updated
> -	 * pos to the end of the file, so we will wait till now to set
> -	 * pos...
> -	 */
> -	pos = *(&iocb->ki_pos);
> -
> -	rc = do_readv_writev(ORANGEFS_IO_WRITE,
> -			     file,
> -			     &pos,
> -			     iter);
> -	if (rc < 0) {
> -		gossip_err("%s: do_readv_writev failed, rc:%zd:.\n",
> -			   __func__, rc);
> -		goto out;
> -	}
> -
> -	iocb->ki_pos = pos;
>  	orangefs_stats.writes++;
> -
> -	if (pos > i_size_read(file->f_mapping->host))
> -		orangefs_i_size_write(file->f_mapping->host, pos);
> -
> -out:
> -
> -	inode_unlock(file->f_mapping->host);
> -	return rc;
> +	return generic_file_write_iter(iocb, iter);
>  }
>  
>  /*
> @@ -606,9 +548,8 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
>  	orangefs_flush_inode(inode);
>  
>  	/*
> -	 * remove all associated inode pages from the page cache and
> -	 * readahead cache (if any); this forces an expensive refresh of
> -	 * data for the next caller of mmap (or 'get_block' accesses)
> +	 * remove all associated inode pages from the readahead cache
> +	 * (if any)
>  	 */
>  	if (file_inode(file) &&
>  	    file_inode(file)->i_mapping &&
> @@ -621,8 +562,6 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
>  			gossip_debug(GOSSIP_INODE_DEBUG,
>  			    "flush_racache finished\n");
>  		}
> -		truncate_inode_pages(file_inode(file)->i_mapping,
> -				     0);
>  	}
>  	return 0;
>  }
> @@ -741,6 +680,40 @@ const struct file_operations orangefs_file_operations = {
>  	.fsync		= orangefs_fsync,
>  };
>  
> +static int orangefs_writepage(struct page *page,
> +    struct writeback_control *wbc)
> +{
> +	struct inode *inode = page->mapping->host;
> +	struct iov_iter iter;
> +	struct iovec iov;
> +	loff_t off;
> +	size_t len;
> +	ssize_t r;
> +	void *map;
> +
> +	off = page_offset(page);
> +	len = i_size_read(inode);
> +	if (off + PAGE_SIZE > len)
> +		len = len - off;
> +	else
> +		len = PAGE_SIZE;
> +
> +	map = kmap_atomic(page);
> +	iov.iov_base = map;
> +	iov.iov_len = len;
> +	iov_iter_init(&iter, WRITE, &iov, 1, len);
> +
> +	set_page_writeback(page);
> +

wait_for_direct_io can sleep, so you can't use kmap_atomic there.
Regular old kmap should be ok there though.

Also, you probably really don't want to use iovecs there, as those are
expected to deal in userland addresses and the kmap address won't be
one.

It may be cleaner to use a bio_vec there instead. You most likely
wouldn't need to kmap at all if you did that.

> +	r = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter,
> +	    len, 0);
> +	kunmap_atomic(map);
> +

When writeback fails for some reason, you'll also want to call
mapping_set_error to help ensure that those errors get reported (unless
you're tracking them on your own somehow). I don't see where that's
being done in a cursory glance at these patches, but I could have missed
it.

> +	end_page_writeback(page);
> +	unlock_page(page);
> +	return 0;
> +}
> +
> static int orangefs_readpage(struct file *file, struct page *page)
>  {
>  	int ret;
> @@ -786,6 +759,17 @@ static int orangefs_readpage(struct file *file, struct page *page)
>  	return ret;
>  }
>  
> +static int orangefs_write_end(struct file *file,
> +    struct address_space *mapping, loff_t pos, unsigned len,
> +    unsigned copied, struct page *page, void *fsdata)
> +{
> +	int r;
> +	r = simple_write_end(file, mapping, pos, len, copied, page,
> +	    fsdata);
> +	mark_inode_dirty_sync(file_inode(file));
> +	return r;
> +}
> +
>  static void orangefs_invalidatepage(struct page *page,
>  				 unsigned int offset,
>  				 unsigned int length)
> @@ -815,17 +799,17 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
>  {
>  	struct file *file = iocb->ki_filp;
>  	loff_t pos = *(&iocb->ki_pos);
> -	/*
> -	 * This cannot happen until write_iter becomes
> -	 * generic_file_write_iter.
> -	 */
> -	BUG_ON(iov_iter_rw(iter) != READ);
> -	return do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
> +	return do_readv_writev(iocb->ki_flags & IOCB_WRITE ?
> +	    ORANGEFS_IO_WRITE : ORANGEFS_IO_READ, file, &pos, iter);
>  }
>  
>  /** ORANGEFS2 implementation of address space operations */
>  const struct address_space_operations orangefs_address_operations = {
> +	.writepage = orangefs_writepage,
>  	.readpage = orangefs_readpage,
> +	.set_page_dirty = __set_page_dirty_nobuffers,
> +	.write_begin = simple_write_begin,
> +	.write_end = orangefs_write_end,
>  	.invalidatepage = orangefs_invalidatepage,
>  	.releasepage = orangefs_releasepage,
>  	.direct_IO = orangefs_direct_IO,

-- 
Jeff Layton <jlayton@redhat.com>

  reply	other threads:[~2017-05-25 16:09 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-22  9:58 [PATCH 00/13] [RFC] orangefs page cache Martin Brandenburg
2017-05-22  9:58 ` [PATCH 01/13] orangefs: move orangefs_address_operations to file.c Martin Brandenburg
2017-05-22  9:58 ` [PATCH 02/13] orangefs: remove orangefs_readpages Martin Brandenburg
2017-05-22  9:58 ` [PATCH 03/13] orangefs: make orangefs_inode_read static Martin Brandenburg
2017-05-22  9:58 ` [PATCH 04/13] orangefs: only set a_ops for regular files Martin Brandenburg
2017-05-22  9:58 ` [PATCH 05/13] orangefs: BUG_ON if i_mode invalid Martin Brandenburg
2017-05-22  9:58 ` [PATCH 06/13] orangefs: remove mapping_nrpages macro Martin Brandenburg
2017-05-22  9:58 ` [PATCH 07/13] orangefs: set up and use backing_dev_info Martin Brandenburg
2017-05-22  9:58 ` [PATCH 08/13] orangefs: initialize new inode size to zero Martin Brandenburg
2017-05-22  9:58 ` [PATCH 09/13] orangefs: inodes linger in cache Martin Brandenburg
2017-05-22  9:58 ` [PATCH 10/13] orangefs: implement direct_IO for the read case Martin Brandenburg
2017-05-22  9:58 ` [PATCH 11/13] orangefs: lock inode during fsync Martin Brandenburg
2017-05-25 15:58   ` Jeff Layton
2017-05-26 16:21     ` martin
2017-05-26 16:58       ` Jeff Layton
2017-05-22  9:59 ` [PATCH 12/13] orangefs: call generic_file_read_iter Martin Brandenburg
2017-05-22  9:59 ` [PATCH 13/13] orangefs: implement write through the page cache Martin Brandenburg
2017-05-25 16:09   ` Jeff Layton [this message]
2017-05-26 18:09     ` martin
2017-05-26 18:48       ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1495728589.2928.5.camel@redhat.com \
    --to=jlayton@redhat.com \
    --cc=hubcap@omnibond.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=martin@omnibond.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).