All of lore.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: Carlos Maiolino <cmaiolino@redhat.com>
Cc: xfs@oss.sgi.com
Subject: Re: [RFC PATCH] xfs: enables file data inlining in inodes
Date: Fri, 12 Oct 2012 11:31:50 -0400	[thread overview]
Message-ID: <507837E6.4040807@redhat.com> (raw)
In-Reply-To: <1349985158-9952-1-git-send-email-cmaiolino@redhat.com>

On 10/11/2012 03:52 PM, Carlos Maiolino wrote:
> Hi,
> 
> this is a first RFC patch of my work on data inlining, i.e. use the xfs inode's
> literal area to store user's data.
> 
> This first patch just cares about write and read new files into inode's literal
> area, it does not make any conversion from inline to extent or vice-versa.
> 
> The idea to send this patch to list is just to get comments about this first
> work and see if anybody has some ideas/suggestions about it, mainly related
> with page cache and journal handling, since it's the first time I deal with
> journal and page cache handling, I'm not pretty sure if I did things right
> or not.
> 
> Every comment is very appreciated.
> 
> Thanks
> ---
>  fs/xfs/xfs_aops.c  | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  fs/xfs/xfs_inode.c |  15 +-----
>  2 files changed, 130 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
> index e562dd4..3d30528 100644
> --- a/fs/xfs/xfs_aops.c
> +++ b/fs/xfs/xfs_aops.c
> @@ -31,6 +31,7 @@
>  #include "xfs_vnodeops.h"
>  #include "xfs_trace.h"
>  #include "xfs_bmap.h"
> +#include "xfs_inode_item.h"
>  #include <linux/gfp.h>
>  #include <linux/mpage.h>
>  #include <linux/pagevec.h>
> @@ -460,6 +461,95 @@ xfs_start_page_writeback(
>  		end_page_writeback(page);
>  }
>  
> +/* Write data inlined in inode */
> +
> +STATIC int
> +xfs_inline_write(
> +	struct xfs_inode	*ip,
> +	struct page		*page,
> +	struct buffer_head	*bh,
> +	__uint64_t		end_offset)
> +{
> +	char		*data;
> +	int		err = 0;
> +	xfs_trans_t	*tp;
> +	xfs_mount_t	*mp = ip->i_mount;
> +
> +	printk("%s: INLINING INODE %llu\n", __func__, ip->i_ino);
> +	xfs_start_page_writeback(page, 1, 1);
> +
> +	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
> +	err = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0); /* Is this the right logspace? */
> +
> +	if (err) {
> +		xfs_trans_cancel(tp, 0);

Do you need to undo xfs_start_page_writeback() here?

> +		return -1;
> +	}
> +
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +
> +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
> +
> +	if (!(ip->i_d.di_format & XFS_DINODE_FMT_LOCAL)) {
> +		ip->i_df.if_flags &= ~XFS_IFEXTENTS;
> +		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
> +		ip->i_df.if_flags |= XFS_IFINLINE;
> +	}
> +
> +	ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> +
> +	if (end_offset > ip->i_df.if_bytes)
> +		xfs_idata_realloc(ip, end_offset - ip->i_df.if_bytes,
> +				  XFS_DATA_FORK);
> +
> +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DDATA);
> +	data = kmap(page);
> +	memcpy(ip->i_df.if_u1.if_data, data, end_offset);
> +	kunmap(page);
> +
> +	ip->i_d.di_size = end_offset;
> +
> +	err = xfs_trans_commit(tp, 0);
> +
> +	set_buffer_uptodate(bh);
> +	clear_buffer_dirty(bh);
> +	clear_buffer_delay(bh);
> +	SetPageUptodate(page);
> +
> +	end_page_writeback(page);
> +	return err;
> +}
> +
> +STATIC int
> +xfs_inline_read(
> +	struct xfs_inode	*ip,
> +	struct page		*page,
> +	struct address_space	*mapping)
> +{
> +	char *data;
> +	unsigned long bsize = 1 << ip->i_vnode.i_blkbits;
> +
> +	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
> +	ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
> +
> +	if (!page_has_buffers(page))
> +		create_empty_buffers(page, bsize, 0);
> +
> +	xfs_ilock(ip, XFS_ILOCK_SHARED);
> +
> +	data = kmap(page);
> +	memcpy(data, ip->i_df.if_u1.if_data, (int)ip->i_d.di_size);
> +	kunmap(page);
> +
> +	/* We should not leave garbage after user data into the page */
> +	memset(data + (int)ip->i_d.di_size, 0, PAGE_SIZE - ip->i_d.di_size);
> +
> +	SetPageUptodate(page);
> +	xfs_iunlock(ip, XFS_ILOCK_SHARED);
> +	unlock_page(page);
> +	return 0;
> +}
> +
>  static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
>  {
>  	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
> @@ -900,6 +990,7 @@ xfs_vm_writepage(
>  	struct writeback_control *wbc)
>  {
>  	struct inode		*inode = page->mapping->host;
> +	xfs_inode_t		*ip = XFS_I(inode);
>  	struct buffer_head	*bh, *head;
>  	struct xfs_bmbt_irec	imap;
>  	xfs_ioend_t		*ioend = NULL, *iohead = NULL;
> @@ -908,7 +999,7 @@ xfs_vm_writepage(
>  	__uint64_t              end_offset;
>  	pgoff_t                 end_index, last_index;
>  	ssize_t			len;
> -	int			err, imap_valid = 0, uptodate = 1;
> +	int			err = 0, imap_valid = 0, uptodate = 1;
>  	int			count = 0;
>  	int			nonblocking = 0;
>  
> @@ -968,8 +1059,16 @@ xfs_vm_writepage(
>  			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
>  			offset);
>  	len = 1 << inode->i_blkbits;
> -
>  	bh = head = page_buffers(page);
> +
> +	if (end_offset + i_size_read(inode) <= XFS_IFORK_DSIZE(ip)) {

So end_offset looks like it is the size of the file if this page
contains EOF, or the next page-aligned offset of the file otherwise. If
I understand correctly, shouldn't we just check if end_offset falls
within the data fork size here? IOW, I don't understand why you add
end_offset and i_size_read(inode) here.

Brian

> +		err = xfs_inline_write(ip, page, bh, end_offset);
> +
> +		if (err)
> +			goto error;
> +		return 0;
> +	}
> +
>  	offset = page_offset(page);
>  	type = XFS_IO_OVERWRITE;
>  
> @@ -1624,20 +1723,43 @@ xfs_vm_bmap(
>  
>  STATIC int
>  xfs_vm_readpage(
> -	struct file		*unused,
> +	struct file		*filp,
>  	struct page		*page)
>  {
> -	return mpage_readpage(page, xfs_get_blocks);
> +	struct inode		*inode = filp->f_mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +
> +	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
> +		return xfs_inline_read(ip, page, page->mapping);
> +	else
> +		return mpage_readpage(page, xfs_get_blocks);
>  }
>  
>  STATIC int
>  xfs_vm_readpages(
> -	struct file		*unused,
> +	struct file		*filp,
>  	struct address_space	*mapping,
>  	struct list_head	*pages,
>  	unsigned		nr_pages)
>  {
> -	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
> +	struct inode		*inode = filp->f_mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +
> +	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
> +		struct page	*page = list_first_entry(pages, struct page, lru);
> +
> +		ASSERT(i_size_read(VFS_I(ip) <= PAGE_CACHE_SIZE));
> +
> +		list_del(&page->lru);
> +		if(!(add_to_page_cache_lru(page, mapping,
> +					    page->index, GFP_KERNEL)))
> +			return xfs_inline_read(ip, page, page->mapping);
> +
> +		page_cache_release(page);
> +		return 0;
> +	} else {
> +		return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
> +	}
>  }
>  
>  const struct address_space_operations xfs_address_space_operations = {
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 2778258..5e56e5c 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -287,18 +287,6 @@ xfs_iformat(
>  	case S_IFDIR:
>  		switch (dip->di_format) {
>  		case XFS_DINODE_FMT_LOCAL:
> -			/*
> -			 * no local regular files yet
> -			 */
> -			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
> -				xfs_warn(ip->i_mount,
> -			"corrupt inode %Lu (local format for regular file).",
> -					(unsigned long long) ip->i_ino);
> -				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
> -						     XFS_ERRLEVEL_LOW,
> -						     ip->i_mount, dip);
> -				return XFS_ERROR(EFSCORRUPTED);
> -			}
>  
>  			di_size = be64_to_cpu(dip->di_size);
>  			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
> @@ -2471,7 +2459,8 @@ xfs_iflush_int(
>  	if (S_ISREG(ip->i_d.di_mode)) {
>  		if (XFS_TEST_ERROR(
>  		    (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
> -		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
> +		    (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
> +		    (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
>  		    mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
>  			xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
>  				"%s: Bad regular inode %Lu, ptr 0x%p",
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2012-10-12 15:31 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-11 19:52 [RFC PATCH] xfs: enables file data inlining in inodes Carlos Maiolino
2012-10-12 12:30 ` Raghavendra D Prabhu
2012-10-15 17:01   ` Carlos Maiolino
2012-10-12 15:31 ` Brian Foster [this message]
2012-10-15 17:19   ` Carlos Maiolino
2012-10-15 18:30     ` Brian Foster
2012-10-16 14:48       ` Carlos Maiolino
2012-10-16 15:18         ` Brian Foster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=507837E6.4040807@redhat.com \
    --to=bfoster@redhat.com \
    --cc=cmaiolino@redhat.com \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.