linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@redhat.com>
To: Liu Bo <liubo2009@cn.fujitsu.com>
Cc: linux-btrfs@vger.kernel.org, dave@jikos.cz, chris.mason@oracle.com
Subject: Re: [PATCH 06/12 v3] Btrfs: improve log with sub transaction
Date: Tue, 21 Jun 2011 09:46:50 -0400	[thread overview]
Message-ID: <4E00A0CA.90108@redhat.com> (raw)
In-Reply-To: <1308646193-7086-7-git-send-email-liubo2009@cn.fujitsu.com>

On 06/21/2011 04:49 AM, Liu Bo wrote:
> When logging an inode _A_, current btrfs will
> a) clear all items belonged to _A_ in log,
> b) copy all items belonged to _A_ from fs/file tree to log tree,
> and this just wastes a lot of time, especially when logging big files.
>
> So we want to use a smarter approach, i.e. "find and merge".
> The amount of file extent items is the largest, so we focus on it.
> Thanks to sub transaction, now we can find those file extent items which
> are changed after last _transaction commit_ or last _log commit_, and
> then merge them with the existed ones in log tree.
>
> It will be great helpful on fsync performance, cause the common case is
> "make changes on a _part_ of inode".
>
> Signed-off-by: Liu Bo<liubo2009@cn.fujitsu.com>
> ---
>   fs/btrfs/tree-log.c |  179 ++++++++++++++++++++++++++++++++++++---------------
>   1 files changed, 127 insertions(+), 52 deletions(-)
>
> diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
> index b91fe8b..6193e27 100644
> --- a/fs/btrfs/tree-log.c
> +++ b/fs/btrfs/tree-log.c
> @@ -2581,61 +2581,106 @@ again:
>   }
>
>   /*
> - * a helper function to drop items from the log before we relog an
> - * inode.  max_key_type indicates the highest item type to remove.
> - * This cannot be run for file data extents because it does not
> - * free the extents they point to.
> + * a helper function to drop items from the log before we merge
> + * the uptodate items into the log tree.
>    */
> -static int drop_objectid_items(struct btrfs_trans_handle *trans,
> -				  struct btrfs_root *log,
> -				  struct btrfs_path *path,
> -				  u64 objectid, int max_key_type)
> +static  int prepare_for_merge_items(struct btrfs_trans_handle *trans,
> +				    struct inode *inode,
> +				    struct extent_buffer *eb,
> +				    int slot, int nr)
>   {
> -	int ret;
> -	struct btrfs_key key;
> +	struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
> +	struct btrfs_path *path;
>   	struct btrfs_key found_key;
> +	struct btrfs_key key;
> +	int i;
> +	int ret;
>
> -	key.objectid = objectid;
> -	key.type = max_key_type;
> -	key.offset = (u64)-1;
> +	/* There are no relative items of the inode in log. */
> +	if (BTRFS_I(inode)->logged_trans<  trans->transaction->transid)
> +		return 0;
>
> -	while (1) {
> +	path = btrfs_alloc_path();
> +	if (!path)
> +		return -ENOMEM;
> +
> +	for (i = 0; i<  nr; i++) {
> +		btrfs_item_key_to_cpu(eb,&key, i + slot);
> +

Just a nit, but it would be less of a pain to do

for (i = slot; i < slot+nr; i++)

that way you don't have a bunch of getter(eb, slot + i) things scattered 
around.

> +		if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
> +			struct btrfs_file_extent_item *fi;
> +			int found_type;
> +			u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
> +			u64 start = key.offset;
> +			u64 extent_end;
> +			u64 hint;
> +			unsigned long size;
> +
> +			fi = btrfs_item_ptr(eb, slot + i,
> +					    struct btrfs_file_extent_item);
> +			found_type = btrfs_file_extent_type(eb, fi);
> +
> +			if (found_type == BTRFS_FILE_EXTENT_REG ||
> +			    found_type == BTRFS_FILE_EXTENT_PREALLOC)
> +				extent_end = start +
> +					    btrfs_file_extent_num_bytes(eb, fi);
> +			else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
> +				size = btrfs_file_extent_inline_len(eb, fi);
> +				extent_end = (start + size + mask)&  ~mask;
> +			} else
> +				BUG_ON(1);
> +

Style screw up, if one chunk of an if/else block has braces, they all 
need it.

> +			/* drop any overlapping extents */
> +			ret = btrfs_drop_extents(trans, inode, start,
> +						 extent_end,&hint, 0, 1);

There are lot's of places where you've missed a space, like here where 
you have extent_end,&hint

> +			BUG_ON(ret);
> +
> +			continue;
> +		}
> +
> +		/* non file extent */
>   		ret = btrfs_search_slot(trans, log,&key, path, -1, 1);
> -		BUG_ON(ret == 0);
>   		if (ret<  0)
>   			break;
>
> -		if (path->slots[0] == 0)
> +		/* empty log! */
> +		if (ret>  0&&  path->slots[0] == 0)

Here too.

>   			break;
>
> -		path->slots[0]--;
> +		if (ret>  0) {

Here.

> +			btrfs_release_path(path);
> +			continue;
> +		}
> +
>   		btrfs_item_key_to_cpu(path->nodes[0],&found_key,
>   				      path->slots[0]);
>
> -		if (found_key.objectid != objectid)
> -			break;
> +		if (btrfs_comp_cpu_keys(&found_key,&key))
> +			BUG_ON(1);
>
>   		ret = btrfs_del_item(trans, log, path);
> -		if (ret)
> -			break;
> +		BUG_ON(ret);
>   		btrfs_release_path(path);
>   	}
>   	btrfs_release_path(path);
> -	return ret;
> +	btrfs_free_path(path);
> +
> +	return 0;
>   }
>
>   static noinline int copy_items(struct btrfs_trans_handle *trans,
> -			       struct btrfs_root *log,
> +			       struct inode *inode,
>   			       struct btrfs_path *dst_path,
>   			       struct extent_buffer *src,
>   			       int start_slot, int nr, int inode_only)
>   {
>   	unsigned long src_offset;
>   	unsigned long dst_offset;
> +	struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
>   	struct btrfs_file_extent_item *extent;
>   	struct btrfs_inode_item *inode_item;
> -	int ret;
>   	struct btrfs_key *ins_keys;
> +	int ret;
>   	u32 *ins_sizes;
>   	char *ins_data;
>   	int i;
> @@ -2643,6 +2688,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
>
>   	INIT_LIST_HEAD(&ordered_sums);
>
> +	ret = prepare_for_merge_items(trans, inode, src, start_slot, nr);
> +	if (ret)
> +		return ret;
> +
>   	ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
>   			   nr * sizeof(u32), GFP_NOFS);
>   	if (!ins_data)
> @@ -2749,6 +2798,34 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
>   	return ret;
>   }
>
> +/*
> + * a helper function to filter the old file extent items by checking their
> + * generation.
> + */
> +static inline int is_extent_uptodate(struct btrfs_path *path, u64 min_trans)
> +{
> +	struct btrfs_file_extent_item *fi;
> +	struct btrfs_key key;
> +	struct extent_buffer *eb;
> +	int slot;
> +	u64 gen;
> +
> +	eb = path->nodes[0];
> +	slot = path->slots[0];
> +
> +	btrfs_item_key_to_cpu(eb,&key, slot);
> +
> +	if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
> +		return 1;
> +
> +	fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
> +	gen = btrfs_file_extent_generation(eb, fi);
> +	if (gen<  min_trans)

Here.

> +		return 0;
> +
> +	return 1;
> +}
> +
>   /* log a single inode in the tree log.
>    * At least one parent directory for this inode must exist in the tree
>    * or be logged already.
> @@ -2779,6 +2856,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
>   	int ins_start_slot = 0;
>   	int ins_nr;
>   	u64 ino = btrfs_ino(inode);
> +	u64 transid;
> +
> +	/*
> +	* We use transid in btrfs_search_forward() as a filter, in order to
> +	* find the uptodate block (node or leaf).
> +	*/
> +	if (BTRFS_I(inode)->first_sub_trans>  trans->transaction->transid)

Here.

> +		transid = BTRFS_I(inode)->first_sub_trans;
> +	else
> +		transid = trans->transaction->transid;
>
>   	log = root->log_root;
>
> @@ -2816,29 +2903,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
>
>   	mutex_lock(&BTRFS_I(inode)->log_mutex);
>
> -	/*
> -	 * a brute force approach to making sure we get the most uptodate
> -	 * copies of everything.
> -	 */
> -	if (S_ISDIR(inode->i_mode)) {
> -		int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
> -
> -		if (inode_only == LOG_INODE_EXISTS)
> -			max_key_type = BTRFS_XATTR_ITEM_KEY;
> -		ret = drop_objectid_items(trans, log, path, ino, max_key_type);
> -	} else {
> -		ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
> -	}
> -	if (ret) {
> -		err = ret;
> -		goto out_unlock;
> -	}
>   	path->keep_locks = 1;
>
>   	while (1) {
>   		ins_nr = 0;
>   		ret = btrfs_search_forward(root,&min_key,&max_key,
> -					   path, 0, trans->transid);
> +					   path, 0, transid);
>   		if (ret != 0)
>   			break;
>   again:
> @@ -2849,6 +2919,9 @@ again:
>   			break;
>
>   		src = path->nodes[0];
> +		if (!is_extent_uptodate(path, transid))
> +			goto filter;
> +
>   		if (ins_nr&&  ins_start_slot + ins_nr == path->slots[0]) {
>   			ins_nr++;
>   			goto next_slot;
> @@ -2857,15 +2930,17 @@ again:
>   			ins_nr = 1;
>   			goto next_slot;
>   		}
> -
> -		ret = copy_items(trans, log, dst_path, src, ins_start_slot,
> -				 ins_nr, inode_only);
> -		if (ret) {
> -			err = ret;
> -			goto out_unlock;
> +filter:
> +		if (ins_nr) {
> +			ret = copy_items(trans, inode, dst_path, src,
> +					 ins_start_slot,
> +					 ins_nr, inode_only);
> +			if (ret) {
> +				err = ret;
> +				goto out_unlock;
> +			}
> +			ins_nr = 0;
>   		}
> -		ins_nr = 1;
> -		ins_start_slot = path->slots[0];
>   next_slot:
>
>   		nritems = btrfs_header_nritems(path->nodes[0]);
> @@ -2876,7 +2951,7 @@ next_slot:
>   			goto again;
>   		}
>   		if (ins_nr) {
> -			ret = copy_items(trans, log, dst_path, src,
> +			ret = copy_items(trans, inode, dst_path, src,
>   					 ins_start_slot,
>   					 ins_nr, inode_only);
>   			if (ret) {
> @@ -2897,7 +2972,7 @@ next_slot:
>   			break;
>   	}
>   	if (ins_nr) {
> -		ret = copy_items(trans, log, dst_path, src,
> +		ret = copy_items(trans, inode, dst_path, src,
>   				 ins_start_slot,
>   				 ins_nr, inode_only);
>   		if (ret) {


  reply	other threads:[~2011-06-21 13:46 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-21  8:49 [GIT PULL v3] Btrfs: improve write ahead log with sub transaction Liu Bo
2011-06-21  8:49 ` [PATCH 01/12 v3] Btrfs: introduce sub transaction stuff Liu Bo
2011-06-21  8:49 ` [PATCH 02/12 v3] Btrfs: update block generation if should_cow_block fails Liu Bo
2011-06-21  8:49 ` [PATCH 03/12 v3] Btrfs: modify btrfs_drop_extents API Liu Bo
2011-06-21  8:49 ` [PATCH 04/12 v3] Btrfs: introduce first sub trans Liu Bo
2011-06-21  8:49 ` [PATCH 05/12 v3] Btrfs: still update inode trans stuff when size remains unchanged Liu Bo
2011-06-21  8:49 ` [PATCH 06/12 v3] Btrfs: improve log with sub transaction Liu Bo
2011-06-21 13:46   ` Josef Bacik [this message]
2011-06-21  8:49 ` [PATCH 07/12 v3] Btrfs: add checksum check for log Liu Bo
2011-06-21 13:49   ` Josef Bacik
2011-06-21  8:49 ` [PATCH 08/12 v3] Btrfs: fix a bug of log check Liu Bo
2011-06-21  8:49 ` [PATCH 09/12 v3] Btrfs: kick off useless code Liu Bo
2011-06-21  8:49 ` [PATCH 10/12 v3] Btrfs: deal with EEXIST after iput Liu Bo
2011-06-21 14:00   ` Josef Bacik
2011-06-22  1:57     ` liubo
2011-06-21  8:49 ` [PATCH 11/12 v3] Btrfs: use the right generation number to read log_root_tree Liu Bo
2011-06-21  8:49 ` [PATCH 12/12 v3] Revert "Btrfs: do not flush csum items of unchanged file data during treelog" Liu Bo
2011-08-04 13:57 ` [GIT PULL v3] Btrfs: improve write ahead log with sub transaction Chris Mason
2011-08-06  3:44   ` liubo
2011-08-06  9:23     ` liubo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E00A0CA.90108@redhat.com \
    --to=josef@redhat.com \
    --cc=chris.mason@oracle.com \
    --cc=dave@jikos.cz \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=liubo2009@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).