linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: tytso@mit.edu, darrick.wong@oracle.com
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH 25/37] libext2fs: when appending to a file, don't split an index block in equal halves
Date: Thu, 01 May 2014 16:15:05 -0700	[thread overview]
Message-ID: <20140501231505.31890.96782.stgit@birch.djwong.org> (raw)
In-Reply-To: <20140501231222.31890.82860.stgit@birch.djwong.org>

When we're appending an extent to the end of a file and the index
block is full, don't split the index block into two half-full index
blocks because this leaves us with under utilized index blocks, at
least in the fallocate case.  Instead, copy the last extent from the
full block into the new block.  This isn't perfect utilization, but
there's a lot of work involved in teaching extent.c to be able to goto
a nonexistent node in a newly allocated (and empty) extent block.

This patch does not fix the general problem of keeping the extent tree
balanced.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 lib/ext2fs/extent.c |   79 ++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 72 insertions(+), 7 deletions(-)


diff --git a/lib/ext2fs/extent.c b/lib/ext2fs/extent.c
index 30673b5..c0b34a7 100644
--- a/lib/ext2fs/extent.c
+++ b/lib/ext2fs/extent.c
@@ -29,6 +29,8 @@
 #include "ext2fsP.h"
 #include "e2image.h"
 
+#undef DEBUG
+
 /*
  * Definitions to be dropped in lib/ext2fs/ext2fs.h
  */
@@ -122,11 +124,39 @@ static void dbg_print_extent(char *desc, struct ext2fs_extent *extent)
 
 }
 
+static void dump_path(const char *tag, struct ext2_extent_handle *handle,
+		      struct extent_path *path)
+{
+	struct extent_path *ppp = path;
+	printf("%s: level=%d\n", tag, handle->level);
+
+	do {
+		printf("%s: path=%ld buf=%p entries=%d max_entries=%d left=%d "
+		       "visit_num=%d flags=0x%x end_blk=%llu curr=%p(%ld)\n",
+		       tag, (ppp - handle->path), ppp->buf, ppp->entries,
+		       ppp->max_entries, ppp->left, ppp->visit_num, ppp->flags,
+		       ppp->end_blk, ppp->curr, ppp->curr - (void *)ppp->buf);
+		printf("  ");
+		dbg_show_header((struct ext3_extent_header *)ppp->buf);
+		if (ppp->curr) {
+			printf("  ");
+			dbg_show_index(ppp->curr);
+			printf("  ");
+			dbg_show_extent(ppp->curr);
+		}
+		ppp--;
+	} while (ppp >= handle->path);
+	fflush(stdout);
+
+	return;
+}
+
 #else
 #define dbg_show_header(eh) do { } while (0)
 #define dbg_show_index(ix) do { } while (0)
 #define dbg_show_extent(ex) do { } while (0)
 #define dbg_print_extent(desc, ex) do { } while (0)
+#define dump_path(tag, handle, path) do { } while (0)
 #endif
 
 /*
@@ -837,12 +867,31 @@ errcode_t ext2fs_extent_replace(ext2_extent_handle_t handle,
 	return 0;
 }
 
+static int splitting_at_eof(struct ext2_extent_handle *handle,
+			    struct extent_path *path)
+{
+	struct extent_path *ppp = path;
+	dump_path(__func__, handle, path);
+
+	if (handle->level == 0)
+		return 0;
+
+	do {
+		if (ppp->left)
+			return 0;
+		ppp--;
+	} while (ppp >= handle->path);
+
+	return 1;
+}
+
 /*
  * allocate a new block, move half the current node to it, and update parent
  *
  * handle will be left pointing at original record.
  */
-errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
+static errcode_t extent_node_split(ext2_extent_handle_t handle,
+				   int expand_allowed)
 {
 	errcode_t			retval = 0;
 	blk64_t				new_node_pblk;
@@ -857,6 +906,7 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 	int				tocopy;
 	int				new_root = 0;
 	struct ext2_extent_info		info;
+	int				no_balance;
 
 	/* basic sanity */
 	EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EXTENT_HANDLE);
@@ -897,7 +947,7 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 			goto done;
 		goal_blk = extent.e_pblk;
 
-		retval = ext2fs_extent_node_split(handle);
+		retval = extent_node_split(handle, expand_allowed);
 		if (retval)
 			goto done;
 
@@ -912,6 +962,14 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 	if (!path->curr)
 		return EXT2_ET_NO_CURRENT_NODE;
 
+	/*
+	 * Normally, we try to split a full node in half.  This doesn't turn
+	 * out so well if we're tacking extents on the end of the file because
+	 * then we're stuck with a tree of half-full extent blocks.  This of
+	 * course doesn't apply to the root level.
+	 */
+	no_balance = expand_allowed ? splitting_at_eof(handle, path) : 0;
+
 	/* extent header of the current node we'll split */
 	eh = (struct ext3_extent_header *)path->buf;
 
@@ -925,7 +983,10 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 		if (retval)
 			goto done;
 	} else {
-		tocopy = ext2fs_le16_to_cpu(eh->eh_entries) / 2;
+		if (no_balance)
+			tocopy = 1;
+		else
+			tocopy = ext2fs_le16_to_cpu(eh->eh_entries) / 2;
 	}
 
 #ifdef DEBUG
@@ -934,7 +995,7 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 				handle->level);
 #endif
 
-	if (!tocopy) {
+	if (!tocopy && !no_balance) {
 #ifdef DEBUG
 		printf("Nothing to copy to new block!\n");
 #endif
@@ -1059,8 +1120,7 @@ errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
 		goto done;
 
 	/* new node hooked in, so update inode block count (do this here?) */
-	handle->inode->i_blocks += (handle->fs->blocksize *
-				    EXT2FS_CLUSTER_RATIO(handle->fs)) / 512;
+	ext2fs_iblk_add_blocks(handle->fs, handle->inode, 1);
 	retval = ext2fs_write_inode(handle->fs, handle->ino,
 				    handle->inode);
 	if (retval)
@@ -1074,6 +1134,11 @@ done:
 	return retval;
 }
 
+errcode_t ext2fs_extent_node_split(ext2_extent_handle_t handle)
+{
+	return extent_node_split(handle, 0);
+}
+
 errcode_t ext2fs_extent_insert(ext2_extent_handle_t handle, int flags,
 				      struct ext2fs_extent *extent)
 {
@@ -1105,7 +1170,7 @@ errcode_t ext2fs_extent_insert(ext2_extent_handle_t handle, int flags,
 			printf("node full (level %d) - splitting\n",
 				   handle->level);
 #endif
-			retval = ext2fs_extent_node_split(handle);
+			retval = extent_node_split(handle, 1);
 			if (retval)
 				return retval;
 			path = handle->path + handle->level;


  parent reply	other threads:[~2014-05-01 23:15 UTC|newest]

Thread overview: 91+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-01 23:12 [PATCH 00/37] e2fsprogs patchbomb 5/14 Darrick J. Wong
2014-05-01 23:12 ` [PATCH 01/37] misc: create better-packaged static analysis reports Darrick J. Wong
2014-05-11 22:33   ` Theodore Ts'o
2014-05-01 23:12 ` [PATCH 02/37] misc: coverity fixes Darrick J. Wong
2014-05-02 11:17   ` Lukáš Czerner
2014-05-05 20:04     ` Darrick J. Wong
2014-05-11 22:40       ` Theodore Ts'o
2014-05-01 23:12 ` [PATCH 03/37] libext2fs: create sockets when populating filesystem Darrick J. Wong
2014-05-02 11:22   ` Lukáš Czerner
2014-05-05 20:08     ` Darrick J. Wong
2014-05-11 22:44       ` Theodore Ts'o
2014-05-01 23:12 ` [PATCH 04/37] mke2fs: always warn if 128-byte inode and inline_data Darrick J. Wong
2014-05-02 11:27   ` Lukáš Czerner
2014-05-05 20:10     ` Darrick J. Wong
2014-05-12  0:26       ` Theodore Ts'o
2014-05-01 23:12 ` [PATCH 05/37] debugfs: teach logdump to deal with 64bit revoke tables Darrick J. Wong
2014-05-02 11:38   ` Lukáš Czerner
2014-05-05 22:23     ` Darrick J. Wong
2014-05-06 11:35       ` Lukáš Czerner
2014-05-12  1:20         ` Theodore Ts'o
2014-05-01 23:13 ` [PATCH 06/37] debugfs: force logdump to display (old) journal contents Darrick J. Wong
2014-05-02 11:49   ` Lukáš Czerner
2014-05-06  0:24     ` Darrick J. Wong
2014-05-12  1:41       ` Theodore Ts'o
2014-05-12  3:31         ` Theodore Ts'o
2014-05-14  0:05         ` Darrick J. Wong
2014-05-01 23:13 ` [PATCH 07/37] resize2fs: fix check for collision between old GDT and superblock on sparse_super2 fs Darrick J. Wong
2014-05-12  3:35   ` Theodore Ts'o
2014-05-01 23:13 ` [PATCH 08/37] mke2fs: set gdt csum when creating packed fs Darrick J. Wong
2014-05-02 11:55   ` Lukáš Czerner
2014-05-12  4:22     ` Theodore Ts'o
2014-05-01 23:13 ` [PATCH 09/37] mke2fs: set error behavior at initialization time Darrick J. Wong
2014-05-02 12:13   ` Lukáš Czerner
2014-05-01 23:13 ` [PATCH 10/37] e2fsck: verify checksums after checking everything else Darrick J. Wong
2014-05-02 12:32   ` Lukáš Czerner
2014-05-05 22:56     ` Darrick J. Wong
2014-05-06 11:32       ` Lukáš Czerner
2014-05-08  0:05         ` Darrick J. Wong
2014-05-01 23:13 ` [PATCH 11/37] e2fsck: fix the extended attribute checksum error message Darrick J. Wong
2014-05-02 12:46   ` Lukáš Czerner
2014-05-05 23:08     ` Darrick J. Wong
2014-05-06 10:12       ` Lukáš Czerner
2014-05-01 23:13 ` [PATCH 12/37] e2fsck: insert a missing dirent tail for checksums if possible Darrick J. Wong
2014-05-02 12:54   ` Lukáš Czerner
2014-05-05 23:16     ` Darrick J. Wong
2014-05-01 23:13 ` [PATCH 13/37] e2fsck: write dir blocks after new inode when reconstructing root/lost+found Darrick J. Wong
2014-05-05 17:13   ` Lukáš Czerner
2014-05-01 23:13 ` [PATCH 14/37] dumpe2fs: add switch to disable checksum verification Darrick J. Wong
2014-05-05 17:20   ` Lukáš Czerner
2014-05-01 23:14 ` [PATCH 15/37] mke2fs: set block_validity as a default mount option Darrick J. Wong
2014-05-05 17:24   ` Lukáš Czerner
2014-05-01 23:14 ` [PATCH 16/37] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2014-05-06 15:45   ` Lukáš Czerner
2014-05-06 19:59     ` Darrick J. Wong
2014-05-07 10:02       ` Lukáš Czerner
2014-05-07 21:37         ` Darrick J. Wong
2014-05-08  0:13           ` [PATCH 1/2] libext2fs: support BLKZEROOUT/FALLOC_FL_ZERO_RANGE in ext2fs_zero_blocks Darrick J. Wong
2014-05-13 11:11             ` Lukáš Czerner
2014-05-08  0:14           ` [PATCH 2/2] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2014-05-27 16:28             ` Lukáš Czerner
2014-05-28 19:48               ` Darrick J. Wong
2014-05-01 23:14 ` [PATCH 17/37] libext2fs: file IO routines should handle uninit blocks Darrick J. Wong
2014-05-01 23:14 ` [PATCH 18/37] resize2fs: convert fs to and from 64bit mode Darrick J. Wong
2014-05-01 23:14 ` [PATCH 19/37] resize2fs: when toggling 64bit, don't free in-use bg data clusters Darrick J. Wong
2014-05-01 23:14 ` [PATCH 20/37] resize2fs: adjust reserved_gdt_blocks when changing group descriptor size Darrick J. Wong
2014-05-01 23:14 ` [PATCH 21/37] libext2fs: have UNIX IO manager use pread/pwrite Darrick J. Wong
2014-08-02 23:16   ` Theodore Ts'o
2014-05-01 23:14 ` [PATCH 22/37] ext2fs: add readahead method to improve scanning Darrick J. Wong
2014-05-01 23:14 ` [PATCH 23/37] e2fsck: provide routines to read-ahead metadata Darrick J. Wong
2014-05-01 23:14 ` [PATCH 24/37] e2fsck: read-ahead metadata during passes 1, 2, and 4 Darrick J. Wong
2014-07-28 22:25   ` Darrick J. Wong
2014-05-01 23:15 ` Darrick J. Wong [this message]
2014-08-02 23:43   ` [PATCH 25/37] libext2fs: when appending to a file, don't split an index block in equal halves Theodore Ts'o
2014-05-01 23:15 ` [PATCH 26/37] libext2fs: find inode goal when allocating blocks Darrick J. Wong
2014-05-01 23:15 ` [PATCH 27/37] libext2fs: find a range of empty blocks Darrick J. Wong
2014-05-01 23:15 ` [PATCH 28/37] libext2fs: provide a function to set inode size Darrick J. Wong
2014-07-26 18:37   ` Theodore Ts'o
2014-05-01 23:15 ` [PATCH 29/37] libext2fs: implement fallocate Darrick J. Wong
2014-05-01 23:15 ` [PATCH 31/37] fuse2fs: translate ACL structures Darrick J. Wong
2014-05-01 23:15 ` [PATCH 32/37] fuse2fs: handle 64-bit dates correctly Darrick J. Wong
2014-05-01 23:16 ` [PATCH 33/37] fuse2fs: implement fallocate Darrick J. Wong
2014-05-01 23:16 ` [PATCH 35/37] tests: enable using fuse2fs with metadata checksum test Darrick J. Wong
2014-05-01 23:16 ` [PATCH 36/37] tests: test date handling Darrick J. Wong
2014-05-01 23:16 ` [PATCH 37/37] ext5: define new subtype to add features and reduce testing complexity Darrick J. Wong
2014-05-02  9:45   ` Lukáš Czerner
2014-05-02 14:04     ` Theodore Ts'o
2014-05-06  1:59       ` Darrick J. Wong
2014-05-06  1:33     ` Darrick J. Wong
2014-05-06 12:50       ` Lukáš Czerner
2014-05-06 15:21         ` Theodore Ts'o
2014-05-06 15:30           ` Lukáš Czerner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140501231505.31890.96782.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).