linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: tytso@mit.edu, darrick.wong@oracle.com
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH 29/54] undo-io: add new calls to and speed up the undo io manager
Date: Mon, 26 Jan 2015 23:38:43 -0800	[thread overview]
Message-ID: <20150127073843.13308.46981.stgit@birch.djwong.org> (raw)
In-Reply-To: <20150127073533.13308.44994.stgit@birch.djwong.org>

Implement pass-through calls for discard, zero-out, and readahead in
the IO manager so that we can take advantage of any underlying
support.

Furthermore, improve tdb write-out speed by disabling locking and only
fsyncing at the end -- we don't care about locking because having
multiple writers to the undo file will produce an undo database full
of garbage blocks; and we only need to fsync at the end because if we
fail before the end, our undo file will lack the necessary superblock
data that e2undo requires to do replay safely.  Without this, we call
fsync four times per tdb update(!)  This reduces the overhead of using
undo_io while converting a 2TB FS to metadata_csum from 3+ hours to 55
minutes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 lib/ext2fs/tdb.c     |   10 ++++++
 lib/ext2fs/tdb.h     |    2 +
 lib/ext2fs/undo_io.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 97 insertions(+), 2 deletions(-)


diff --git a/lib/ext2fs/tdb.c b/lib/ext2fs/tdb.c
index 1d97685..7317288 100644
--- a/lib/ext2fs/tdb.c
+++ b/lib/ext2fs/tdb.c
@@ -4142,3 +4142,13 @@ int tdb_reopen_all(int parent_longlived)
 
 	return 0;
 }
+
+/**
+ * Flush a database file from the page cache.
+ **/
+int tdb_flush(struct tdb_context *tdb)
+{
+	if (tdb->fd != -1)
+		return fsync(tdb->fd);
+	return 0;
+}
diff --git a/lib/ext2fs/tdb.h b/lib/ext2fs/tdb.h
index 732ef0e..6a4086c 100644
--- a/lib/ext2fs/tdb.h
+++ b/lib/ext2fs/tdb.h
@@ -129,6 +129,7 @@ typedef struct TDB_DATA {
 #define tdb_lockall_nonblock ext2fs_tdb_lockall_nonblock
 #define tdb_lockall_read_nonblock ext2fs_tdb_lockall_read_nonblock
 #define tdb_lockall_unmark ext2fs_tdb_lockall_unmark
+#define tdb_flush ext2fs_tdb_flush
 
 /* this is the context structure that is returned from a db open */
 typedef struct tdb_context TDB_CONTEXT;
@@ -191,6 +192,7 @@ size_t tdb_map_size(struct tdb_context *tdb);
 int tdb_get_flags(struct tdb_context *tdb);
 void tdb_enable_seqnum(struct tdb_context *tdb);
 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
+int tdb_flush(struct tdb_context *tdb);
 
 /* Low level locking functions: use with care */
 int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c
index d6beb02..94317cb 100644
--- a/lib/ext2fs/undo_io.c
+++ b/lib/ext2fs/undo_io.c
@@ -37,6 +37,7 @@
 #if HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
+#include <limits.h>
 
 #include "tdb.h"
 
@@ -354,8 +355,12 @@ static errcode_t undo_open(const char *name, int flags, io_channel *channel)
 		data->real = 0;
 	}
 
+	if (data->real)
+		io->flags = (io->flags & ~CHANNEL_FLAGS_DISCARD_ZEROES) |
+			    (data->real->flags & CHANNEL_FLAGS_DISCARD_ZEROES);
+
 	/* setup the tdb file */
-	data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST,
+	data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST | TDB_NOLOCK | TDB_NOSYNC,
 			     O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
 	if (!data->tdb) {
 		retval = errno;
@@ -399,8 +404,10 @@ static errcode_t undo_close(io_channel channel)
 		return retval;
 	if (data->real)
 		retval = io_channel_close(data->real);
-	if (data->tdb)
+	if (data->tdb) {
+		tdb_flush(data->tdb);
 		tdb_close(data->tdb);
+	}
 	ext2fs_free_mem(&channel->private_data);
 	if (channel->name)
 		ext2fs_free_mem(&channel->name);
@@ -510,6 +517,77 @@ static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
 	return retval;
 }
 
+static errcode_t undo_discard(io_channel channel, unsigned long long block,
+			      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+	int icount;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (count > INT_MAX)
+		return EXT2_ET_UNIMPLEMENTED;
+	icount = count;
+
+	/*
+	 * First write the existing content into database
+	 */
+	retval = undo_write_tdb(channel, block, icount);
+	if (retval)
+		return retval;
+	if (data->real)
+		retval = io_channel_discard(data->real, block, count);
+
+	return retval;
+}
+
+static errcode_t undo_zeroout(io_channel channel, unsigned long long block,
+			      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+	int icount;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (count > INT_MAX)
+		return EXT2_ET_UNIMPLEMENTED;
+	icount = count;
+
+	/*
+	 * First write the existing content into database
+	 */
+	retval = undo_write_tdb(channel, block, icount);
+	if (retval)
+		return retval;
+	if (data->real)
+		retval = io_channel_zeroout(data->real, block, count);
+
+	return retval;
+}
+
+static errcode_t undo_cache_readahead(io_channel channel,
+				      unsigned long long block,
+				      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (data->real)
+		retval = io_channel_cache_readahead(data->real, block, count);
+
+	return retval;
+}
+
 /*
  * Flush data buffers to disk.
  */
@@ -522,6 +600,8 @@ static errcode_t undo_flush(io_channel channel)
 	data = (struct undo_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
 
+	if (data->tdb)
+		tdb_flush(data->tdb);
 	if (data->real)
 		retval = io_channel_flush(data->real);
 
@@ -601,6 +681,9 @@ static struct struct_io_manager struct_undo_manager = {
 	.get_stats	= undo_get_stats,
 	.read_blk64	= undo_read_blk64,
 	.write_blk64	= undo_write_blk64,
+	.discard	= undo_discard,
+	.zeroout	= undo_zeroout,
+	.cache_readahead	= undo_cache_readahead,
 };
 
 io_manager undo_io_manager = &struct_undo_manager;


  parent reply	other threads:[~2015-01-27  7:38 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-27  7:35 [PATCH 00/54] e2fsprogs January 2015 patchbomb Darrick J. Wong
2015-01-27  7:35 ` [PATCH 01/54] misc: fix minor testcase problems Darrick J. Wong
2015-01-27 15:55   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 02/54] debugfs: document new commands Darrick J. Wong
2015-01-27 15:56   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 03/54] debugfs: fix crash in ea_set argument handling Darrick J. Wong
2015-01-27 15:58   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 04/54] libext2fs: initialize i_extra_isize when writing EAs Darrick J. Wong
2015-01-27 16:02   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 05/54] libext2fs: avoid pointless EA block allocation Darrick J. Wong
2015-01-27 16:07   ` Theodore Ts'o
2015-01-27 19:26     ` Darrick J. Wong
2015-01-27  7:36 ` [PATCH 06/54] libext2fs: strengthen i_extra_isize checks when reading/writing xattrs Darrick J. Wong
2015-01-27 16:08   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 07/54] libext2fs: fix tdb.c mmap leak Darrick J. Wong
2015-01-27 16:09   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 08/54] resize2fs: fix regression test to not depend on ext4.ko being loaded Darrick J. Wong
2015-01-27 16:10   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 09/54] tune2fs: disable csum verification before resizing inode Darrick J. Wong
2015-01-27 16:11   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 10/54] tune2fs: abort when trying to enable/disable metadata_csum on mounted fs Darrick J. Wong
2015-01-27 16:26   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 11/54] tune2fs: call out to resize2fs for 64bit conversion Darrick J. Wong
2015-01-27 16:31   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 12/54] e2fsck: clear i_block[] when there are too many bad mappings on a special inode Darrick J. Wong
2015-01-27 16:32   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 13/54] e2fsck: on read error, don't rewrite blocks past the end of the fs Darrick J. Wong
2015-01-27 17:35   ` Theodore Ts'o
2015-01-28 23:35     ` Darrick J. Wong
2015-01-27  7:37 ` [PATCH 14/54] e2fsck: fix the journal recreation message Darrick J. Wong
2015-01-27 18:02   ` Theodore Ts'o
2015-01-27 19:37     ` Darrick J. Wong
2015-01-27  7:37 ` [PATCH 15/54] e2fsck: handle multiple *ind block collisions with critical metadata Darrick J. Wong
2015-01-28 13:52   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 16/54] e2fsck: decrement bad count _after_ remapping a duplicate block Darrick J. Wong
2015-01-28 13:58   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 17/54] e2fsck: inspect inline dir data as two directory blocks Darrick J. Wong
2015-01-28 15:16   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 18/54] e2fsck: improve the inline directory detector Darrick J. Wong
2015-01-28 16:38   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 19/54] e2fsck: salvage under-sized dirents by removing them Darrick J. Wong
2015-02-16 15:40   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 20/54] e2fsck: add a 'yes to all' response in interactive mode Darrick J. Wong
2015-03-29  2:54   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 21/54] libext2fs: zero blocks via FALLOC_FL_ZERO_RANGE in ext2fs_zero_blocks Darrick J. Wong
2015-03-29  3:46   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 22/54] libext2fs: ext2fs_new_block2() should call alloc_block hook Darrick J. Wong
2015-03-29  3:08   ` Theodore Ts'o
2015-01-27  7:38 ` [PATCH 23/54] libext2fs: Support readonly filesystem images Darrick J. Wong
2015-03-19 21:32   ` [PATCH v2 " Darrick J. Wong
2015-03-29  3:42     ` Theodore Ts'o
2015-01-27  7:38 ` [PATCH 24/54] libext2fs/e2fsck: provide routines to read-ahead metadata Darrick J. Wong
2015-01-27  7:38 ` [PATCH 25/54] e2fsck: read-ahead metadata during passes 1, 2, and 4 Darrick J. Wong
2015-01-27  7:38 ` [PATCH 26/54] e2fsck: track directories to be rehashed with a bitmap Darrick J. Wong
2015-01-27  7:38 ` [PATCH 27/54] e2fsck: rebuild sparse extent trees/convert non-extent ext3 files Darrick J. Wong
2015-03-19 21:42   ` [PATCH v4 " Darrick J. Wong
2015-01-27  7:38 ` [PATCH 28/54] tests: verify proper rebuilding of sparse extent trees and block map file conversion Darrick J. Wong
2015-01-27  7:38 ` Darrick J. Wong [this message]
2015-01-27  7:38 ` [PATCH 30/54] undo-io: be more flexible about setting block size Darrick J. Wong
2015-01-27  7:38 ` [PATCH 31/54] undo-io: use a bitmap to track what we've already written Darrick J. Wong
2015-01-27  7:39 ` [PATCH 32/54] e2undo: fix memory leaks and tweak the error messages somewhat Darrick J. Wong
2015-01-27  7:39 ` [PATCH 33/54] e2undo: ditch tdb file, write everything to a flat file Darrick J. Wong
2015-01-27  7:39 ` [PATCH 34/54] libext2fs: support atexit cleanups Darrick J. Wong
2015-01-27  7:39 ` [PATCH 35/54] e2fsck: optionally create an undo file Darrick J. Wong
2015-01-27  7:39 ` [PATCH 36/54] resize2fs: optionally create " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 37/54] tune2fs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 38/54] mke2fs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 39/54] debugfs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 40/54] tests: test undo file creation in e2fsck/resize2fs/tune2fs/mke2fs Darrick J. Wong
2015-01-27  7:40 ` [PATCH 41/54] tests: test various features of the new e2undo format Darrick J. Wong
2015-01-27  7:40 ` [PATCH 42/54] copy-in: create hardlinks with the correct directory filetype Darrick J. Wong
2015-01-27  7:40 ` [PATCH 43/54] copy-in: for files, only iterate file blocks that are mapped Darrick J. Wong
2015-01-27  7:40 ` [PATCH 44/54] copyin: fix error handling Darrick J. Wong
2015-01-27  7:40 ` [PATCH 45/54] mke2fs: add simple tests and re-alphabetize mke2fs manpage options Darrick J. Wong
2015-01-27  7:40 ` [PATCH 46/54] contrib: script to create minified ext4 image from a directory Darrick J. Wong
2015-01-27  7:40 ` [PATCH 47/54] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2015-01-27  7:40 ` [PATCH 48/54] libext2fs: find/alloc a range of empty blocks Darrick J. Wong
2015-01-27  7:40 ` [PATCH 49/54] libext2fs: add new hooks to support large allocations Darrick J. Wong
2015-01-27  7:41 ` [PATCH 50/54] libext2fs: implement fallocate Darrick J. Wong
2015-01-27  7:41 ` [PATCH 51/54] libext2fs: use fallocate for creating journals and hugefiles Darrick J. Wong
2015-01-27  7:41 ` [PATCH 52/54] debugfs: implement fallocate Darrick J. Wong
2015-01-27  7:41 ` [PATCH 53/54] tests: test debugfs punch command Darrick J. Wong
2015-03-19 21:44 ` [PATCH 55/54] e2fsck: actually fix inline_data flags problems when user says to do so Darrick J. Wong
2015-03-29  4:05   ` Theodore Ts'o
2015-03-19 21:45 ` [PATCH 56/54] libext2fs: zero hash in ibody extended attributes Darrick J. Wong
2015-03-29  4:13   ` Theodore Ts'o
2015-03-19 21:47 ` [PATCH 57/54] e2fsck: convert block-mapped files to extents on bigalloc fs Darrick J. Wong
2015-03-19 23:54 ` [PATCH 58/54] e2fsck: turn inline data symlink into a fast symlink when possible Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150127073843.13308.46981.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).