linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: tytso@mit.edu, darrick.wong@oracle.com
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH 12/31] undo-io: add new calls to and speed up the undo io manager
Date: Sat, 20 Dec 2014 13:18:04 -0800	[thread overview]
Message-ID: <20141220211804.25563.63523.stgit@birch.djwong.org> (raw)
In-Reply-To: <20141220211640.25563.80596.stgit@birch.djwong.org>

Implement pass-through calls for discard, zero-out, and readahead in
the IO manager so that we can take advantage of any underlying
support.

Furthermore, improve tdb write-out speed by disabling locking and only
fsyncing at the end -- we don't care about locking because having
multiple writers to the undo file will produce an undo database full
of garbage blocks; and we only need to fsync at the end because if we
fail before the end, our undo file will lack the necessary superblock
data that e2undo requires to do replay safely.  Without this, we call
fsync four times per tdb update(!)  This reduces the overhead of using
undo_io while converting a 2TB FS to metadata_csum from 3+ hours to 55
minutes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 lib/ext2fs/tdb.c     |   10 ++++++
 lib/ext2fs/tdb.h     |    2 +
 lib/ext2fs/undo_io.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 97 insertions(+), 2 deletions(-)


diff --git a/lib/ext2fs/tdb.c b/lib/ext2fs/tdb.c
index 61e30ed..a916768 100644
--- a/lib/ext2fs/tdb.c
+++ b/lib/ext2fs/tdb.c
@@ -4138,3 +4138,13 @@ int tdb_reopen_all(int parent_longlived)
 
 	return 0;
 }
+
+/**
+ * Flush a database file from the page cache.
+ **/
+int tdb_flush(struct tdb_context *tdb)
+{
+	if (tdb->fd != -1)
+		return fsync(tdb->fd);
+	return 0;
+}
diff --git a/lib/ext2fs/tdb.h b/lib/ext2fs/tdb.h
index 732ef0e..6a4086c 100644
--- a/lib/ext2fs/tdb.h
+++ b/lib/ext2fs/tdb.h
@@ -129,6 +129,7 @@ typedef struct TDB_DATA {
 #define tdb_lockall_nonblock ext2fs_tdb_lockall_nonblock
 #define tdb_lockall_read_nonblock ext2fs_tdb_lockall_read_nonblock
 #define tdb_lockall_unmark ext2fs_tdb_lockall_unmark
+#define tdb_flush ext2fs_tdb_flush
 
 /* this is the context structure that is returned from a db open */
 typedef struct tdb_context TDB_CONTEXT;
@@ -191,6 +192,7 @@ size_t tdb_map_size(struct tdb_context *tdb);
 int tdb_get_flags(struct tdb_context *tdb);
 void tdb_enable_seqnum(struct tdb_context *tdb);
 void tdb_increment_seqnum_nonblock(struct tdb_context *tdb);
+int tdb_flush(struct tdb_context *tdb);
 
 /* Low level locking functions: use with care */
 int tdb_chainlock(struct tdb_context *tdb, TDB_DATA key);
diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c
index d6beb02..94317cb 100644
--- a/lib/ext2fs/undo_io.c
+++ b/lib/ext2fs/undo_io.c
@@ -37,6 +37,7 @@
 #if HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
+#include <limits.h>
 
 #include "tdb.h"
 
@@ -354,8 +355,12 @@ static errcode_t undo_open(const char *name, int flags, io_channel *channel)
 		data->real = 0;
 	}
 
+	if (data->real)
+		io->flags = (io->flags & ~CHANNEL_FLAGS_DISCARD_ZEROES) |
+			    (data->real->flags & CHANNEL_FLAGS_DISCARD_ZEROES);
+
 	/* setup the tdb file */
-	data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST,
+	data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST | TDB_NOLOCK | TDB_NOSYNC,
 			     O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
 	if (!data->tdb) {
 		retval = errno;
@@ -399,8 +404,10 @@ static errcode_t undo_close(io_channel channel)
 		return retval;
 	if (data->real)
 		retval = io_channel_close(data->real);
-	if (data->tdb)
+	if (data->tdb) {
+		tdb_flush(data->tdb);
 		tdb_close(data->tdb);
+	}
 	ext2fs_free_mem(&channel->private_data);
 	if (channel->name)
 		ext2fs_free_mem(&channel->name);
@@ -510,6 +517,77 @@ static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
 	return retval;
 }
 
+static errcode_t undo_discard(io_channel channel, unsigned long long block,
+			      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+	int icount;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (count > INT_MAX)
+		return EXT2_ET_UNIMPLEMENTED;
+	icount = count;
+
+	/*
+	 * First write the existing content into database
+	 */
+	retval = undo_write_tdb(channel, block, icount);
+	if (retval)
+		return retval;
+	if (data->real)
+		retval = io_channel_discard(data->real, block, count);
+
+	return retval;
+}
+
+static errcode_t undo_zeroout(io_channel channel, unsigned long long block,
+			      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+	int icount;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (count > INT_MAX)
+		return EXT2_ET_UNIMPLEMENTED;
+	icount = count;
+
+	/*
+	 * First write the existing content into database
+	 */
+	retval = undo_write_tdb(channel, block, icount);
+	if (retval)
+		return retval;
+	if (data->real)
+		retval = io_channel_zeroout(data->real, block, count);
+
+	return retval;
+}
+
+static errcode_t undo_cache_readahead(io_channel channel,
+				      unsigned long long block,
+				      unsigned long long count)
+{
+	struct undo_private_data *data;
+	errcode_t	retval = 0;
+
+	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+	data = (struct undo_private_data *) channel->private_data;
+	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+	if (data->real)
+		retval = io_channel_cache_readahead(data->real, block, count);
+
+	return retval;
+}
+
 /*
  * Flush data buffers to disk.
  */
@@ -522,6 +600,8 @@ static errcode_t undo_flush(io_channel channel)
 	data = (struct undo_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
 
+	if (data->tdb)
+		tdb_flush(data->tdb);
 	if (data->real)
 		retval = io_channel_flush(data->real);
 
@@ -601,6 +681,9 @@ static struct struct_io_manager struct_undo_manager = {
 	.get_stats	= undo_get_stats,
 	.read_blk64	= undo_read_blk64,
 	.write_blk64	= undo_write_blk64,
+	.discard	= undo_discard,
+	.zeroout	= undo_zeroout,
+	.cache_readahead	= undo_cache_readahead,
 };
 
 io_manager undo_io_manager = &struct_undo_manager;


  parent reply	other threads:[~2014-12-20 21:18 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-20 21:16 [PATCH 00/31] e2fsprogs December 2014 patchbomb Darrick J. Wong
2014-12-20 21:16 ` [PATCH 01/31] misc: fix clang warnings and a resource leak Darrick J. Wong
2015-01-19 21:39   ` Theodore Ts'o
2014-12-20 21:16 ` [PATCH 02/31] debugfs: document new commands Darrick J. Wong
2014-12-20 21:17 ` [PATCH 03/31] libext2fs: zero blocks via FALLOC_FL_ZERO_RANGE in ext2fs_zero_blocks Darrick J. Wong
2014-12-20 21:17 ` [PATCH 04/31] libext2fs: ext2fs_new_block2() should call alloc_block hook Darrick J. Wong
2014-12-20 21:17 ` [PATCH 05/31] tune2fs: disable csum verification before resizing inode Darrick J. Wong
2014-12-20 21:17 ` [PATCH 06/31] e2fsck: clear i_block[] when there are too many bad mappings on a special inode Darrick J. Wong
2014-12-20 21:17 ` [PATCH 07/31] libext2fs/e2fsck: provide routines to read-ahead metadata Darrick J. Wong
2014-12-20 21:17 ` [PATCH 08/31] e2fsck: read-ahead metadata during passes 1, 2, and 4 Darrick J. Wong
2014-12-20 21:17 ` [PATCH 09/31] e2fsck: track directories to be rehashed with a bitmap Darrick J. Wong
2014-12-20 21:17 ` [PATCH 10/31] e2fsck: rebuild sparse extent trees/convert non-extent ext3 files Darrick J. Wong
2014-12-20 21:17 ` [PATCH 11/31] tests: verify proper rebuilding of sparse extent trees and block map file conversion Darrick J. Wong
2014-12-20 21:18 ` Darrick J. Wong [this message]
2014-12-20 21:18 ` [PATCH 13/31] undo-io: be more flexible about setting block size Darrick J. Wong
2014-12-20 21:18 ` [PATCH 14/31] undo-io: use a bitmap to track what we've already written Darrick J. Wong
2014-12-20 21:18 ` [PATCH 15/31] e2undo: fix memory leaks and tweak the error messages somewhat Darrick J. Wong
2014-12-20 21:18 ` [PATCH 16/31] e2undo: ditch tdb file, write everything to a flat file Darrick J. Wong
2015-01-08  1:36   ` Darrick J. Wong
2014-12-20 21:18 ` [PATCH 17/31] e2fsck: optionally create an undo file Darrick J. Wong
2014-12-20 21:18 ` [PATCH 18/31] resize2fs: optionally create " Darrick J. Wong
2014-12-20 21:18 ` [PATCH 19/31] tune2fs: " Darrick J. Wong
2014-12-20 21:19 ` [PATCH 20/31] mke2fs: " Darrick J. Wong
2014-12-20 21:19 ` [PATCH 21/31] debugfs: " Darrick J. Wong
2014-12-20 21:19 ` [PATCH 22/31] tests: test undo file creation in e2fsck/resize2fs/tune2fs/mke2fs Darrick J. Wong
2014-12-20 21:19 ` [PATCH 23/31] tests: test various features of the new e2undo format Darrick J. Wong
2014-12-20 21:19 ` [PATCH 24/31] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2014-12-20 21:19 ` [PATCH 25/31] libext2fs: find/alloc a range of empty blocks Darrick J. Wong
2014-12-20 21:19 ` [PATCH 26/31] libext2fs: add new hooks to support large allocations Darrick J. Wong
2014-12-20 21:19 ` [PATCH 27/31] libext2fs: implement fallocate Darrick J. Wong
2014-12-20 21:19 ` [PATCH 28/31] libext2fs: use fallocate for creating journals and hugefiles Darrick J. Wong
2014-12-20 21:20 ` [PATCH 29/31] debugfs: implement fallocate Darrick J. Wong
2014-12-20 21:20 ` [PATCH 30/31] tests: test debugfs punch command Darrick J. Wong
2014-12-22 18:53 ` [PATCH 32/31] libext2fs: initialize i_extra_isize when writing EAs Darrick J. Wong
2014-12-22 22:22   ` Andreas Dilger
2014-12-22 22:32     ` Darrick J. Wong
2014-12-22 22:55   ` [PATCH v2 " Darrick J. Wong
2014-12-22 18:55 ` [PATCH 33/31] e2fsck: on read error, don't rewrite blocks past the end of the fs Darrick J. Wong
2014-12-22 18:55 ` [PATCH 34/31] e2fsck: fix the journal recreation message Darrick J. Wong
2014-12-22 18:57 ` [PATCH 35/31] libext2fs: avoid pointless EA block allocation Darrick J. Wong
2014-12-22 18:57 ` [PATCH 36/31] libext2fs: strengthen i_extra_isize checks when reading/writing xattrs Darrick J. Wong
2014-12-22 18:57 ` [PATCH 37/31] libext2fs: fix tdb.c mmap leak Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141220211804.25563.63523.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).