* [PATCH 1/4] Add unix COW io manager
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
lib/ext2fs/Makefile.in | 6 +
lib/ext2fs/ext2_io.h | 3 +
lib/ext2fs/unix_cow_io.c | 864 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 873 insertions(+), 0 deletions(-)
create mode 100644 lib/ext2fs/unix_cow_io.c
diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 799659e..938f2d3 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -64,6 +64,7 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
rw_bitmaps.o \
swapfs.o \
unix_io.o \
+ unix_cow_io.o \
unlink.o \
valid_blk.o \
version.o
@@ -126,6 +127,7 @@ SRCS= ext2_err.c \
$(srcdir)/swapfs.c \
$(srcdir)/test_io.c \
$(srcdir)/unix_io.c \
+ $(srcdir)/unix_cow_io.c \
$(srcdir)/unlink.c \
$(srcdir)/valid_blk.c \
$(srcdir)/version.c \
@@ -512,6 +514,10 @@ unix_io.o: $(srcdir)/unix_io.c $(srcdir)/ext2_fs.h \
$(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
$(srcdir)/ext2_fs.h $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
$(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+unix_cow_io.o: $(srcdir)/unix_cow_io.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
+ $(srcdir)/ext2_fs.h $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
+ $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
unlink.o: $(srcdir)/unlink.c $(srcdir)/ext2_fs.h \
$(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
$(srcdir)/ext2_fs.h $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h
index eada278..cdfdd51 100644
--- a/lib/ext2fs/ext2_io.h
+++ b/lib/ext2fs/ext2_io.h
@@ -96,6 +96,9 @@ extern errcode_t io_channel_write_byte(io_channel channel,
/* unix_io.c */
extern io_manager unix_io_manager;
+/* unix_cow_io.c */
+extern io_manager unix_cow_io_manager;
+
/* test_io.c */
extern io_manager test_io_manager, test_io_backing_manager;
extern void (*test_io_cb_read_blk)
diff --git a/lib/ext2fs/unix_cow_io.c b/lib/ext2fs/unix_cow_io.c
new file mode 100644
index 0000000..aeebec8
--- /dev/null
+++ b/lib/ext2fs/unix_cow_io.c
@@ -0,0 +1,864 @@
+/*
+ * unix_cow_io.c --- This is the Unix (well, really POSIX) implementation
+ * of the COW I/O manager.
+ *
+ * Implements a one-block write-through cache.
+ *
+ * Includes support for Windows NT support under Cygwin.
+ *
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ * 2002 by Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#include <fcntl.h>
+#include <time.h>
+#ifdef __linux__
+#include <sys/utsname.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+
+#include <tdb.h>
+
+#include "ext2_fs.h"
+#include "ext2fs.h"
+
+/*
+ * For checking structure magic numbers...
+ */
+
+#define EXT2_CHECK_MAGIC(struct, code) \
+ if ((struct)->magic != (code)) return (code)
+
+struct unix_cache {
+ char *buf;
+ unsigned long block;
+ int access_time;
+ unsigned dirty:1;
+ unsigned in_use:1;
+};
+
+#define CACHE_SIZE 8
+#define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
+#define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
+
+struct unix_cow_private_data {
+ int magic;
+ int dev;
+ int flags;
+ int access_time;
+ ext2_loff_t offset;
+ TDB_CONTEXT *tdb;
+ char *tdb_file;
+ struct unix_cache cache[CACHE_SIZE];
+};
+
+static errcode_t unix_cow_open(const char *name, int flags, io_channel *channel);
+static errcode_t unix_cow_close(io_channel channel);
+static errcode_t unix_cow_set_blksize(io_channel channel, int blksize);
+static errcode_t unix_cow_read_blk(io_channel channel, unsigned long block,
+ int count, void *data);
+static errcode_t unix_cow_write_blk(io_channel channel, unsigned long block,
+ int count, const void *data);
+static errcode_t unix_cow_flush(io_channel channel);
+static errcode_t unix_cow_write_byte(io_channel channel, unsigned long offset,
+ int size, const void *data);
+static errcode_t unix_cow_set_option(io_channel channel, const char *option,
+ const char *arg);
+
+static void reuse_cache(io_channel channel, struct unix_cow_private_data *data,
+ struct unix_cache *cache, unsigned long block);
+
+/* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
+ * does not know buffered block devices - everything is raw. */
+#if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#define NEED_BOUNCE_BUFFER
+#else
+#undef NEED_BOUNCE_BUFFER
+#endif
+
+static struct struct_io_manager struct_unix_cow_manager = {
+ EXT2_ET_MAGIC_IO_MANAGER,
+ "Unix COW_I/O Manager",
+ unix_cow_open,
+ unix_cow_close,
+ unix_cow_set_blksize,
+ unix_cow_read_blk,
+ unix_cow_write_blk,
+ unix_cow_flush,
+#ifdef NEED_BOUNCE_BUFFER
+ 0,
+#else
+ unix_cow_write_byte,
+#endif
+ unix_cow_set_option
+};
+
+io_manager unix_cow_io_manager = &struct_unix_cow_manager;
+
+/*
+ * Here are the raw I/O functions
+ */
+#ifndef NEED_BOUNCE_BUFFER
+static errcode_t raw_read_blk(io_channel channel,
+ struct unix_cow_private_data *data,
+ unsigned long block,
+ int count, void *buf)
+{
+ errcode_t retval;
+ ssize_t size;
+ ext2_loff_t location;
+ int actual = 0;
+
+ size = (count < 0) ? -count : count * channel->block_size;
+ location = ((ext2_loff_t) block * channel->block_size) + data->offset;
+ if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out;
+ }
+ actual = read(data->dev, buf, size);
+ if (actual != size) {
+ if (actual < 0)
+ actual = 0;
+ retval = EXT2_ET_SHORT_READ;
+ goto error_out;
+ }
+ return 0;
+
+error_out:
+ memset((char *) buf+actual, 0, size-actual);
+ if (channel->read_error)
+ retval = (channel->read_error)(channel, block, count, buf,
+ size, actual, retval);
+ return retval;
+}
+#else /* NEED_BOUNCE_BUFFER */
+/*
+ * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
+ */
+static errcode_t raw_read_blk(io_channel channel,
+ struct unix_cow_private_data *data,
+ unsigned long block,
+ int count, void *buf)
+{
+ errcode_t retval;
+ size_t size, alignsize, fragment;
+ ext2_loff_t location;
+ int total = 0, actual;
+#define BLOCKALIGN 512
+ char sector[BLOCKALIGN];
+
+ size = (count < 0) ? -count : count * channel->block_size;
+ location = ((ext2_loff_t) block * channel->block_size) + data->offset;
+#ifdef DEBUG
+ printf("count=%d, size=%d, block=%lu, blk_size=%d, location=%llx\n",
+ count, size, block, channel->block_size, (long long)location);
+#endif
+ if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out;
+ }
+ fragment = size % BLOCKALIGN;
+ alignsize = size - fragment;
+ if (alignsize) {
+ actual = read(data->dev, buf, alignsize);
+ if (actual != alignsize)
+ goto short_read;
+ }
+ if (fragment) {
+ actual = read(data->dev, sector, BLOCKALIGN);
+ if (actual != BLOCKALIGN)
+ goto short_read;
+ memcpy(buf+alignsize, sector, fragment);
+ }
+ return 0;
+
+short_read:
+ if (actual>0)
+ total += actual;
+ retval = EXT2_ET_SHORT_READ;
+
+error_out:
+ memset((char *) buf+total, 0, size-actual);
+ if (channel->read_error)
+ retval = (channel->read_error)(channel, block, count, buf,
+ size, actual, retval);
+ return retval;
+}
+#endif
+
+static errcode_t unix_cow_write_tdb(struct unix_cow_private_data * data,
+ ext2_loff_t location, int size)
+
+{
+ TDB_DATA tdb_key, tdb_data;
+ errcode_t retval = 0;
+ int actual = 0, rsize = size;
+ ext2_loff_t cur_loc ;
+
+ if (data->tdb == NULL) {
+ /*
+ * Transaction database not initialized
+ */
+ return 0;
+ }
+
+ tdb_key.dptr = (unsigned char *)&location;
+ tdb_key.dsize = sizeof(location);
+
+ /* insert a new entry and refuse to overwrite an existing entry */
+ /* See if we have an entry already */
+ tdb_data = tdb_fetch(data->tdb, tdb_key);
+ if (tdb_data.dptr) {
+ if (tdb_data.dsize == size) {
+ /*
+ * already we wrote something at this
+ * location with the same size
+ */
+ return 0;
+ } else {
+ /*
+ * Write at the same location but with different size
+ */
+ rsize = size - tdb_data.dsize ;
+ if (rsize < 0) {
+ /*
+ * smaller chunk. return doing nothing
+ */
+ return 0;
+ }
+ location += tdb_data.dsize;
+ }
+ free(tdb_data.dptr);
+ }
+
+ retval = ext2fs_get_mem(rsize, &(tdb_data.dptr));
+ if (retval) {
+ return retval;
+ }
+
+ if ((cur_loc = ext2fs_llseek(data->dev, 0, SEEK_CUR)) == -1) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out_lseek;
+ }
+
+ if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out_lseek;
+ }
+
+ actual = read(data->dev, tdb_data.dptr, rsize);
+ if (actual != rsize) {
+ /*
+ * eventhough it is read we still are doing a write
+ * so return error that is related to write
+ */
+ retval = EXT2_ET_SHORT_WRITE;
+ goto error_out;
+ }
+
+ tdb_data.dsize = rsize;
+
+#ifdef DEBUG
+ printf("Printing with key %lld data %x and size %d\n", location, tdb_data.dptr, rsize);
+#endif
+
+ retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT);
+ if (retval == -1) {
+ /* FIXME!! Not sure what should be the error */
+ retval = EXT2_ET_SHORT_WRITE;
+ goto error_out;
+
+ }
+
+error_out:
+ /* Move the location back */
+ if (ext2fs_llseek(data->dev, cur_loc, SEEK_SET) != cur_loc) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out;
+ }
+
+error_out_lseek:
+ free(tdb_data.dptr);
+ return retval;
+
+}
+static errcode_t raw_write_blk(io_channel channel,
+ struct unix_cow_private_data *data,
+ unsigned long block,
+ int count, const void *buf)
+{
+ ssize_t size;
+ ext2_loff_t location;
+ int actual = 0;
+ errcode_t retval;
+ TDB_DATA tdb_key, tdb_data;
+
+ if (count == 1)
+ size = channel->block_size;
+ else {
+ if (count < 0)
+ size = -count;
+ else
+ size = count * channel->block_size;
+ }
+
+ location = ((ext2_loff_t) block * channel->block_size) + data->offset;
+ if (ext2fs_llseek(data->dev, location, SEEK_SET) != location) {
+ retval = errno ? errno : EXT2_ET_LLSEEK_FAILED;
+ goto error_out;
+ }
+
+
+ /*
+ * before writing to the location read the data from the location
+ * and put it in the database. This will help us to replay the
+ * database content in cae of crash later
+ */
+ retval = unix_cow_write_tdb(data, location, size);
+ if (retval) {
+ goto error_out;
+ }
+
+ actual = write(data->dev, buf, size);
+ if (actual != size) {
+ retval = EXT2_ET_SHORT_WRITE;
+ goto error_out;
+ }
+ return 0;
+
+error_out:
+ if (channel->write_error)
+ retval = (channel->write_error)(channel, block, count, buf,
+ size, actual, retval);
+ return retval;
+}
+
+
+/*
+ * Here we implement the cache functions
+ */
+
+/* Allocate the cache buffers */
+static errcode_t alloc_cache(io_channel channel,
+ struct unix_cow_private_data *data)
+{
+ errcode_t retval;
+ struct unix_cache *cache;
+ int i;
+
+ data->access_time = 0;
+ for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
+ cache->block = 0;
+ cache->access_time = 0;
+ cache->dirty = 0;
+ cache->in_use = 0;
+ if ((retval = ext2fs_get_mem(channel->block_size,
+ &cache->buf)))
+ return retval;
+ }
+ return 0;
+}
+
+/* Free the cache buffers */
+static void free_cache(struct unix_cow_private_data *data)
+{
+ struct unix_cache *cache;
+ int i;
+
+ data->access_time = 0;
+ for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
+ cache->block = 0;
+ cache->access_time = 0;
+ cache->dirty = 0;
+ cache->in_use = 0;
+ if (cache->buf)
+ ext2fs_free_mem(&cache->buf);
+ cache->buf = 0;
+ }
+}
+
+#ifndef NO_IO_CACHE
+/*
+ * Try to find a block in the cache. If the block is not found, and
+ * eldest is a non-zero pointer, then fill in eldest with the cache
+ * entry to that should be reused.
+ */
+static struct unix_cache *find_cached_block(struct unix_cow_private_data *data,
+ unsigned long block,
+ struct unix_cache **eldest)
+{
+ struct unix_cache *cache, *unused_cache, *oldest_cache;
+ int i;
+
+ unused_cache = oldest_cache = 0;
+ for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
+ if (!cache->in_use) {
+ if (!unused_cache)
+ unused_cache = cache;
+ continue;
+ }
+ if (cache->block == block) {
+ cache->access_time = ++data->access_time;
+ return cache;
+ }
+ if (!oldest_cache ||
+ (cache->access_time < oldest_cache->access_time))
+ oldest_cache = cache;
+ }
+ if (eldest)
+ *eldest = (unused_cache) ? unused_cache : oldest_cache;
+ return 0;
+}
+
+/*
+ * Reuse a particular cache entry for another block.
+ */
+static void reuse_cache(io_channel channel, struct unix_cow_private_data *data,
+ struct unix_cache *cache, unsigned long block)
+{
+ if (cache->dirty && cache->in_use)
+ raw_write_blk(channel, data, cache->block, 1, cache->buf);
+
+ cache->in_use = 1;
+ cache->dirty = 0;
+ cache->block = block;
+ cache->access_time = ++data->access_time;
+}
+
+/*
+ * Flush all of the blocks in the cache
+ */
+static errcode_t flush_cached_blocks(io_channel channel,
+ struct unix_cow_private_data *data,
+ int invalidate)
+
+{
+ struct unix_cache *cache;
+ errcode_t retval, retval2;
+ int i;
+
+ retval2 = 0;
+ for (i=0, cache = data->cache; i < CACHE_SIZE; i++, cache++) {
+ if (!cache->in_use)
+ continue;
+
+ if (invalidate)
+ cache->in_use = 0;
+
+ if (!cache->dirty)
+ continue;
+
+ retval = raw_write_blk(channel, data,
+ cache->block, 1, cache->buf);
+ if (retval)
+ retval2 = retval;
+ else
+ cache->dirty = 0;
+ }
+ return retval2;
+}
+#endif /* NO_IO_CACHE */
+
+static void unix_cow_setup_tdb(char *tdb_file,
+ struct unix_cow_private_data *data)
+{
+ errcode_t retval;
+
+ data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST,
+ O_RDWR | O_CREAT | O_TRUNC, 0600);
+ return;
+
+}
+static errcode_t unix_cow_open(const char *name, int flags, io_channel *channel)
+{
+ io_channel io = NULL;
+ struct unix_cow_private_data *data = NULL;
+ errcode_t retval;
+ int open_flags;
+ struct stat st;
+#ifdef __linux__
+ struct utsname ut;
+#endif
+
+ if (name == 0)
+ return EXT2_ET_BAD_DEVICE_NAME;
+ retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
+ if (retval)
+ return retval;
+ memset(io, 0, sizeof(struct struct_io_channel));
+ io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
+ retval = ext2fs_get_mem(sizeof(struct unix_cow_private_data), &data);
+ if (retval)
+ goto cleanup;
+
+ io->manager = unix_cow_io_manager;
+ retval = ext2fs_get_mem(strlen(name)+1, &io->name);
+ if (retval)
+ goto cleanup;
+
+ strcpy(io->name, name);
+ io->private_data = data;
+ io->block_size = 1024;
+ io->read_error = 0;
+ io->write_error = 0;
+ io->refcount = 1;
+
+ memset(data, 0, sizeof(struct unix_cow_private_data));
+ data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
+
+ if ((retval = alloc_cache(io, data)))
+ goto cleanup;
+
+ open_flags = (flags & IO_FLAG_RW) ? O_RDWR : O_RDONLY;
+ if (flags & IO_FLAG_EXCLUSIVE)
+ open_flags |= O_EXCL;
+#ifdef HAVE_OPEN64
+ data->dev = open64(io->name, open_flags);
+#else
+ data->dev = open(io->name, open_flags);
+#endif
+ if (data->dev < 0) {
+ retval = errno;
+ goto cleanup;
+ }
+
+#ifdef __linux__
+#undef RLIM_INFINITY
+#if (defined(__alpha__) || ((defined(__sparc__) || defined(__mips__)) && (SIZEOF_LONG == 4)))
+#define RLIM_INFINITY ((unsigned long)(~0UL>>1))
+#else
+#define RLIM_INFINITY (~0UL)
+#endif
+ /*
+ * Work around a bug in 2.4.10-2.4.18 kernels where writes to
+ * block devices are wrongly getting hit by the filesize
+ * limit. This workaround isn't perfect, since it won't work
+ * if glibc wasn't built against 2.2 header files. (Sigh.)
+ *
+ */
+ if ((flags & IO_FLAG_RW) &&
+ (uname(&ut) == 0) &&
+ ((ut.release[0] == '2') && (ut.release[1] == '.') &&
+ (ut.release[2] == '4') && (ut.release[3] == '.') &&
+ (ut.release[4] == '1') && (ut.release[5] >= '0') &&
+ (ut.release[5] < '8')) &&
+ (fstat(data->dev, &st) == 0) &&
+ (S_ISBLK(st.st_mode))) {
+ struct rlimit rlim;
+
+ rlim.rlim_cur = rlim.rlim_max = (unsigned long) RLIM_INFINITY;
+ setrlimit(RLIMIT_FSIZE, &rlim);
+ getrlimit(RLIMIT_FSIZE, &rlim);
+ if (((unsigned long) rlim.rlim_cur) <
+ ((unsigned long) rlim.rlim_max)) {
+ rlim.rlim_cur = rlim.rlim_max;
+ setrlimit(RLIMIT_FSIZE, &rlim);
+ }
+ }
+#endif
+
+ *channel = io;
+ return 0;
+
+cleanup:
+ if (data) {
+ free_cache(data);
+ ext2fs_free_mem(&data);
+ }
+ if (io)
+ ext2fs_free_mem(&io);
+ return retval;
+}
+
+static errcode_t unix_cow_close(io_channel channel)
+{
+ struct unix_cow_private_data *data;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (--channel->refcount > 0)
+ return 0;
+
+#ifndef NO_IO_CACHE
+ retval = flush_cached_blocks(channel, data, 0);
+#endif
+ if (data->tdb)
+ tdb_close(data->tdb);
+
+ if (close(data->dev) < 0)
+ retval = errno;
+ free_cache(data);
+
+ ext2fs_free_mem(&channel->private_data);
+ if (channel->name)
+ ext2fs_free_mem(&channel->name);
+ ext2fs_free_mem(&channel);
+ return retval;
+}
+
+static errcode_t unix_cow_set_blksize(io_channel channel, int blksize)
+{
+ struct unix_cow_private_data *data;
+ errcode_t retval;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (channel->block_size != blksize) {
+#ifndef NO_IO_CACHE
+ if ((retval = flush_cached_blocks(channel, data, 0)))
+ return retval;
+#endif
+
+ channel->block_size = blksize;
+ free_cache(data);
+ if ((retval = alloc_cache(channel, data)))
+ return retval;
+ }
+ return 0;
+}
+
+
+static errcode_t unix_cow_read_blk(io_channel channel, unsigned long block,
+ int count, void *buf)
+{
+ struct unix_cow_private_data *data;
+ struct unix_cache *cache, *reuse[READ_DIRECT_SIZE];
+ errcode_t retval;
+ char *cp;
+ int i, j;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+#ifdef NO_IO_CACHE
+ return raw_read_blk(channel, data, block, count, buf);
+#else
+ /*
+ * If we're doing an odd-sized read or a very large read,
+ * flush out the cache and then do a direct read.
+ */
+ if (count < 0 || count > WRITE_DIRECT_SIZE) {
+ if ((retval = flush_cached_blocks(channel, data, 0)))
+ return retval;
+ return raw_read_blk(channel, data, block, count, buf);
+ }
+
+ cp = buf;
+ while (count > 0) {
+ /* If it's in the cache, use it! */
+ if ((cache = find_cached_block(data, block, &reuse[0]))) {
+#ifdef DEBUG
+ printf("Using cached block %lu\n", block);
+#endif
+ memcpy(cp, cache->buf, channel->block_size);
+ count--;
+ block++;
+ cp += channel->block_size;
+ continue;
+ }
+ /*
+ * Find the number of uncached blocks so we can do a
+ * single read request
+ */
+ for (i=1; i < count; i++)
+ if (find_cached_block(data, block+i, &reuse[i]))
+ break;
+#ifdef DEBUG
+ printf("Reading %d blocks starting at %lu\n", i, block);
+#endif
+ if ((retval = raw_read_blk(channel, data, block, i, cp)))
+ return retval;
+
+ /* Save the results in the cache */
+ for (j=0; j < i; j++) {
+ count--;
+ cache = reuse[j];
+ reuse_cache(channel, data, cache, block++);
+ memcpy(cache->buf, cp, channel->block_size);
+ cp += channel->block_size;
+ }
+ }
+ return 0;
+#endif /* NO_IO_CACHE */
+}
+
+static errcode_t unix_cow_write_blk(io_channel channel, unsigned long block,
+ int count, const void *buf)
+{
+ struct unix_cow_private_data *data;
+ struct unix_cache *cache, *reuse;
+ errcode_t retval = 0;
+ const char *cp;
+ int writethrough;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+#ifdef NO_IO_CACHE
+ return raw_write_blk(channel, data, block, count, buf);
+#else
+ /*
+ * If we're doing an odd-sized write or a very large write,
+ * flush out the cache completely and then do a direct write.
+ */
+ if (count < 0 || count > WRITE_DIRECT_SIZE) {
+ if ((retval = flush_cached_blocks(channel, data, 1)))
+ return retval;
+ return raw_write_blk(channel, data, block, count, buf);
+ }
+
+ /*
+ * For a moderate-sized multi-block write, first force a write
+ * if we're in write-through cache mode, and then fill the
+ * cache with the blocks.
+ */
+ writethrough = channel->flags & CHANNEL_FLAGS_WRITETHROUGH;
+ if (writethrough)
+ retval = raw_write_blk(channel, data, block, count, buf);
+
+ cp = buf;
+ while (count > 0) {
+ cache = find_cached_block(data, block, &reuse);
+ if (!cache) {
+ cache = reuse;
+ reuse_cache(channel, data, cache, block);
+ }
+ memcpy(cache->buf, cp, channel->block_size);
+ cache->dirty = !writethrough;
+ count--;
+ block++;
+ cp += channel->block_size;
+ }
+ return retval;
+#endif /* NO_IO_CACHE */
+}
+
+
+static errcode_t unix_cow_write_byte(io_channel channel, unsigned long offset,
+ int size, const void *buf)
+{
+ struct unix_cow_private_data *data;
+ errcode_t retval = 0;
+ ssize_t actual;
+ ext2_loff_t location;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+#ifndef NO_IO_CACHE
+ /*
+ * Flush out the cache completely
+ */
+ if ((retval = flush_cached_blocks(channel, data, 1)))
+ return retval;
+#endif
+
+ if (lseek(data->dev, offset + data->offset, SEEK_SET) < 0)
+ return errno;
+
+ /*
+ * before writing to the location read the data from the location
+ * and put it in the database. This will help us to replay the
+ * database content in cae of crash later
+ */
+
+ location = offset + data->offset;
+ retval = unix_cow_write_tdb(data, location, size);
+ if (retval)
+ return retval;
+
+ actual = write(data->dev, buf, size);
+ if (actual != size)
+ return EXT2_ET_SHORT_WRITE;
+
+ return 0;
+}
+
+/*
+ * Flush data buffers to disk.
+ */
+static errcode_t unix_cow_flush(io_channel channel)
+{
+ struct unix_cow_private_data *data;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+#ifndef NO_IO_CACHE
+ retval = flush_cached_blocks(channel, data, 0);
+#endif
+ fsync(data->dev);
+ return retval;
+}
+
+static errcode_t unix_cow_set_option(io_channel channel, const char *option,
+ const char *arg)
+{
+ struct unix_cow_private_data *data;
+ unsigned long tmp;
+ char *end;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct unix_cow_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (!strcmp(option, "offset")) {
+ if (!arg)
+ return EXT2_ET_INVALID_ARGUMENT;
+
+ tmp = strtoul(arg, &end, 0);
+ if (*end)
+ return EXT2_ET_INVALID_ARGUMENT;
+ data->offset = tmp;
+ return 0;
+ }
+ if (!strcmp(option, "tdbfilename")) {
+ if (!arg)
+ return EXT2_ET_INVALID_ARGUMENT;
+
+ data->tdb_file = strdup(arg);
+ /*
+ * Initialize the database context
+ * We need to do it in set_option because when
+ * unix_cow_open is called we don't have the
+ * tdb file name set.
+ */
+ unix_cow_setup_tdb(data->tdb_file, data);
+ return 0;
+ }
+ return EXT2_ET_INVALID_ARGUMENT;
+}
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 2/4] Add extent related functions
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
The code is derived out of the latest ext4 kernel source. I
have tried to keep the code as close as possible to the kernel
sources. This makes sure that any fixes for the tree building
code in kernel should be easily applied to ext4migrate. The
ext3_ext naming convention instead of ext4_ext found in kernel is to
make sure we are in sync with rest of e2fsprogs source
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
ext4migrate/extents.c | 763 +++++++++++++++++++++++++++++++++++++++++++++++++
ext4migrate/extents.h | 10 +
2 files changed, 773 insertions(+), 0 deletions(-)
create mode 100644 ext4migrate/extents.c
create mode 100644 ext4migrate/extents.h
diff --git a/ext4migrate/extents.c b/ext4migrate/extents.c
new file mode 100644
index 0000000..d25ce36
--- /dev/null
+++ b/ext4migrate/extents.c
@@ -0,0 +1,763 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "migrate.h"
+#include "extents.h"
+
+struct ext4_ext_path {
+ ext4_fsblk_t p_block;
+ __u16 p_depth;
+ struct ext3_extent *p_ext;
+ struct ext3_extent_idx *p_idx;
+ struct ext3_extent_header *p_hdr;
+};
+
+/*
+ * ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
+ext4_fsblk_t ext_pblock(struct ext3_extent *ex)
+{
+ ext4_fsblk_t block;
+
+ block = ex->ee_start;
+ block |= ((ext4_fsblk_t) ex->ee_start_hi << 31) << 1;
+ return block;
+}
+
+/*
+ * idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
+ext4_fsblk_t idx_pblock(struct ext3_extent_idx *ix)
+{
+ ext4_fsblk_t block;
+
+ block = ix->ei_leaf;
+ block |= ((ext4_fsblk_t) ix->ei_leaf_hi << 31) << 1;
+ return block;
+}
+
+/*
+ * ext3_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
+void ext3_ext_store_pblock(struct ext3_extent *ex, ext4_fsblk_t pb)
+{
+ ex->ee_start = (unsigned long) (pb & 0xffffffff);
+ ex->ee_start_hi = (unsigned long) ((pb >> 31) >> 1) & 0xffff;
+}
+
+/*
+ * ext3_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
+static void ext3_idx_store_pblock(struct ext3_extent_idx *ix, ext4_fsblk_t pb)
+{
+ ix->ei_leaf = (unsigned long) (pb & 0xffffffff);
+ ix->ei_leaf_hi = (unsigned long) ((pb >> 31) >> 1) & 0xffff;
+}
+
+
+static int __ext3_ext_space_block(ext2_filsys filesys)
+{
+ int size;
+
+ size = (filesys->blocksize - sizeof(struct ext3_extent_header))
+ / sizeof(struct ext3_extent);
+ return size;
+}
+static int ext3_ext_space_block()
+{
+ return __ext3_ext_space_block(current_fs);
+}
+
+static int __ext3_ext_space_block_idx(ext2_filsys filesys)
+{
+ int size;
+
+ size = (filesys->blocksize - sizeof(struct ext3_extent_header))
+ / sizeof(struct ext3_extent_idx);
+ return size;
+}
+static int ext3_ext_space_block_idx()
+{
+ return __ext3_ext_space_block_idx(current_fs);
+
+}
+
+int ext3_ext_space_root(void)
+{
+ int size;
+
+ size = EXT2_N_BLOCKS*sizeof(blk_t);
+ size -= sizeof(struct ext3_extent_header);
+ size /= sizeof(struct ext3_extent);
+ return size;
+}
+
+static int ext3_ext_space_root_idx(void)
+{
+ int size;
+
+ size = EXT2_N_BLOCKS*sizeof(blk_t);
+ size -= sizeof(struct ext3_extent_header);
+ size /= sizeof(struct ext3_extent_idx);
+ return size;
+}
+static void ext3_ext_binsearch_idx(struct ext4_ext_path *path,
+ blk_t logical_blk)
+{
+ struct ext3_extent_header *eh = path->p_hdr;
+ struct ext3_extent_idx *r, *l, *m;
+
+ l = EXT_FIRST_INDEX(eh) + 1;
+ r = EXT_FIRST_INDEX(eh) + eh->eh_entries - 1;
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (logical_blk < m->ei_block)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+ path->p_idx = l - 1;
+}
+
+static void ext3_ext_binsearch(struct ext4_ext_path *path, blk_t logical_blk)
+{
+ struct ext3_extent_header *eh = path->p_hdr;
+ struct ext3_extent *r, *l, *m;
+
+ if (eh->eh_entries == 0) {
+ /*
+ * this leaf is empty:
+ * we get such a leaf in split/add case
+ */
+ return;
+ }
+
+ l = EXT_FIRST_EXTENT(eh) + 1;
+ r = EXT_FIRST_EXTENT(eh) + eh->eh_entries - 1;
+
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (logical_blk < m->ee_block)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+
+ path->p_ext = l - 1;
+}
+
+/*
+ * ext3_ext_insert_index:
+ * insert new index [@logical;@ptr] into the block at @curp;
+ * check where to insert: before @curp or after @curp
+ */
+static int ext3_ext_insert_index(struct ext4_ext_path *curp,
+ int logical, ext4_fsblk_t ptr)
+{
+ struct ext3_extent_idx *ix;
+ int len, err = 0;
+
+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
+ if (logical > curp->p_idx->ei_block) {
+ /* insert after */
+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
+ len = (len - 1) * sizeof(struct ext3_extent_idx);
+ len = len < 0 ? 0 : len;
+ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
+ }
+ ix = curp->p_idx + 1;
+ } else {
+ /* insert before */
+ len = len * sizeof(struct ext3_extent_idx);
+ len = len < 0 ? 0 : len;
+ memmove(curp->p_idx + 1, curp->p_idx, len);
+ ix = curp->p_idx;
+ }
+
+ ix->ei_block = logical;
+ ext3_idx_store_pblock(ix, ptr);
+ curp->p_hdr->eh_entries = curp->p_hdr->eh_entries+1;
+
+ return err;
+}
+/*
+ * ext3_ext_split:
+ * inserts new subtree into the path, using free index entry
+ * at depth @at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extents and index entries (right to the split point)
+ * into the newly allocated blocks
+ * - initializes subtree
+ */
+static int ext3_ext_split(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext, int at)
+{
+ int depth = eh->eh_depth;
+ struct ext3_extent_header *neh;
+ struct ext3_extent_idx *fidx;
+ struct ext3_extent *ex;
+ struct blk_cache *blkc;
+ int i = at, k, m, a;
+ ext4_fsblk_t newblock, oldblock;
+ int border;
+ ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
+ int err = 0;
+
+ /* make decision: where to split? */
+ /* FIXME: now decision is simplest: at current extent */
+
+ /* if current leaf will be split, then we should use
+ * border from split point */
+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ border = path[depth].p_ext[1].ee_block;
+ } else {
+ border = newext->ee_block;
+ }
+
+ /*
+ * If error occurs, then we break processing
+ * and mark filesystem read-only. index won't
+ * be inserted and tree will be in consistent
+ * state. Next mount will repair buffers too.
+ */
+
+ /*
+ * Get array to track all allocated blocks.
+ * We need this to handle errors and free blocks
+ * upon them.
+ */
+ ablocks = malloc(sizeof(ext4_fsblk_t) * depth);
+ if (!ablocks)
+ return -1;
+
+ memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
+
+ /* allocate all needed blocks */
+ for (a = 0; a < depth - at; a++) {
+ blkc = get_block(0);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+ newblock = blkc->pblock;
+ if (newblock == 0)
+ goto cleanup;
+ ablocks[a] = newblock;
+ }
+
+ /* initialize new leaf */
+ newblock = ablocks[--a];
+
+ blkc = get_block(newblock);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+
+ neh = (struct ext3_extent_header *)blkc->block_mem_addr;
+ neh->eh_entries = 0;
+ neh->eh_max = ext3_ext_space_block();
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_depth = 0;
+ ex = EXT_FIRST_EXTENT(neh);
+
+ /* move remainder of path[depth] to the new leaf */
+ /* start copy from next extent */
+ /* TODO: we could do it by single memmove */
+ m = 0;
+ path[depth].p_ext++;
+ while (path[depth].p_ext <=
+ EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ /*memmove(ex++, path[depth].p_ext++,
+ sizeof(struct ext4_extent));
+ neh->eh_entries++;*/
+ path[depth].p_ext++;
+ m++;
+ }
+
+ /*
+ * move the part of the old extent to the new allocated one
+ */
+ if (m) {
+ memmove(ex, path[depth].p_ext-m, sizeof(struct ext3_extent)*m);
+ neh->eh_entries = neh->eh_entries+m;
+ }
+
+ /* correct old leaf */
+ if (m) {
+ path[depth].p_hdr->eh_entries = path[depth].p_hdr->eh_entries-m;
+
+ }
+
+ /* create intermediate indexes */
+ k = depth - at - 1;
+
+ /*create intermediate indices */
+ /* insert new index into current index block */
+ /* current depth stored in i var */
+ i = depth - 1;
+ while (k--) {
+ oldblock = newblock;
+ newblock = ablocks[--a];
+ blkc = get_block(newblock);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+
+ neh = (struct ext3_extent_header *)blkc->block_mem_addr;
+
+ neh->eh_entries = 1;
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_max = ext3_ext_space_block_idx();
+ neh->eh_depth = depth - i;
+ fidx = EXT_FIRST_INDEX(neh);
+ fidx->ei_block = border;
+ ext3_idx_store_pblock(fidx, oldblock);
+
+ /* copy indexes */
+ m = 0;
+ path[i].p_idx++;
+
+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
+ /*memmove(++fidx, path[i].p_idx++,
+ sizeof(struct ext4_extent_idx));
+ neh->eh_entries++;
+ BUG_ON(neh->eh_entries > neh->eh_max);*/
+ path[i].p_idx++;
+ m++;
+ }
+ if (m) {
+ memmove(++fidx, path[i].p_idx - m,
+ sizeof(struct ext3_extent_idx) * m);
+ neh->eh_entries = neh->eh_entries + m;
+ }
+
+
+ /* correct old index */
+ if (m) {
+ path[i].p_hdr->eh_entries = path[i].p_hdr->eh_entries-m;
+ }
+
+ i--;
+ }
+
+ /* insert new index */
+ err = ext3_ext_insert_index(path + at, border, newblock);
+
+cleanup:
+
+ free(ablocks);
+
+ return err;
+}
+
+/*
+ * ext3_ext_grow_indepth:
+ * implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initializes new top-level, creating index that points to the
+ * just created block
+ */
+static int ext3_ext_grow_indepth(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path)
+{
+ struct ext4_ext_path *curp = path;
+ struct ext3_extent_header *neh;
+ struct blk_cache *blkc;
+ ext4_fsblk_t newblock;
+ void *addr;
+
+ blkc = get_block(0);
+ if (!blkc) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+
+ }
+ addr = blkc->block_mem_addr;
+ newblock = blkc->pblock;
+
+ /* move top-level index/leaf into new block */
+ memmove(addr, curp->p_hdr, EXT2_N_BLOCKS*sizeof(blk_t));
+
+ /* set size of new block */
+ neh = (struct ext3_extent_header *)addr;
+ /* old root could have indexes or leaves
+ * so calculate e_max right way */
+ if (eh->eh_depth) {
+ neh->eh_max = ext3_ext_space_block_idx();
+ } else {
+ neh->eh_max = ext3_ext_space_block();
+ }
+
+ neh->eh_magic = EXT4_EXT_MAGIC;
+
+ /* create index in new top-level index: num,max,pointer */
+
+ curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
+ curp->p_hdr->eh_max = ext3_ext_space_root_idx();
+ curp->p_hdr->eh_entries = 1;
+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
+ /* FIXME: it works, but actually path[0] can be index */
+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
+ ext3_idx_store_pblock(curp->p_idx, newblock);
+
+ neh = eh;
+ //fidx = EXT_FIRST_INDEX(neh);
+
+ neh->eh_depth = path->p_depth + 1;
+
+ return 0;
+}
+struct ext4_ext_path * ext3_ext_find_extent(struct ext3_extent_header *eh,
+ blk_t logical_blk,
+ struct ext4_ext_path *path)
+{
+
+ struct blk_cache *blkc;
+ short int depth, i, ppos = 0;
+ int alloc = 0;
+
+ i = depth = eh->eh_depth;
+
+ if (!path) {
+ /*
+ * We may be called to rebuild the path while growing the
+ * depth of the tree
+ */
+ path = malloc(sizeof(struct ext4_ext_path)* (depth+2));
+ if (!path)
+ return NULL;
+
+ alloc = 1;
+ memset(path, 0, sizeof(struct ext4_ext_path)* (depth+2));
+ }
+
+
+ path[0].p_hdr = eh;
+
+ while (i) {
+
+ ext3_ext_binsearch_idx(path+ppos, logical_blk);
+
+ path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_depth = i;
+ path[ppos].p_ext = NULL;
+
+ blkc = get_block(path[ppos].p_block);
+
+ if (!blkc) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+
+ eh = blkc->block_mem_addr;
+ if (!eh) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+ if (eh->eh_max == 0) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+ ppos++;
+ path[ppos].p_hdr = eh;
+ i--;
+ }
+ path[ppos].p_depth = i;
+ path[ppos].p_hdr = eh;
+ path[ppos].p_ext = NULL;
+ path[ppos].p_idx = NULL;
+
+ ext3_ext_binsearch(path + ppos, logical_blk);
+
+ return path;
+}
+
+static int ext3_ext_create_new_leaf(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext)
+{
+
+ struct ext4_ext_path *curp;
+ int depth, i, err = 0;
+
+repeat:
+ i = depth = eh->eh_depth;
+
+ /* walk up to the tree and look for free index entry */
+ curp = path + depth;
+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
+ i--;
+ curp--;
+ }
+
+ if (EXT_HAS_FREE_INDEX(curp)) {
+
+ /* if we found index with free entry, then use that
+ * entry: create all needed subtree and add new leaf */
+ err = ext3_ext_split(eh, path, newext, i);
+ if (err)
+ goto err_out;
+
+ /*
+ * Recalculate the path since grow inode would
+ * have changed it
+ */
+ path = ext3_ext_find_extent(eh, newext->ee_block, path);
+ if (!path) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+
+ } else {
+
+ /* tree is full, time to grow in depth */
+ err = ext3_ext_grow_indepth(eh, path);
+ if (err)
+ goto err_out;
+
+ /*
+ * recalculate the path since grow inode
+ * would have changed it i
+ */
+ path = ext3_ext_find_extent(eh, newext->ee_block, path);
+ if (!path) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+
+
+ /*
+ * only first (depth 0 -> 1) produces free space;
+ * in all other cases we have to split the grown tree
+ */
+ depth = eh->eh_depth;
+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+ /* now we need to split */
+ goto repeat;
+ }
+
+ }
+
+err_out:
+
+ return err;
+}
+
+static int ext3_can_extents_be_merged(struct ext3_extent *ex1,
+ struct ext3_extent *ex2)
+{
+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
+ return 0;
+
+
+ if (ext_pblock(ex1) + ex1->ee_len == ext_pblock(ex2))
+ return 1;
+ return 0;
+}
+
+
+/*
+ * ext3_ext_next_leaf_block:
+ * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ */
+static unsigned ext3_ext_next_leaf_block(struct ext4_ext_path *path)
+{
+ int depth;
+
+ depth = path->p_depth;
+
+ /* zero-tree has no leaf blocks at all */
+ if (depth == 0)
+ return EXT_MAX_BLOCK;
+
+ /* go to index block */
+ depth--;
+
+ while (depth >= 0) {
+ if (path[depth].p_idx != EXT_LAST_INDEX(path[depth].p_hdr)) {
+ return path[depth].p_idx[1].ei_block;
+ }
+ depth--;
+ }
+
+ return EXT_MAX_BLOCK;
+}
+
+/*
+ * ext3_ext_correct_indexes:
+ * if leaf gets modified and modified extent is first in the leaf,
+ * then we have to correct all indexes above.
+ * TODO: do we need to correct tree in all cases?
+ */
+static void ext3_ext_correct_indexes(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path)
+{
+ int depth = eh->eh_depth;
+ struct ext3_extent *ex;
+ int border;
+ int k;
+
+
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ if (depth == 0) {
+ /* there is no tree at all */
+ return;
+ }
+
+ if (ex != EXT_FIRST_EXTENT(eh)) {
+ /* we correct tree if first leaf got modified only */
+ return;
+ }
+
+ /*
+ * TODO: we need correction if border is smaller than current one
+ */
+ k = depth - 1;
+ border = path[depth].p_ext->ee_block;
+ path[k].p_idx->ei_block = border;
+
+ while (k--) {
+ /* change all left-side indexes */
+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
+ break;
+ path[k].p_idx->ei_block = border;
+ }
+
+ return;
+}
+
+int ext3_ext_insert_extent(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext)
+{
+
+ struct ext3_extent *ex, *fex;
+ struct ext3_extent *nearex; /* nearest extent */
+ struct ext4_ext_path *npath = NULL;
+ int depth, len, err = 0, next;
+ struct ext3_extent_header *save_eh = eh;
+
+ depth = eh->eh_depth;
+ ex = path[depth].p_ext;
+
+ /* try to insert block into found extent and return */
+ if (ex && ext3_can_extents_be_merged(ex, newext)) {
+ ex->ee_len = ex->ee_len + newext->ee_len;
+ eh = path[depth].p_hdr;
+ nearex = ex;
+ goto merge;
+ }
+
+repeat:
+ /* recalculate depth here */
+ depth=save_eh->eh_depth;
+
+ eh = path[depth].p_hdr;
+ if (eh->eh_entries < eh->eh_max)
+ goto has_space;
+
+ /* probably next leaf has space for us? */
+ fex = EXT_LAST_EXTENT(eh);
+ next = ext3_ext_next_leaf_block(path);
+
+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
+
+ npath = ext3_ext_find_extent(save_eh, next, NULL);
+ if (!npath) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ eh = npath[depth].p_hdr;
+ if (eh->eh_entries < eh->eh_max) {
+ path = npath;
+ goto repeat;
+ }
+ }
+
+ /*
+ * There is no free space in the found leaf.
+ * We're gonna add a new leaf in the tree.
+ */
+ err = ext3_ext_create_new_leaf(save_eh, path, newext);
+ if (err)
+ goto cleanup;
+
+ /* recalculate depth here */
+ depth=save_eh->eh_depth;
+ eh = path[depth].p_hdr;
+
+has_space:
+
+ nearex = path[depth].p_ext;
+
+ if (!nearex) {
+ /* there is no extent in this leaf, create first one */
+ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
+
+ } else if (newext->ee_block > nearex->ee_block) {
+
+ if (nearex != EXT_LAST_EXTENT(eh)) {
+ len = EXT_MAX_EXTENT(eh) - nearex;
+ len = (len - 1) * sizeof(struct ext3_extent);
+ len = len < 0 ? 0 : len;
+ memmove(nearex + 2, nearex + 1, len);
+ }
+ path[depth].p_ext = nearex + 1;
+ } else {
+ len = (EXT_MAX_EXTENT(eh) - nearex)*sizeof(struct ext3_extent);
+ len = len < 0 ? 0 : len;
+ memmove(nearex + 1, nearex, len);
+ path[depth].p_ext = nearex;
+ }
+
+ eh->eh_entries = eh->eh_entries+1;
+ nearex = path[depth].p_ext;
+ nearex->ee_block = newext->ee_block;
+ nearex->ee_start = newext->ee_start;
+ nearex->ee_start_hi = newext->ee_start_hi;
+ nearex->ee_len = newext->ee_len;
+
+merge:
+ /* try to merge extents to the right */
+ while (nearex < EXT_LAST_EXTENT(eh)) {
+ if (!ext3_can_extents_be_merged(nearex, nearex + 1))
+ break;
+ /* merge with next extent! */
+ nearex->ee_len = nearex->ee_len + nearex[1].ee_len;
+
+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
+ len = (EXT_LAST_EXTENT(eh) - nearex - 1)
+ * sizeof(struct ext3_extent);
+ memmove(nearex + 1, nearex + 2, len);
+ }
+ eh->eh_entries = eh->eh_entries-1;
+ }
+
+ /* try to merge extents to the left */
+
+ /* time to correct all indexes above */
+ ext3_ext_correct_indexes(save_eh, path);
+
+cleanup:
+ if (npath) {
+ free(npath);
+ }
+
+ return err;
+}
+
diff --git a/ext4migrate/extents.h b/ext4migrate/extents.h
new file mode 100644
index 0000000..38114dd
--- /dev/null
+++ b/ext4migrate/extents.h
@@ -0,0 +1,10 @@
+
+struct ext4_ext_path * ext3_ext_find_extent(struct ext3_extent_header *,
+ blk_t , struct ext4_ext_path *);
+struct blk_cache * get_block(ext4_fsblk_t);
+int ext3_ext_space_root(void);
+ext4_fsblk_t ext_pblock(struct ext3_extent *);
+ext4_fsblk_t idx_pblock(struct ext3_extent_idx *);
+void ext3_ext_store_pblock(struct ext3_extent *, ext4_fsblk_t );
+int ext3_ext_insert_extent(struct ext3_extent_header *,
+ struct ext4_ext_path *, struct ext3_extent *);
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 2/4] Add extent related functions
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
The code is derived out of the latest ext4 kernel source. I
have tried to keep the code as close as possible to the kernel
sources. This makes sure that any fixes for the tree building
code in kernel should be easily applied to ext4migrate. The
ext3_ext naming convention instead of ext4_ext found in kernel is to
make sure we are in sync with rest of e2fsprogs source
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
ext4migrate/extents.c | 763 +++++++++++++++++++++++++++++++++++++++++++++++++
ext4migrate/extents.h | 10 +
2 files changed, 773 insertions(+), 0 deletions(-)
create mode 100644 ext4migrate/extents.c
create mode 100644 ext4migrate/extents.h
diff --git a/ext4migrate/extents.c b/ext4migrate/extents.c
new file mode 100644
index 0000000..d25ce36
--- /dev/null
+++ b/ext4migrate/extents.c
@@ -0,0 +1,763 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "migrate.h"
+#include "extents.h"
+
+struct ext4_ext_path {
+ ext4_fsblk_t p_block;
+ __u16 p_depth;
+ struct ext3_extent *p_ext;
+ struct ext3_extent_idx *p_idx;
+ struct ext3_extent_header *p_hdr;
+};
+
+/*
+ * ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
+ext4_fsblk_t ext_pblock(struct ext3_extent *ex)
+{
+ ext4_fsblk_t block;
+
+ block = ex->ee_start;
+ block |= ((ext4_fsblk_t) ex->ee_start_hi << 31) << 1;
+ return block;
+}
+
+/*
+ * idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
+ext4_fsblk_t idx_pblock(struct ext3_extent_idx *ix)
+{
+ ext4_fsblk_t block;
+
+ block = ix->ei_leaf;
+ block |= ((ext4_fsblk_t) ix->ei_leaf_hi << 31) << 1;
+ return block;
+}
+
+/*
+ * ext3_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
+void ext3_ext_store_pblock(struct ext3_extent *ex, ext4_fsblk_t pb)
+{
+ ex->ee_start = (unsigned long) (pb & 0xffffffff);
+ ex->ee_start_hi = (unsigned long) ((pb >> 31) >> 1) & 0xffff;
+}
+
+/*
+ * ext3_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
+static void ext3_idx_store_pblock(struct ext3_extent_idx *ix, ext4_fsblk_t pb)
+{
+ ix->ei_leaf = (unsigned long) (pb & 0xffffffff);
+ ix->ei_leaf_hi = (unsigned long) ((pb >> 31) >> 1) & 0xffff;
+}
+
+
+static int __ext3_ext_space_block(ext2_filsys filesys)
+{
+ int size;
+
+ size = (filesys->blocksize - sizeof(struct ext3_extent_header))
+ / sizeof(struct ext3_extent);
+ return size;
+}
+static int ext3_ext_space_block()
+{
+ return __ext3_ext_space_block(current_fs);
+}
+
+static int __ext3_ext_space_block_idx(ext2_filsys filesys)
+{
+ int size;
+
+ size = (filesys->blocksize - sizeof(struct ext3_extent_header))
+ / sizeof(struct ext3_extent_idx);
+ return size;
+}
+static int ext3_ext_space_block_idx()
+{
+ return __ext3_ext_space_block_idx(current_fs);
+
+}
+
+int ext3_ext_space_root(void)
+{
+ int size;
+
+ size = EXT2_N_BLOCKS*sizeof(blk_t);
+ size -= sizeof(struct ext3_extent_header);
+ size /= sizeof(struct ext3_extent);
+ return size;
+}
+
+static int ext3_ext_space_root_idx(void)
+{
+ int size;
+
+ size = EXT2_N_BLOCKS*sizeof(blk_t);
+ size -= sizeof(struct ext3_extent_header);
+ size /= sizeof(struct ext3_extent_idx);
+ return size;
+}
+static void ext3_ext_binsearch_idx(struct ext4_ext_path *path,
+ blk_t logical_blk)
+{
+ struct ext3_extent_header *eh = path->p_hdr;
+ struct ext3_extent_idx *r, *l, *m;
+
+ l = EXT_FIRST_INDEX(eh) + 1;
+ r = EXT_FIRST_INDEX(eh) + eh->eh_entries - 1;
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (logical_blk < m->ei_block)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+ path->p_idx = l - 1;
+}
+
+static void ext3_ext_binsearch(struct ext4_ext_path *path, blk_t logical_blk)
+{
+ struct ext3_extent_header *eh = path->p_hdr;
+ struct ext3_extent *r, *l, *m;
+
+ if (eh->eh_entries == 0) {
+ /*
+ * this leaf is empty:
+ * we get such a leaf in split/add case
+ */
+ return;
+ }
+
+ l = EXT_FIRST_EXTENT(eh) + 1;
+ r = EXT_FIRST_EXTENT(eh) + eh->eh_entries - 1;
+
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (logical_blk < m->ee_block)
+ r = m - 1;
+ else
+ l = m + 1;
+ }
+
+ path->p_ext = l - 1;
+}
+
+/*
+ * ext3_ext_insert_index:
+ * insert new index [@logical;@ptr] into the block at @curp;
+ * check where to insert: before @curp or after @curp
+ */
+static int ext3_ext_insert_index(struct ext4_ext_path *curp,
+ int logical, ext4_fsblk_t ptr)
+{
+ struct ext3_extent_idx *ix;
+ int len, err = 0;
+
+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
+ if (logical > curp->p_idx->ei_block) {
+ /* insert after */
+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
+ len = (len - 1) * sizeof(struct ext3_extent_idx);
+ len = len < 0 ? 0 : len;
+ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
+ }
+ ix = curp->p_idx + 1;
+ } else {
+ /* insert before */
+ len = len * sizeof(struct ext3_extent_idx);
+ len = len < 0 ? 0 : len;
+ memmove(curp->p_idx + 1, curp->p_idx, len);
+ ix = curp->p_idx;
+ }
+
+ ix->ei_block = logical;
+ ext3_idx_store_pblock(ix, ptr);
+ curp->p_hdr->eh_entries = curp->p_hdr->eh_entries+1;
+
+ return err;
+}
+/*
+ * ext3_ext_split:
+ * inserts new subtree into the path, using free index entry
+ * at depth @at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extents and index entries (right to the split point)
+ * into the newly allocated blocks
+ * - initializes subtree
+ */
+static int ext3_ext_split(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext, int at)
+{
+ int depth = eh->eh_depth;
+ struct ext3_extent_header *neh;
+ struct ext3_extent_idx *fidx;
+ struct ext3_extent *ex;
+ struct blk_cache *blkc;
+ int i = at, k, m, a;
+ ext4_fsblk_t newblock, oldblock;
+ int border;
+ ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
+ int err = 0;
+
+ /* make decision: where to split? */
+ /* FIXME: now decision is simplest: at current extent */
+
+ /* if current leaf will be split, then we should use
+ * border from split point */
+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ border = path[depth].p_ext[1].ee_block;
+ } else {
+ border = newext->ee_block;
+ }
+
+ /*
+ * If error occurs, then we break processing
+ * and mark filesystem read-only. index won't
+ * be inserted and tree will be in consistent
+ * state. Next mount will repair buffers too.
+ */
+
+ /*
+ * Get array to track all allocated blocks.
+ * We need this to handle errors and free blocks
+ * upon them.
+ */
+ ablocks = malloc(sizeof(ext4_fsblk_t) * depth);
+ if (!ablocks)
+ return -1;
+
+ memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
+
+ /* allocate all needed blocks */
+ for (a = 0; a < depth - at; a++) {
+ blkc = get_block(0);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+ newblock = blkc->pblock;
+ if (newblock == 0)
+ goto cleanup;
+ ablocks[a] = newblock;
+ }
+
+ /* initialize new leaf */
+ newblock = ablocks[--a];
+
+ blkc = get_block(newblock);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+
+ neh = (struct ext3_extent_header *)blkc->block_mem_addr;
+ neh->eh_entries = 0;
+ neh->eh_max = ext3_ext_space_block();
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_depth = 0;
+ ex = EXT_FIRST_EXTENT(neh);
+
+ /* move remainder of path[depth] to the new leaf */
+ /* start copy from next extent */
+ /* TODO: we could do it by single memmove */
+ m = 0;
+ path[depth].p_ext++;
+ while (path[depth].p_ext <=
+ EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ /*memmove(ex++, path[depth].p_ext++,
+ sizeof(struct ext4_extent));
+ neh->eh_entries++;*/
+ path[depth].p_ext++;
+ m++;
+ }
+
+ /*
+ * move the part of the old extent to the new allocated one
+ */
+ if (m) {
+ memmove(ex, path[depth].p_ext-m, sizeof(struct ext3_extent)*m);
+ neh->eh_entries = neh->eh_entries+m;
+ }
+
+ /* correct old leaf */
+ if (m) {
+ path[depth].p_hdr->eh_entries = path[depth].p_hdr->eh_entries-m;
+
+ }
+
+ /* create intermediate indexes */
+ k = depth - at - 1;
+
+ /*create intermediate indices */
+ /* insert new index into current index block */
+ /* current depth stored in i var */
+ i = depth - 1;
+ while (k--) {
+ oldblock = newblock;
+ newblock = ablocks[--a];
+ blkc = get_block(newblock);
+ if (!blkc) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto cleanup;
+ }
+
+ neh = (struct ext3_extent_header *)blkc->block_mem_addr;
+
+ neh->eh_entries = 1;
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_max = ext3_ext_space_block_idx();
+ neh->eh_depth = depth - i;
+ fidx = EXT_FIRST_INDEX(neh);
+ fidx->ei_block = border;
+ ext3_idx_store_pblock(fidx, oldblock);
+
+ /* copy indexes */
+ m = 0;
+ path[i].p_idx++;
+
+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
+ /*memmove(++fidx, path[i].p_idx++,
+ sizeof(struct ext4_extent_idx));
+ neh->eh_entries++;
+ BUG_ON(neh->eh_entries > neh->eh_max);*/
+ path[i].p_idx++;
+ m++;
+ }
+ if (m) {
+ memmove(++fidx, path[i].p_idx - m,
+ sizeof(struct ext3_extent_idx) * m);
+ neh->eh_entries = neh->eh_entries + m;
+ }
+
+
+ /* correct old index */
+ if (m) {
+ path[i].p_hdr->eh_entries = path[i].p_hdr->eh_entries-m;
+ }
+
+ i--;
+ }
+
+ /* insert new index */
+ err = ext3_ext_insert_index(path + at, border, newblock);
+
+cleanup:
+
+ free(ablocks);
+
+ return err;
+}
+
+/*
+ * ext3_ext_grow_indepth:
+ * implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initializes new top-level, creating index that points to the
+ * just created block
+ */
+static int ext3_ext_grow_indepth(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path)
+{
+ struct ext4_ext_path *curp = path;
+ struct ext3_extent_header *neh;
+ struct blk_cache *blkc;
+ ext4_fsblk_t newblock;
+ void *addr;
+
+ blkc = get_block(0);
+ if (!blkc) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+
+ }
+ addr = blkc->block_mem_addr;
+ newblock = blkc->pblock;
+
+ /* move top-level index/leaf into new block */
+ memmove(addr, curp->p_hdr, EXT2_N_BLOCKS*sizeof(blk_t));
+
+ /* set size of new block */
+ neh = (struct ext3_extent_header *)addr;
+ /* old root could have indexes or leaves
+ * so calculate e_max right way */
+ if (eh->eh_depth) {
+ neh->eh_max = ext3_ext_space_block_idx();
+ } else {
+ neh->eh_max = ext3_ext_space_block();
+ }
+
+ neh->eh_magic = EXT4_EXT_MAGIC;
+
+ /* create index in new top-level index: num,max,pointer */
+
+ curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
+ curp->p_hdr->eh_max = ext3_ext_space_root_idx();
+ curp->p_hdr->eh_entries = 1;
+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
+ /* FIXME: it works, but actually path[0] can be index */
+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
+ ext3_idx_store_pblock(curp->p_idx, newblock);
+
+ neh = eh;
+ //fidx = EXT_FIRST_INDEX(neh);
+
+ neh->eh_depth = path->p_depth + 1;
+
+ return 0;
+}
+struct ext4_ext_path * ext3_ext_find_extent(struct ext3_extent_header *eh,
+ blk_t logical_blk,
+ struct ext4_ext_path *path)
+{
+
+ struct blk_cache *blkc;
+ short int depth, i, ppos = 0;
+ int alloc = 0;
+
+ i = depth = eh->eh_depth;
+
+ if (!path) {
+ /*
+ * We may be called to rebuild the path while growing the
+ * depth of the tree
+ */
+ path = malloc(sizeof(struct ext4_ext_path)* (depth+2));
+ if (!path)
+ return NULL;
+
+ alloc = 1;
+ memset(path, 0, sizeof(struct ext4_ext_path)* (depth+2));
+ }
+
+
+ path[0].p_hdr = eh;
+
+ while (i) {
+
+ ext3_ext_binsearch_idx(path+ppos, logical_blk);
+
+ path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_depth = i;
+ path[ppos].p_ext = NULL;
+
+ blkc = get_block(path[ppos].p_block);
+
+ if (!blkc) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+
+ eh = blkc->block_mem_addr;
+ if (!eh) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+ if (eh->eh_max == 0) {
+ if (alloc)
+ free(path);
+ return NULL;
+ }
+ ppos++;
+ path[ppos].p_hdr = eh;
+ i--;
+ }
+ path[ppos].p_depth = i;
+ path[ppos].p_hdr = eh;
+ path[ppos].p_ext = NULL;
+ path[ppos].p_idx = NULL;
+
+ ext3_ext_binsearch(path + ppos, logical_blk);
+
+ return path;
+}
+
+static int ext3_ext_create_new_leaf(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext)
+{
+
+ struct ext4_ext_path *curp;
+ int depth, i, err = 0;
+
+repeat:
+ i = depth = eh->eh_depth;
+
+ /* walk up to the tree and look for free index entry */
+ curp = path + depth;
+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
+ i--;
+ curp--;
+ }
+
+ if (EXT_HAS_FREE_INDEX(curp)) {
+
+ /* if we found index with free entry, then use that
+ * entry: create all needed subtree and add new leaf */
+ err = ext3_ext_split(eh, path, newext, i);
+ if (err)
+ goto err_out;
+
+ /*
+ * Recalculate the path since grow inode would
+ * have changed it
+ */
+ path = ext3_ext_find_extent(eh, newext->ee_block, path);
+ if (!path) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+
+ } else {
+
+ /* tree is full, time to grow in depth */
+ err = ext3_ext_grow_indepth(eh, path);
+ if (err)
+ goto err_out;
+
+ /*
+ * recalculate the path since grow inode
+ * would have changed it i
+ */
+ path = ext3_ext_find_extent(eh, newext->ee_block, path);
+ if (!path) {
+ err = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+
+
+ /*
+ * only first (depth 0 -> 1) produces free space;
+ * in all other cases we have to split the grown tree
+ */
+ depth = eh->eh_depth;
+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+ /* now we need to split */
+ goto repeat;
+ }
+
+ }
+
+err_out:
+
+ return err;
+}
+
+static int ext3_can_extents_be_merged(struct ext3_extent *ex1,
+ struct ext3_extent *ex2)
+{
+ if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
+ return 0;
+
+
+ if (ext_pblock(ex1) + ex1->ee_len == ext_pblock(ex2))
+ return 1;
+ return 0;
+}
+
+
+/*
+ * ext3_ext_next_leaf_block:
+ * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ */
+static unsigned ext3_ext_next_leaf_block(struct ext4_ext_path *path)
+{
+ int depth;
+
+ depth = path->p_depth;
+
+ /* zero-tree has no leaf blocks at all */
+ if (depth == 0)
+ return EXT_MAX_BLOCK;
+
+ /* go to index block */
+ depth--;
+
+ while (depth >= 0) {
+ if (path[depth].p_idx != EXT_LAST_INDEX(path[depth].p_hdr)) {
+ return path[depth].p_idx[1].ei_block;
+ }
+ depth--;
+ }
+
+ return EXT_MAX_BLOCK;
+}
+
+/*
+ * ext3_ext_correct_indexes:
+ * if leaf gets modified and modified extent is first in the leaf,
+ * then we have to correct all indexes above.
+ * TODO: do we need to correct tree in all cases?
+ */
+static void ext3_ext_correct_indexes(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path)
+{
+ int depth = eh->eh_depth;
+ struct ext3_extent *ex;
+ int border;
+ int k;
+
+
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ if (depth == 0) {
+ /* there is no tree at all */
+ return;
+ }
+
+ if (ex != EXT_FIRST_EXTENT(eh)) {
+ /* we correct tree if first leaf got modified only */
+ return;
+ }
+
+ /*
+ * TODO: we need correction if border is smaller than current one
+ */
+ k = depth - 1;
+ border = path[depth].p_ext->ee_block;
+ path[k].p_idx->ei_block = border;
+
+ while (k--) {
+ /* change all left-side indexes */
+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
+ break;
+ path[k].p_idx->ei_block = border;
+ }
+
+ return;
+}
+
+int ext3_ext_insert_extent(struct ext3_extent_header *eh,
+ struct ext4_ext_path *path,
+ struct ext3_extent *newext)
+{
+
+ struct ext3_extent *ex, *fex;
+ struct ext3_extent *nearex; /* nearest extent */
+ struct ext4_ext_path *npath = NULL;
+ int depth, len, err = 0, next;
+ struct ext3_extent_header *save_eh = eh;
+
+ depth = eh->eh_depth;
+ ex = path[depth].p_ext;
+
+ /* try to insert block into found extent and return */
+ if (ex && ext3_can_extents_be_merged(ex, newext)) {
+ ex->ee_len = ex->ee_len + newext->ee_len;
+ eh = path[depth].p_hdr;
+ nearex = ex;
+ goto merge;
+ }
+
+repeat:
+ /* recalculate depth here */
+ depth=save_eh->eh_depth;
+
+ eh = path[depth].p_hdr;
+ if (eh->eh_entries < eh->eh_max)
+ goto has_space;
+
+ /* probably next leaf has space for us? */
+ fex = EXT_LAST_EXTENT(eh);
+ next = ext3_ext_next_leaf_block(path);
+
+ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
+
+ npath = ext3_ext_find_extent(save_eh, next, NULL);
+ if (!npath) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ eh = npath[depth].p_hdr;
+ if (eh->eh_entries < eh->eh_max) {
+ path = npath;
+ goto repeat;
+ }
+ }
+
+ /*
+ * There is no free space in the found leaf.
+ * We're gonna add a new leaf in the tree.
+ */
+ err = ext3_ext_create_new_leaf(save_eh, path, newext);
+ if (err)
+ goto cleanup;
+
+ /* recalculate depth here */
+ depth=save_eh->eh_depth;
+ eh = path[depth].p_hdr;
+
+has_space:
+
+ nearex = path[depth].p_ext;
+
+ if (!nearex) {
+ /* there is no extent in this leaf, create first one */
+ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
+
+ } else if (newext->ee_block > nearex->ee_block) {
+
+ if (nearex != EXT_LAST_EXTENT(eh)) {
+ len = EXT_MAX_EXTENT(eh) - nearex;
+ len = (len - 1) * sizeof(struct ext3_extent);
+ len = len < 0 ? 0 : len;
+ memmove(nearex + 2, nearex + 1, len);
+ }
+ path[depth].p_ext = nearex + 1;
+ } else {
+ len = (EXT_MAX_EXTENT(eh) - nearex)*sizeof(struct ext3_extent);
+ len = len < 0 ? 0 : len;
+ memmove(nearex + 1, nearex, len);
+ path[depth].p_ext = nearex;
+ }
+
+ eh->eh_entries = eh->eh_entries+1;
+ nearex = path[depth].p_ext;
+ nearex->ee_block = newext->ee_block;
+ nearex->ee_start = newext->ee_start;
+ nearex->ee_start_hi = newext->ee_start_hi;
+ nearex->ee_len = newext->ee_len;
+
+merge:
+ /* try to merge extents to the right */
+ while (nearex < EXT_LAST_EXTENT(eh)) {
+ if (!ext3_can_extents_be_merged(nearex, nearex + 1))
+ break;
+ /* merge with next extent! */
+ nearex->ee_len = nearex->ee_len + nearex[1].ee_len;
+
+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
+ len = (EXT_LAST_EXTENT(eh) - nearex - 1)
+ * sizeof(struct ext3_extent);
+ memmove(nearex + 1, nearex + 2, len);
+ }
+ eh->eh_entries = eh->eh_entries-1;
+ }
+
+ /* try to merge extents to the left */
+
+ /* time to correct all indexes above */
+ ext3_ext_correct_indexes(save_eh, path);
+
+cleanup:
+ if (npath) {
+ free(npath);
+ }
+
+ return err;
+}
+
diff --git a/ext4migrate/extents.h b/ext4migrate/extents.h
new file mode 100644
index 0000000..38114dd
--- /dev/null
+++ b/ext4migrate/extents.h
@@ -0,0 +1,10 @@
+
+struct ext4_ext_path * ext3_ext_find_extent(struct ext3_extent_header *,
+ blk_t , struct ext4_ext_path *);
+struct blk_cache * get_block(ext4_fsblk_t);
+int ext3_ext_space_root(void);
+ext4_fsblk_t ext_pblock(struct ext3_extent *);
+ext4_fsblk_t idx_pblock(struct ext3_extent_idx *);
+void ext3_ext_store_pblock(struct ext3_extent *, ext4_fsblk_t );
+int ext3_ext_insert_extent(struct ext3_extent_header *,
+ struct ext4_ext_path *, struct ext3_extent *);
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 3/4] e2fsprogs: Add ext4migrate
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Add ext4migrate utility that helps in migrating a ext3 block mapped
inode to ext4 extent mapped inode.
ext4migrate command takes the below syntax
ext4migrate --display | --migrate <image_name> [<filename>]
The --display option helps in displaying the block map details for an ext3 inode
and extent map details for an ext4 inode. The --migrate option convert a block mapped
ext3 inode to extent mapped ext4 inode.
This needs to be run on an unmounted file system (offline migration).
The inode modification is done only at the last stage. This is to make sure that if we
fail at any intermediate stage, we exit without touching the disk.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
Makefile.in | 3 +-
configure.in | 21 ++-
ext4migrate/Makefile.in | 66 ++++
ext4migrate/migrate.c | 791 +++++++++++++++++++++++++++++++++++++++++++++++
ext4migrate/migrate.h | 30 ++
5 files changed, 909 insertions(+), 2 deletions(-)
create mode 100644 ext4migrate/Makefile.in
create mode 100644 ext4migrate/migrate.c
create mode 100644 ext4migrate/migrate.h
diff --git a/Makefile.in b/Makefile.in
index 0d31caa..9d8d291 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -9,9 +9,10 @@ INSTALL = @INSTALL@
@RESIZER_CMT@RESIZE_DIR= resize
@DEBUGFS_CMT@DEBUGFS_DIR= debugfs
+@EXT4MIGRATE_CMT@EXT4MIGRATE_DIR= ext4migrate
LIB_SUBDIRS=lib/et lib/ss lib/e2p lib/ext2fs lib/uuid lib/blkid intl
-PROG_SUBDIRS=e2fsck $(DEBUGFS_DIR) misc $(RESIZE_DIR) tests/progs po
+PROG_SUBDIRS=e2fsck $(DEBUGFS_DIR) misc $(RESIZE_DIR) tests/progs po $(EXT4MIGRATE_DIR)
SUBDIRS=util $(LIB_SUBDIRS) $(PROG_SUBDIRS) tests
SUBS= lib/ext2fs/ext2_types.h lib/blkid/blkid_types.h lib/uuid/uuid_types.h
diff --git a/configure.in b/configure.in
index 44b718d..5ae5e0b 100644
--- a/configure.in
+++ b/configure.in
@@ -436,6 +436,24 @@ echo "Building e2fsck statically by default"
)
AC_SUBST(E2FSCK_TYPE)
dnl
+dnl handle --enable-ext4migrate
+dnl
+AC_ARG_ENABLE([ext4migrate],
+[ --disable-ext4migrate disable support of ext4migrate program],
+if test "$enableval" = "no"
+then
+ echo "Disabling ext4migrate support"
+ EXT4MIGRATE_CMT="#"
+else
+ EXT4MIGRATE_CMT=
+ echo "Enabling ext4migrate support"
+fi
+,
+echo "Enabling ext4migrate support by default"
+EXT4MIGRATE_CMT=
+)
+AC_SUBST(EXT4MIGRATE_CMT)
+dnl
dnl See whether to install the `fsck' wrapper program (that calls e2fsck)
dnl
AC_ARG_ENABLE([fsck],
@@ -862,7 +880,8 @@ for i in MCONFIG Makefile e2fsprogs.spec \
lib/e2p/e2p.pc lib/blkid/blkid.pc lib/ext2fs/ext2fs.pc \
misc/Makefile ext2ed/Makefile e2fsck/Makefile \
debugfs/Makefile tests/Makefile tests/progs/Makefile \
- resize/Makefile doc/Makefile intl/Makefile po/Makefile.in ; do
+ resize/Makefile doc/Makefile intl/Makefile po/Makefile.in \
+ ext4migrate/Makefile ; do
if test -d `dirname ${srcdir}/$i` ; then
outlist="$outlist $i"
fi
diff --git a/ext4migrate/Makefile.in b/ext4migrate/Makefile.in
new file mode 100644
index 0000000..b000ae7
--- /dev/null
+++ b/ext4migrate/Makefile.in
@@ -0,0 +1,66 @@
+#
+# Standard e2fsprogs prologue....
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+top_builddir = ..
+my_dir = util
+INSTALL = @INSTALL@
+@MCONFIG@
+
+SRCS = $(srcdir)/migrate.c
+
+LIBS= $(LIBEXT2FS) $(LIBCOM_ERR) -ltdb
+DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
+
+.c.o:
+ @echo " CC $<"
+ @$(CC) -c $(ALL_CFLAGS) $< -o $@
+ @#cc -g -I../lib/ -Wunreachable-code -Wunused -Wunused-function
+ @#-Wunused-label -Wunused-parameter -Wunused-value -Wunused-variable -c migrate.c
+
+PROGS= ext4migrate
+
+all:: $(PROGS)
+
+ext4migrate: migrate.o extents.o $(DEPLIBS)
+ @echo " LD $@"
+ @$(CC) $(ALL_LDFLAGS) -o ext4migrate migrate.o extents.o $(LIBS)
+
+installdirs:
+ @echo " MKINSTALLDIRS $(root_sbindir)"
+ @$(MKINSTALLDIRS) $(DESTDIR)$(root_sbindir)
+
+install: $(PROGS) installdirs
+ @for i in $(PROGS); do \
+ echo " INSTALL $(root_sbindir)/$$i"; \
+ $(INSTALL_PROGRAM) $$i $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+
+install-strip: install
+ @for i in $(PROGS); do \
+ echo " STRIP $(root_sbindir)/$$i"; \
+ $(STRIP) $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+
+uninstall:
+ for i in $(PROGS); do \
+ $(RM) -f $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+clean:
+ $(RM) -f $(PROGS) \#* *.s *.o *.a *~ core
+
+mostlyclean: clean
+
+distclean: clean
+ $(RM) -f .depend Makefile $(srcdir)/TAGS $(srcdir)/Makefile.in.old
+
+# +++ Dependency line eater +++
+#
+# Makefile dependencies follow. This must be the last section in
+# the Makefile.in file
+#
+migrate.o: $(srcdir)/migrate.c
+extents.o: $(srcdir)/extents.c
diff --git a/ext4migrate/migrate.c b/ext4migrate/migrate.c
new file mode 100644
index 0000000..d6348c8
--- /dev/null
+++ b/ext4migrate/migrate.c
@@ -0,0 +1,791 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "migrate.h"
+#include "extents.h"
+
+static struct list_head blk_cache_head;
+static struct list_head blk_stat_head;
+static blk_t i_block[EXT2_N_BLOCKS];
+static int migrate = 0;
+ext2_filsys current_fs = NULL;
+
+static void free_blk_cache(void)
+{
+ struct list_head *entry, *tmp;
+ struct blk_cache *blk;
+
+ list_for_each_safe(entry, tmp, &blk_cache_head) {
+ blk = list_entry(entry, struct blk_cache, head);
+ list_del(entry);
+ free(blk->block_mem_addr);
+ free(blk);
+ }
+
+ INIT_LIST_HEAD(&blk_cache_head);
+}
+
+static void free_blk_stat(void)
+{
+ struct list_head *entry, *tmp;
+ struct blk_stat *stat;
+
+ list_for_each_safe(entry, tmp, &blk_stat_head) {
+ stat = list_entry(entry, struct blk_stat, head);
+ list_del(entry);
+ free(stat);
+ }
+ INIT_LIST_HEAD(&blk_stat_head);
+}
+
+static void init_iblock()
+{
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ eh->eh_magic = EXT4_EXT_MAGIC;
+ eh->eh_entries = 0;
+ eh->eh_depth = 0;
+ eh->eh_max = ext3_ext_space_root();
+}
+
+static void usage(char *prg_name)
+{
+ fprintf(stderr,
+ "%s --display | --migrate <image_name> [<filename>]\n",
+ prg_name);
+ exit(1);
+}
+
+static int build_extent(blk_t start_blk, blk_t end_blk,
+ e2_blkcnt_t start_pblk, e2_blkcnt_t end_pblk)
+{
+
+ int retval = 0;
+ struct ext3_extent newext;
+ struct ext4_ext_path *path;
+ /* start with a local allocated array without touching the inode */
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ (void)end_pblk;
+
+ newext.ee_block = start_blk;
+ newext.ee_len = end_blk - start_blk +1;
+ ext3_ext_store_pblock(&newext, start_pblk);
+
+ path = ext3_ext_find_extent(eh, start_blk, NULL);
+
+ if (!path)
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+
+ retval = ext3_ext_insert_extent(eh, path, &newext);
+
+ free(path);
+ return retval;
+}
+
+static void finish_range(struct list_blocks_struct *lb)
+{
+ int retval;
+
+ if (lb->first_block == 0)
+ return;
+
+ printf("(%lld-%lld):%u-%u\n", lb->first_bcnt, lb->last_bcnt,
+ lb->first_block, lb->last_block);
+
+ if (migrate) {
+ retval = build_extent(lb->first_bcnt, lb->last_bcnt,
+ lb->first_block, lb->last_block);
+ if (retval) {
+ com_err("build_extent", retval,
+ ": Failed to build extent");
+ exit(1);
+ }
+ }
+
+ lb->first_block = 0;
+}
+/*
+ * We don't want to update the filesystem block bitmap
+ * directly. For indirect blocks and others which we need
+ * to mark as unused, we want to track them but not update
+ * the file systembitmap early. This is needed to make sure
+ * the subsequent request for free blocks are not satisfied
+ * by these indirect blocks. This makes sure that if
+ * we fail later writing the inode, we have consistent
+ * block bitmap.
+ */
+
+static int mark_block_bitmap(ext4_fsblk_t pblock, int update)
+{
+ struct blk_stat *stat;
+
+ stat = malloc(sizeof(struct blk_stat));
+ if (!stat) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ stat->pblock = pblock;
+ stat->dirty = 1;
+ list_add(&(stat->head), &blk_stat_head);
+
+ if (update) {
+ ext2fs_mark_block_bitmap(current_fs->block_map, stat->pblock);
+ ext2fs_mark_bb_dirty(current_fs);
+ }
+
+ return 0;
+
+
+}
+static int unmark_block_bitmap(ext4_fsblk_t pblock, int update)
+{
+ struct blk_stat *stat;
+
+ stat = malloc(sizeof(struct blk_stat));
+ if (!stat) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ stat->pblock = pblock;
+ stat->dirty = 0;
+ list_add(&(stat->head), &blk_stat_head);
+
+ if (update) {
+ ext2fs_unmark_block_bitmap(current_fs->block_map, stat->pblock);
+ ext2fs_mark_bb_dirty(current_fs);
+ }
+
+ return 0;
+}
+
+static int write_block_bitmap(struct ext2_inode *inode, int write_to_fs)
+{
+ struct blk_stat *stat;
+ struct list_head *entry;
+ int total_blocks_added = 0, group;
+
+ list_for_each(entry, &blk_stat_head) {
+
+ stat = list_entry(entry, struct blk_stat, head);
+ group = ext2fs_group_of_blk(current_fs, stat->pblock);
+ if (stat->dirty) {
+ total_blocks_added++;
+ current_fs->group_desc[group].bg_free_blocks_count--;
+ current_fs->super->s_free_blocks_count--;
+ ext2fs_mark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ } else {
+ total_blocks_added--;
+ current_fs->group_desc[group].bg_free_blocks_count++;
+ current_fs->super->s_free_blocks_count++;
+ ext2fs_unmark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ }
+ }
+
+ /*
+ * Update the inode block count
+ */
+ if (total_blocks_added) {
+
+ inode->i_blocks += total_blocks_added *
+ (current_fs->blocksize / 512);
+ }
+
+
+ if (write_to_fs) {
+ ext2fs_mark_bb_dirty(current_fs);
+ return ext2fs_write_block_bitmap(current_fs);
+ }
+ return 0;
+}
+
+static int undo_write_block_bitmap(struct ext2_inode *inode, int write_to_fs)
+{
+ struct blk_stat *stat;
+ struct list_head *entry;
+ int total_blocks_added = 0, group;
+
+ list_for_each(entry, &blk_stat_head) {
+
+ stat = list_entry(entry, struct blk_stat, head);
+ group = ext2fs_group_of_blk(current_fs, stat->pblock);
+ if (stat->dirty) {
+ total_blocks_added--;
+ current_fs->group_desc[group].bg_free_blocks_count++;
+ current_fs->super->s_free_blocks_count++;
+ ext2fs_unmark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ } else {
+ total_blocks_added++;
+ current_fs->group_desc[group].bg_free_blocks_count--;
+ current_fs->super->s_free_blocks_count--;
+ ext2fs_mark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ }
+ }
+
+ /*
+ * Update the inode block count
+ */
+ if (total_blocks_added) {
+
+ inode->i_blocks += total_blocks_added *
+ (current_fs->blocksize / 512);
+ }
+
+
+ if (write_to_fs) {
+ ext2fs_mark_bb_dirty(current_fs);
+ return ext2fs_write_block_bitmap(current_fs);
+ }
+ return 0;
+}
+
+
+static int list_blocks_proc(ext2_filsys fs EXT2FS_ATTR((unused)),
+ blk_t *blocknr, e2_blkcnt_t blockcnt,
+ blk_t ref_block EXT2FS_ATTR((unused)),
+ int ref_offset EXT2FS_ATTR((unused)),
+ void *private)
+{
+ struct list_blocks_struct *lb = (struct list_blocks_struct *) private;
+
+ if (blockcnt >= 0) {
+ /*
+ * See if we can add on to the existing range (if it exists)
+ */
+ if (lb->first_block &&
+ (lb->last_block+1 == *blocknr) &&
+ (lb->last_bcnt+1 == blockcnt)) {
+ lb->last_block = *blocknr;
+ lb->last_bcnt = blockcnt;
+ return 0;
+ }
+ /*
+ * Start a new range.
+ */
+ finish_range(lb);
+ lb->first_block = lb->last_block = *blocknr;
+ lb->first_bcnt = lb->last_bcnt = blockcnt;
+ } else {
+ /*
+ * special blocks such as indirect, double indrect and triple
+ * indirect blocks. Mark them as free if we are migrating
+ * BLOCK_COUNT_IND BLOCK_COUNT_DIND BLOCK_COUNT_TIND
+ */
+ if (migrate) {
+ /*
+ * We should not update the file system bitmap
+ * because we don't want the next request for
+ * free block to be satisfied by this block.
+ */
+ if (unmark_block_bitmap(*blocknr, 0))
+ return BLOCK_ABORT;
+ }
+ }
+
+ return 0;
+}
+
+static void iterate_blocks(ext2_filsys current_fs, ext2_ino_t inode_num)
+{
+ struct list_blocks_struct lb;
+ lb.first_block = 0;
+
+ ext2fs_block_iterate2(current_fs, inode_num, 0, NULL,
+ list_blocks_proc, (void *)&lb);
+
+ finish_range(&lb);
+}
+
+/*FIXME!! goal value passed as zero */
+
+/* FIXME!! 48 bit block
+ * Can we ask for block addressed by 48 bit ?
+ * We have actually mounted a ext3 file system via ext4dev If want want
+ * we will have to change the group descriptor also */
+/* Function is also called from extents.c */
+static struct blk_cache * __get_block(ext2_filsys current_fs,
+ ext4_fsblk_t pblock)
+{
+ struct list_head *entry;
+ struct blk_cache *blk;
+ void *addr;
+ int retval;
+ /* need to initialize to fix corruption due to (long) typecasting */
+ ext4_fsblk_t block = 0;
+
+
+ if (pblock == 0) {
+ /* allocate in disk and give back */
+ /* FIXME!! 48 bit block */
+ retval = ext2fs_new_block(current_fs, 0, NULL, (blk_t *)&block);
+ if (retval) {
+ /*
+ * In case we fail we don't write/sync previous update
+ * we don't want partial conversion
+ */
+ return NULL;
+ }
+ /*
+ * We need to update the file system bitmap also
+ * otherwise this block may be double allocated
+ */
+ if (mark_block_bitmap(block, 1))
+ return NULL;
+
+ addr = malloc(current_fs->blocksize);
+ if (!addr)
+ return NULL;
+ memset(addr, 0, current_fs->blocksize);
+
+ blk = malloc(sizeof(struct blk_cache));
+ if (!blk) {
+ free(addr);
+ return NULL;
+ }
+
+ blk->pblock = (ext4_fsblk_t)block;
+ blk->block_mem_addr = addr;
+ list_add(&(blk->head), &blk_cache_head);
+
+ return blk;
+ }
+
+
+ list_for_each(entry, &blk_cache_head) {
+
+ blk = list_entry(entry, struct blk_cache, head);
+
+ if (blk->pblock == pblock ) {
+ return blk;
+
+ }
+ }
+
+ /*
+ * No block present. Allocate a disk block and a memory equivalent
+ * add the same to the list and return the address
+ */
+
+ /* FIXME!! 48 bit block */
+ retval = ext2fs_new_block(current_fs, 0, NULL, (blk_t *)&block);
+ if (retval) {
+ /*
+ * In case we fail we don't write/sync previous update
+ * we don't want partial conversion
+ */
+ return NULL;
+ }
+ /*
+ * We need to update the file system bitmap also
+ * otherwise this block may be double allocated
+ */
+ if (mark_block_bitmap(block, 1))
+ return NULL;
+
+ addr = malloc(current_fs->blocksize);
+ if (!addr)
+ return NULL;
+ memset(addr, 0, current_fs->blocksize);
+
+ blk = malloc(sizeof(struct blk_cache));
+ if (!blk) {
+ free(addr);
+ return NULL;
+ }
+
+ blk->pblock = (ext4_fsblk_t)block;
+ blk->block_mem_addr = addr;
+ list_add(&(blk->head), &blk_cache_head);
+
+ return blk;
+
+}
+
+struct blk_cache * get_block(ext4_fsblk_t pblock)
+{
+
+ return __get_block(current_fs, pblock);
+}
+
+static int write_extent_block(ext2_filsys current_fs,
+ ext4_fsblk_t pblock, void *buf)
+{
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)buf;
+ struct ext3_extent_idx *ix;
+ ext4_fsblk_t tpblock;
+ struct blk_cache *blkc;
+ int i, retval;
+
+ if (eh->eh_depth == 0) {
+ retval = ext2fs_write_ext_block(current_fs, pblock, buf);
+ if (retval)
+ goto err_out;
+ } else {
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < eh->eh_entries; i++, ix++) {
+
+ tpblock = idx_pblock(ix);
+ blkc = get_block(tpblock);
+ if (!blkc) {
+ retval = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+ retval = write_extent_block(current_fs,
+ blkc->pblock,
+ blkc->block_mem_addr);
+ if (retval) {
+ goto err_out;
+ }
+ }
+ retval = ext2fs_write_ext_block(current_fs, pblock, buf);
+ if (retval)
+ goto err_out;
+
+ }
+
+err_out:
+
+ return retval;
+
+}
+
+
+static int write_extent_details(ext2_filsys current_fs, ext2_ino_t inode_num,
+ struct ext2_inode *inode, blk_t *i_block)
+{
+
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ struct ext3_extent_idx *ix;
+ ext4_fsblk_t pblock;
+ struct blk_cache *blkc;
+ int i, retval;
+
+
+ /* Write the blocks to the disk */
+ if (eh->eh_depth != 0) {
+ /* need to write blocks corresponding to extent_idx */
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < eh->eh_entries; i++, ix++) {
+
+ pblock = idx_pblock(ix);
+ blkc = get_block(pblock);
+ if (!blkc) {
+ retval = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+ retval = write_extent_block(current_fs,
+ blkc->pblock,
+ blkc->block_mem_addr);
+ if (retval) {
+ goto err_out;
+ }
+ }
+ }
+
+ /*
+ * Don't write to the file system. If we have
+ * multiple files to migrate we batch the block
+ * bitmap update
+ */
+ retval = write_block_bitmap(inode, 0);
+ if (retval) {
+ goto err_out;
+ }
+
+ inode->i_flags |= EXT4_EXTENTS_FL;
+ memcpy(inode->i_block, i_block, sizeof(inode->i_block));
+ /* write inode */
+ retval = ext2fs_write_inode(current_fs, inode_num, inode);
+ if (retval) {
+ undo_write_block_bitmap(inode, 0);
+ goto err_out;
+ }
+
+
+
+err_out:
+ return retval;
+
+}
+
+static int do_migrate(ext2_filsys current_fs,
+ ext2_ino_t inode_num, char *file_name)
+{
+ int retval, saved_migrate;
+ struct ext2_inode inode;
+
+ /* reset the i_block array */
+ init_iblock();
+
+ saved_migrate = migrate;
+
+
+ printf("%s inode number %d\n", file_name, inode_num);
+
+ retval = ext2fs_read_inode(current_fs, inode_num, &inode);
+ if (retval) {
+ com_err(file_name, retval,": Unable to get file inode number");
+ goto err_out;
+ }
+
+ if (inode.i_flags & EXT4_EXTENTS_FL) {
+ printf("Extent already enabled\n");
+ migrate = 0 ;
+ }
+
+ iterate_blocks(current_fs, inode_num);
+
+ if (migrate) {
+
+ /*
+ * Only if we have successfully walked the
+ * block mapping write the extent details
+ */
+ retval = write_extent_details(current_fs, inode_num,
+ &inode, i_block);
+ if (retval) {
+ com_err(file_name, 0,
+ "Failed to write extent details");
+ goto err_out;
+ }
+
+ /* dump the migrated extent details */
+ migrate = 0;
+ printf("Extent details after migrate\n");
+ iterate_blocks(current_fs, inode_num);
+ }
+
+ /* reset the value of migrate */
+ migrate = saved_migrate;
+
+err_out:
+ free_blk_cache();
+ free_blk_stat();
+ return retval;
+
+
+}
+
+static int dir_migrate_proc(ext2_ino_t dir EXT2FS_ATTR((unused)),
+ int entry EXT2FS_ATTR((unused)),
+ struct ext2_dir_entry *dirent,
+ int offset EXT2FS_ATTR((unused)),
+ int blocksize EXT2FS_ATTR((unused)),
+ char *buf EXT2FS_ATTR((unused)),
+ void *private EXT2FS_ATTR((unused)))
+{
+ int retval;
+ struct ext2_inode inode;
+ char file_name[EXT2_NAME_LEN];
+
+ if (dirent->inode == 0)
+ return 0;
+ if (((dirent->name_len&0xFF) == 1) && (dirent->name[0] == '.'))
+ return 0;
+ if (((dirent->name_len&0xFF) == 2) && (dirent->name[0] == '.') &&
+ (dirent->name[1] == '.')) {
+ return 0;
+ }
+
+ retval = ext2fs_read_inode(current_fs, dirent->inode, &inode);
+ if (retval) {
+ goto err_out;
+ }
+ if (LINUX_S_ISDIR(inode.i_mode)) {
+ /*
+ * Directory iterate this again
+ */
+ retval = ext2fs_dir_iterate2(current_fs, dirent->inode, 0, 0,
+ dir_migrate_proc, 0);
+ if (retval) {
+ goto err_out;
+ }
+
+
+ } else if (LINUX_S_ISREG(inode.i_mode)) {
+
+ /*
+ * properly NULL terminate the name
+ */
+ int this_len = ((dirent->name_len & 0xFF) < EXT2_NAME_LEN) ?
+ (dirent->name_len & 0xFF) : EXT2_NAME_LEN;
+
+ strncpy(file_name, dirent->name, this_len);
+ file_name[this_len] = '\0';
+
+ retval = do_migrate(current_fs, dirent->inode, file_name);
+ if (retval) {
+ goto err_out;
+ }
+
+ }
+
+ return 0;
+
+err_out:
+ retval = DIRENT_ABORT;
+ return retval;
+
+
+}
+static void recursive_migrate(ext2_filsys current_fs)
+{
+
+ int retval;
+
+ retval = ext2fs_dir_iterate2(current_fs, EXT2_ROOT_INO, 0, 0,
+ dir_migrate_proc, 0);
+ if (retval)
+ goto err_out;
+
+ if (migrate && !EXT2_HAS_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS)) {
+
+ EXT2_SET_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS);
+
+ }
+
+ if (migrate) {
+
+ ext2fs_mark_bb_dirty(current_fs);
+ ext2fs_mark_super_dirty(current_fs);
+
+ /*
+ * Write the block bitmap
+ */
+ retval = ext2fs_write_block_bitmap(current_fs);
+ if (retval == 0)
+ current_fs->super->s_state |= EXT2_VALID_FS;
+ }
+err_out:
+ ext2fs_close(current_fs);
+ return ;
+}
+
+main(int argc, char *argv[])
+{
+ char *dev_name, *file_name;
+ int flags, superblock = 0;
+ unsigned int block_size = 0;
+ int retval, i, recursive = 0;
+ ext2_ino_t inode_num;
+
+ if (argc < 3) {
+ usage(argv[0]);
+ }
+
+ if (strcmp(argv[1], "--migrate") == 0) {
+ migrate = 1;
+ } else if (strcmp(argv[1], "--display") == 0) {
+ migrate = 0;
+ } else {
+ usage(argv[0]);
+ }
+
+ dev_name = argv[2];
+
+ if (argc == 3) {
+ recursive = 1;
+ }
+
+ if (migrate) {
+ flags = EXT2_FLAG_SOFTSUPP_FEATURES | EXT2_FLAG_RW;
+ retval = ext2fs_open2(dev_name,"tdbfilename=/tmp/test.tdb&",
+ flags, superblock, block_size,
+ unix_cow_io_manager, ¤t_fs);
+ } else {
+ flags = EXT2_FLAG_SOFTSUPP_FEATURES;
+ retval = ext2fs_open2(dev_name, 0,
+ flags, superblock, block_size,
+ unix_cow_io_manager, ¤t_fs);
+ }
+
+#ifdef DEBUG
+ retval = ext2fs_open2(dev_name, 0, flags, superblock,
+ block_size, unix_io_manager, ¤t_fs);
+#endif
+
+ if (retval) {
+ com_err(dev_name, retval,": Unable to open the file system");
+ return;
+ }
+
+ retval = ext2fs_read_inode_bitmap(current_fs);
+ if (retval) {
+ com_err(dev_name, retval,": Unable to read inode bitmap");
+ goto err_out;
+ }
+ retval = ext2fs_read_block_bitmap(current_fs);
+ if (retval) {
+ com_err(dev_name, retval,": Unable to read block bitmap");
+ goto err_out;
+ }
+
+ /*
+ * initialize the list head for block cache and bloc stat
+ */
+ INIT_LIST_HEAD(&blk_cache_head);
+ INIT_LIST_HEAD(&blk_stat_head);
+
+ /* Mark the file system as unclean
+ * This will force a fsck if something goes wrong later
+ */
+ if (migrate)
+ current_fs->super->s_state &= ~EXT2_VALID_FS;
+
+ if (recursive) {
+ /*
+ * Migrate all the ext3 inode in the file
+ * system to extent map
+ */
+ recursive_migrate(current_fs);
+ return ;
+ }
+
+ for (i = 3; i < argc; i++) {
+
+ file_name = argv[i];
+ retval = ext2fs_namei(current_fs, EXT2_ROOT_INO,
+ EXT2_ROOT_INO, file_name, &inode_num);
+ if (retval) {
+ com_err(file_name, retval,
+ ": Unable to get file inode number");
+ goto err_out;
+ }
+
+ retval = do_migrate(current_fs, inode_num, file_name);
+ if (retval) {
+ /*
+ * If we fail to migrate then exit
+ */
+ goto err_out;
+ }
+ }
+
+ if (migrate && !EXT2_HAS_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS)) {
+
+ EXT2_SET_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS);
+
+ }
+
+ if (migrate) {
+
+ ext2fs_mark_bb_dirty(current_fs);
+ ext2fs_mark_super_dirty(current_fs);
+
+ /*
+ * Write the block bitmap
+ */
+ retval = ext2fs_write_block_bitmap(current_fs);
+ if (retval == 0)
+ current_fs->super->s_state |= EXT2_VALID_FS;
+ }
+err_out:
+ ext2fs_close(current_fs);
+}
+
diff --git a/ext4migrate/migrate.h b/ext4migrate/migrate.h
new file mode 100644
index 0000000..7dbb93b
--- /dev/null
+++ b/ext4migrate/migrate.h
@@ -0,0 +1,30 @@
+
+#include "ext2fs/ext2_fs.h"
+#include "ext2fs/ext2fs.h"
+#include "blkid/list.h"
+
+typedef unsigned long long ext4_fsblk_t;
+
+extern ext2_filsys current_fs;
+
+#define EXT4_EXT_MAGIC 0xf30a
+#define EXT2_SET_INCOMPAT_FEATURE(sb,mask) \
+ ( EXT2_SB(sb)->s_feature_incompat |= (mask) )
+
+
+struct list_blocks_struct {
+ blk_t first_block, last_block;
+ e2_blkcnt_t first_bcnt, last_bcnt;
+};
+
+struct blk_stat {
+ struct list_head head;
+ ext4_fsblk_t pblock;
+ int dirty;
+};
+
+struct blk_cache {
+ struct list_head head;
+ ext4_fsblk_t pblock;
+ void * block_mem_addr;
+};
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 3/4] e2fsprogs: Add ext4migrate
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Add ext4migrate utility that helps in migrating a ext3 block mapped
inode to ext4 extent mapped inode.
ext4migrate command takes the below syntax
ext4migrate --display | --migrate <image_name> [<filename>]
The --display option helps in displaying the block map details for an ext3 inode
and extent map details for an ext4 inode. The --migrate option convert a block mapped
ext3 inode to extent mapped ext4 inode.
This needs to be run on an unmounted file system (offline migration).
The inode modification is done only at the last stage. This is to make sure that if we
fail at any intermediate stage, we exit without touching the disk.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
Makefile.in | 3 +-
configure.in | 21 ++-
ext4migrate/Makefile.in | 66 ++++
ext4migrate/migrate.c | 791 +++++++++++++++++++++++++++++++++++++++++++++++
ext4migrate/migrate.h | 30 ++
5 files changed, 909 insertions(+), 2 deletions(-)
create mode 100644 ext4migrate/Makefile.in
create mode 100644 ext4migrate/migrate.c
create mode 100644 ext4migrate/migrate.h
diff --git a/Makefile.in b/Makefile.in
index 0d31caa..9d8d291 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -9,9 +9,10 @@ INSTALL = @INSTALL@
@RESIZER_CMT@RESIZE_DIR= resize
@DEBUGFS_CMT@DEBUGFS_DIR= debugfs
+@EXT4MIGRATE_CMT@EXT4MIGRATE_DIR= ext4migrate
LIB_SUBDIRS=lib/et lib/ss lib/e2p lib/ext2fs lib/uuid lib/blkid intl
-PROG_SUBDIRS=e2fsck $(DEBUGFS_DIR) misc $(RESIZE_DIR) tests/progs po
+PROG_SUBDIRS=e2fsck $(DEBUGFS_DIR) misc $(RESIZE_DIR) tests/progs po $(EXT4MIGRATE_DIR)
SUBDIRS=util $(LIB_SUBDIRS) $(PROG_SUBDIRS) tests
SUBS= lib/ext2fs/ext2_types.h lib/blkid/blkid_types.h lib/uuid/uuid_types.h
diff --git a/configure.in b/configure.in
index 44b718d..5ae5e0b 100644
--- a/configure.in
+++ b/configure.in
@@ -436,6 +436,24 @@ echo "Building e2fsck statically by default"
)
AC_SUBST(E2FSCK_TYPE)
dnl
+dnl handle --enable-ext4migrate
+dnl
+AC_ARG_ENABLE([ext4migrate],
+[ --disable-ext4migrate disable support of ext4migrate program],
+if test "$enableval" = "no"
+then
+ echo "Disabling ext4migrate support"
+ EXT4MIGRATE_CMT="#"
+else
+ EXT4MIGRATE_CMT=
+ echo "Enabling ext4migrate support"
+fi
+,
+echo "Enabling ext4migrate support by default"
+EXT4MIGRATE_CMT=
+)
+AC_SUBST(EXT4MIGRATE_CMT)
+dnl
dnl See whether to install the `fsck' wrapper program (that calls e2fsck)
dnl
AC_ARG_ENABLE([fsck],
@@ -862,7 +880,8 @@ for i in MCONFIG Makefile e2fsprogs.spec \
lib/e2p/e2p.pc lib/blkid/blkid.pc lib/ext2fs/ext2fs.pc \
misc/Makefile ext2ed/Makefile e2fsck/Makefile \
debugfs/Makefile tests/Makefile tests/progs/Makefile \
- resize/Makefile doc/Makefile intl/Makefile po/Makefile.in ; do
+ resize/Makefile doc/Makefile intl/Makefile po/Makefile.in \
+ ext4migrate/Makefile ; do
if test -d `dirname ${srcdir}/$i` ; then
outlist="$outlist $i"
fi
diff --git a/ext4migrate/Makefile.in b/ext4migrate/Makefile.in
new file mode 100644
index 0000000..b000ae7
--- /dev/null
+++ b/ext4migrate/Makefile.in
@@ -0,0 +1,66 @@
+#
+# Standard e2fsprogs prologue....
+#
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+top_builddir = ..
+my_dir = util
+INSTALL = @INSTALL@
+@MCONFIG@
+
+SRCS = $(srcdir)/migrate.c
+
+LIBS= $(LIBEXT2FS) $(LIBCOM_ERR) -ltdb
+DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
+
+.c.o:
+ @echo " CC $<"
+ @$(CC) -c $(ALL_CFLAGS) $< -o $@
+ @#cc -g -I../lib/ -Wunreachable-code -Wunused -Wunused-function
+ @#-Wunused-label -Wunused-parameter -Wunused-value -Wunused-variable -c migrate.c
+
+PROGS= ext4migrate
+
+all:: $(PROGS)
+
+ext4migrate: migrate.o extents.o $(DEPLIBS)
+ @echo " LD $@"
+ @$(CC) $(ALL_LDFLAGS) -o ext4migrate migrate.o extents.o $(LIBS)
+
+installdirs:
+ @echo " MKINSTALLDIRS $(root_sbindir)"
+ @$(MKINSTALLDIRS) $(DESTDIR)$(root_sbindir)
+
+install: $(PROGS) installdirs
+ @for i in $(PROGS); do \
+ echo " INSTALL $(root_sbindir)/$$i"; \
+ $(INSTALL_PROGRAM) $$i $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+
+install-strip: install
+ @for i in $(PROGS); do \
+ echo " STRIP $(root_sbindir)/$$i"; \
+ $(STRIP) $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+
+uninstall:
+ for i in $(PROGS); do \
+ $(RM) -f $(DESTDIR)$(root_sbindir)/$$i; \
+ done
+clean:
+ $(RM) -f $(PROGS) \#* *.s *.o *.a *~ core
+
+mostlyclean: clean
+
+distclean: clean
+ $(RM) -f .depend Makefile $(srcdir)/TAGS $(srcdir)/Makefile.in.old
+
+# +++ Dependency line eater +++
+#
+# Makefile dependencies follow. This must be the last section in
+# the Makefile.in file
+#
+migrate.o: $(srcdir)/migrate.c
+extents.o: $(srcdir)/extents.c
diff --git a/ext4migrate/migrate.c b/ext4migrate/migrate.c
new file mode 100644
index 0000000..d6348c8
--- /dev/null
+++ b/ext4migrate/migrate.c
@@ -0,0 +1,791 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "migrate.h"
+#include "extents.h"
+
+static struct list_head blk_cache_head;
+static struct list_head blk_stat_head;
+static blk_t i_block[EXT2_N_BLOCKS];
+static int migrate = 0;
+ext2_filsys current_fs = NULL;
+
+static void free_blk_cache(void)
+{
+ struct list_head *entry, *tmp;
+ struct blk_cache *blk;
+
+ list_for_each_safe(entry, tmp, &blk_cache_head) {
+ blk = list_entry(entry, struct blk_cache, head);
+ list_del(entry);
+ free(blk->block_mem_addr);
+ free(blk);
+ }
+
+ INIT_LIST_HEAD(&blk_cache_head);
+}
+
+static void free_blk_stat(void)
+{
+ struct list_head *entry, *tmp;
+ struct blk_stat *stat;
+
+ list_for_each_safe(entry, tmp, &blk_stat_head) {
+ stat = list_entry(entry, struct blk_stat, head);
+ list_del(entry);
+ free(stat);
+ }
+ INIT_LIST_HEAD(&blk_stat_head);
+}
+
+static void init_iblock()
+{
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ eh->eh_magic = EXT4_EXT_MAGIC;
+ eh->eh_entries = 0;
+ eh->eh_depth = 0;
+ eh->eh_max = ext3_ext_space_root();
+}
+
+static void usage(char *prg_name)
+{
+ fprintf(stderr,
+ "%s --display | --migrate <image_name> [<filename>]\n",
+ prg_name);
+ exit(1);
+}
+
+static int build_extent(blk_t start_blk, blk_t end_blk,
+ e2_blkcnt_t start_pblk, e2_blkcnt_t end_pblk)
+{
+
+ int retval = 0;
+ struct ext3_extent newext;
+ struct ext4_ext_path *path;
+ /* start with a local allocated array without touching the inode */
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ (void)end_pblk;
+
+ newext.ee_block = start_blk;
+ newext.ee_len = end_blk - start_blk +1;
+ ext3_ext_store_pblock(&newext, start_pblk);
+
+ path = ext3_ext_find_extent(eh, start_blk, NULL);
+
+ if (!path)
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+
+ retval = ext3_ext_insert_extent(eh, path, &newext);
+
+ free(path);
+ return retval;
+}
+
+static void finish_range(struct list_blocks_struct *lb)
+{
+ int retval;
+
+ if (lb->first_block == 0)
+ return;
+
+ printf("(%lld-%lld):%u-%u\n", lb->first_bcnt, lb->last_bcnt,
+ lb->first_block, lb->last_block);
+
+ if (migrate) {
+ retval = build_extent(lb->first_bcnt, lb->last_bcnt,
+ lb->first_block, lb->last_block);
+ if (retval) {
+ com_err("build_extent", retval,
+ ": Failed to build extent");
+ exit(1);
+ }
+ }
+
+ lb->first_block = 0;
+}
+/*
+ * We don't want to update the filesystem block bitmap
+ * directly. For indirect blocks and others which we need
+ * to mark as unused, we want to track them but not update
+ * the file systembitmap early. This is needed to make sure
+ * the subsequent request for free blocks are not satisfied
+ * by these indirect blocks. This makes sure that if
+ * we fail later writing the inode, we have consistent
+ * block bitmap.
+ */
+
+static int mark_block_bitmap(ext4_fsblk_t pblock, int update)
+{
+ struct blk_stat *stat;
+
+ stat = malloc(sizeof(struct blk_stat));
+ if (!stat) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ stat->pblock = pblock;
+ stat->dirty = 1;
+ list_add(&(stat->head), &blk_stat_head);
+
+ if (update) {
+ ext2fs_mark_block_bitmap(current_fs->block_map, stat->pblock);
+ ext2fs_mark_bb_dirty(current_fs);
+ }
+
+ return 0;
+
+
+}
+static int unmark_block_bitmap(ext4_fsblk_t pblock, int update)
+{
+ struct blk_stat *stat;
+
+ stat = malloc(sizeof(struct blk_stat));
+ if (!stat) {
+ return EXT2_ET_BLOCK_ALLOC_FAIL;
+ }
+
+ stat->pblock = pblock;
+ stat->dirty = 0;
+ list_add(&(stat->head), &blk_stat_head);
+
+ if (update) {
+ ext2fs_unmark_block_bitmap(current_fs->block_map, stat->pblock);
+ ext2fs_mark_bb_dirty(current_fs);
+ }
+
+ return 0;
+}
+
+static int write_block_bitmap(struct ext2_inode *inode, int write_to_fs)
+{
+ struct blk_stat *stat;
+ struct list_head *entry;
+ int total_blocks_added = 0, group;
+
+ list_for_each(entry, &blk_stat_head) {
+
+ stat = list_entry(entry, struct blk_stat, head);
+ group = ext2fs_group_of_blk(current_fs, stat->pblock);
+ if (stat->dirty) {
+ total_blocks_added++;
+ current_fs->group_desc[group].bg_free_blocks_count--;
+ current_fs->super->s_free_blocks_count--;
+ ext2fs_mark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ } else {
+ total_blocks_added--;
+ current_fs->group_desc[group].bg_free_blocks_count++;
+ current_fs->super->s_free_blocks_count++;
+ ext2fs_unmark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ }
+ }
+
+ /*
+ * Update the inode block count
+ */
+ if (total_blocks_added) {
+
+ inode->i_blocks += total_blocks_added *
+ (current_fs->blocksize / 512);
+ }
+
+
+ if (write_to_fs) {
+ ext2fs_mark_bb_dirty(current_fs);
+ return ext2fs_write_block_bitmap(current_fs);
+ }
+ return 0;
+}
+
+static int undo_write_block_bitmap(struct ext2_inode *inode, int write_to_fs)
+{
+ struct blk_stat *stat;
+ struct list_head *entry;
+ int total_blocks_added = 0, group;
+
+ list_for_each(entry, &blk_stat_head) {
+
+ stat = list_entry(entry, struct blk_stat, head);
+ group = ext2fs_group_of_blk(current_fs, stat->pblock);
+ if (stat->dirty) {
+ total_blocks_added--;
+ current_fs->group_desc[group].bg_free_blocks_count++;
+ current_fs->super->s_free_blocks_count++;
+ ext2fs_unmark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ } else {
+ total_blocks_added++;
+ current_fs->group_desc[group].bg_free_blocks_count--;
+ current_fs->super->s_free_blocks_count--;
+ ext2fs_mark_block_bitmap(current_fs->block_map,
+ stat->pblock);
+ }
+ }
+
+ /*
+ * Update the inode block count
+ */
+ if (total_blocks_added) {
+
+ inode->i_blocks += total_blocks_added *
+ (current_fs->blocksize / 512);
+ }
+
+
+ if (write_to_fs) {
+ ext2fs_mark_bb_dirty(current_fs);
+ return ext2fs_write_block_bitmap(current_fs);
+ }
+ return 0;
+}
+
+
+static int list_blocks_proc(ext2_filsys fs EXT2FS_ATTR((unused)),
+ blk_t *blocknr, e2_blkcnt_t blockcnt,
+ blk_t ref_block EXT2FS_ATTR((unused)),
+ int ref_offset EXT2FS_ATTR((unused)),
+ void *private)
+{
+ struct list_blocks_struct *lb = (struct list_blocks_struct *) private;
+
+ if (blockcnt >= 0) {
+ /*
+ * See if we can add on to the existing range (if it exists)
+ */
+ if (lb->first_block &&
+ (lb->last_block+1 == *blocknr) &&
+ (lb->last_bcnt+1 == blockcnt)) {
+ lb->last_block = *blocknr;
+ lb->last_bcnt = blockcnt;
+ return 0;
+ }
+ /*
+ * Start a new range.
+ */
+ finish_range(lb);
+ lb->first_block = lb->last_block = *blocknr;
+ lb->first_bcnt = lb->last_bcnt = blockcnt;
+ } else {
+ /*
+ * special blocks such as indirect, double indrect and triple
+ * indirect blocks. Mark them as free if we are migrating
+ * BLOCK_COUNT_IND BLOCK_COUNT_DIND BLOCK_COUNT_TIND
+ */
+ if (migrate) {
+ /*
+ * We should not update the file system bitmap
+ * because we don't want the next request for
+ * free block to be satisfied by this block.
+ */
+ if (unmark_block_bitmap(*blocknr, 0))
+ return BLOCK_ABORT;
+ }
+ }
+
+ return 0;
+}
+
+static void iterate_blocks(ext2_filsys current_fs, ext2_ino_t inode_num)
+{
+ struct list_blocks_struct lb;
+ lb.first_block = 0;
+
+ ext2fs_block_iterate2(current_fs, inode_num, 0, NULL,
+ list_blocks_proc, (void *)&lb);
+
+ finish_range(&lb);
+}
+
+/*FIXME!! goal value passed as zero */
+
+/* FIXME!! 48 bit block
+ * Can we ask for block addressed by 48 bit ?
+ * We have actually mounted a ext3 file system via ext4dev If want want
+ * we will have to change the group descriptor also */
+/* Function is also called from extents.c */
+static struct blk_cache * __get_block(ext2_filsys current_fs,
+ ext4_fsblk_t pblock)
+{
+ struct list_head *entry;
+ struct blk_cache *blk;
+ void *addr;
+ int retval;
+ /* need to initialize to fix corruption due to (long) typecasting */
+ ext4_fsblk_t block = 0;
+
+
+ if (pblock == 0) {
+ /* allocate in disk and give back */
+ /* FIXME!! 48 bit block */
+ retval = ext2fs_new_block(current_fs, 0, NULL, (blk_t *)&block);
+ if (retval) {
+ /*
+ * In case we fail we don't write/sync previous update
+ * we don't want partial conversion
+ */
+ return NULL;
+ }
+ /*
+ * We need to update the file system bitmap also
+ * otherwise this block may be double allocated
+ */
+ if (mark_block_bitmap(block, 1))
+ return NULL;
+
+ addr = malloc(current_fs->blocksize);
+ if (!addr)
+ return NULL;
+ memset(addr, 0, current_fs->blocksize);
+
+ blk = malloc(sizeof(struct blk_cache));
+ if (!blk) {
+ free(addr);
+ return NULL;
+ }
+
+ blk->pblock = (ext4_fsblk_t)block;
+ blk->block_mem_addr = addr;
+ list_add(&(blk->head), &blk_cache_head);
+
+ return blk;
+ }
+
+
+ list_for_each(entry, &blk_cache_head) {
+
+ blk = list_entry(entry, struct blk_cache, head);
+
+ if (blk->pblock == pblock ) {
+ return blk;
+
+ }
+ }
+
+ /*
+ * No block present. Allocate a disk block and a memory equivalent
+ * add the same to the list and return the address
+ */
+
+ /* FIXME!! 48 bit block */
+ retval = ext2fs_new_block(current_fs, 0, NULL, (blk_t *)&block);
+ if (retval) {
+ /*
+ * In case we fail we don't write/sync previous update
+ * we don't want partial conversion
+ */
+ return NULL;
+ }
+ /*
+ * We need to update the file system bitmap also
+ * otherwise this block may be double allocated
+ */
+ if (mark_block_bitmap(block, 1))
+ return NULL;
+
+ addr = malloc(current_fs->blocksize);
+ if (!addr)
+ return NULL;
+ memset(addr, 0, current_fs->blocksize);
+
+ blk = malloc(sizeof(struct blk_cache));
+ if (!blk) {
+ free(addr);
+ return NULL;
+ }
+
+ blk->pblock = (ext4_fsblk_t)block;
+ blk->block_mem_addr = addr;
+ list_add(&(blk->head), &blk_cache_head);
+
+ return blk;
+
+}
+
+struct blk_cache * get_block(ext4_fsblk_t pblock)
+{
+
+ return __get_block(current_fs, pblock);
+}
+
+static int write_extent_block(ext2_filsys current_fs,
+ ext4_fsblk_t pblock, void *buf)
+{
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)buf;
+ struct ext3_extent_idx *ix;
+ ext4_fsblk_t tpblock;
+ struct blk_cache *blkc;
+ int i, retval;
+
+ if (eh->eh_depth == 0) {
+ retval = ext2fs_write_ext_block(current_fs, pblock, buf);
+ if (retval)
+ goto err_out;
+ } else {
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < eh->eh_entries; i++, ix++) {
+
+ tpblock = idx_pblock(ix);
+ blkc = get_block(tpblock);
+ if (!blkc) {
+ retval = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+ retval = write_extent_block(current_fs,
+ blkc->pblock,
+ blkc->block_mem_addr);
+ if (retval) {
+ goto err_out;
+ }
+ }
+ retval = ext2fs_write_ext_block(current_fs, pblock, buf);
+ if (retval)
+ goto err_out;
+
+ }
+
+err_out:
+
+ return retval;
+
+}
+
+
+static int write_extent_details(ext2_filsys current_fs, ext2_ino_t inode_num,
+ struct ext2_inode *inode, blk_t *i_block)
+{
+
+ struct ext3_extent_header *eh = (struct ext3_extent_header *)i_block;
+ struct ext3_extent_idx *ix;
+ ext4_fsblk_t pblock;
+ struct blk_cache *blkc;
+ int i, retval;
+
+
+ /* Write the blocks to the disk */
+ if (eh->eh_depth != 0) {
+ /* need to write blocks corresponding to extent_idx */
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < eh->eh_entries; i++, ix++) {
+
+ pblock = idx_pblock(ix);
+ blkc = get_block(pblock);
+ if (!blkc) {
+ retval = EXT2_ET_BLOCK_ALLOC_FAIL;
+ goto err_out;
+ }
+ retval = write_extent_block(current_fs,
+ blkc->pblock,
+ blkc->block_mem_addr);
+ if (retval) {
+ goto err_out;
+ }
+ }
+ }
+
+ /*
+ * Don't write to the file system. If we have
+ * multiple files to migrate we batch the block
+ * bitmap update
+ */
+ retval = write_block_bitmap(inode, 0);
+ if (retval) {
+ goto err_out;
+ }
+
+ inode->i_flags |= EXT4_EXTENTS_FL;
+ memcpy(inode->i_block, i_block, sizeof(inode->i_block));
+ /* write inode */
+ retval = ext2fs_write_inode(current_fs, inode_num, inode);
+ if (retval) {
+ undo_write_block_bitmap(inode, 0);
+ goto err_out;
+ }
+
+
+
+err_out:
+ return retval;
+
+}
+
+static int do_migrate(ext2_filsys current_fs,
+ ext2_ino_t inode_num, char *file_name)
+{
+ int retval, saved_migrate;
+ struct ext2_inode inode;
+
+ /* reset the i_block array */
+ init_iblock();
+
+ saved_migrate = migrate;
+
+
+ printf("%s inode number %d\n", file_name, inode_num);
+
+ retval = ext2fs_read_inode(current_fs, inode_num, &inode);
+ if (retval) {
+ com_err(file_name, retval,": Unable to get file inode number");
+ goto err_out;
+ }
+
+ if (inode.i_flags & EXT4_EXTENTS_FL) {
+ printf("Extent already enabled\n");
+ migrate = 0 ;
+ }
+
+ iterate_blocks(current_fs, inode_num);
+
+ if (migrate) {
+
+ /*
+ * Only if we have successfully walked the
+ * block mapping write the extent details
+ */
+ retval = write_extent_details(current_fs, inode_num,
+ &inode, i_block);
+ if (retval) {
+ com_err(file_name, 0,
+ "Failed to write extent details");
+ goto err_out;
+ }
+
+ /* dump the migrated extent details */
+ migrate = 0;
+ printf("Extent details after migrate\n");
+ iterate_blocks(current_fs, inode_num);
+ }
+
+ /* reset the value of migrate */
+ migrate = saved_migrate;
+
+err_out:
+ free_blk_cache();
+ free_blk_stat();
+ return retval;
+
+
+}
+
+static int dir_migrate_proc(ext2_ino_t dir EXT2FS_ATTR((unused)),
+ int entry EXT2FS_ATTR((unused)),
+ struct ext2_dir_entry *dirent,
+ int offset EXT2FS_ATTR((unused)),
+ int blocksize EXT2FS_ATTR((unused)),
+ char *buf EXT2FS_ATTR((unused)),
+ void *private EXT2FS_ATTR((unused)))
+{
+ int retval;
+ struct ext2_inode inode;
+ char file_name[EXT2_NAME_LEN];
+
+ if (dirent->inode == 0)
+ return 0;
+ if (((dirent->name_len&0xFF) == 1) && (dirent->name[0] == '.'))
+ return 0;
+ if (((dirent->name_len&0xFF) == 2) && (dirent->name[0] == '.') &&
+ (dirent->name[1] == '.')) {
+ return 0;
+ }
+
+ retval = ext2fs_read_inode(current_fs, dirent->inode, &inode);
+ if (retval) {
+ goto err_out;
+ }
+ if (LINUX_S_ISDIR(inode.i_mode)) {
+ /*
+ * Directory iterate this again
+ */
+ retval = ext2fs_dir_iterate2(current_fs, dirent->inode, 0, 0,
+ dir_migrate_proc, 0);
+ if (retval) {
+ goto err_out;
+ }
+
+
+ } else if (LINUX_S_ISREG(inode.i_mode)) {
+
+ /*
+ * properly NULL terminate the name
+ */
+ int this_len = ((dirent->name_len & 0xFF) < EXT2_NAME_LEN) ?
+ (dirent->name_len & 0xFF) : EXT2_NAME_LEN;
+
+ strncpy(file_name, dirent->name, this_len);
+ file_name[this_len] = '\0';
+
+ retval = do_migrate(current_fs, dirent->inode, file_name);
+ if (retval) {
+ goto err_out;
+ }
+
+ }
+
+ return 0;
+
+err_out:
+ retval = DIRENT_ABORT;
+ return retval;
+
+
+}
+static void recursive_migrate(ext2_filsys current_fs)
+{
+
+ int retval;
+
+ retval = ext2fs_dir_iterate2(current_fs, EXT2_ROOT_INO, 0, 0,
+ dir_migrate_proc, 0);
+ if (retval)
+ goto err_out;
+
+ if (migrate && !EXT2_HAS_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS)) {
+
+ EXT2_SET_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS);
+
+ }
+
+ if (migrate) {
+
+ ext2fs_mark_bb_dirty(current_fs);
+ ext2fs_mark_super_dirty(current_fs);
+
+ /*
+ * Write the block bitmap
+ */
+ retval = ext2fs_write_block_bitmap(current_fs);
+ if (retval == 0)
+ current_fs->super->s_state |= EXT2_VALID_FS;
+ }
+err_out:
+ ext2fs_close(current_fs);
+ return ;
+}
+
+main(int argc, char *argv[])
+{
+ char *dev_name, *file_name;
+ int flags, superblock = 0;
+ unsigned int block_size = 0;
+ int retval, i, recursive = 0;
+ ext2_ino_t inode_num;
+
+ if (argc < 3) {
+ usage(argv[0]);
+ }
+
+ if (strcmp(argv[1], "--migrate") == 0) {
+ migrate = 1;
+ } else if (strcmp(argv[1], "--display") == 0) {
+ migrate = 0;
+ } else {
+ usage(argv[0]);
+ }
+
+ dev_name = argv[2];
+
+ if (argc == 3) {
+ recursive = 1;
+ }
+
+ if (migrate) {
+ flags = EXT2_FLAG_SOFTSUPP_FEATURES | EXT2_FLAG_RW;
+ retval = ext2fs_open2(dev_name,"tdbfilename=/tmp/test.tdb&",
+ flags, superblock, block_size,
+ unix_cow_io_manager, ¤t_fs);
+ } else {
+ flags = EXT2_FLAG_SOFTSUPP_FEATURES;
+ retval = ext2fs_open2(dev_name, 0,
+ flags, superblock, block_size,
+ unix_cow_io_manager, ¤t_fs);
+ }
+
+#ifdef DEBUG
+ retval = ext2fs_open2(dev_name, 0, flags, superblock,
+ block_size, unix_io_manager, ¤t_fs);
+#endif
+
+ if (retval) {
+ com_err(dev_name, retval,": Unable to open the file system");
+ return;
+ }
+
+ retval = ext2fs_read_inode_bitmap(current_fs);
+ if (retval) {
+ com_err(dev_name, retval,": Unable to read inode bitmap");
+ goto err_out;
+ }
+ retval = ext2fs_read_block_bitmap(current_fs);
+ if (retval) {
+ com_err(dev_name, retval,": Unable to read block bitmap");
+ goto err_out;
+ }
+
+ /*
+ * initialize the list head for block cache and bloc stat
+ */
+ INIT_LIST_HEAD(&blk_cache_head);
+ INIT_LIST_HEAD(&blk_stat_head);
+
+ /* Mark the file system as unclean
+ * This will force a fsck if something goes wrong later
+ */
+ if (migrate)
+ current_fs->super->s_state &= ~EXT2_VALID_FS;
+
+ if (recursive) {
+ /*
+ * Migrate all the ext3 inode in the file
+ * system to extent map
+ */
+ recursive_migrate(current_fs);
+ return ;
+ }
+
+ for (i = 3; i < argc; i++) {
+
+ file_name = argv[i];
+ retval = ext2fs_namei(current_fs, EXT2_ROOT_INO,
+ EXT2_ROOT_INO, file_name, &inode_num);
+ if (retval) {
+ com_err(file_name, retval,
+ ": Unable to get file inode number");
+ goto err_out;
+ }
+
+ retval = do_migrate(current_fs, inode_num, file_name);
+ if (retval) {
+ /*
+ * If we fail to migrate then exit
+ */
+ goto err_out;
+ }
+ }
+
+ if (migrate && !EXT2_HAS_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS)) {
+
+ EXT2_SET_INCOMPAT_FEATURE(current_fs->super,
+ EXT3_FEATURE_INCOMPAT_EXTENTS);
+
+ }
+
+ if (migrate) {
+
+ ext2fs_mark_bb_dirty(current_fs);
+ ext2fs_mark_super_dirty(current_fs);
+
+ /*
+ * Write the block bitmap
+ */
+ retval = ext2fs_write_block_bitmap(current_fs);
+ if (retval == 0)
+ current_fs->super->s_state |= EXT2_VALID_FS;
+ }
+err_out:
+ ext2fs_close(current_fs);
+}
+
diff --git a/ext4migrate/migrate.h b/ext4migrate/migrate.h
new file mode 100644
index 0000000..7dbb93b
--- /dev/null
+++ b/ext4migrate/migrate.h
@@ -0,0 +1,30 @@
+
+#include "ext2fs/ext2_fs.h"
+#include "ext2fs/ext2fs.h"
+#include "blkid/list.h"
+
+typedef unsigned long long ext4_fsblk_t;
+
+extern ext2_filsys current_fs;
+
+#define EXT4_EXT_MAGIC 0xf30a
+#define EXT2_SET_INCOMPAT_FEATURE(sb,mask) \
+ ( EXT2_SB(sb)->s_feature_incompat |= (mask) )
+
+
+struct list_blocks_struct {
+ blk_t first_block, last_block;
+ e2_blkcnt_t first_bcnt, last_bcnt;
+};
+
+struct blk_stat {
+ struct list_head head;
+ ext4_fsblk_t pblock;
+ int dirty;
+};
+
+struct blk_cache {
+ struct list_head head;
+ ext4_fsblk_t pblock;
+ void * block_mem_addr;
+};
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 4/4] Add ext4replay tool.
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
This tool can be used to replay the transactions stored in the database
during ext4migrate.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
ext4migrate/Makefile.in | 7 ++++++-
ext4migrate/replay.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 1 deletions(-)
create mode 100644 ext4migrate/replay.c
diff --git a/ext4migrate/Makefile.in b/ext4migrate/Makefile.in
index b000ae7..8fe6291 100644
--- a/ext4migrate/Makefile.in
+++ b/ext4migrate/Makefile.in
@@ -21,7 +21,7 @@ DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
@#cc -g -I../lib/ -Wunreachable-code -Wunused -Wunused-function
@#-Wunused-label -Wunused-parameter -Wunused-value -Wunused-variable -c migrate.c
-PROGS= ext4migrate
+PROGS= ext4migrate ext4replay
all:: $(PROGS)
@@ -29,6 +29,10 @@ ext4migrate: migrate.o extents.o $(DEPLIBS)
@echo " LD $@"
@$(CC) $(ALL_LDFLAGS) -o ext4migrate migrate.o extents.o $(LIBS)
+ext4replay: replay.o
+ @echo " LD $@"
+ @$(CC) $(ALL_LDFLAGS) -o ext4replay replay.o -ltdb
+
installdirs:
@echo " MKINSTALLDIRS $(root_sbindir)"
@$(MKINSTALLDIRS) $(DESTDIR)$(root_sbindir)
@@ -64,3 +68,4 @@ distclean: clean
#
migrate.o: $(srcdir)/migrate.c
extents.o: $(srcdir)/extents.c
+replay.o: $(srcdir)/replay.c
diff --git a/ext4migrate/replay.c b/ext4migrate/replay.c
new file mode 100644
index 0000000..8476416
--- /dev/null
+++ b/ext4migrate/replay.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <tdb.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+void usage(char *prg_name)
+{
+ fprintf(stderr, "Usage: %s <transaction file> <filesystem>\n", prg_name);
+ exit(1);
+
+}
+
+
+main(int argc, char *argv[])
+{
+ TDB_CONTEXT *tdb;
+ TDB_DATA key, data;
+ long long int location;
+ int fd;
+
+ if (argc != 3)
+ usage(argv[0]);
+
+ tdb = tdb_open(argv[1], 0, 0, O_RDONLY, 0600);
+
+ if (!tdb) {
+ printf("Failed tdb_open %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fd = open(argv[2], O_WRONLY);
+
+ for (key = tdb_firstkey(tdb); key.dptr; key = tdb_nextkey(tdb, key)) {
+ data = tdb_fetch(tdb, key);
+ location = *(long long int *)key.dptr;
+ printf("Replayed transaction of size %d at location %lld\n", data.dsize, location);
+ lseek(fd, location, SEEK_SET);
+ write(fd, data.dptr, data.dsize);
+ }
+ close(fd);
+ tdb_close(tdb);
+
+}
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 4/4] Add ext4replay tool.
@ 2007-05-04 9:13 ` Aneesh Kumar K.V
0 siblings, 0 replies; 13+ messages in thread
From: Aneesh Kumar K.V @ 2007-05-04 9:13 UTC (permalink / raw)
To: linux-ext4; +Cc: aneesh.kumar
From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
This tool can be used to replay the transactions stored in the database
during ext4migrate.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
ext4migrate/Makefile.in | 7 ++++++-
ext4migrate/replay.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+), 1 deletions(-)
create mode 100644 ext4migrate/replay.c
diff --git a/ext4migrate/Makefile.in b/ext4migrate/Makefile.in
index b000ae7..8fe6291 100644
--- a/ext4migrate/Makefile.in
+++ b/ext4migrate/Makefile.in
@@ -21,7 +21,7 @@ DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
@#cc -g -I../lib/ -Wunreachable-code -Wunused -Wunused-function
@#-Wunused-label -Wunused-parameter -Wunused-value -Wunused-variable -c migrate.c
-PROGS= ext4migrate
+PROGS= ext4migrate ext4replay
all:: $(PROGS)
@@ -29,6 +29,10 @@ ext4migrate: migrate.o extents.o $(DEPLIBS)
@echo " LD $@"
@$(CC) $(ALL_LDFLAGS) -o ext4migrate migrate.o extents.o $(LIBS)
+ext4replay: replay.o
+ @echo " LD $@"
+ @$(CC) $(ALL_LDFLAGS) -o ext4replay replay.o -ltdb
+
installdirs:
@echo " MKINSTALLDIRS $(root_sbindir)"
@$(MKINSTALLDIRS) $(DESTDIR)$(root_sbindir)
@@ -64,3 +68,4 @@ distclean: clean
#
migrate.o: $(srcdir)/migrate.c
extents.o: $(srcdir)/extents.c
+replay.o: $(srcdir)/replay.c
diff --git a/ext4migrate/replay.c b/ext4migrate/replay.c
new file mode 100644
index 0000000..8476416
--- /dev/null
+++ b/ext4migrate/replay.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+#include <tdb.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+void usage(char *prg_name)
+{
+ fprintf(stderr, "Usage: %s <transaction file> <filesystem>\n", prg_name);
+ exit(1);
+
+}
+
+
+main(int argc, char *argv[])
+{
+ TDB_CONTEXT *tdb;
+ TDB_DATA key, data;
+ long long int location;
+ int fd;
+
+ if (argc != 3)
+ usage(argv[0]);
+
+ tdb = tdb_open(argv[1], 0, 0, O_RDONLY, 0600);
+
+ if (!tdb) {
+ printf("Failed tdb_open %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fd = open(argv[2], O_WRONLY);
+
+ for (key = tdb_firstkey(tdb); key.dptr; key = tdb_nextkey(tdb, key)) {
+ data = tdb_fetch(tdb, key);
+ location = *(long long int *)key.dptr;
+ printf("Replayed transaction of size %d at location %lld\n", data.dsize, location);
+ lseek(fd, location, SEEK_SET);
+ write(fd, data.dptr, data.dsize);
+ }
+ close(fd);
+ tdb_close(tdb);
+
+}
--
1.5.2.rc0.71.g4342-dirty
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH 1/4] Add unix COW io manager
@ 2007-05-07 13:01 ` Theodore Tso
0 siblings, 0 replies; 13+ messages in thread
From: Theodore Tso @ 2007-05-07 13:01 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: linux-ext4
On Fri, May 04, 2007 at 02:43:21PM +0530, Aneesh Kumar K.V wrote:
> + * unix_cow_io.c --- This is the Unix (well, really POSIX) implementation
> + * of the COW I/O manager.
I really wish you had based this not on unix_io.c, but rather on
test_io.c, for two reasons (a) this reduces code duplication, (b) it
means that we can chain/stack I/O managers. i.e., test_io ->
cow_manager -> unix_io.
The one change I would recommend is that instead of assigning to a
globally visible external variable (as I did in test_io, and which I
now regret as a not necessarily being portable --- different shared
library implementations have different restrictions on whether you can
access global variables defined in shared libraries from an
application or vice versa --- with AIX having some of the most peverse
shared library restritions, at least 7-8 years ago when I had the
misfortune of trying to make Kerberos v5 shared libraries work on AIX
:-). Instead, what I would recommend is creating a function which an
application could call to set the backing I/O manager for the
cow_manager, as well as the filenames to use for the tdb journal.
The second change I would recommend is to NOT call it "cow_manager"
but rather an "undo_manager", since that makes it clear that what is
being backed up is not the work that we are about to do, but rather
the original contents of the disk. I would also rename the
ext4_replay tool something like e2undofs, since "replay" has the
connotation that you are applying changes that were done in a log,
when in fact what you are really doing is backing out changes.
We want to make sure we avoid this confusion, since Xen and UML and
most other virtualization engines, for good reasons, do the exact
opposite --- the COW file contains the changes, and the base
file/device remains unchanged.
> + retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT);
> + if (retval == -1) {
> + /* FIXME!! Not sure what should be the error */
> + retval = EXT2_ET_SHORT_WRITE;
> + goto error_out;
> + }
You need to write the tdb_store() around a tdb_transaction_start() and
tdb_transaction_commit() pair, so that we can be sure the original
data is actually synced to disk. Otherwise, on a crash, we could have
a situation where the original data on disk has been overwritten, but
the backup of the original block was never forced to disk!
- Ted
^ permalink raw reply [flat|nested] 13+ messages in thread