linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <darrick.wong@oracle.com>
To: tytso@mit.edu, darrick.wong@oracle.com
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH 43/54] copy-in: for files, only iterate file blocks that are mapped
Date: Mon, 26 Jan 2015 23:40:15 -0800	[thread overview]
Message-ID: <20150127074015.13308.22535.stgit@birch.djwong.org> (raw)
In-Reply-To: <20150127073533.13308.44994.stgit@birch.djwong.org>

Rewrite the file copy-in algorithm to detect smaller holes in the
files we're copying in.  Use SEEK_DATA/SEEK_HOLE/FIEMAP when available
to skip known empty parts.  This fixes the particular bug where zeroed
blocks on a system with 64k pages are needlessly copied into a
4k-block filesystem.  It also saves time by skipping parts we know to
be zeroed.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 misc/create_inode.c |  280 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 196 insertions(+), 84 deletions(-)


diff --git a/misc/create_inode.c b/misc/create_inode.c
index 3bc0515..8ab546e 100644
--- a/misc/create_inode.c
+++ b/misc/create_inode.c
@@ -9,12 +9,22 @@
  * %End-Header%
  */
 
+#define _FILE_OFFSET_BITS       64
+#define _LARGEFILE64_SOURCE     1
+#define _GNU_SOURCE		1
+
+#include "config.h"
 #include <time.h>
+#include <sys/types.h>
 #include <unistd.h>
 #include <limits.h> /* for PATH_MAX */
 #ifdef HAVE_ATTR_XATTR_H
 #include <attr/xattr.h>
 #endif
+#include <sys/ioctl.h>
+#include <ext2fs/ext2fs.h>
+#include <ext2fs/ext2_types.h>
+#include <ext2fs/fiemap.h>
 
 #include "create_inode.h"
 #include "nls-enable.h"
@@ -28,14 +38,7 @@
 #endif
 
 /* 64KiB is the minimium blksize to best minimize system call overhead. */
-#ifndef IO_BUFSIZE
-#define IO_BUFSIZE 64*1024
-#endif
-
-/* Block size for `st_blocks' */
-#ifndef S_BLKSIZE
-#define S_BLKSIZE 512
-#endif
+#define COPY_FILE_BUFLEN	65536
 
 static int ext2_file_type(unsigned int mode)
 {
@@ -139,10 +142,9 @@ static errcode_t set_inode_xattr(ext2_filsys fs, ext2_ino_t ino, const char *fil
 {
 #ifdef HAVE_LLISTXATTR
 	errcode_t			retval, close_retval;
-	struct ext2_inode		inode;
 	struct ext2_xattr_handle	*handle;
 	ssize_t				size, value_size;
-	char				*list;
+	char				*list = NULL;
 	int				i;
 
 	size = llistxattr(filename, NULL, 0);
@@ -382,82 +384,202 @@ try_again:
 	return retval;
 }
 
-static errcode_t copy_file(ext2_filsys fs, int fd, ext2_ino_t newfile,
-			   int bufsize, int make_holes)
+#if !defined HAVE_PREAD64 && !defined HAVE_PREAD
+static ssize_t my_pread(int fd, const void *buf, size_t count, off_t offset)
 {
-	ext2_file_t	e2_file;
-	errcode_t	retval, close_ret;
-	int		got;
-	unsigned int	written;
-	char		*buf;
-	char		*ptr;
-	char		*zero_buf;
-	int		cmp;
-
-	retval = ext2fs_file_open(fs, newfile,
-				  EXT2_FILE_WRITE, &e2_file);
-	if (retval)
-		return retval;
-
-	retval = ext2fs_get_mem(bufsize, &buf);
-	if (retval) {
-		com_err("copy_file", retval, "can't allocate buffer\n");
-		goto out_close;
-	}
+	if (lseek(fd, offset, SEEK_SET) < 0)
+		return 0;
 
-	/* This is used for checking whether the whole block is zero */
-	retval = ext2fs_get_memzero(bufsize, &zero_buf);
-	if (retval) {
-		com_err("copy_file", retval, "can't allocate zero buffer\n");
-		goto out_free_buf;
-	}
+	return read(fd, buf, count);
+}
+#endif /* !defined HAVE_PREAD64 && !defined HAVE_PREAD */
 
-	while (1) {
-		got = read(fd, buf, bufsize);
-		if (got == 0)
-			break;
+static errcode_t copy_file_range(ext2_filsys fs, int fd, ext2_file_t e2_file,
+				 off_t start, off_t end, char *buf,
+				 char *zerobuf)
+{
+	off_t off, bpos;
+	ssize_t got, blen;
+	unsigned int written;
+	char *ptr;
+	errcode_t err = 0;
+
+	for (off = start; off < end; off += COPY_FILE_BUFLEN) {
+#ifdef HAVE_PREAD64
+		got = pread64(fd, buf, COPY_FILE_BUFLEN, off);
+#elif HAVE_PREAD
+		got = pread(fd, buf, COPY_FILE_BUFLEN, off);
+#else
+		got = my_pread(fd, buf, COPY_FILE_BUFLEN, off);
+#endif
 		if (got < 0) {
-			retval = errno;
+			err = errno;
 			goto fail;
 		}
-		ptr = buf;
-
-		/* Sparse copy */
-		if (make_holes) {
-			/* Check whether all is zero */
-			cmp = memcmp(ptr, zero_buf, got);
-			if (cmp == 0) {
-				 /* The whole block is zero, make a hole */
-				retval = ext2fs_file_lseek(e2_file, got,
-							   EXT2_SEEK_CUR,
-							   NULL);
-				if (retval)
+		for (bpos = 0, ptr = buf; bpos < got; bpos += fs->blocksize) {
+			blen = fs->blocksize;
+			if (blen > got - bpos)
+				blen = got - bpos;
+			if (memcmp(ptr, zerobuf, blen) == 0) {
+				ptr += blen;
+				continue;
+			}
+			err = ext2fs_file_lseek(e2_file, off + bpos,
+						EXT2_SEEK_SET, NULL);
+			if (err)
+				goto fail;
+			while (blen > 0) {
+				err = ext2fs_file_write(e2_file, ptr, blen,
+							&written);
+				if (err)
+					goto fail;
+				if (written == 0) {
+					err = EIO;
 					goto fail;
-				got = 0;
+				}
+				blen -= written;
+				ptr += written;
 			}
 		}
+	}
+fail:
+	return err;
+}
 
-		/* Normal copy */
-		while (got > 0) {
-			retval = ext2fs_file_write(e2_file, ptr,
-						   got, &written);
-			if (retval)
-				goto fail;
-
-			got -= written;
-			ptr += written;
+static errcode_t try_lseek_copy(ext2_filsys fs, int fd, struct stat *statbuf,
+				ext2_file_t e2_file, char *buf, char *zerobuf)
+{
+#if defined(SEEK_DATA) && defined(SEEK_HOLE)
+	off_t data = 0, hole;
+	off_t data_blk, hole_blk;
+	errcode_t err;
+
+	/* Try to use SEEK_DATA and SEEK_HOLE */
+	while (data < statbuf->st_size) {
+		data = lseek(fd, data, SEEK_DATA);
+		if (data < 0) {
+			if (errno == ENXIO)
+				break;
+			return EXT2_ET_UNIMPLEMENTED;
 		}
+		hole = lseek(fd, data, SEEK_HOLE);
+		if (hole < 0)
+			return EXT2_ET_UNIMPLEMENTED;
+
+		data_blk = data & ~(fs->blocksize - 1);
+		hole_blk = (hole + (fs->blocksize - 1)) & ~(fs->blocksize - 1);
+		err = copy_file_range(fs, fd, e2_file, data_blk, hole_blk, buf,
+				      zerobuf);
+		if (err)
+			return err;
+
+		data = hole;
 	}
 
-fail:
-	ext2fs_free_mem(&zero_buf);
-out_free_buf:
+	return err;
+#else
+	return EXT2_ET_UNIMPLEMENTED;
+#endif /* SEEK_DATA and SEEK_HOLE */
+}
+
+static errcode_t try_fiemap_copy(ext2_filsys fs, int fd, ext2_file_t e2_file,
+				 char *buf, char *zerobuf)
+{
+#if defined(FS_IOC_FIEMAP)
+#define EXTENT_MAX_COUNT 512
+	struct fiemap *fiemap_buf;
+	struct fiemap_extent *ext_buf, *ext;
+	int ext_buf_size, fie_buf_size;
+	off_t pos = 0;
+	unsigned int i;
+	errcode_t err;
+
+	ext_buf_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);
+	fie_buf_size = sizeof(struct fiemap) + ext_buf_size;
+
+	err = ext2fs_get_memzero(fie_buf_size, &fiemap_buf);
+	if (err)
+		return err;
+
+	ext_buf = fiemap_buf->fm_extents;
+	memset(fiemap_buf, 0, fie_buf_size);
+	fiemap_buf->fm_length = FIEMAP_MAX_OFFSET;
+	fiemap_buf->fm_flags |= FIEMAP_FLAG_SYNC;
+	fiemap_buf->fm_extent_count = EXTENT_MAX_COUNT;
+
+	do {
+		fiemap_buf->fm_start = pos;
+		memset(ext_buf, 0, ext_buf_size);
+		err = ioctl(fd, FS_IOC_FIEMAP, fiemap_buf);
+		if (err < 0 && (errno == EOPNOTSUPP || errno == ENOTTY)) {
+			err = EXT2_ET_UNIMPLEMENTED;
+			goto out;
+		} else if (err < 0 || fiemap_buf->fm_mapped_extents == 0) {
+			err = errno;
+			goto out;
+		}
+		for (i = 0, ext = ext_buf; i < fiemap_buf->fm_mapped_extents;
+		     i++, ext++) {
+			err = copy_file_range(fs, fd, e2_file, ext->fe_logical,
+					      ext->fe_logical + ext->fe_length,
+					      buf, zerobuf);
+			if (err)
+				goto out;
+		}
+
+		ext--;
+		/* Record file's logical offset this time */
+		pos = ext->fe_logical + ext->fe_length;
+		/*
+		 * If fm_extents array has been filled and
+		 * there are extents left, continue to cycle.
+		 */
+	} while (fiemap_buf->fm_mapped_extents == EXTENT_MAX_COUNT &&
+		 !(ext->fe_flags & FIEMAP_EXTENT_LAST));
+out:
+	ext2fs_free_mem(&fiemap_buf);
+	return err;
+#else
+	return EXT2_ET_UNIMPLEMENTED;
+#endif /* FS_IOC_FIEMAP */
+}
+
+static errcode_t copy_file(ext2_filsys fs, int fd, struct stat *statbuf,
+			   ext2_ino_t ino)
+{
+	ext2_file_t e2_file;
+	char *buf = NULL, *zerobuf = NULL;
+	errcode_t err, close_err;
+
+	err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &e2_file);
+	if (err)
+		return err;
+
+	err = ext2fs_get_mem(COPY_FILE_BUFLEN, &buf);
+	if (err)
+		goto out;
+
+	err = ext2fs_get_memzero(fs->blocksize, &zerobuf);
+	if (err)
+		goto out;
+
+	err = try_lseek_copy(fs, fd, statbuf, e2_file, buf, zerobuf);
+	if (err != EXT2_ET_UNIMPLEMENTED)
+		goto out;
+
+	err = try_fiemap_copy(fs, fd, e2_file, buf, zerobuf);
+	if (err != EXT2_ET_UNIMPLEMENTED)
+		goto out;
+
+	err = copy_file_range(fs, fd, e2_file, 0, statbuf->st_size, buf,
+			      zerobuf);
+out:
+	ext2fs_free_mem(&zerobuf);
 	ext2fs_free_mem(&buf);
-out_close:
-	close_ret = ext2fs_file_close(e2_file);
-	if (retval == 0)
-		retval = close_ret;
-	return retval;
+	close_err = ext2fs_file_close(e2_file);
+	if (err == 0)
+		err = close_err;
+	return err;
 }
 
 static int is_hardlink(struct hdlinks_s *hdlinks, dev_t dev, ino_t ino)
@@ -481,8 +603,6 @@ errcode_t do_write_internal(ext2_filsys fs, ext2_ino_t cwd, const char *src,
 	ext2_ino_t	newfile;
 	errcode_t	retval;
 	struct ext2_inode inode;
-	int		bufsize = IO_BUFSIZE;
-	int		make_holes = 0;
 
 	fd = ext2fs_open_file(src, O_RDONLY, 0);
 	if (fd < 0) {
@@ -570,17 +690,9 @@ errcode_t do_write_internal(ext2_filsys fs, ext2_ino_t cwd, const char *src,
 		}
 	}
 	if (LINUX_S_ISREG(inode.i_mode)) {
-		if (statbuf.st_blocks < statbuf.st_size / S_BLKSIZE) {
-			make_holes = 1;
-			/*
-			 * Use I/O blocksize as buffer size when
-			 * copying sparse files.
-			 */
-			bufsize = statbuf.st_blksize;
-		}
-		retval = copy_file(fs, fd, newfile, bufsize, make_holes);
+		retval = copy_file(fs, fd, &statbuf, newfile);
 		if (retval)
-			com_err("copy_file", retval, 0);
+			com_err("copy_file", retval, _("while copying %s"), src);
 	}
 	close(fd);
 


  parent reply	other threads:[~2015-01-27  7:40 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-27  7:35 [PATCH 00/54] e2fsprogs January 2015 patchbomb Darrick J. Wong
2015-01-27  7:35 ` [PATCH 01/54] misc: fix minor testcase problems Darrick J. Wong
2015-01-27 15:55   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 02/54] debugfs: document new commands Darrick J. Wong
2015-01-27 15:56   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 03/54] debugfs: fix crash in ea_set argument handling Darrick J. Wong
2015-01-27 15:58   ` Theodore Ts'o
2015-01-27  7:35 ` [PATCH 04/54] libext2fs: initialize i_extra_isize when writing EAs Darrick J. Wong
2015-01-27 16:02   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 05/54] libext2fs: avoid pointless EA block allocation Darrick J. Wong
2015-01-27 16:07   ` Theodore Ts'o
2015-01-27 19:26     ` Darrick J. Wong
2015-01-27  7:36 ` [PATCH 06/54] libext2fs: strengthen i_extra_isize checks when reading/writing xattrs Darrick J. Wong
2015-01-27 16:08   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 07/54] libext2fs: fix tdb.c mmap leak Darrick J. Wong
2015-01-27 16:09   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 08/54] resize2fs: fix regression test to not depend on ext4.ko being loaded Darrick J. Wong
2015-01-27 16:10   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 09/54] tune2fs: disable csum verification before resizing inode Darrick J. Wong
2015-01-27 16:11   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 10/54] tune2fs: abort when trying to enable/disable metadata_csum on mounted fs Darrick J. Wong
2015-01-27 16:26   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 11/54] tune2fs: call out to resize2fs for 64bit conversion Darrick J. Wong
2015-01-27 16:31   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 12/54] e2fsck: clear i_block[] when there are too many bad mappings on a special inode Darrick J. Wong
2015-01-27 16:32   ` Theodore Ts'o
2015-01-27  7:36 ` [PATCH 13/54] e2fsck: on read error, don't rewrite blocks past the end of the fs Darrick J. Wong
2015-01-27 17:35   ` Theodore Ts'o
2015-01-28 23:35     ` Darrick J. Wong
2015-01-27  7:37 ` [PATCH 14/54] e2fsck: fix the journal recreation message Darrick J. Wong
2015-01-27 18:02   ` Theodore Ts'o
2015-01-27 19:37     ` Darrick J. Wong
2015-01-27  7:37 ` [PATCH 15/54] e2fsck: handle multiple *ind block collisions with critical metadata Darrick J. Wong
2015-01-28 13:52   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 16/54] e2fsck: decrement bad count _after_ remapping a duplicate block Darrick J. Wong
2015-01-28 13:58   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 17/54] e2fsck: inspect inline dir data as two directory blocks Darrick J. Wong
2015-01-28 15:16   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 18/54] e2fsck: improve the inline directory detector Darrick J. Wong
2015-01-28 16:38   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 19/54] e2fsck: salvage under-sized dirents by removing them Darrick J. Wong
2015-02-16 15:40   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 20/54] e2fsck: add a 'yes to all' response in interactive mode Darrick J. Wong
2015-03-29  2:54   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 21/54] libext2fs: zero blocks via FALLOC_FL_ZERO_RANGE in ext2fs_zero_blocks Darrick J. Wong
2015-03-29  3:46   ` Theodore Ts'o
2015-01-27  7:37 ` [PATCH 22/54] libext2fs: ext2fs_new_block2() should call alloc_block hook Darrick J. Wong
2015-03-29  3:08   ` Theodore Ts'o
2015-01-27  7:38 ` [PATCH 23/54] libext2fs: Support readonly filesystem images Darrick J. Wong
2015-03-19 21:32   ` [PATCH v2 " Darrick J. Wong
2015-03-29  3:42     ` Theodore Ts'o
2015-01-27  7:38 ` [PATCH 24/54] libext2fs/e2fsck: provide routines to read-ahead metadata Darrick J. Wong
2015-01-27  7:38 ` [PATCH 25/54] e2fsck: read-ahead metadata during passes 1, 2, and 4 Darrick J. Wong
2015-01-27  7:38 ` [PATCH 26/54] e2fsck: track directories to be rehashed with a bitmap Darrick J. Wong
2015-01-27  7:38 ` [PATCH 27/54] e2fsck: rebuild sparse extent trees/convert non-extent ext3 files Darrick J. Wong
2015-03-19 21:42   ` [PATCH v4 " Darrick J. Wong
2015-01-27  7:38 ` [PATCH 28/54] tests: verify proper rebuilding of sparse extent trees and block map file conversion Darrick J. Wong
2015-01-27  7:38 ` [PATCH 29/54] undo-io: add new calls to and speed up the undo io manager Darrick J. Wong
2015-01-27  7:38 ` [PATCH 30/54] undo-io: be more flexible about setting block size Darrick J. Wong
2015-01-27  7:38 ` [PATCH 31/54] undo-io: use a bitmap to track what we've already written Darrick J. Wong
2015-01-27  7:39 ` [PATCH 32/54] e2undo: fix memory leaks and tweak the error messages somewhat Darrick J. Wong
2015-01-27  7:39 ` [PATCH 33/54] e2undo: ditch tdb file, write everything to a flat file Darrick J. Wong
2015-01-27  7:39 ` [PATCH 34/54] libext2fs: support atexit cleanups Darrick J. Wong
2015-01-27  7:39 ` [PATCH 35/54] e2fsck: optionally create an undo file Darrick J. Wong
2015-01-27  7:39 ` [PATCH 36/54] resize2fs: optionally create " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 37/54] tune2fs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 38/54] mke2fs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 39/54] debugfs: " Darrick J. Wong
2015-01-27  7:39 ` [PATCH 40/54] tests: test undo file creation in e2fsck/resize2fs/tune2fs/mke2fs Darrick J. Wong
2015-01-27  7:40 ` [PATCH 41/54] tests: test various features of the new e2undo format Darrick J. Wong
2015-01-27  7:40 ` [PATCH 42/54] copy-in: create hardlinks with the correct directory filetype Darrick J. Wong
2015-01-27  7:40 ` Darrick J. Wong [this message]
2015-01-27  7:40 ` [PATCH 44/54] copyin: fix error handling Darrick J. Wong
2015-01-27  7:40 ` [PATCH 45/54] mke2fs: add simple tests and re-alphabetize mke2fs manpage options Darrick J. Wong
2015-01-27  7:40 ` [PATCH 46/54] contrib: script to create minified ext4 image from a directory Darrick J. Wong
2015-01-27  7:40 ` [PATCH 47/54] libext2fs: support allocating uninit blocks in bmap2() Darrick J. Wong
2015-01-27  7:40 ` [PATCH 48/54] libext2fs: find/alloc a range of empty blocks Darrick J. Wong
2015-01-27  7:40 ` [PATCH 49/54] libext2fs: add new hooks to support large allocations Darrick J. Wong
2015-01-27  7:41 ` [PATCH 50/54] libext2fs: implement fallocate Darrick J. Wong
2015-01-27  7:41 ` [PATCH 51/54] libext2fs: use fallocate for creating journals and hugefiles Darrick J. Wong
2015-01-27  7:41 ` [PATCH 52/54] debugfs: implement fallocate Darrick J. Wong
2015-01-27  7:41 ` [PATCH 53/54] tests: test debugfs punch command Darrick J. Wong
2015-03-19 21:44 ` [PATCH 55/54] e2fsck: actually fix inline_data flags problems when user says to do so Darrick J. Wong
2015-03-29  4:05   ` Theodore Ts'o
2015-03-19 21:45 ` [PATCH 56/54] libext2fs: zero hash in ibody extended attributes Darrick J. Wong
2015-03-29  4:13   ` Theodore Ts'o
2015-03-19 21:47 ` [PATCH 57/54] e2fsck: convert block-mapped files to extents on bigalloc fs Darrick J. Wong
2015-03-19 23:54 ` [PATCH 58/54] e2fsck: turn inline data symlink into a fast symlink when possible Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150127074015.13308.22535.stgit@birch.djwong.org \
    --to=darrick.wong@oracle.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).