linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zheng Liu <gnehzuil.liu@gmail.com>
To: George Spelvin <linux@horizon.com>
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH] debugfs: dump a sparse file as a new sparse file
Date: Thu, 15 Nov 2012 22:46:13 +0800	[thread overview]
Message-ID: <20121115144613.GA11706@gmail.com> (raw)
In-Reply-To: <20121114040757.16656.qmail@science.horizon.com>

On Tue, Nov 13, 2012 at 11:07:57PM -0500, George Spelvin wrote:
> When dumping a file with some messed-up block pointers, I discovered
> that I had a huge, and fully allocated destination file.  (ls -s on the
> dump displayed some very large number, even though the original file
> only had about 8 blocks allocated.)
> 
> It wouldprobably make more sense to seek over the gaps to create a
> sparse file.

Hi George,

Would you like to try this patch?

Thanks,
                                                - Zheng

Subject: [PATCH] debugfs: dump a sparse file as a new sparse file

From: Zheng Liu <wenqing.lz@taobao.com>

For dumping a sparse file ext2fs_file_read2() is defined to expand the
interface.  It returns the size of hole and we can call lseek64(2) to skip it.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 debugfs/dump.c      | 16 ++++++++++++++--
 lib/ext2fs/ext2fs.h |  3 +++
 lib/ext2fs/fileio.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/debugfs/dump.c b/debugfs/dump.c
index a15a0b7..013c0c8 100644
--- a/debugfs/dump.c
+++ b/debugfs/dump.c
@@ -105,10 +105,11 @@ static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
 {
 	errcode_t retval;
 	struct ext2_inode	inode;
-	char 		buf[8192];
+	char 		buf[current_fs->blocksize];
 	ext2_file_t	e2_file;
 	int		nbytes;
 	unsigned int	got;
+	ext2_off64_t	seek;
 
 	if (debugfs_read_inode(ino, &inode, cmdname))
 		return;
@@ -119,11 +120,22 @@ static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
 		return;
 	}
 	while (1) {
-		retval = ext2fs_file_read(e2_file, buf, sizeof(buf), &got);
+		if (fd == 1)
+			retval = ext2fs_file_read(e2_file, buf, sizeof(buf),
+						  &got);
+		else
+			retval = ext2fs_file_read2(e2_file, buf, sizeof(buf),
+						   &got, &seek);
 		if (retval)
 			com_err(cmdname, retval, "while reading ext2 file");
 		if (got == 0)
 			break;
+		if (fd != 1) {
+			nbytes = lseek64(fd, current_fs->blocksize * seek,
+					 SEEK_CUR);
+			if (nbytes < 0)
+				com_err(cmdname, errno, "while lseeking file");
+		}
 		nbytes = write(fd, buf, got);
 		if ((unsigned) nbytes != got)
 			com_err(cmdname, errno, "while writing file");
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 9148d4e..8576b1e 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1171,6 +1171,9 @@ extern errcode_t ext2fs_file_close(ext2_file_t file);
 extern errcode_t ext2fs_file_flush(ext2_file_t file);
 extern errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 				  unsigned int wanted, unsigned int *got);
+extern errcode_t ext2fs_file_read2(ext2_file_t file, void *buf,
+				  unsigned int wanted, unsigned int *got,
+				  ext2_off64_t *seek);
 extern errcode_t ext2fs_file_write(ext2_file_t file, const void *buf,
 				   unsigned int nbytes, unsigned int *written);
 extern errcode_t ext2fs_file_llseek(ext2_file_t file, __u64 offset,
diff --git a/lib/ext2fs/fileio.c b/lib/ext2fs/fileio.c
index 1f7002c..d25ebb4 100644
--- a/lib/ext2fs/fileio.c
+++ b/lib/ext2fs/fileio.c
@@ -176,23 +176,26 @@ static errcode_t sync_buffer_position(ext2_file_t file)
  * This function loads the file's block buffer with valid data from
  * the disk as necessary.
  *
- * If dontfill is true, then skip initializing the buffer since we're
+ * If flags is true, then skip initializing the buffer since we're
  * going to be replacing its entire contents anyway.  If set, then the
  * function basically only sets file->physblock and EXT2_FILE_BUF_VALID
  */
 #define DONTFILL 1
-static errcode_t load_buffer(ext2_file_t file, int dontfill)
+#define SEEK	 2
+static errcode_t load_buffer(ext2_file_t file, int flags)
 {
 	ext2_filsys	fs = file->fs;
 	errcode_t	retval;
+	int		ret_flags;
+	int		valid = 1;
 
 	if (!(file->flags & EXT2_FILE_BUF_VALID)) {
 		retval = ext2fs_bmap2(fs, file->ino, &file->inode,
-				     BMAP_BUFFER, 0, file->blockno, 0,
+				     BMAP_BUFFER, 0, file->blockno, &ret_flags,
 				     &file->physblock);
 		if (retval)
 			return retval;
-		if (!dontfill) {
+		if (flags != DONTFILL) {
 			if (file->physblock) {
 				retval = io_channel_read_blk(fs->io,
 							     file->physblock,
@@ -202,7 +205,11 @@ static errcode_t load_buffer(ext2_file_t file, int dontfill)
 			} else
 				memset(file->buf, 0, fs->blocksize);
 		}
-		file->flags |= EXT2_FILE_BUF_VALID;
+		if (flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
+		    file->physblock == 0))
+			valid = 0;
+		if (valid)
+			file->flags |= EXT2_FILE_BUF_VALID;
 	}
 	return 0;
 }
@@ -227,20 +234,33 @@ errcode_t ext2fs_file_close(ext2_file_t file)
 errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 			   unsigned int wanted, unsigned int *got)
 {
+	return ext2fs_file_read2(file, buf, wanted, got, 0);
+}
+
+
+errcode_t ext2fs_file_read2(ext2_file_t file, void *buf,
+			    unsigned int wanted, unsigned int *got,
+			    ext2_off64_t *seek)
+{
 	ext2_filsys	fs;
 	errcode_t	retval = 0;
 	unsigned int	start, c, count = 0;
 	__u64		left;
 	char		*ptr = (char *) buf;
+	int		seek_cnt = 0;
+	int		flags = 0;
 
 	EXT2_CHECK_MAGIC(file, EXT2_ET_MAGIC_EXT2_FILE);
 	fs = file->fs;
 
+	if (seek)
+		flags = SEEK;
+
 	while ((file->pos < EXT2_I_SIZE(&file->inode)) && (wanted > 0)) {
 		retval = sync_buffer_position(file);
 		if (retval)
 			goto fail;
-		retval = load_buffer(file, 0);
+		retval = load_buffer(file, flags);
 		if (retval)
 			goto fail;
 
@@ -248,20 +268,26 @@ errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 		c = fs->blocksize - start;
 		if (c > wanted)
 			c = wanted;
-		left = EXT2_I_SIZE(&file->inode) - file->pos ;
+		left = EXT2_I_SIZE(&file->inode) - file->pos;
 		if (c > left)
 			c = left;
 
-		memcpy(ptr, file->buf+start, c);
 		file->pos += c;
-		ptr += c;
-		count += c;
-		wanted -= c;
+		if (file->flags & EXT2_FILE_BUF_VALID) {
+			memcpy(ptr, file->buf+start, c);
+			ptr += c;
+			count += c;
+			wanted -= c;
+		} else {
+			seek_cnt++;
+		}
 	}
 
 fail:
 	if (got)
 		*got = count;
+	if (seek)
+		*seek = seek_cnt;
 	return retval;
 }
 
-- 
1.7.12.rc2.18.g61b472e


  reply	other threads:[~2012-11-15 14:33 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-14  4:07 debugfs feature request: dump sparse files as sparse George Spelvin
2012-11-15 14:46 ` Zheng Liu [this message]
2013-01-01  2:08   ` debugfs: dump a sparse file as a new sparse file Theodore Ts'o
2013-01-01 12:33     ` Zheng Liu
2013-01-01 20:10     ` George Spelvin
2013-01-01 20:57       ` Theodore Ts'o
2013-01-01 21:25         ` George Spelvin
2013-01-01 22:47           ` Theodore Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121115144613.GA11706@gmail.com \
    --to=gnehzuil.liu@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux@horizon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).