linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Sandeen <sandeen@redhat.com>
To: ext4 development <linux-ext4@vger.kernel.org>, xfs-oss <xfs@oss.sgi.com>
Subject: sparsify - utility to punch out blocks of 0s in a file
Date: Sat, 04 Feb 2012 14:04:00 -0600	[thread overview]
Message-ID: <4F2D8F30.3090802@redhat.com> (raw)

Now that ext4, xfs, & ocfs2 can support punch hole, a tool to
"re-sparsify" a file by punching out ranges of 0s might be in order.

I whipped this up fast, it probably has bugs & off-by-ones but thought
I'd send it out.  It's not terribly efficient doing 4k reads by default
I suppose.

I'll see if util-linux wants it after it gets beat into shape.
(or did a tool like this already exist and I missed it?)

(Another mode which does a file copy, possibly from stdin
might be good, like e2fsprogs/contrib/make-sparse.c ?  Although
that can be hacked up with cp already).

It works like this:

[root@inode sparsify]# ./sparsify  -h
Usage: sparsify [-m min hole size] [-o offset] [-l length] filename

[root@inode sparsify]# dd if=/dev/zero of=fsfile bs=1M count=512
[root@inode sparsify]# mkfs.xfs fsfile >/dev/null
[root@inode sparsify]# du -hc fsfile
512M	fsfile
512M	total
[root@inode sparsify]# ./sparsify fsfile
punching out holes of minimum size 4096 in range 0-536870912
[root@inode sparsify]# du -hc fsfile
129M	fsfile
129M	total
[root@inode sparsify]# xfs_repair fsfile
Phase 1 - find and verify superblock...
<snip>
Phase 7 - verify and correct link counts...
done
[root@inode sparsify]# echo $?
0
[root@inode sparsify]# 

/*
 * sparsify - utility to punch out blocks of 0s in a file
 *
 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
 * Written by Eric Sandeen <sandeen@redhat.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include <string.h>

#include <linux/falloc.h>

#ifndef FALLOC_FL_PUNCH_HOLE
#define FALLOC_FL_PUNCH_HOLE    0x02 /* de-allocates range */
#endif

void usage(void)
{
	printf("Usage: sparsify [-m min hole size] [-o offset] [-l length] filename\n");
	exit(EXIT_FAILURE);
}

#define EXABYTES(x)     ((long long)(x) << 60)
#define PETABYTES(x)    ((long long)(x) << 50)
#define TERABYTES(x)    ((long long)(x) << 40)
#define GIGABYTES(x)    ((long long)(x) << 30)
#define MEGABYTES(x)    ((long long)(x) << 20)
#define KILOBYTES(x)    ((long long)(x) << 10)

#define __round_mask(x, y) ((__typeof__(x))((y)-1))
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
#define round_down(x, y) ((x) & ~__round_mask(x, y))

int debug;

long long
cvtnum(char *s)
{
	long long	i;
	char		*sp;
	int		c;

	i = strtoll(s, &sp, 0);
	if (i == 0 && sp == s)
		return -1LL;
	if (*sp == '\0')
		return i;
	if (sp[1] != '\0')
		return -1LL;

	c = tolower(*sp);
	switch (c) {
	case 'k':
		return KILOBYTES(i);
	case 'm':
		return MEGABYTES(i);
	case 'g':
		return GIGABYTES(i);
	case 't':
		return TERABYTES(i);
	case 'p':
		return PETABYTES(i);
	case 'e':
		return  EXABYTES(i);
	}

	return -1LL;
}

int punch_hole(int fd, off_t offset, off_t len)
{
	int error = 0;

	if (debug)
		printf("punching at %lld len %lld\n", offset, len);
	//error = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
	//		  offset, len);
	if (error < 0) {
		perror("punch failed");
		exit(EXIT_FAILURE);
	}
}

int main(int argc, char **argv)
{
	int	fd;
	char	*fname;
	int	opt;
	loff_t	min_hole = 0;
	loff_t	punch_range_start = 0;
	loff_t	punch_range_len = 0;
	loff_t	punch_range_end = 0;
	loff_t	cur_offset = 0;
	unsigned long blocksize;
	struct statvfs statvfsbuf;
	struct stat statbuf;
	ssize_t	ret;
	off_t	punch_offset, punch_len;
	char	*readbuf, *zerobuf;

	while ((opt = getopt(argc, argv, "m:l:o:vh")) != -1) {
		switch(opt) {
		case 'm':
			min_hole = cvtnum(optarg);
			break;
		case 'o':
			punch_range_start = cvtnum(optarg);
			break;
		case 'l':
			punch_range_len = cvtnum(optarg);
			break;
		case 'v':
			debug++;
			break;
		case 'h':
		default:
			usage();
		}
	}

	if (min_hole < 0) {
		printf("Error: invalid min hole value specified\n");
		usage();
	}

	if (punch_range_len < 0) {
		printf("Error: invalid length value specified\n");
		usage();
	}

	if (punch_range_start < 0) {
		printf("Error: invalid offset value specified\n");
		usage();
	}

	if (optind == argc) {
		printf("Error: no filename specified\n");
		usage();
	}

	fname = argv[optind++];

	fd = open(fname, O_RDWR);
	if (fd < 0) {
		perror("Error opening file");
		exit(EXIT_FAILURE);
	}

	if (fstat(fd, &statbuf) < 0) {
		perror("Error stat-ing file");
		exit(EXIT_FAILURE);
	}

	if (fstatvfs(fd, &statvfsbuf) < 0) {
		perror("Error stat-ing fs");
		exit(EXIT_FAILURE);
	}

	blocksize = statvfsbuf.f_bsize;
	if (debug)
		printf("blocksize is %lu\n", blocksize);

	/* default range end is end of file */
	if (!punch_range_len)
		punch_range_end = statbuf.st_size;
	else
		punch_range_end = punch_range_start + punch_range_len;

	if (punch_range_end > statbuf.st_size) {
		printf("Error: range extends past EOF\n");
		exit(EXIT_FAILURE);
	}

	if (debug)
		printf("orig start/end %lld/%lld/%lld\n", punch_range_start, punch_range_end, min_hole);

	/*
	 * Normalize to blocksize-aligned range:
	 * round start down, round end up - get all blocks including the range specified
	 */

	punch_range_start = round_down(punch_range_start, blocksize);
	punch_range_end = round_up(punch_range_end, blocksize);
	min_hole = round_up(min_hole, blocksize);
	if (!min_hole)
		min_hole = blocksize;

	if (debug)
		printf("new start/end/min %lld/%lld/%lld\n", punch_range_start, punch_range_end, min_hole);

	if (punch_range_end <= punch_range_start) {
		printf("Range too small, nothing to do\n");
		exit(0);
	}

	readbuf = malloc(min_hole);
	zerobuf = malloc(min_hole);

	if (!readbuf || !zerobuf) {
		perror("buffer allocation failed");
		exit(EXIT_FAILURE);
	}

	memset(zerobuf, 0, min_hole);

	punch_offset = -1;
	punch_len = 0;

	/* Move to the start of our requested range */
	if (punch_range_start)
		lseek(fd, punch_range_start, SEEK_SET);
	cur_offset = punch_range_start;

	printf("punching out holes of minimum size %lld in range %lld-%lld\n",
		min_hole, punch_range_start, punch_range_end);

	/*
	 * Read through the file, finding block-aligned regions of 0s.
	 * If the region is at least min_hole, punch it out.
	 * This should be starting at a block-aligned offset
	 */

	while ((ret = read(fd, readbuf, min_hole)) > 0) {

		if (!memcmp(readbuf, zerobuf, min_hole)) {
			/* Block of zeros, so extend punch range */
			if (punch_offset < 0)
				punch_offset = cur_offset;
			punch_len += min_hole;
			if (debug > 1)
				printf("found zeros at %lld, hole len now %lld\n", cur_offset, punch_len);
		} else if (punch_offset > 0) {
			/* Found nonzero byte; punch accumulated hole if it's big enough */
 			if (punch_len >= min_hole)
				punch_hole(fd, punch_offset, punch_len);
			else if (debug > 1)
				printf("skipping hole of insufficient size %lld\n", punch_len);

			/* reset punch range */
			punch_offset = -1;
			punch_len = 0;
		}

		cur_offset += ret;
		/* Quit if we've moved beyond the specified range to punch */
		if (cur_offset >= punch_range_end) {
			/* punch out last hole in range if needed */
			if (punch_offset > 0 && punch_len >= min_hole)
				punch_hole(fd, punch_offset, punch_len);
			break;
		}
	}

	if (ret < 0) {
		perror("read failed");
		exit(EXIT_FAILURE);
	}

	free(readbuf);
	free(zerobuf);
	close(fd);
	return 0;
}


             reply	other threads:[~2012-02-04 20:04 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-04 20:04 Eric Sandeen [this message]
2012-02-04 20:10 ` sparsify - utility to punch out blocks of 0s in a file Eric Sandeen
2012-02-04 20:17   ` Eric Sandeen
2012-02-05 15:05   ` Raghavendra D Prabhu
2012-02-05 23:44   ` Michael Tokarev
2012-02-05 23:55     ` Eric Sandeen
2012-02-05  9:33 ` Ron Yorston
2012-02-05 16:36   ` Eric Sandeen
2012-02-05 16:55     ` Andreas Dilger
2012-02-05 17:23       ` Matthias Schniedermeyer
2012-02-05 17:23       ` Eric Sandeen
2012-02-05 19:24         ` Andreas Dilger
2012-02-05 17:19     ` Ron Yorston
2012-02-05 17:21       ` Eric Sandeen
2012-02-06 18:40 ` Sunil Mushran
2012-02-06 21:41 ` Ted Ts'o
2012-02-06 21:47   ` Eric Sandeen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F2D8F30.3090802@redhat.com \
    --to=sandeen@redhat.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).