qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel][PATCH] qemu-fuse
@ 2008-08-27 15:50 Shahar Frank
  2008-08-27 16:28 ` Anthony Liguori
  2008-08-28 21:05 ` [Qemu-devel] Re: [PATCH] qemu-fuse Szabolcs Szakacsits
  0 siblings, 2 replies; 13+ messages in thread
From: Shahar Frank @ 2008-08-27 15:50 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 21042 bytes --]

Hi All,

	The attached is a small utility to mount qemu images as pseudo
partition files. It can be very useful to access (rw) images from the
host.
I would not use it for production, but it seems to be stable.

Limitations:

1. Only primary partitions on disks are recognized.
2. The qemu-fuse is forced to be single threaded.
3. The write behind option is enabled for the images (better to be
turned off).
4. No snapshot access support.
5. Other?

Signed-off-by: Shahar Frank <shaharf@qumranet.com>

Index: qemu-fuse.c
===================================================================
--- qemu-fuse.c	(revision 0)
+++ qemu-fuse.c	(revision 0)
@@ -0,0 +1,643 @@
+/*
+ * QEMU disk image fuse server
+ *
+ * Copyright (c) 2008 Shahar Frank
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include <assert.h>
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse/fuse_lowlevel.h>
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#define QDISK_MAX_PART          5
+typedef struct QDiskPart {
+	uint32_t start;
+	uint64_t count;
+	unsigned char type;
+	int boot;
+	char const *path;
+} QDiskPart;
+
+QDiskPart qparttbl[QDISK_MAX_PART];
+
+static const char *qemu_parttbl_path = "/parttbl";
+static char qemu_parttbl_str[8192];
+static int qemu_parttbl_str_len;
+
+static BlockDriverState *qemu_bs;
+static BlockDriver *qemu_bdrv;
+static char *qemu_img_path;
+static char *prog;
+static long qemu_dsize;
+static int open_flags = 0;
+
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+#if defined(_POSIX_C_SOURCE)
+	int ret;
+	void *ptr;
+	ret = posix_memalign(&ptr, alignment, size);
+	if (ret != 0)
+		return NULL;
+	return ptr;
+#elif defined(_BSD)
+	return valloc(size);
+#else
+	return memalign(alignment, size);
+#endif
+}
+
+static void __attribute__ ((noreturn)) error(const char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	fprintf(stderr, "qemu-fuse: ");
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	exit(1);
+	va_end(ap);
+}
+
+static void format_print(void *opaque, const char *name)
+{
+	printf(" %s", name);
+}
+
+void usage(void)
+{
+	printf("%s version " QEMU_VERSION
+	       ", Copyright (c) 2007 Qumranet, Shahar Frank\n"
+	       "usage: qemu-fuse [options] <image_path> <mount_point>
[fuse options]\n"
+	       "QEMU disk image file system utility\n" "\n" "Options:\n"
+	       "  -d            # debug mode (force also forground)\n"
+	       "  -f fmt        # force image format\n"
+	       "  -F            # keep program in forground\n"
+	       "  -h            # help (this text)\n" "\n" "Examples:\n"
+	       "\tmkdir -p /tmp/qemu /tmp/ext3 /tmp/ntfs\n"
+	       "\t%s /images/disk.vmdk /tmp/qemu\n"
+	       "\tmount -o loop /tmp/qemu/img2 /tmp/ext3\n"
+	       "\tmount -o loop -t ntfs-3g -o force /tmp/qemu/img1
/tmp/ntfs\n",
+	       prog, prog);
+	printf("\nSupported formats:");
+	bdrv_iterate_format(format_print, NULL);
+	printf("\n");
+	exit(1);
+}
+
+void help(void)
+{
+	usage();
+}
+
+#include <termios.h>
+
+static struct termios oldtty;
+
+static void term_exit(void)
+{
+	tcsetattr(0, TCSANOW, &oldtty);
+}
+
+static void term_init(void)
+{
+	struct termios tty;
+
+	tcgetattr(0, &tty);
+	oldtty = tty;
+
+	tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+			 | INLCR | IGNCR | ICRNL | IXON);
+	tty.c_oflag |= OPOST;
+	tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+	tty.c_cflag &= ~(CSIZE | PARENB);
+	tty.c_cflag |= CS8;
+	tty.c_cc[VMIN] = 1;
+	tty.c_cc[VTIME] = 0;
+
+	tcsetattr(0, TCSANOW, &tty);
+
+	atexit(term_exit);
+}
+
+static int read_password(char *buf, int buf_size)
+{
+	uint8_t ch;
+	int i, ret;
+
+	printf("password: ");
+	fflush(stdout);
+	term_init();
+	i = 0;
+	for (;;) {
+		ret = read(0, &ch, 1);
+		if (ret == -1) {
+			if (errno == EAGAIN || errno == EINTR) {
+				continue;
+			} else {
+				ret = -1;
+				break;
+			}
+		} else if (ret == 0) {
+			ret = -1;
+			break;
+		} else {
+			if (ch == '\r') {
+				ret = 0;
+				break;
+			}
+			if (i < (buf_size - 1))
+				buf[i++] = ch;
+		}
+	}
+	term_exit();
+	buf[i] = '\0';
+	printf("\n");
+	return ret;
+}
+
+static BlockDriverState *bdrv_new_open(const char *filename,
+				       const char *fmt)
+{
+	BlockDriverState *bs;
+	BlockDriver *drv;
+	char password[256];
+
+	bs = bdrv_new("");
+	if (!bs)
+		error("Not enough memory");
+	if (fmt) {
+		drv = bdrv_find_format(fmt);
+		if (!drv)
+			error("Unknown file format '%s'", fmt);
+	} else {
+		drv = NULL;
+	}
+	if (bdrv_open2(bs, filename, open_flags, drv) < 0) {
+		error("Could not open '%s'", filename);
+	}
+	if (bdrv_is_encrypted(bs)) {
+		printf("Disk image '%s' is encrypted.\n", filename);
+		if (read_password(password, sizeof(password)) < 0)
+			error("No password given");
+		if (bdrv_set_key(bs, password) < 0)
+			error("invalid password");
+	}
+	qemu_bs = bs;
+	qemu_bdrv = drv;
+	return bs;
+}
+
+static int64_t get_allocated_file_size(const char *filename)
+{
+	struct stat st;
+	if (stat(filename, &st) < 0)
+		return -1;
+	return (int64_t) st.st_blocks * 512;
+}
+
+static void dump_snapshots(BlockDriverState * bs)
+{
+	QEMUSnapshotInfo *sn_tab, *sn;
+	int nb_sns, i;
+	char buf[256];
+
+	nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+	if (nb_sns <= 0)
+		return;
+	printf("Snapshot list:\n");
+	printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL));
+	for (i = 0; i < nb_sns; i++) {
+		sn = &sn_tab[i];
+		printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf),
sn));
+	}
+	qemu_free(sn_tab);
+}
+
+static int open_img(const char *filename, const char *fmt)
+{
+	BlockDriverState *bs;
+	char fmt_name[128], size_buf[128], dsize_buf[128];
+	uint64_t total_sectors;
+	int64_t allocated_size;
+	char backing_filename[1024];
+	char backing_filename2[1024];
+	BlockDriverInfo bdi;
+
+	if (!(bs = bdrv_new_open(filename, fmt))) {
+		error("Could not open '%s'", filename);
+	}
+	bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
+	bdrv_get_geometry(bs, &total_sectors);
+	get_human_readable_size(size_buf, sizeof(size_buf),
+				total_sectors * 512);
+	allocated_size = get_allocated_file_size(filename);
+	if (allocated_size < 0)
+		sprintf(dsize_buf, "unavailable");
+	else
+		get_human_readable_size(dsize_buf, sizeof(dsize_buf),
+					allocated_size);
+	printf("image: %s\n"
+	       "file format: %s\n"
+	       "virtual size: %s (%" PRId64 " bytes)\n"
+	       "disk size: %s\n",
+	       filename, fmt_name, size_buf,
+	       (total_sectors * 512), dsize_buf);
+	if (bdrv_is_encrypted(bs))
+		printf("encrypted: yes\n");
+	if (bdrv_get_info(bs, &bdi) >= 0) {
+		if (bdi.cluster_size != 0)
+			printf("cluster_size: %d\n", bdi.cluster_size);
+	}
+	bdrv_get_backing_filename(bs, backing_filename,
+				  sizeof(backing_filename));
+	if (backing_filename[0] != '\0') {
+		path_combine(backing_filename2,
sizeof(backing_filename2),
+			     filename, backing_filename);
+		printf("backing file: %s (actual path: %s)\n",
+		       backing_filename, backing_filename2);
+	}
+	dump_snapshots(bs);
+	qemu_img_path = strdup(filename);
+	qemu_dsize = total_sectors * 512;
+	return 0;
+}
+
+
+#define DOS_PARTTBL_OFFS        0x1be
+#define DOS_PARTTBL_SZ          66
+#define DOS_PARTTBL_ENT_SZ      16
+#define DOS_PARTTBL_MAGIC_LO    0x55
+#define DOS_PARTTBL_MAGIC_HI    0xaa
+#define DOS_PARTTBL_MAX_PART    4
+
+typedef struct PTableEntry {
+	unsigned char bootdisk;
+	unsigned char start_cylinder;
+	unsigned char start_head;
+	unsigned char start_sector;
+	unsigned char part_type;
+	unsigned char end_cylinder;
+	unsigned char end_head;
+	unsigned char end_sector;
+	unsigned char start_lba[4];
+	unsigned char sectors_count[4];
+} PTableEntry;			/*  use char only so
__attribute__((packed)) not required */
+
+static unsigned lt32_to_u32(char *lt32)
+{
+	uint32_t u = 0;
+	int i;
+	for (i = 0; i < 4; i++)
+		u |= ((uint32_t) lt32[i] & 0xff) << (i * 8);
+	return u;
+}
+
+static int fill_pent(QDiskPart * qtbl, char *buf, int n)
+{
+	return snprintf(buf, n,
+			"%s:\tstart %10u \tsectors %10llu \ttype %x
%s\n",
+			qtbl->path, qtbl->start,
+			(long long unsigned) qtbl->count, (int)
qtbl->type,
+			qtbl->boot ? "*" : "");
+}
+
+/* FIXME: parses only primary partition table */
+static char *build_partition_table(unsigned char *tbl, QDiskPart *
qtbl)
+{
+	PTableEntry *pent;
+	char buf[64] = "";
+	int i = 0, n = 0;
+
+	/* Init first entry as the whole disk */
+	qtbl[0].start = 0;
+	qtbl[0].count = qemu_dsize / 512;
+	qtbl[0].type = 0;
+	qtbl[0].boot = 0;
+	qtbl[0].path = "/img";
+	n += fill_pent(qtbl, qemu_parttbl_str + n,
+		       sizeof(qemu_parttbl_str) - n - 1);
+	if (n >= sizeof(qemu_parttbl_str) - 1)
+		return "parttbl string too large";
+	qemu_parttbl_str_len = n;
+
+	/*
+	 * disk part tbl should be the last 66 bytes of the first
sector,
+	 * last two bytes are magic.
+	 */
+	if (tbl[DOS_PARTTBL_SZ - 2] != DOS_PARTTBL_MAGIC_LO ||
+	    tbl[DOS_PARTTBL_SZ - 1] != DOS_PARTTBL_MAGIC_HI)
+		return NULL;	/* no valid partition table, only /img
file will be created */
+
+	for (i = 1; i < DOS_PARTTBL_MAX_PART;
+	     i++, tbl += DOS_PARTTBL_ENT_SZ) {
+		if (i >= QDISK_MAX_PART)
+			return "Internal: too much partitions";
+		pent = (void *) tbl;
+		qtbl[i].start = lt32_to_u32(pent->start_lba);
+		if (qtbl[i].start == 0)
+			continue;
+		qtbl[i].count = lt32_to_u32(pent->sectors_count);
+		qtbl[i].type = pent->part_type;
+		qtbl[i].boot = pent->bootdisk & 0x80;
+		snprintf(buf, sizeof(buf) - 1, "/img%d", i);
+		qtbl[i].path = strdup(buf);
+		n += fill_pent(qtbl + i, qemu_parttbl_str + n,
+			       sizeof(qemu_parttbl_str) - n - 1);
+		if (n >= sizeof(qemu_parttbl_str) - 1)
+			return "parttbl string too large";
+	}
+	qemu_parttbl_str_len = n;
+	return NULL;
+}
+
+static int has_path(const char *path)
+{
+	int i;
+
+	if (!strcmp(path, qemu_parttbl_path))
+		return 1;
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (!strcmp(path, qparttbl[i].path))
+			return 1;
+	return 0;
+}
+
+static QDiskPart *find_part(const char *path)
+{
+	int i;
+
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (!strcmp(path, qparttbl[i].path))
+			return qparttbl + i;
+	return NULL;
+}
+
+static int qemu_parttbl_read(char *buf, int sz, long offset)
+{
+	if (offset >= qemu_parttbl_str_len)
+		return 0;
+
+	if (sz > qemu_parttbl_str_len - offset)
+		sz = qemu_parttbl_str_len - offset;
+
+	memcpy(buf, qemu_parttbl_str + offset, sz);
+
+	return sz;
+}
+
+static int qemu_getattr(const char *path, struct stat *stbuf)
+{
+	int i;
+
+	memset(stbuf, 0, sizeof(struct stat));
+	if (strcmp(path, "/") == 0) {
+		stbuf->st_mode = S_IFDIR | 0755;
+		stbuf->st_nlink = 2;
+		return 0;
+	}
+
+	if (strcmp(path, qemu_parttbl_path) == 0) {
+		stbuf->st_mode = S_IFREG | 0444;
+		stbuf->st_nlink = 1;
+		stbuf->st_size = qemu_parttbl_str_len;
+		return 0;
+	}
+
+	for (i = 0; i < QDISK_MAX_PART; i++) {
+		if (!qparttbl[i].path
+		    || (strcmp(path, qparttbl[i].path) != 0))
+			continue;
+		stbuf->st_mode = S_IFREG | 0666;
+		stbuf->st_nlink = 1;
+		stbuf->st_size = (uint64_t) qparttbl[i].count * 512;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int qemu_readdir(const char *path, void *buf,
+			fuse_fill_dir_t filler, off_t offset,
+			struct fuse_file_info *fi)
+{
+	int i;
+
+	(void) offset;
+	(void) fi;
+
+	if (strcmp(path, "/") != 0)
+		return -ENOENT;
+
+	filler(buf, ".", NULL, 0);
+	filler(buf, "..", NULL, 0);
+	filler(buf, qemu_parttbl_path + 1, NULL, 0);
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (qparttbl[i].path)
+			filler(buf, qparttbl[i].path + 1, NULL, 0);
+	return 0;
+}
+
+static int qemu_open(const char *path, struct fuse_file_info *fi)
+{
+	if (!has_path(path))
+		return -ENOENT;
+
+	return 0;
+}
+
+static int qemu_read(const char *path, char *buf, size_t size,
+		     off_t offset, struct fuse_file_info *fi)
+{
+	QDiskPart *qpart;
+
+	if (strcmp(path, qemu_parttbl_path) == 0)
+		return qemu_parttbl_read(buf, size, offset);
+	if (!(qpart = find_part(path)))
+		return -ENOENT;
+	return bdrv_pread(qemu_bs, offset + qpart->start * 512, buf,
size);
+}
+
+static int qemu_write(const char *path, const char *buf, size_t size,
+		      off_t offset, struct fuse_file_info *fi)
+{
+	QDiskPart *qpart;
+
+	fprintf(stderr, "W: path %s\n", path);
+	if (strcmp(path, qemu_parttbl_path) == 0)
+		return -EPERM;
+	if (!(qpart = find_part(path)))
+		return -ENOENT;
+	return bdrv_pwrite(qemu_bs, offset + qpart->start * 512, buf,
+			   size);
+}
+
+static int qemu_flush(const char *path, struct fuse_file_info *fi)
+{
+	bdrv_flush(qemu_bs);
+	return 0;
+}
+
+/** Rename a file */
+//static int qemu_rename (const char *, const char *);
+
+/** Create a hard link to a file */
+//int qemu_link (const char *, const char *){}
+
+
+/** Change the permission bits of a file */
+static int qemu_chmod(const char *s, mode_t m)
+{
+	return 0;
+}
+
+/** Change the owner and group of a file */
+static int qemu_chown(const char *s, uid_t u, gid_t g)
+{
+	return 0;
+}
+
+    /** Change the size of a file */
+static int qemu_truncate(const char *s, off_t t)
+{
+	return 0;
+}
+
+/** Change the access and/or modification times of a file
+ *
+ * Deprecated, use utimens() instead.
+ */
+static int qemu_utime(const char *s, struct utimbuf *u)
+{
+	return 0;
+}
+
+static struct fuse_operations qemu_oper = {
+	.getattr = qemu_getattr,
+	.readdir = qemu_readdir,
+	.open = qemu_open,
+	.read = qemu_read,
+	.write = qemu_write,
+	.flush = qemu_flush,
+	//    .rename = qemu_rename,
+	.chmod = qemu_chmod,
+	.chown = qemu_chown,
+	.truncate = qemu_truncate,
+	.utime = qemu_utime,
+};
+
+char *init_fs(void)
+{
+	unsigned char buf[DOS_PARTTBL_SZ];
+
+	if (bdrv_pread(qemu_bs, DOS_PARTTBL_OFFS, buf, DOS_PARTTBL_SZ)
!=
+	    DOS_PARTTBL_SZ)
+		error("can't read partion table, bad read size");
+
+	return build_partition_table(buf, qparttbl);
+}
+
+int main(int argc, char *argv[])
+{
+	char *err, *filename, *fmt = NULL, **newargs;
+	int forground = 0, debug = 0;
+	int c, r;
+
+	prog = strrchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+
+	for (;;) {
+		/*
+		   The + in the start of opsting is to force POSIX
parsing -
+		   i.e. stop parsing at first non option. This is
required to
+		   handle fuse options correctly. Without the +, the
getopt will
+		   permute the options to force them at start...
+		 */
+		c = getopt(argc, argv, "+f:hFd");
+		if (c == -1)
+			break;
+		switch (c) {
+		case 'h':
+			help();
+			break;
+		case 'f':
+			fmt = optarg;
+			break;
+		case 'F':
+			forground = 1;
+			break;
+		case 'd':
+			debug = 1;
+			forground = 1;
+			break;
+		}
+	}
+	if (optind >= argc)
+		help();
+	filename = argv[optind++];
+
+	
+	if (!forground) {
+		if ((r = fork()) < 0)
+			error("can't fork");
+		if (r > 0)
+			exit(0);
+		/* chield */
+		if (daemon(0, 0) < 0) {
+			error("qemu-fuse: failed to daemonize
program\n");
+			return -1;
+		}
+	}
+	
+	bdrv_init();
+
+	open_img(filename, fmt);
+
+	if ((err = init_fs()))
+		error("init: %s", err);
+
+	// remove first arg and pass the rest to fuse_main
+	if (argc < optind)
+		error("Missing mount point");
+
+	argc -= optind - 1;
+
+	if (!(newargs = calloc(argc + 3, sizeof(char *))))
+		error("out of mem");
+	memcpy(newargs, argv + optind - 1, argc * sizeof(char *));
+	newargs[0] = argv[0];
+	newargs[argc] = "-s";	/* force single thread mode - qemu code
is not thread safe */
+	if (debug)
+		newargs[argc + 1] = "-d";	/* force debug and
forground mode */
+	else
+		newargs[argc + 1] = "-f";	/* force forground mode,
overcome signal masking problems */
+	newargs[argc + 2] = 0;
+
+	return fuse_main(argc + 2, newargs, &qemu_oper, NULL);
+}
Index: Makefile
===================================================================
--- Makefile	(revision 5089)
+++ Makefile	(working copy)
@@ -188,6 +188,14 @@
 qemu-img-%.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -c -o $@ $<
 
+ifdef CONFIG_FUSE
+qemu-fuse$(EXESUF): qemu-fuse.o qemu-img-block.o $(QEMU_IMG_BLOCK_OBJS)
+	$(CC) $(LDFLAGS)  -o $@ $^ $(FUSELIBS) -lz $(LIBS)
+
+qemu-fuse.o: qemu-fuse.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS)  -DQEMU_IMG
$(FUSEFLAGS) -g -c -o $@ $<
+endif
+
 %.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
 
@@ -342,6 +350,7 @@
         $(bindir)/qemu-cris \
         $(bindir)/qemu-img \
         $(bindir)/qemu-nbd \
+        $(bindir)/qemu-fuse \
 	$(datadir)/bios.bin \
 	$(datadir)/vgabios.bin \
 	$(datadir)/vgabios-cirrus.bin \
Index: configure
===================================================================
--- configure	(revision 5089)
+++ configure	(working copy)
@@ -110,6 +110,7 @@
 aio="yes"
 nptl="yes"
 mixemu="no"
+fuse="no"
 
 # OS specific
 targetos=`uname -s`
@@ -340,6 +341,8 @@
   ;;
   --disable-aio) aio="no"
   ;;
+  --enable-fuse) fuse="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -436,6 +439,7 @@
 echo "  --sparc_cpu=V            Build qemu for Sparc architecture v7,
v8, v8plus, v8plusa, v9"
 echo "  --disable-vde            disable support for vde network"
 echo "  --disable-aio            disable AIO support"
+echo "  --enable-fuse            enable fuse support"
 echo ""
 echo "NOTE: The object files are built at the place where configure is
launched"
 exit 1
@@ -479,7 +483,11 @@
 
 if [ "$bsd" = "yes" -o "$darwin" = "yes" -o "$mingw32" = "yes" ] ; then
     AIOLIBS=
+    FUSELIBS=
+    FUSEFLAGS=
 else
+    FUSELIBS="-lfuse"
+    FUSEFLAGS=-D_FILE_OFFSET_BITS=64
     # Some Linux architectures (e.g. s390) don't imply -lpthread
automatically.
     AIOLIBS="-lrt -lpthread"
 fi
@@ -889,6 +897,22 @@
   fi
 fi
 
+##########################################
+# Fuse probe
+if test "$fuse" = "yes" ; then
+  fuse=no
+  cat > $TMPC << EOF
+#define FUSE_USE_VERSION 26
+#include <fuse/fuse_lowlevel.h>
+#include <fuse.h>
+static struct fuse_operations qemu_oper = {};
+int main(int argc, char **argv) { return fuse_main(argc, argv,
&qemu_oper, NULL);}
+EOF
+  if $cc $ARCH_CFLAGS $FUSEFLAGS -o $TMPE $FUSELIBS $TMPC 2> /dev/null
; then
+    fuse=yes
+  fi
+fi
+
 # Check if tools are available to build documentation.
 if [ -x "`which texi2html 2>/dev/null`" ] && \
    [ -x "`which pod2man 2>/dev/null`" ]; then
@@ -961,6 +985,7 @@
 echo "NPTL support      $nptl"
 echo "vde support       $vde"
 echo "AIO support       $aio"
+echo "FUSE support      $fuse"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL
support"
@@ -1007,6 +1032,9 @@
 echo "LDFLAGS=$LDFLAGS" >> $config_mak
 echo "EXESUF=$EXESUF" >> $config_mak
 echo "AIOLIBS=$AIOLIBS" >> $config_mak
+echo "FUSELIBS=$FUSELIBS" >> $config_mak
+echo "FUSEFLAGS=$FUSEFLAGS" >> $config_mak
+
 case "$cpu" in
   i386)
     echo "ARCH=i386" >> $config_mak
@@ -1216,6 +1244,10 @@
 if test "$aio" = "yes" ; then
   echo "#define CONFIG_AIO 1" >> $config_h
 fi
+if test "$fuse" = "yes" ; then
+  echo "#define CONFIG_FUSE 1" >> $config_h
+  echo "CONFIG_FUSE=yes" >> $config_mak
+fi
 
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
@@ -1232,6 +1264,9 @@
   if [ "$linux" = "yes" ] ; then
       tools="qemu-nbd\$(EXESUF) $tools"
   fi
+  if [ "$fuse" = "yes" ];then
+      tools="$tools qemu-fuse\$(EXESUF)"
+  fi
 fi
 echo "TOOLS=$tools" >> $config_mak
  

[-- Attachment #2: qemu-fuse-5089.patch --]
[-- Type: application/octet-stream, Size: 19757 bytes --]

Signed-off-by: Shahar Frank <shaharf@qumranet.com>

Index: qemu-fuse.c
===================================================================
--- qemu-fuse.c	(revision 0)
+++ qemu-fuse.c	(revision 0)
@@ -0,0 +1,643 @@
+/*
+ * QEMU disk image fuse server
+ *
+ * Copyright (c) 2008 Shahar Frank
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block_int.h"
+#include <assert.h>
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse/fuse_lowlevel.h>
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#define QDISK_MAX_PART          5
+typedef struct QDiskPart {
+	uint32_t start;
+	uint64_t count;
+	unsigned char type;
+	int boot;
+	char const *path;
+} QDiskPart;
+
+QDiskPart qparttbl[QDISK_MAX_PART];
+
+static const char *qemu_parttbl_path = "/parttbl";
+static char qemu_parttbl_str[8192];
+static int qemu_parttbl_str_len;
+
+static BlockDriverState *qemu_bs;
+static BlockDriver *qemu_bdrv;
+static char *qemu_img_path;
+static char *prog;
+static long qemu_dsize;
+static int open_flags = 0;
+
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+#if defined(_POSIX_C_SOURCE)
+	int ret;
+	void *ptr;
+	ret = posix_memalign(&ptr, alignment, size);
+	if (ret != 0)
+		return NULL;
+	return ptr;
+#elif defined(_BSD)
+	return valloc(size);
+#else
+	return memalign(alignment, size);
+#endif
+}
+
+static void __attribute__ ((noreturn)) error(const char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	fprintf(stderr, "qemu-fuse: ");
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	exit(1);
+	va_end(ap);
+}
+
+static void format_print(void *opaque, const char *name)
+{
+	printf(" %s", name);
+}
+
+void usage(void)
+{
+	printf("%s version " QEMU_VERSION
+	       ", Copyright (c) 2007 Qumranet, Shahar Frank\n"
+	       "usage: qemu-fuse [options] <image_path> <mount_point> [fuse options]\n"
+	       "QEMU disk image file system utility\n" "\n" "Options:\n"
+	       "  -d            # debug mode (force also forground)\n"
+	       "  -f fmt        # force image format\n"
+	       "  -F            # keep program in forground\n"
+	       "  -h            # help (this text)\n" "\n" "Examples:\n"
+	       "\tmkdir -p /tmp/qemu /tmp/ext3 /tmp/ntfs\n"
+	       "\t%s /images/disk.vmdk /tmp/qemu\n"
+	       "\tmount -o loop /tmp/qemu/img2 /tmp/ext3\n"
+	       "\tmount -o loop -t ntfs-3g -o force /tmp/qemu/img1 /tmp/ntfs\n",
+	       prog, prog);
+	printf("\nSupported formats:");
+	bdrv_iterate_format(format_print, NULL);
+	printf("\n");
+	exit(1);
+}
+
+void help(void)
+{
+	usage();
+}
+
+#include <termios.h>
+
+static struct termios oldtty;
+
+static void term_exit(void)
+{
+	tcsetattr(0, TCSANOW, &oldtty);
+}
+
+static void term_init(void)
+{
+	struct termios tty;
+
+	tcgetattr(0, &tty);
+	oldtty = tty;
+
+	tty.c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
+			 | INLCR | IGNCR | ICRNL | IXON);
+	tty.c_oflag |= OPOST;
+	tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+	tty.c_cflag &= ~(CSIZE | PARENB);
+	tty.c_cflag |= CS8;
+	tty.c_cc[VMIN] = 1;
+	tty.c_cc[VTIME] = 0;
+
+	tcsetattr(0, TCSANOW, &tty);
+
+	atexit(term_exit);
+}
+
+static int read_password(char *buf, int buf_size)
+{
+	uint8_t ch;
+	int i, ret;
+
+	printf("password: ");
+	fflush(stdout);
+	term_init();
+	i = 0;
+	for (;;) {
+		ret = read(0, &ch, 1);
+		if (ret == -1) {
+			if (errno == EAGAIN || errno == EINTR) {
+				continue;
+			} else {
+				ret = -1;
+				break;
+			}
+		} else if (ret == 0) {
+			ret = -1;
+			break;
+		} else {
+			if (ch == '\r') {
+				ret = 0;
+				break;
+			}
+			if (i < (buf_size - 1))
+				buf[i++] = ch;
+		}
+	}
+	term_exit();
+	buf[i] = '\0';
+	printf("\n");
+	return ret;
+}
+
+static BlockDriverState *bdrv_new_open(const char *filename,
+				       const char *fmt)
+{
+	BlockDriverState *bs;
+	BlockDriver *drv;
+	char password[256];
+
+	bs = bdrv_new("");
+	if (!bs)
+		error("Not enough memory");
+	if (fmt) {
+		drv = bdrv_find_format(fmt);
+		if (!drv)
+			error("Unknown file format '%s'", fmt);
+	} else {
+		drv = NULL;
+	}
+	if (bdrv_open2(bs, filename, open_flags, drv) < 0) {
+		error("Could not open '%s'", filename);
+	}
+	if (bdrv_is_encrypted(bs)) {
+		printf("Disk image '%s' is encrypted.\n", filename);
+		if (read_password(password, sizeof(password)) < 0)
+			error("No password given");
+		if (bdrv_set_key(bs, password) < 0)
+			error("invalid password");
+	}
+	qemu_bs = bs;
+	qemu_bdrv = drv;
+	return bs;
+}
+
+static int64_t get_allocated_file_size(const char *filename)
+{
+	struct stat st;
+	if (stat(filename, &st) < 0)
+		return -1;
+	return (int64_t) st.st_blocks * 512;
+}
+
+static void dump_snapshots(BlockDriverState * bs)
+{
+	QEMUSnapshotInfo *sn_tab, *sn;
+	int nb_sns, i;
+	char buf[256];
+
+	nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+	if (nb_sns <= 0)
+		return;
+	printf("Snapshot list:\n");
+	printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL));
+	for (i = 0; i < nb_sns; i++) {
+		sn = &sn_tab[i];
+		printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), sn));
+	}
+	qemu_free(sn_tab);
+}
+
+static int open_img(const char *filename, const char *fmt)
+{
+	BlockDriverState *bs;
+	char fmt_name[128], size_buf[128], dsize_buf[128];
+	uint64_t total_sectors;
+	int64_t allocated_size;
+	char backing_filename[1024];
+	char backing_filename2[1024];
+	BlockDriverInfo bdi;
+
+	if (!(bs = bdrv_new_open(filename, fmt))) {
+		error("Could not open '%s'", filename);
+	}
+	bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
+	bdrv_get_geometry(bs, &total_sectors);
+	get_human_readable_size(size_buf, sizeof(size_buf),
+				total_sectors * 512);
+	allocated_size = get_allocated_file_size(filename);
+	if (allocated_size < 0)
+		sprintf(dsize_buf, "unavailable");
+	else
+		get_human_readable_size(dsize_buf, sizeof(dsize_buf),
+					allocated_size);
+	printf("image: %s\n"
+	       "file format: %s\n"
+	       "virtual size: %s (%" PRId64 " bytes)\n"
+	       "disk size: %s\n",
+	       filename, fmt_name, size_buf,
+	       (total_sectors * 512), dsize_buf);
+	if (bdrv_is_encrypted(bs))
+		printf("encrypted: yes\n");
+	if (bdrv_get_info(bs, &bdi) >= 0) {
+		if (bdi.cluster_size != 0)
+			printf("cluster_size: %d\n", bdi.cluster_size);
+	}
+	bdrv_get_backing_filename(bs, backing_filename,
+				  sizeof(backing_filename));
+	if (backing_filename[0] != '\0') {
+		path_combine(backing_filename2, sizeof(backing_filename2),
+			     filename, backing_filename);
+		printf("backing file: %s (actual path: %s)\n",
+		       backing_filename, backing_filename2);
+	}
+	dump_snapshots(bs);
+	qemu_img_path = strdup(filename);
+	qemu_dsize = total_sectors * 512;
+	return 0;
+}
+
+
+#define DOS_PARTTBL_OFFS        0x1be
+#define DOS_PARTTBL_SZ          66
+#define DOS_PARTTBL_ENT_SZ      16
+#define DOS_PARTTBL_MAGIC_LO    0x55
+#define DOS_PARTTBL_MAGIC_HI    0xaa
+#define DOS_PARTTBL_MAX_PART    4
+
+typedef struct PTableEntry {
+	unsigned char bootdisk;
+	unsigned char start_cylinder;
+	unsigned char start_head;
+	unsigned char start_sector;
+	unsigned char part_type;
+	unsigned char end_cylinder;
+	unsigned char end_head;
+	unsigned char end_sector;
+	unsigned char start_lba[4];
+	unsigned char sectors_count[4];
+} PTableEntry;			/*  use char only so __attribute__((packed)) not required */
+
+static unsigned lt32_to_u32(char *lt32)
+{
+	uint32_t u = 0;
+	int i;
+	for (i = 0; i < 4; i++)
+		u |= ((uint32_t) lt32[i] & 0xff) << (i * 8);
+	return u;
+}
+
+static int fill_pent(QDiskPart * qtbl, char *buf, int n)
+{
+	return snprintf(buf, n,
+			"%s:\tstart %10u \tsectors %10llu \ttype %x %s\n",
+			qtbl->path, qtbl->start,
+			(long long unsigned) qtbl->count, (int) qtbl->type,
+			qtbl->boot ? "*" : "");
+}
+
+/* FIXME: parses only primary partition table */
+static char *build_partition_table(unsigned char *tbl, QDiskPart * qtbl)
+{
+	PTableEntry *pent;
+	char buf[64] = "";
+	int i = 0, n = 0;
+
+	/* Init first entry as the whole disk */
+	qtbl[0].start = 0;
+	qtbl[0].count = qemu_dsize / 512;
+	qtbl[0].type = 0;
+	qtbl[0].boot = 0;
+	qtbl[0].path = "/img";
+	n += fill_pent(qtbl, qemu_parttbl_str + n,
+		       sizeof(qemu_parttbl_str) - n - 1);
+	if (n >= sizeof(qemu_parttbl_str) - 1)
+		return "parttbl string too large";
+	qemu_parttbl_str_len = n;
+
+	/*
+	 * disk part tbl should be the last 66 bytes of the first sector,
+	 * last two bytes are magic.
+	 */
+	if (tbl[DOS_PARTTBL_SZ - 2] != DOS_PARTTBL_MAGIC_LO ||
+	    tbl[DOS_PARTTBL_SZ - 1] != DOS_PARTTBL_MAGIC_HI)
+		return NULL;	/* no valid partition table, only /img file will be created */
+
+	for (i = 1; i < DOS_PARTTBL_MAX_PART;
+	     i++, tbl += DOS_PARTTBL_ENT_SZ) {
+		if (i >= QDISK_MAX_PART)
+			return "Internal: too much partitions";
+		pent = (void *) tbl;
+		qtbl[i].start = lt32_to_u32(pent->start_lba);
+		if (qtbl[i].start == 0)
+			continue;
+		qtbl[i].count = lt32_to_u32(pent->sectors_count);
+		qtbl[i].type = pent->part_type;
+		qtbl[i].boot = pent->bootdisk & 0x80;
+		snprintf(buf, sizeof(buf) - 1, "/img%d", i);
+		qtbl[i].path = strdup(buf);
+		n += fill_pent(qtbl + i, qemu_parttbl_str + n,
+			       sizeof(qemu_parttbl_str) - n - 1);
+		if (n >= sizeof(qemu_parttbl_str) - 1)
+			return "parttbl string too large";
+	}
+	qemu_parttbl_str_len = n;
+	return NULL;
+}
+
+static int has_path(const char *path)
+{
+	int i;
+
+	if (!strcmp(path, qemu_parttbl_path))
+		return 1;
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (!strcmp(path, qparttbl[i].path))
+			return 1;
+	return 0;
+}
+
+static QDiskPart *find_part(const char *path)
+{
+	int i;
+
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (!strcmp(path, qparttbl[i].path))
+			return qparttbl + i;
+	return NULL;
+}
+
+static int qemu_parttbl_read(char *buf, int sz, long offset)
+{
+	if (offset >= qemu_parttbl_str_len)
+		return 0;
+
+	if (sz > qemu_parttbl_str_len - offset)
+		sz = qemu_parttbl_str_len - offset;
+
+	memcpy(buf, qemu_parttbl_str + offset, sz);
+
+	return sz;
+}
+
+static int qemu_getattr(const char *path, struct stat *stbuf)
+{
+	int i;
+
+	memset(stbuf, 0, sizeof(struct stat));
+	if (strcmp(path, "/") == 0) {
+		stbuf->st_mode = S_IFDIR | 0755;
+		stbuf->st_nlink = 2;
+		return 0;
+	}
+
+	if (strcmp(path, qemu_parttbl_path) == 0) {
+		stbuf->st_mode = S_IFREG | 0444;
+		stbuf->st_nlink = 1;
+		stbuf->st_size = qemu_parttbl_str_len;
+		return 0;
+	}
+
+	for (i = 0; i < QDISK_MAX_PART; i++) {
+		if (!qparttbl[i].path
+		    || (strcmp(path, qparttbl[i].path) != 0))
+			continue;
+		stbuf->st_mode = S_IFREG | 0666;
+		stbuf->st_nlink = 1;
+		stbuf->st_size = (uint64_t) qparttbl[i].count * 512;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int qemu_readdir(const char *path, void *buf,
+			fuse_fill_dir_t filler, off_t offset,
+			struct fuse_file_info *fi)
+{
+	int i;
+
+	(void) offset;
+	(void) fi;
+
+	if (strcmp(path, "/") != 0)
+		return -ENOENT;
+
+	filler(buf, ".", NULL, 0);
+	filler(buf, "..", NULL, 0);
+	filler(buf, qemu_parttbl_path + 1, NULL, 0);
+	for (i = 0; i < QDISK_MAX_PART; i++)
+		if (qparttbl[i].path)
+			filler(buf, qparttbl[i].path + 1, NULL, 0);
+	return 0;
+}
+
+static int qemu_open(const char *path, struct fuse_file_info *fi)
+{
+	if (!has_path(path))
+		return -ENOENT;
+
+	return 0;
+}
+
+static int qemu_read(const char *path, char *buf, size_t size,
+		     off_t offset, struct fuse_file_info *fi)
+{
+	QDiskPart *qpart;
+
+	if (strcmp(path, qemu_parttbl_path) == 0)
+		return qemu_parttbl_read(buf, size, offset);
+	if (!(qpart = find_part(path)))
+		return -ENOENT;
+	return bdrv_pread(qemu_bs, offset + qpart->start * 512, buf, size);
+}
+
+static int qemu_write(const char *path, const char *buf, size_t size,
+		      off_t offset, struct fuse_file_info *fi)
+{
+	QDiskPart *qpart;
+
+	fprintf(stderr, "W: path %s\n", path);
+	if (strcmp(path, qemu_parttbl_path) == 0)
+		return -EPERM;
+	if (!(qpart = find_part(path)))
+		return -ENOENT;
+	return bdrv_pwrite(qemu_bs, offset + qpart->start * 512, buf,
+			   size);
+}
+
+static int qemu_flush(const char *path, struct fuse_file_info *fi)
+{
+	bdrv_flush(qemu_bs);
+	return 0;
+}
+
+/** Rename a file */
+//static int qemu_rename (const char *, const char *);
+
+/** Create a hard link to a file */
+//int qemu_link (const char *, const char *){}
+
+
+/** Change the permission bits of a file */
+static int qemu_chmod(const char *s, mode_t m)
+{
+	return 0;
+}
+
+/** Change the owner and group of a file */
+static int qemu_chown(const char *s, uid_t u, gid_t g)
+{
+	return 0;
+}
+
+    /** Change the size of a file */
+static int qemu_truncate(const char *s, off_t t)
+{
+	return 0;
+}
+
+/** Change the access and/or modification times of a file
+ *
+ * Deprecated, use utimens() instead.
+ */
+static int qemu_utime(const char *s, struct utimbuf *u)
+{
+	return 0;
+}
+
+static struct fuse_operations qemu_oper = {
+	.getattr = qemu_getattr,
+	.readdir = qemu_readdir,
+	.open = qemu_open,
+	.read = qemu_read,
+	.write = qemu_write,
+	.flush = qemu_flush,
+	//    .rename = qemu_rename,
+	.chmod = qemu_chmod,
+	.chown = qemu_chown,
+	.truncate = qemu_truncate,
+	.utime = qemu_utime,
+};
+
+char *init_fs(void)
+{
+	unsigned char buf[DOS_PARTTBL_SZ];
+
+	if (bdrv_pread(qemu_bs, DOS_PARTTBL_OFFS, buf, DOS_PARTTBL_SZ) !=
+	    DOS_PARTTBL_SZ)
+		error("can't read partion table, bad read size");
+
+	return build_partition_table(buf, qparttbl);
+}
+
+int main(int argc, char *argv[])
+{
+	char *err, *filename, *fmt = NULL, **newargs;
+	int forground = 0, debug = 0;
+	int c, r;
+
+	prog = strrchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+
+	for (;;) {
+		/*
+		   The + in the start of opsting is to force POSIX parsing -
+		   i.e. stop parsing at first non option. This is required to
+		   handle fuse options correctly. Without the +, the getopt will
+		   permute the options to force them at start...
+		 */
+		c = getopt(argc, argv, "+f:hFd");
+		if (c == -1)
+			break;
+		switch (c) {
+		case 'h':
+			help();
+			break;
+		case 'f':
+			fmt = optarg;
+			break;
+		case 'F':
+			forground = 1;
+			break;
+		case 'd':
+			debug = 1;
+			forground = 1;
+			break;
+		}
+	}
+	if (optind >= argc)
+		help();
+	filename = argv[optind++];
+
+	
+	if (!forground) {
+		if ((r = fork()) < 0)
+			error("can't fork");
+		if (r > 0)
+			exit(0);
+		/* chield */
+		if (daemon(0, 0) < 0) {
+			error("qemu-fuse: failed to daemonize program\n");
+			return -1;
+		}
+	}
+	
+	bdrv_init();
+
+	open_img(filename, fmt);
+
+	if ((err = init_fs()))
+		error("init: %s", err);
+
+	// remove first arg and pass the rest to fuse_main
+	if (argc < optind)
+		error("Missing mount point");
+
+	argc -= optind - 1;
+
+	if (!(newargs = calloc(argc + 3, sizeof(char *))))
+		error("out of mem");
+	memcpy(newargs, argv + optind - 1, argc * sizeof(char *));
+	newargs[0] = argv[0];
+	newargs[argc] = "-s";	/* force single thread mode - qemu code is not thread safe */
+	if (debug)
+		newargs[argc + 1] = "-d";	/* force debug and forground mode */
+	else
+		newargs[argc + 1] = "-f";	/* force forground mode, overcome signal masking problems */
+	newargs[argc + 2] = 0;
+
+	return fuse_main(argc + 2, newargs, &qemu_oper, NULL);
+}
Index: Makefile
===================================================================
--- Makefile	(revision 5089)
+++ Makefile	(working copy)
@@ -188,6 +188,14 @@
 qemu-img-%.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -c -o $@ $<
 
+ifdef CONFIG_FUSE
+qemu-fuse$(EXESUF): qemu-fuse.o qemu-img-block.o $(QEMU_IMG_BLOCK_OBJS)
+	$(CC) $(LDFLAGS)  -o $@ $^ $(FUSELIBS) -lz $(LIBS)
+
+qemu-fuse.o: qemu-fuse.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS)  -DQEMU_IMG $(FUSEFLAGS) -g -c -o $@ $<
+endif
+
 %.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
 
@@ -342,6 +350,7 @@
         $(bindir)/qemu-cris \
         $(bindir)/qemu-img \
         $(bindir)/qemu-nbd \
+        $(bindir)/qemu-fuse \
 	$(datadir)/bios.bin \
 	$(datadir)/vgabios.bin \
 	$(datadir)/vgabios-cirrus.bin \
Index: configure
===================================================================
--- configure	(revision 5089)
+++ configure	(working copy)
@@ -110,6 +110,7 @@
 aio="yes"
 nptl="yes"
 mixemu="no"
+fuse="no"
 
 # OS specific
 targetos=`uname -s`
@@ -340,6 +341,8 @@
   ;;
   --disable-aio) aio="no"
   ;;
+  --enable-fuse) fuse="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
@@ -436,6 +439,7 @@
 echo "  --sparc_cpu=V            Build qemu for Sparc architecture v7, v8, v8plus, v8plusa, v9"
 echo "  --disable-vde            disable support for vde network"
 echo "  --disable-aio            disable AIO support"
+echo "  --enable-fuse            enable fuse support"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -479,7 +483,11 @@
 
 if [ "$bsd" = "yes" -o "$darwin" = "yes" -o "$mingw32" = "yes" ] ; then
     AIOLIBS=
+    FUSELIBS=
+    FUSEFLAGS=
 else
+    FUSELIBS="-lfuse"
+    FUSEFLAGS=-D_FILE_OFFSET_BITS=64
     # Some Linux architectures (e.g. s390) don't imply -lpthread automatically.
     AIOLIBS="-lrt -lpthread"
 fi
@@ -889,6 +897,22 @@
   fi
 fi
 
+##########################################
+# Fuse probe
+if test "$fuse" = "yes" ; then
+  fuse=no
+  cat > $TMPC << EOF
+#define FUSE_USE_VERSION 26
+#include <fuse/fuse_lowlevel.h>
+#include <fuse.h>
+static struct fuse_operations qemu_oper = {};
+int main(int argc, char **argv) { return fuse_main(argc, argv, &qemu_oper, NULL);}
+EOF
+  if $cc $ARCH_CFLAGS $FUSEFLAGS -o $TMPE $FUSELIBS $TMPC 2> /dev/null ; then
+    fuse=yes
+  fi
+fi
+
 # Check if tools are available to build documentation.
 if [ -x "`which texi2html 2>/dev/null`" ] && \
    [ -x "`which pod2man 2>/dev/null`" ]; then
@@ -961,6 +985,7 @@
 echo "NPTL support      $nptl"
 echo "vde support       $vde"
 echo "AIO support       $aio"
+echo "FUSE support      $fuse"
 
 if test $sdl_too_old = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -1007,6 +1032,9 @@
 echo "LDFLAGS=$LDFLAGS" >> $config_mak
 echo "EXESUF=$EXESUF" >> $config_mak
 echo "AIOLIBS=$AIOLIBS" >> $config_mak
+echo "FUSELIBS=$FUSELIBS" >> $config_mak
+echo "FUSEFLAGS=$FUSEFLAGS" >> $config_mak
+
 case "$cpu" in
   i386)
     echo "ARCH=i386" >> $config_mak
@@ -1216,6 +1244,10 @@
 if test "$aio" = "yes" ; then
   echo "#define CONFIG_AIO 1" >> $config_h
 fi
+if test "$fuse" = "yes" ; then
+  echo "#define CONFIG_FUSE 1" >> $config_h
+  echo "CONFIG_FUSE=yes" >> $config_mak
+fi
 
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
@@ -1232,6 +1264,9 @@
   if [ "$linux" = "yes" ] ; then
       tools="qemu-nbd\$(EXESUF) $tools"
   fi
+  if [ "$fuse" = "yes" ];then
+      tools="$tools qemu-fuse\$(EXESUF)"
+  fi
 fi
 echo "TOOLS=$tools" >> $config_mak
 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 15:50 [Qemu-devel][PATCH] qemu-fuse Shahar Frank
@ 2008-08-27 16:28 ` Anthony Liguori
  2008-08-27 17:02   ` Luca Bigliardi
  2008-08-27 18:00   ` [Qemu-devel][PATCH] qemu-fuse Jamie Lokier
  2008-08-28 21:05 ` [Qemu-devel] Re: [PATCH] qemu-fuse Szabolcs Szakacsits
  1 sibling, 2 replies; 13+ messages in thread
From: Anthony Liguori @ 2008-08-27 16:28 UTC (permalink / raw)
  To: qemu-devel

Shahar Frank wrote:
> Hi All,
>
> 	The attached is a small utility to mount qemu images as pseudo
> partition files. It can be very useful to access (rw) images from the
> host.
> I would not use it for production, but it seems to be stable.
>   

Heh, quite similar to:

http://hg.codemonkey.ws/qemu-img-mount/file/9f11265882b3/qemu-img-mount.diff

So this was my first approach to solving this problem that I ended up 
abandoning in favor of qemu-nbd (which is now in the tree).  The main 
limitation I found with fuse is that by default (and it takes a lot to 
change this), fuse filesystems are unreadable by root.  This means that 
you cannot easily mount -oloop something in a fuse filesystem.

What I really wanted, was the ability to say something like:

qemu-img mount -p1 foo.img /mnt

and it would Just Work.  The only two options I found that could make 
this work is:

1) disable the root readable limitation in fuse, this has to be done 
globally and I suspect is unlikely to be done by distros.

2) add support to qemu-img-mount to use filesystems libraries like 
libe2fs to read and present the filesystem contents.  This has the 
advantage of working entirely as the user who issues the command but has 
the disadvantage of requiring a lot of effort without being able to 
support all filesystems that Linux supports.

So that's why I settled on qemu-nbd.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 16:28 ` Anthony Liguori
@ 2008-08-27 17:02   ` Luca Bigliardi
  2008-08-27 18:00     ` Anthony Liguori
  2008-08-27 18:00   ` [Qemu-devel][PATCH] qemu-fuse Jamie Lokier
  1 sibling, 1 reply; 13+ messages in thread
From: Luca Bigliardi @ 2008-08-27 17:02 UTC (permalink / raw)
  To: qemu-devel

On Wed, Aug 27, 2008 at 11:28 AM, Anthony Liguori wrote:

> 1) disable the root readable limitation in fuse, this has to be done  
> globally and I suspect is unlikely to be done by distros.

I'm sorry, why you don't like to pass '-o allow_root' to fuse_main() from
your application ?


Thank you,

luca

-- 
Beware of programmers who carry screwdrivers.
                        -- Leonard Brandwein

http://shammash.homelinux.org/ - http://www.artha.org/ - http://www.yue.it/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 17:02   ` Luca Bigliardi
@ 2008-08-27 18:00     ` Anthony Liguori
  2008-08-27 18:10       ` Daniel P. Berrange
  0 siblings, 1 reply; 13+ messages in thread
From: Anthony Liguori @ 2008-08-27 18:00 UTC (permalink / raw)
  To: qemu-devel

Luca Bigliardi wrote:
> On Wed, Aug 27, 2008 at 11:28 AM, Anthony Liguori wrote:
>
>   
>> 1) disable the root readable limitation in fuse, this has to be done  
>> globally and I suspect is unlikely to be done by distros.
>>     
>
> I'm sorry, why you don't like to pass '-o allow_root' to fuse_main() from
> your application ?
>   

It also requires user_allow_root to be specified in /etc/fuse.conf.

Why don't you like qemu-nbd?  FWIW, I'm open to fuse but I don't think 
we should have both an NBD server and a fuse filesystem.  I think we 
should try and have a single mechanism to access guest images.

Regards,

Anthony Liguori

> Thank you,
>
> luca
>
>   

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 16:28 ` Anthony Liguori
  2008-08-27 17:02   ` Luca Bigliardi
@ 2008-08-27 18:00   ` Jamie Lokier
  1 sibling, 0 replies; 13+ messages in thread
From: Jamie Lokier @ 2008-08-27 18:00 UTC (permalink / raw)
  To: qemu-devel

Anthony Liguori wrote:
> 1) disable the root readable limitation in fuse, this has to be done 
> globally and I suspect is unlikely to be done by distros.

Can't you read even if you do the whole fuse setup as root (including
real user-id etc.)?

If that's a real problem with fuse in some applications, I'm surprised
there isn't at least an option, available only to root, to relax it.

Or maybe they just don't know about this scenario, and a fix would be
welcome...

> So that's why I settled on qemu-nbd.

Yeah, it's a really nice idea, thank you :-)

A FUSE-style "userspace block device" which doesn't require any root
access would be nice in principle, but qemu-nbd does the job and
doesn't require any add-ons.

-- Jamie

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 18:00     ` Anthony Liguori
@ 2008-08-27 18:10       ` Daniel P. Berrange
  2008-08-27 18:59         ` Jamie Lokier
  0 siblings, 1 reply; 13+ messages in thread
From: Daniel P. Berrange @ 2008-08-27 18:10 UTC (permalink / raw)
  To: qemu-devel

On Wed, Aug 27, 2008 at 01:00:27PM -0500, Anthony Liguori wrote:
> Luca Bigliardi wrote:
> >On Wed, Aug 27, 2008 at 11:28 AM, Anthony Liguori wrote:
> >
> >  
> >>1) disable the root readable limitation in fuse, this has to be done  
> >>globally and I suspect is unlikely to be done by distros.
> >>    
> >
> >I'm sorry, why you don't like to pass '-o allow_root' to fuse_main() from
> >your application ?
> >  
> 
> It also requires user_allow_root to be specified in /etc/fuse.conf.
> 
> Why don't you like qemu-nbd?  FWIW, I'm open to fuse but I don't think 
> we should have both an NBD server and a fuse filesystem.  I think we 
> should try and have a single mechanism to access guest images.

Well qemu-nbd also allows sharing to remote machines, whereas fuse
only exposes it locally. qemu-nbd also allows access to things which
aren't filesystems. eg, you could have LVM / software RAID devices
inside your qcow file - qemu-fuse won't help you get access to those.

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 18:10       ` Daniel P. Berrange
@ 2008-08-27 18:59         ` Jamie Lokier
  2008-08-28  8:03           ` Shahar Frank
  0 siblings, 1 reply; 13+ messages in thread
From: Jamie Lokier @ 2008-08-27 18:59 UTC (permalink / raw)
  To: Daniel P. Berrange, qemu-devel

> > >I'm sorry, why you don't like to pass '-o allow_root' to fuse_main() from
> > >your application ?
> > >  
> > 
> > It also requires user_allow_root to be specified in /etc/fuse.conf.

You need root access to use qemu-nbd (to mount it), and if you mount
as root you don't need to modify /etc/fuse.conf, so they're equivalent
in this regard.

> Well qemu-nbd also allows sharing to remote machines, whereas fuse
> only exposes it locally. qemu-nbd also allows access to things which
> aren't filesystems. eg, you could have LVM / software RAID devices
> inside your qcow file - qemu-fuse won't help you get access to those.

Yes, qemu-nbd lets you use all the kernel filesystems, different
partition formats, RAID, encryption etc.

-- Jamie

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [Qemu-devel][PATCH] qemu-fuse
  2008-08-27 18:59         ` Jamie Lokier
@ 2008-08-28  8:03           ` Shahar Frank
  2008-08-28  9:27             ` [Qemu-devel][PATCH] block level testing/execersing utility Shahar Frank
  0 siblings, 1 reply; 13+ messages in thread
From: Shahar Frank @ 2008-08-28  8:03 UTC (permalink / raw)
  To: qemu-devel

> From: qemu-devel-bounces+shaharf=qumranet.com@nongnu.org [mailto:qemu-
> devel-bounces+shaharf=qumranet.com@nongnu.org] On Behalf Of Jamie
Lokier
> Sent: Wednesday, August 27, 2008 9:59 PM
> To: Daniel P. Berrange; qemu-devel@nongnu.org
> Subject: Re: [Qemu-devel][PATCH] qemu-fuse
> 
> 
> > Well qemu-nbd also allows sharing to remote machines, whereas fuse
> > only exposes it locally. qemu-nbd also allows access to things which
> > aren't filesystems. eg, you could have LVM / software RAID devices
> > inside your qcow file - qemu-fuse won't help you get access to
those.
> 
> Yes, qemu-nbd lets you use all the kernel filesystems, different
> partition formats, RAID, encryption etc.
> 
> -- Jamie
> 

If qemu-nbd is covering all useful use cases than qemu-fuse is obsolete.
In fact I wouldn't have developed it if I had qemu-nbd when I wrote it.
I sent it because it was useful for me and I enjoyed using fuse (my
first fuse server....).

I find qemu-ndb to be small, smart and elegant. In fact I once
considered to develop a remoting method based on RPC - to remote qemu
image level verbs. I wonder if qemu-ndb is good enough for remoting or
there is still use cases (or performance considerations) that would
justify RPC level (SUN's or other) remoting. What you do think?

Shahar

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel][PATCH] block level testing/execersing utility
  2008-08-28  8:03           ` Shahar Frank
@ 2008-08-28  9:27             ` Shahar Frank
  2008-08-28  9:47               ` Samuel Thibault
  2008-08-28 20:11               ` Anthony Liguori
  0 siblings, 2 replies; 13+ messages in thread
From: Shahar Frank @ 2008-08-28  9:27 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 28261 bytes --]

Hi All,

The attached is a small utility to test and exercise block level device.
I wrote it to test and benchmark Qemu image backends and to test Qemu
image related features. I think it may be useful for others too.

For the ones who ask why we need yet another tool, I can say that I
found that most existing tools are benchmarks oriented which make them
much less useful as a developing aid. I needed something that it is at
the level of dd but can do also random IO and multiple threading.

I added also sample scripts.

I hope it will be useful also for other people developing image related
features.

Shahar

Index: btest/btest.c
===================================================================
--- btest/btest.c	(revision 0)
+++ btest/btest.c	(revision 0)
@@ -0,0 +1,755 @@
+/*
+ * Block test/exerciser utility
+ *
+ * Copyright (c) 2008 Shahar Frank, Qumranet
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a copy
+ * of this software and associated documentation files (the
"Software"), to deal
+ * in the Software without restriction, including without limitation
the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell
+ * copies of the Software, and to permit persons to whom the Software
is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <malloc.h>
+
+#define BTEST_VERSION 1
+
+int secs = 60;
+int threads = 1;
+int def_blocksize = 4 * 1024;
+int openflags = O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC;
+int write_behind;
+
+char *prog;
+int debug;
+int dorandom;
+int doread;
+
+typedef struct IOStats {
+	char *title;
+	uint64_t duration;
+	uint64_t sduration;		/* sync duration */
+	uint64_t lat;
+	uint64_t slat;			/* sync latency */
+	uint64_t ops;
+	uint64_t bytes;
+	uint64_t errors;
+} IOStats;
+
+struct shared {
+	pthread_cond_t start_cond;
+	pthread_mutex_t lock;
+	int started;
+	int finished;
+	IOStats total;
+} shared = {
+	PTHREAD_COND_INITIALIZER,
+	PTHREAD_MUTEX_INITIALIZER,
+	};
+
+volatile int finished;
+
+/** printf style debugging MACRO, conmmon header includes name of
function */
+#define WARN(fmt, args...)	warn(__FUNCTION__, fmt, ## args)
+
+/** printf style abort MACRO, conmmon header includes name of function
*/
+#define PANIC(fmt, args...)	panic(__FUNCTION__, fmt, ## args)
+
+#define DEBUG(fmt, args...)	if (debug) warn(__FUNCTION__, fmt, ##
args)
+#define DEBUG2(fmt, args...)	if (debug > 1) warn(__FUNCTION__, fmt,
## args)
+#define DEBUG3(fmt, args...)	if (debug > 2) warn(__FUNCTION__, fmt,
## args)
+
+#ifndef BLKGETSIZE
+#define BLKGETSIZE _IO(0x12,96)
+#endif
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#endif
+
+uint64_t
+timestamp(void)
+{
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+/**
+ * Show a message and abort the probram.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void panic(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "PANIC: [%d:%" PRId64 "] %s: %s%s%s\n",
getpid(), timestamp(), fn, buf, errno ? ": " : "", errno ?
strerror(errno) : "");
+
+	exit(-1);
+}
+
+/**
+ * Print a message to the stderr.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void warn(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "[%s:%d:%" PRId64 "]: %s: %s\n", "btest",
getpid(), timestamp(), fn, buf);
+}
+
+uint64_t parse_storage_size(char *arg)
+{
+	int l = strlen(arg);
+	uint64_t factor = 1;
+
+	arg = strdupa(arg);
+	switch (arg[l - 1]) {
+	case 'G':
+	case 'g':
+		factor = 1 << 30;
+		break;
+	case 'M':
+	case 'm':
+		factor = 1 << 20;
+		break;
+	case 'K':
+	case 'k':
+		factor = 1 << 10;
+		break;
+	case 'B':
+	case 'b':
+		factor = 512;
+		break;
+	default:
+		l++;
+	}
+	arg[l] = 0;
+	return strtoull(arg, 0, 0) * factor;
+}
+
+static int64_t
+blockdev_getsize(int fd)
+{
+	int64_t b;
+	long sz;
+	int err;
+
+	err = ioctl (fd, BLKGETSIZE, &sz);
+	if (err)
+		return err;
+
+	err = ioctl(fd, BLKGETSIZE64, &b);
+	if (err || b == 0 || b == sz)
+		b = sz << 9;
+	return b;
+} 
+
+static int64_t
+getsize(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0) {
+		WARN("fstat failed: %m");
+		return -1;
+	}
+	
+	if (S_ISBLK(st.st_mode))
+		return blockdev_getsize(fd);
+	
+	if (S_ISREG(st.st_mode))
+		return st.st_size;
+	
+	WARN("unsupported file type");
+	return -1;
+}
+
+typedef struct worker_arg {
+	int fd;
+	int blocksize;
+	char *file;
+	int64_t size;
+	loff_t offset;
+	loff_t startoffset;
+	loff_t endoffset;
+	int randomratio;
+	int readratio;
+	void *buf;
+	pid_t tid;
+	int (*io)(struct worker_arg *);
+	IOStats stats;
+	struct worker_arg *next;
+} worker_arg;
+
+worker_arg *workers;
+
+int
+do_seq_read(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file,
arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (read(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_seq_write(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file,
arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (write(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_read(worker_arg *arg)
+{
+	arg->offset = random() * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset %
(arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file,
arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (read(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_write(worker_arg *arg)
+{
+	arg->offset = random() * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset %
(arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file,
arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	//DEBUG3("write at offset %" PRIu64 " count %d", arg->offset,
arg->blocksize);
+	if (write(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_io(worker_arg *arg)
+{
+	int (*io)(struct worker_arg *);
+	int doread = 0, dorandom = 0;
+
+	if (arg->readratio == 100)
+		doread = 1;
+	else if (arg->readratio == 0)
+		doread = 0;
+	else
+		doread = (random() % 100) < arg->readratio;	
+		
+	if (arg->randomratio == 100)
+		dorandom = 1 << 1;
+	else if (arg->randomratio == 0)
+		dorandom = 0 << 1;
+	else
+		dorandom = ((random() % 100) < arg->randomratio) ? 1 <<
1 : 0 << 1;	
+
+	switch (doread | dorandom) {
+	case 0:
+		DEBUG3("%s %d: seq write", arg->file, arg->tid);
+		io = do_seq_write;
+		break;
+	case 1:
+		DEBUG3("%s %d: seq read", arg->file, arg->tid);
+		io = do_seq_read;
+		break;
+	case 2:
+		DEBUG3("%s %d: random write", arg->file, arg->tid);
+		io = do_rand_write;
+		break;
+	case 3:
+		DEBUG3("%s %d: random read", arg->file, arg->tid);
+		io = do_rand_read;
+		break;
+	}
+	
+	return io(arg);
+}
+
+void
+summary(char *title, IOStats *stats)
+{
+	printf("%s: %.3f seconds, %" PRIu64 " ops, avg latency %" PRIu64
" usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		title,
+		stats->duration * 1.0/ (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration /
1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+char *
+randomratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "S";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+char *
+readratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "W";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+int
+gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+void
+worker_summary(worker_arg *arg)
+{
+	IOStats *stats = &arg->stats;
+	
+	printf("%s %s %s %d %"PRIu64 " %" PRIu64 ": %.3f seconds, %"
PRIu64
+		" ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		arg->file,
+		randomratio_str(arg->randomratio, alloca(8)),
+		readratio_str(arg->readratio, alloca(8)),
+		arg->blocksize,
+		arg->startoffset, arg->endoffset,
+		stats->duration * 1.0 / (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration /
1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+void
+dostats(int sig)
+{
+	worker_arg *worker;
+	
+	pthread_mutex_lock(&shared.lock);
+	for (worker = workers; worker; worker = worker->next)
+		worker_summary(worker);
+	pthread_mutex_unlock(&shared.lock);	
+}
+
+/*char *
+parse_worker_arg(worker_arg *arg, char *line)
+{
+	char *s = line;
+	
+	while (*s && !isalpha(*s))
+		s++;
+	if (!*s || (toupper(*s) != 'R' && toupeer(*s) != 'S' &&
!isdigit(*s))
+		return "random ratio";
+	return 0;
+}
+*/
+		
+void *
+worker(worker_arg *arg)
+{
+	struct timespec t1, t2;
+	IOStats *stats = &arg->stats;
+
+	arg->tid = gettid();
+	DEBUG("%d: starting worker thread on '%s'", arg->tid,
arg->file);
+	
+	pthread_mutex_lock(&shared.lock);
+	shared.started++;
+	pthread_cond_wait(&shared.start_cond, &shared.lock);
+	pthread_mutex_unlock(&shared.lock);
+	
+	DEBUG("%d: !! worker thread on '%s'", arg->tid, arg->file);
+	while (!finished) {
+		clock_gettime(CLOCK_REALTIME, &t1);
+		if (do_io(arg) < 0) {
+			//if (debug)
+				WARN("%d: IO error on '%s': %m",
arg->tid, arg->file);
+			stats->errors++;
+		} else {
+			clock_gettime(CLOCK_REALTIME, &t2);
+			stats->duration += (t2.tv_sec - t1.tv_sec) *
1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+			stats->ops++;
+			stats->bytes += arg->blocksize;
+		}
+	}
+	stats->lat = stats->duration / stats->ops;
+	worker_summary(arg);
+
+	pthread_mutex_lock(&shared.lock);
+	shared.finished++;
+	shared.total.errors += stats->errors;
+	shared.total.ops += stats->ops;
+	shared.total.duration += stats->duration;
+	shared.total.bytes += stats->bytes;
+	shared.total.lat += stats->lat;
+	pthread_mutex_unlock(&shared.lock);
+	
+	return 0;
+}
+
+/**
+ * Create and initialize new worker thread.
+ * Returns the newly created thread ID.
+ */
+pthread_t
+new_worker(char *file, int blocksize, int randomratio, int readratio,
uint64_t start, uint64_t len)
+{
+	worker_arg *arg;
+	pthread_t thid;
+	int fd;
+	
+	openflags |= (readratio == 100) ? O_RDONLY : O_RDWR;
+
+	DEBUG("open flags: 0x%x", openflags);
+	if ((fd = open(file, openflags, 0600)) < 0)
+		PANIC("open '%s' failed", file);
+		
+	if (!(arg = calloc(1, sizeof *arg)))
+		PANIC("out of mem - alloc arg");
+
+	pthread_mutex_lock(&shared.lock);
+	arg->next = workers ;
+	workers = arg;
+	pthread_mutex_unlock(&shared.lock);
+
+	arg->randomratio = randomratio;
+	arg->readratio = readratio;
+	arg->fd = fd;
+	arg->file = strdup(file);
+	arg->blocksize = blocksize;
+	arg->startoffset = start;
+	
+	if ((arg->size = getsize(fd)) < 0)
+		PANIC("can't get size of '%s'", file);
+	
+	if (len == 0 && arg->size > arg->startoffset + blocksize)
+		len = arg->size - arg->startoffset;
+
+	arg->endoffset = arg->startoffset + len;
+	if (arg->size == 0)
+		arg->size = arg->endoffset;
+
+	DEBUG("'%s' size is %" PRId64 " using blocksize %d", file,
arg->size, arg->blocksize);
+	if (arg->endoffset - arg->startoffset < blocksize)
+		PANIC("file '%s' is too small, min size is one block
(%d)", file, blocksize);
+	if (arg->endoffset > arg->size)
+		PANIC("file '%s' offset %" PRId64 " is out of
file/device size range (%"PRId64")",
+			file, arg->endoffset, arg->size);
+	
+	if (!(arg->buf = valloc(blocksize)))
+		PANIC("can't alloc buf sized %d bytes", blocksize);
+	memset(arg->buf, 0, blocksize);
+
+	if (pthread_create(&thid, NULL, (void *(*)(void *))worker, arg))
+		PANIC("thread creation failed [file %s]", file);
+	
+	DEBUG("thread %d created", thid);
+	return thid;
+}
+
+int
+start(int n)
+{
+	time_t t;
+
+	pthread_mutex_lock(&shared.lock);
+	while (n > shared.started) {
+		DEBUG("wait: n %d started %d", n, shared.started);
+		pthread_mutex_unlock(&shared.lock);
+		sleep(1);
+		pthread_mutex_lock(&shared.lock);
+	}
+	pthread_mutex_unlock(&shared.lock);
+
+	time(&t);
+	printf("%d threads are ready, starting test at %s", n,
ctime(&t));
+	pthread_cond_broadcast(&shared.start_cond);
+	return 0;
+}
+	
+void
+flush(worker)
+{
+	worker_arg *w;
+	struct timespec t1, t2;
+	IOStats *stats;
+	
+	for (w = workers; w; w = w->next) {
+		stats = &w->stats;
+		clock_gettime(CLOCK_REALTIME, &t1);
+		fsync(w->fd);
+		close(w->fd);
+		clock_gettime(CLOCK_REALTIME, &t2);
+		stats->sduration = (t2.tv_sec - t1.tv_sec) * 1000000llu
+ (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+		shared.total.sduration += stats->sduration;
+	}
+}
+
+int
+finish(pthread_t *thread_list, int n)
+{
+	int i;
+	
+	finished = 1;
+	for (i = 0; i < n; i++) {
+		pthread_mutex_lock(&shared.lock);
+		DEBUG("wait: n %d finished %d", n, shared.finished);
+		if (shared.finished >= n)
+			break;	// shread lock is still locked, but we
are alone, so it is ok
+		pthread_mutex_unlock(&shared.lock);
+		
+		pthread_join(thread_list[i], NULL);
+	}
+	if (write_behind)
+		flush();
+	shared.total.duration /= n;
+	shared.total.lat /= n;
+	shared.total.slat /= n;
+	return 0;
+}
+
+void usage(void)
+{
+	printf("Usage: %s [-hdV -W -D -b <blocksize> -t <sec> -T
<threds_per_dev> -o <startoffset> -l <length> -S <seed>]
<S|R|random-ratio> <R|W|read-ratio> <dev/file> ...\n",
+	     prog);
+	printf("\n\tDefaults:\n");
+	printf("\t\tBlocksize %d\n", def_blocksize);
+	printf("\t\tDuration in seconds %d\n", secs);
+	printf("\t\tNumber of threads per file %d\n", threads);
+	printf("\t\tThe default start offset is 0\n");
+	printf("\t\tThe default length for IO is the size of the
file/device\n");
+	printf("\t\tThe default random seed is the current time\n");
+	printf("\t\tThe default open flags are:\n");
+	printf("\t\t\t O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC\n");
+	printf("\t\tWrite behind mode (-W): O_CREAT | O_LARGEFILE |
O_NOATIME \n");
+	printf("\t\tDirect IO mode (-D): O_CREAT | O_LARGEFILE |
O_NOATIME | O_DIRECT \n");
+
+	exit(1);
+}
+
+pthread_t *thread_list;
+
+void doexit(int sig)
+{
+	time_t t;
+	finish(thread_list, shared.started);
+	summary("Total", &shared.total);
+	if (write_behind) {
+		shared.total.duration += shared.total.sduration;
+		shared.total.lat = shared.total.duration /
shared.total.ops;
+		summary("Synced", &shared.total);
+	}
+	time(&t);
+	printf("Test is done at %s", ctime(&t));
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct timespec duration = {0}, remaining = {0};
+	int i, t, opt, nfiles, nthreads;
+	int blocksize = def_blocksize;
+	uint64_t len = 0, startoff = 0;
+	int seed = time(0);
+
+	prog = strchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+	else
+		prog++;
+
+	while ((opt = getopt(argc, argv, "+hVdt:T:b:s:o:l:S:DW")) != -1)
{
+		switch (opt) {
+		default:
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf("%s version %d\n", prog, BTEST_VERSION);
+			exit(0);
+		case 'd':
+			debug++;
+			break;
+		case 'b':
+			blocksize = parse_storage_size(optarg);
+			if (!blocksize)
+				PANIC("invalid blocksize parameter: -b
%s",
+				      optarg);
+			printf("IO Block size is %d\n", blocksize);
+			break;
+		case 'o':
+			startoff = parse_storage_size(optarg);
+			printf("File start offset is %" PRId64 "\n",
startoff);
+			break;
+		case 'l':
+			len = parse_storage_size(optarg);
+			if (!len)
+				PANIC("invalid len size parameter: -l
%s",
+				      optarg);
+			printf("Limit IO space to %s (%" PRId64 " bytes)
per file\n", optarg, len);
+			break;
+		case 'S':
+			seed = atoi(optarg);
+			printf("Use random seed %d\n", seed);
+			break;
+		case 't':
+			secs = atoi(optarg);
+			if (!secs)
+				PANIC("invalid seconds parameter: -t
%s",
+				      optarg);
+			break;
+		case 'T':
+			threads = atoi(optarg);
+			if (!threads)
+				PANIC("invalid threads parameter: -T
%s",
+				      optarg);
+			break;
+		case 'W':
+			printf("Allow write behind\n");
+			openflags &= ~(O_SYNC|O_DIRECT);
+			write_behind = 1;
+			break;
+		case 'D':
+			printf("Use direct IO\n");
+			openflags &= ~O_SYNC;
+			openflags |= O_DIRECT;
+			break;		
+		}
+	}
+	if (argc - optind < 3)
+		usage();
+		
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		dorandom = 100;
+		break;
+	case 'S':
+	case 's':
+		dorandom = 0;
+		break;
+	default:
+		dorandom = atoi(argv[optind]);
+		if (dorandom < 0 || dorandom > 100)
+			PANIC("bad random/sequencial parameter: should
be R|S|0-100");
+	}
+	optind++;
+	
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		doread = 100;
+		break;
+	case 'W':
+	case 'w':
+		doread = 0;
+		break;
+	default:
+		doread = atoi(argv[optind]);
+		if (doread < 0 || doread > 100)
+			PANIC("bad read/write parameter: should be
R|W|0-100");
+	}
+	optind++;
+	
+	DEBUG("using random seed %d", seed);
+	srandom(seed);
+	
+	nfiles = argc - optind;
+	nthreads = nfiles * threads;
+	if (!(thread_list = calloc(nthreads, sizeof(*thread_list))))
+		PANIC("no mem for thread list (threads %d)", threads *
nfiles);
+		
+	for (i = 0; i < nfiles; i++)
+		for (t = 0; t < threads; t++)
+			thread_list[i * threads + t] =
+				new_worker(argv[optind + i], blocksize,
dorandom, doread, startoff, len);
+
+	signal(SIGTERM, doexit);
+	signal(SIGINT, doexit);
+	signal(SIGUSR1, dostats);
+	start(nthreads);
+	
+	duration.tv_sec = secs;
+
+	while (nanosleep(&duration, &remaining) < 0)
+		duration = remaining;
+			
+	doexit(0);
+	
+	return 0;
+}
Index: btest/scripts/btest-test-sum.awk
===================================================================
--- btest/scripts/btest-test-sum.awk	(revision 0)
+++ btest/scripts/btest-test-sum.awk	(revision 0)
@@ -0,0 +1,25 @@
+#!/bin/awk -f
+
+$1 == "@@" { type = $2; types[ntypes++] = type; next }
+
+$1 == "##" { test = $0; if (testcount[test] > 0) next; tests[ntests++]
= test; next }
+
+$1 == "Total:" { totals[type, test] = $0; testcount[test]++; next }
+$1 == "Synced:" { synced[type, test] = $0; next }
+
+END {
+	for (t = 0; t < ntests; t++ ) {
+		test = tests[t]
+		if (testcount[test] == 0)
+			continue
+		print "Test: ", test
+		for (ty = 0; ty < ntypes; ty++) {
+			type = types[ty]
+			if ((type, test) in totals)
+				print type, totals[type, test]
+			if ((type, test) in synced)
+				print type, synced[type, test]
+		}
+	}
+}
+

Property changes on: btest/scripts/btest-test-sum.awk
___________________________________________________________________
Name: svn:executable
   + *

Index: btest/scripts/btest-test
===================================================================
--- btest/scripts/btest-test	(revision 0)
+++ btest/scripts/btest-test	(revision 0)
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+function panic() {
+	echo "Panic: $*" > /dev/stderr
+	exit 1
+}
+
+if [ -z "$1" -o -z "$2" ]; then
+	echo "Usage: `basename $0` <label> <testdir>"
+fi
+
+label=$1
+basedir=$2/btest$$-`date +%s`
+
+echo "@@ $label"
+
+if ! mkdir -p $basedir; then
+	panic "can't mkdir $basedir"
+fi
+
+rm -rf $basedir/*
+
+big=1G
+mid=512M
+small=100M
+
+echo "## Create 16 big ($big) files - 32k block"
+btest -b 32k -t 900 -l $big S W $basedir/fileX{1..16}
+
+echo "## Create 16 big ($big) files - 32k block - allow Write behind"
+btest -W -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 32k block - sync"
+btest -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 4k block - sync"
+btest -b 4k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks
- sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks
- sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks
- direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks
- direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks
- write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks
- write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 10% Random, 75% Read 4k blocks
- sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 5% Random, 75% Read 4k blocks -
sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Create 16 middle ($mid) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $mid S W $basedir/fileM{1..16}
+
+echo "## Create 16 small ($small) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $small S W $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - write
behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized
($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - write
behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5}
$basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k
blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
Index: btest/Makefile
===================================================================
--- btest/Makefile	(revision 0)
+++ btest/Makefile	(revision 0)
@@ -0,0 +1,5 @@
+btest: btest.c
+	$(CC) $(CFLAGS) -D _LARGEFILE64_SOURCE -Wall -o $@ $(LDFLAGS) -l
pthread -l rt $<
+	
+clean:
+	rm -f *.o btest

[-- Attachment #2: btest.patch --]
[-- Type: application/octet-stream, Size: 26559 bytes --]

Index: btest/btest.c
===================================================================
--- btest/btest.c	(revision 0)
+++ btest/btest.c	(revision 0)
@@ -0,0 +1,755 @@
+/*
+ * Block test/exerciser utility
+ *
+ * Copyright (c) 2008 Shahar Frank
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <malloc.h>
+
+#define BTEST_VERSION 1
+
+int secs = 60;
+int threads = 1;
+int def_blocksize = 4 * 1024;
+int openflags = O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC;
+int write_behind;
+
+char *prog;
+int debug;
+int dorandom;
+int doread;
+
+typedef struct IOStats {
+	char *title;
+	uint64_t duration;
+	uint64_t sduration;		/* sync duration */
+	uint64_t lat;
+	uint64_t slat;			/* sync duration */
+	uint64_t ops;
+	uint64_t bytes;
+	uint64_t errors;
+} IOStats;
+
+struct shared {
+	pthread_cond_t start_cond;
+	pthread_mutex_t lock;
+	int started;
+	int finished;
+	IOStats total;
+} shared = {
+	PTHREAD_COND_INITIALIZER,
+	PTHREAD_MUTEX_INITIALIZER,
+	};
+
+volatile int finished;
+
+/** printf style debugging MACRO, conmmon header includes name of function */
+#define WARN(fmt, args...)	warn(__FUNCTION__, fmt, ## args)
+
+/** printf style abort MACRO, conmmon header includes name of function */
+#define PANIC(fmt, args...)	panic(__FUNCTION__, fmt, ## args)
+
+#define DEBUG(fmt, args...)	if (debug) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG2(fmt, args...)	if (debug > 1) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG3(fmt, args...)	if (debug > 2) warn(__FUNCTION__, fmt, ## args)
+
+#ifndef BLKGETSIZE
+#define BLKGETSIZE _IO(0x12,96)
+#endif
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#endif
+
+uint64_t
+timestamp(void)
+{
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+/**
+ * Show a message and abort the probram.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void panic(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "PANIC: [%d:%" PRId64 "] %s: %s%s%s\n", getpid(), timestamp(), fn, buf, errno ? ": " : "", errno ? strerror(errno) : "");
+
+	exit(-1);
+}
+
+/**
+ * Print a message to the stderr.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void warn(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "[%s:%d:%" PRId64 "]: %s: %s\n", "btest", getpid(), timestamp(), fn, buf);
+}
+
+uint64_t parse_storage_size(char *arg)
+{
+	int l = strlen(arg);
+	uint64_t factor = 1;
+
+	arg = strdupa(arg);
+	switch (arg[l - 1]) {
+	case 'G':
+	case 'g':
+		factor = 1 << 30;
+		break;
+	case 'M':
+	case 'm':
+		factor = 1 << 20;
+		break;
+	case 'K':
+	case 'k':
+		factor = 1 << 10;
+		break;
+	case 'B':
+	case 'b':
+		factor = 512;
+		break;
+	default:
+		l++;
+	}
+	arg[l] = 0;
+	return strtoull(arg, 0, 0) * factor;
+}
+
+static int64_t
+blockdev_getsize(int fd)
+{
+	int64_t b;
+	long sz;
+	int err;
+
+	err = ioctl (fd, BLKGETSIZE, &sz);
+	if (err)
+		return err;
+
+	err = ioctl(fd, BLKGETSIZE64, &b);
+	if (err || b == 0 || b == sz)
+		b = sz << 9;
+	return b;
+} 
+
+static int64_t
+getsize(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0) {
+		WARN("fstat failed: %m");
+		return -1;
+	}
+	
+	if (S_ISBLK(st.st_mode))
+		return blockdev_getsize(fd);
+	
+	if (S_ISREG(st.st_mode))
+		return st.st_size;
+	
+	WARN("unsupported file type");
+	return -1;
+}
+
+typedef struct worker_arg {
+	int fd;
+	int blocksize;
+	char *file;
+	int64_t size;
+	loff_t offset;
+	loff_t startoffset;
+	loff_t endoffset;
+	int randomratio;
+	int readratio;
+	void *buf;
+	pid_t tid;
+	int (*io)(struct worker_arg *);
+	IOStats stats;
+	struct worker_arg *next;
+} worker_arg;
+
+worker_arg *workers;
+
+int
+do_seq_read(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (read(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_seq_write(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (write(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_read(worker_arg *arg)
+{
+	arg->offset = random() * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	if (read(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_write(worker_arg *arg)
+{
+	arg->offset = random() * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (lseek64(arg->fd, arg->offset, SEEK_SET) < 0)
+		return -1;
+	//DEBUG3("write at offset %" PRIu64 " count %d", arg->offset, arg->blocksize);
+	if (write(arg->fd, arg->buf, arg->blocksize) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_io(worker_arg *arg)
+{
+	int (*io)(struct worker_arg *);
+	int doread = 0, dorandom = 0;
+
+	if (arg->readratio == 100)
+		doread = 1;
+	else if (arg->readratio == 0)
+		doread = 0;
+	else
+		doread = (random() % 100) < arg->readratio;	
+		
+	if (arg->randomratio == 100)
+		dorandom = 1 << 1;
+	else if (arg->randomratio == 0)
+		dorandom = 0 << 1;
+	else
+		dorandom = ((random() % 100) < arg->randomratio) ? 1 << 1 : 0 << 1;	
+
+	switch (doread | dorandom) {
+	case 0:
+		DEBUG3("%s %d: seq write", arg->file, arg->tid);
+		io = do_seq_write;
+		break;
+	case 1:
+		DEBUG3("%s %d: seq read", arg->file, arg->tid);
+		io = do_seq_read;
+		break;
+	case 2:
+		DEBUG3("%s %d: random write", arg->file, arg->tid);
+		io = do_rand_write;
+		break;
+	case 3:
+		DEBUG3("%s %d: random read", arg->file, arg->tid);
+		io = do_rand_read;
+		break;
+	}
+	
+	return io(arg);
+}
+
+void
+summary(char *title, IOStats *stats)
+{
+	printf("%s: %.3f seconds, %" PRIu64 " ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		title,
+		stats->duration * 1.0/ (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+char *
+randomratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "S";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+char *
+readratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "W";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+int
+gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+void
+worker_summary(worker_arg *arg)
+{
+	IOStats *stats = &arg->stats;
+	
+	printf("%s %s %s %d %"PRIu64 " %" PRIu64 ": %.3f seconds, %" PRIu64
+		" ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		arg->file,
+		randomratio_str(arg->randomratio, alloca(8)),
+		readratio_str(arg->readratio, alloca(8)),
+		arg->blocksize,
+		arg->startoffset, arg->endoffset,
+		stats->duration * 1.0 / (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+void
+dostats(int sig)
+{
+	worker_arg *worker;
+	
+	pthread_mutex_lock(&shared.lock);
+	for (worker = workers; worker; worker = worker->next)
+		worker_summary(worker);
+	pthread_mutex_unlock(&shared.lock);	
+}
+
+/*char *
+parse_worker_arg(worker_arg *arg, char *line)
+{
+	char *s = line;
+	
+	while (*s && !isalpha(*s))
+		s++;
+	if (!*s || (toupper(*s) != 'R' && toupeer(*s) != 'S' && !isdigit(*s))
+		return "random ratio";
+	return 0;
+}
+*/
+		
+void *
+worker(worker_arg *arg)
+{
+	struct timespec t1, t2;
+	IOStats *stats = &arg->stats;
+
+	arg->tid = gettid();
+	DEBUG("%d: starting worker thread on '%s'", arg->tid, arg->file);
+	
+	pthread_mutex_lock(&shared.lock);
+	shared.started++;
+	pthread_cond_wait(&shared.start_cond, &shared.lock);
+	pthread_mutex_unlock(&shared.lock);
+	
+	DEBUG("%d: !! worker thread on '%s'", arg->tid, arg->file);
+	while (!finished) {
+		clock_gettime(CLOCK_REALTIME, &t1);
+		if (do_io(arg) < 0) {
+			//if (debug)
+				WARN("%d: IO error on '%s': %m", arg->tid, arg->file);
+			stats->errors++;
+		} else {
+			clock_gettime(CLOCK_REALTIME, &t2);
+			stats->duration += (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+			stats->ops++;
+			stats->bytes += arg->blocksize;
+		}
+	}
+	stats->lat = stats->duration / stats->ops;
+	worker_summary(arg);
+
+	pthread_mutex_lock(&shared.lock);
+	shared.finished++;
+	shared.total.errors += stats->errors;
+	shared.total.ops += stats->ops;
+	shared.total.duration += stats->duration;
+	shared.total.bytes += stats->bytes;
+	shared.total.lat += stats->lat;
+	pthread_mutex_unlock(&shared.lock);
+	
+	return 0;
+}
+
+/**
+ * Create and initialize new worker thread.
+ * Returns the newly created thread ID.
+ */
+pthread_t
+new_worker(char *file, int blocksize, int randomratio, int readratio, uint64_t start, uint64_t len)
+{
+	worker_arg *arg;
+	pthread_t thid;
+	int fd;
+	
+	openflags |= (readratio == 100) ? O_RDONLY : O_RDWR;
+
+	DEBUG("open flags: 0x%x", openflags);
+	if ((fd = open(file, openflags, 0600)) < 0)
+		PANIC("open '%s' failed", file);
+		
+	if (!(arg = calloc(1, sizeof *arg)))
+		PANIC("out of mem - alloc arg");
+
+	pthread_mutex_lock(&shared.lock);
+	arg->next = workers ;
+	workers = arg;
+	pthread_mutex_unlock(&shared.lock);
+
+	arg->randomratio = randomratio;
+	arg->readratio = readratio;
+	arg->fd = fd;
+	arg->file = strdup(file);
+	arg->blocksize = blocksize;
+	arg->startoffset = start;
+	
+	if ((arg->size = getsize(fd)) < 0)
+		PANIC("can't get size of '%s'", file);
+	
+	if (len == 0 && arg->size > arg->startoffset + blocksize)
+		len = arg->size - arg->startoffset;
+
+	arg->endoffset = arg->startoffset + len;
+	if (arg->size == 0)
+		arg->size = arg->endoffset;
+
+	DEBUG("'%s' size is %" PRId64 " using blocksize %d", file, arg->size, arg->blocksize);
+	if (arg->endoffset - arg->startoffset < blocksize)
+		PANIC("file '%s' is too small, min size is one block (%d)", file, blocksize);
+	if (arg->endoffset > arg->size)
+		PANIC("file '%s' offset %" PRId64 " is out of file/device size range (%"PRId64")",
+			file, arg->endoffset, arg->size);
+	
+	if (!(arg->buf = valloc(blocksize)))
+		PANIC("can't alloc buf sized %d bytes", blocksize);
+	memset(arg->buf, 0, blocksize);
+
+	if (pthread_create(&thid, NULL, (void *(*)(void *))worker, arg))
+		PANIC("thread creation failed [file %s]", file);
+	
+	DEBUG("thread %d created", thid);
+	return thid;
+}
+
+int
+start(int n)
+{
+	time_t t;
+
+	pthread_mutex_lock(&shared.lock);
+	while (n > shared.started) {
+		DEBUG("wait: n %d started %d", n, shared.started);
+		pthread_mutex_unlock(&shared.lock);
+		sleep(1);
+		pthread_mutex_lock(&shared.lock);
+	}
+	pthread_mutex_unlock(&shared.lock);
+
+	time(&t);
+	printf("%d threads are ready, starting test at %s", n, ctime(&t));
+	pthread_cond_broadcast(&shared.start_cond);
+	return 0;
+}
+	
+void
+flush(worker)
+{
+	worker_arg *w;
+	struct timespec t1, t2;
+	IOStats *stats;
+	
+	for (w = workers; w; w = w->next) {
+		stats = &w->stats;
+		clock_gettime(CLOCK_REALTIME, &t1);
+		fsync(w->fd);
+		close(w->fd);
+		clock_gettime(CLOCK_REALTIME, &t2);
+		stats->sduration = (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+		shared.total.sduration += stats->sduration;
+	}
+}
+
+int
+finish(pthread_t *thread_list, int n)
+{
+	int i;
+	
+	finished = 1;
+	for (i = 0; i < n; i++) {
+		pthread_mutex_lock(&shared.lock);
+		DEBUG("wait: n %d finished %d", n, shared.finished);
+		if (shared.finished >= n)
+			break;	// shread lock is still locked, but we are alone, so it is ok
+		pthread_mutex_unlock(&shared.lock);
+		
+		pthread_join(thread_list[i], NULL);
+	}
+	if (write_behind)
+		flush();
+	shared.total.duration /= n;
+	shared.total.lat /= n;
+	shared.total.slat /= n;
+	return 0;
+}
+
+void usage(void)
+{
+	printf("Usage: %s [-hdV -W -D -b <blocksize> -t <sec> -T <threds_per_dev> -o <startoffset> -l <length> -S <seed>] <S|R|random-ratio> <R|W|read-ratio> <dev/file> ...\n",
+	     prog);
+	printf("\n\tDefaults:\n");
+	printf("\t\tBlocksize %d\n", def_blocksize);
+	printf("\t\tDuration in seconds %d\n", secs);
+	printf("\t\tNumber of threads per file %d\n", threads);
+	printf("\t\tThe default start offset is 0\n");
+	printf("\t\tThe default length for IO is the size of the file/device\n");
+	printf("\t\tThe default random seed is the current time\n");
+	printf("\t\tThe default open flags are:\n");
+	printf("\t\t\t O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC\n");
+	printf("\t\tWrite behind mode (-W): O_CREAT | O_LARGEFILE | O_NOATIME \n");
+	printf("\t\tDirect IO mode (-D): O_CREAT | O_LARGEFILE | O_NOATIME | O_DIRECT \n");
+
+	exit(1);
+}
+
+pthread_t *thread_list;
+
+void doexit(int sig)
+{
+	time_t t;
+	finish(thread_list, shared.started);
+	summary("Total", &shared.total);
+	if (write_behind) {
+		shared.total.duration += shared.total.sduration;
+		shared.total.lat = shared.total.duration / shared.total.ops;
+		summary("Synced", &shared.total);
+	}
+	time(&t);
+	printf("Test is done at %s", ctime(&t));
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct timespec duration = {0}, remaining = {0};
+	int i, t, opt, nfiles, nthreads;
+	int blocksize = def_blocksize;
+	uint64_t len = 0, startoff = 0;
+	int seed = time(0);
+
+	prog = strchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+	else
+		prog++;
+
+	while ((opt = getopt(argc, argv, "+hVdt:T:b:s:o:l:S:DW")) != -1) {
+		switch (opt) {
+		default:
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf("%s version %d\n", prog, BTEST_VERSION);
+			exit(0);
+		case 'd':
+			debug++;
+			break;
+		case 'b':
+			blocksize = parse_storage_size(optarg);
+			if (!blocksize)
+				PANIC("invalid blocksize parameter: -b %s",
+				      optarg);
+			printf("IO Block size is %d\n", blocksize);
+			break;
+		case 'o':
+			startoff = parse_storage_size(optarg);
+			printf("File start offset is %" PRId64 "\n", startoff);
+			break;
+		case 'l':
+			len = parse_storage_size(optarg);
+			if (!len)
+				PANIC("invalid len size parameter: -l %s",
+				      optarg);
+			printf("Limit IO space to %s (%" PRId64 " bytes) per file\n", optarg, len);
+			break;
+		case 'S':
+			seed = atoi(optarg);
+			printf("Use random seed %d\n", seed);
+			break;
+		case 't':
+			secs = atoi(optarg);
+			if (!secs)
+				PANIC("invalid seconds parameter: -t %s",
+				      optarg);
+			break;
+		case 'T':
+			threads = atoi(optarg);
+			if (!threads)
+				PANIC("invalid threads parameter: -T %s",
+				      optarg);
+			break;
+		case 'W':
+			printf("Allow write behind\n");
+			openflags &= ~(O_SYNC|O_DIRECT);
+			write_behind = 1;
+			break;
+		case 'D':
+			printf("Use direct IO\n");
+			openflags &= ~O_SYNC;
+			openflags |= O_DIRECT;
+			break;		
+		}
+	}
+	if (argc - optind < 3)
+		usage();
+		
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		dorandom = 100;
+		break;
+	case 'S':
+	case 's':
+		dorandom = 0;
+		break;
+	default:
+		dorandom = atoi(argv[optind]);
+		if (dorandom < 0 || dorandom > 100)
+			PANIC("bad random/sequencial parameter: should be R|S|0-100");
+	}
+	optind++;
+	
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		doread = 100;
+		break;
+	case 'W':
+	case 'w':
+		doread = 0;
+		break;
+	default:
+		doread = atoi(argv[optind]);
+		if (doread < 0 || doread > 100)
+			PANIC("bad read/write parameter: should be R|W|0-100");
+	}
+	optind++;
+	
+	DEBUG("using random seed %d", seed);
+	srandom(seed);
+	
+	nfiles = argc - optind;
+	nthreads = nfiles * threads;
+	if (!(thread_list = calloc(nthreads, sizeof(*thread_list))))
+		PANIC("no mem for thread list (threads %d)", threads * nfiles);
+		
+	for (i = 0; i < nfiles; i++)
+		for (t = 0; t < threads; t++)
+			thread_list[i * threads + t] =
+				new_worker(argv[optind + i], blocksize, dorandom, doread, startoff, len);
+
+	signal(SIGTERM, doexit);
+	signal(SIGINT, doexit);
+	signal(SIGUSR1, dostats);
+	start(nthreads);
+	
+	duration.tv_sec = secs;
+
+	while (nanosleep(&duration, &remaining) < 0)
+		duration = remaining;
+			
+	doexit(0);
+	
+	return 0;
+}
Index: btest/scripts/btest-test-sum.awk
===================================================================
--- btest/scripts/btest-test-sum.awk	(revision 0)
+++ btest/scripts/btest-test-sum.awk	(revision 0)
@@ -0,0 +1,25 @@
+#!/bin/awk -f
+
+$1 == "@@" { type = $2; types[ntypes++] = type; next }
+
+$1 == "##" { test = $0; if (testcount[test] > 0) next; tests[ntests++] = test; next }
+
+$1 == "Total:" { totals[type, test] = $0; testcount[test]++; next }
+$1 == "Synced:" { synced[type, test] = $0; next }
+
+END {
+	for (t = 0; t < ntests; t++ ) {
+		test = tests[t]
+		if (testcount[test] == 0)
+			continue
+		print "Test: ", test
+		for (ty = 0; ty < ntypes; ty++) {
+			type = types[ty]
+			if ((type, test) in totals)
+				print type, totals[type, test]
+			if ((type, test) in synced)
+				print type, synced[type, test]
+		}
+	}
+}
+

Property changes on: btest/scripts/btest-test-sum.awk
___________________________________________________________________
Name: svn:executable
   + *

Index: btest/scripts/btest-test
===================================================================
--- btest/scripts/btest-test	(revision 0)
+++ btest/scripts/btest-test	(revision 0)
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+function panic() {
+	echo "Panic: $*" > /dev/stderr
+	exit 1
+}
+
+if [ -z "$1" -o -z "$2" ]; then
+	echo "Usage: `basename $0` <label> <testdir>"
+fi
+
+label=$1
+basedir=$2/btest$$-`date +%s`
+
+echo "@@ $label"
+
+if ! mkdir -p $basedir; then
+	panic "can't mkdir $basedir"
+fi
+
+rm -rf $basedir/*
+
+big=1G
+mid=512M
+small=100M
+
+echo "## Create 16 big ($big) files - 32k block"
+btest -b 32k -t 900 -l $big S W $basedir/fileX{1..16}
+
+echo "## Create 16 big ($big) files - 32k block - allow Write behind"
+btest -W -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 32k block - sync"
+btest -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 4k block - sync"
+btest -b 4k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 10% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 5% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Create 16 middle ($mid) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $mid S W $basedir/fileM{1..16}
+
+echo "## Create 16 small ($small) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $small S W $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
Index: btest/Makefile
===================================================================
--- btest/Makefile	(revision 0)
+++ btest/Makefile	(revision 0)
@@ -0,0 +1,5 @@
+btest: btest.c
+	$(CC) $(CFLAGS) -D _LARGEFILE64_SOURCE -Wall -o $@ $(LDFLAGS) -l pthread -l rt $<
+	
+clean:
+	rm -f *.o btest

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] block level testing/execersing utility
  2008-08-28  9:27             ` [Qemu-devel][PATCH] block level testing/execersing utility Shahar Frank
@ 2008-08-28  9:47               ` Samuel Thibault
  2008-08-28 12:22                 ` Shahar Frank
  2008-08-28 20:11               ` Anthony Liguori
  1 sibling, 1 reply; 13+ messages in thread
From: Samuel Thibault @ 2008-08-28  9:47 UTC (permalink / raw)
  To: qemu-devel

Shahar Frank, le Thu 28 Aug 2008 02:27:13 -0700, a écrit :
> can do also multiple threading.
> 
> +	arg->offset = random() * arg->blocksize;

Take care: random() uses a central mutex for safety. Use rand_r or
rand48_r instead.

Samuel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [Qemu-devel][PATCH] block level testing/execersing utility
  2008-08-28  9:47               ` Samuel Thibault
@ 2008-08-28 12:22                 ` Shahar Frank
  0 siblings, 0 replies; 13+ messages in thread
From: Shahar Frank @ 2008-08-28 12:22 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 28107 bytes --]

> From: qemu-devel-bounces+shaharf=qumranet.com@nongnu.org [mailto:qemu-
> devel-bounces+shaharf=qumranet.com@nongnu.org] On Behalf Of Samuel
> Thibault
> Sent: Thursday, August 28, 2008 12:47 PM
> To: qemu-devel@nongnu.org
> Subject: Re: [Qemu-devel][PATCH] block level testing/execersing utility
> 
> Shahar Frank, le Thu 28 Aug 2008 02:27:13 -0700, a écrit :
> > can do also multiple threading.
> >
> > +	arg->offset = random() * arg->blocksize;
> 
> Take care: random() uses a central mutex for safety. Use rand_r or
> rand48_r instead.
> 
> Samuel
> 

Thanks. Attached is a fixed version. I also replaced seek + read/write with pread/pwrite.

Shahar

Index: btest/btest.c
===================================================================
--- btest/btest.c	(revision 0)
+++ btest/btest.c	(revision 0)
@@ -0,0 +1,759 @@
+/*
+ * Block test/exerciser utility
+ *
+ * Copyright (c) 2008 Shahar Frank
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <malloc.h>
+#include <stdlib.h>
+
+#define BTEST_VERSION 1
+
+int secs = 60;
+int threads = 1;
+int def_blocksize = 4 * 1024;
+int openflags = O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC;
+int write_behind;
+
+char *prog;
+int debug;
+int dorandom;
+int doread;
+int rseed;
+
+typedef struct IOStats {
+	char *title;
+	uint64_t duration;
+	uint64_t sduration;		/* sync duration */
+	uint64_t lat;
+	uint64_t ops;
+	uint64_t bytes;
+	uint64_t errors;
+} IOStats;
+
+struct shared {
+	pthread_cond_t start_cond;
+	pthread_mutex_t lock;
+	int started;
+	int finished;
+	IOStats total;
+} shared = {
+	PTHREAD_COND_INITIALIZER,
+	PTHREAD_MUTEX_INITIALIZER,
+	};
+
+volatile int finished;
+
+/** printf style debugging MACRO, conmmon header includes name of function */
+#define WARN(fmt, args...)	warn(__FUNCTION__, fmt, ## args)
+
+/** printf style abort MACRO, conmmon header includes name of function */
+#define PANIC(fmt, args...)	panic(__FUNCTION__, fmt, ## args)
+
+#define DEBUG(fmt, args...)	if (debug) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG2(fmt, args...)	if (debug > 1) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG3(fmt, args...)	if (debug > 2) warn(__FUNCTION__, fmt, ## args)
+
+#ifndef BLKGETSIZE
+#define BLKGETSIZE _IO(0x12,96)
+#endif
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#endif
+
+uint64_t
+timestamp(void)
+{
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+/**
+ * Show a message and abort the probram.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void panic(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "PANIC: [%d:%" PRId64 "] %s: %s%s%s\n", getpid(), timestamp(), fn, buf, errno ? ": " : "", errno ? strerror(errno) : "");
+
+	exit(-1);
+}
+
+/**
+ * Print a message to the stderr.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void warn(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "[%s:%d:%" PRId64 "]: %s: %s\n", "btest", getpid(), timestamp(), fn, buf);
+}
+
+uint64_t parse_storage_size(char *arg)
+{
+	int l = strlen(arg);
+	uint64_t factor = 1;
+
+	arg = strdupa(arg);
+	switch (arg[l - 1]) {
+	case 'G':
+	case 'g':
+		factor = 1 << 30;
+		break;
+	case 'M':
+	case 'm':
+		factor = 1 << 20;
+		break;
+	case 'K':
+	case 'k':
+		factor = 1 << 10;
+		break;
+	case 'B':
+	case 'b':
+		factor = 512;
+		break;
+	default:
+		l++;
+	}
+	arg[l] = 0;
+	return strtoull(arg, 0, 0) * factor;
+}
+
+static int64_t
+blockdev_getsize(int fd)
+{
+	int64_t b;
+	long sz;
+	int err;
+
+	err = ioctl (fd, BLKGETSIZE, &sz);
+	if (err)
+		return err;
+
+	err = ioctl(fd, BLKGETSIZE64, &b);
+	if (err || b == 0 || b == sz)
+		b = sz << 9;
+	return b;
+} 
+
+static int64_t
+getsize(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0) {
+		WARN("fstat failed: %m");
+		return -1;
+	}
+	
+	if (S_ISBLK(st.st_mode))
+		return blockdev_getsize(fd);
+	
+	if (S_ISREG(st.st_mode))
+		return st.st_size;
+	
+	WARN("unsupported file type");
+	return -1;
+}
+
+typedef struct worker_arg {
+	int fd;
+	int blocksize;
+	char *file;
+	int64_t size;
+	loff_t offset;
+	loff_t startoffset;
+	loff_t endoffset;
+	int randomratio;
+	int readratio;
+	void *buf;
+	pid_t tid;
+	int (*io)(struct worker_arg *);
+	struct drand48_data rbuf;
+	IOStats stats;
+	struct worker_arg *next;
+} worker_arg;
+
+worker_arg *workers;
+
+int
+do_seq_read(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pread(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_seq_write(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pwrite(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+uint64_t
+saferandom(struct drand48_data *buffer)
+{
+	double d;
+
+	drand48_r(buffer, &d);
+
+	return (uint64_t)d;
+}
+
+int
+do_rand_read(worker_arg *arg)
+{
+	arg->offset = saferandom(&arg->rbuf) * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pread(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_write(worker_arg *arg)
+{
+	arg->offset = saferandom(&arg->rbuf) * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pwrite(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_io(worker_arg *arg)
+{
+	int (*io)(struct worker_arg *);
+	int doread = 0, dorandom = 0;
+
+	if (arg->readratio == 100)
+		doread = 1;
+	else if (arg->readratio == 0)
+		doread = 0;
+	else
+		doread = (saferandom(&arg->rbuf) % 100) < arg->readratio;
+		
+	if (arg->randomratio == 100)
+		dorandom = 1 << 1;
+	else if (arg->randomratio == 0)
+		dorandom = 0 << 1;
+	else
+		dorandom = ((saferandom(&arg->rbuf) % 100) < arg->randomratio) ? 1 << 1 : 0 << 1;
+
+	switch (doread | dorandom) {
+	case 0:
+		DEBUG3("%s %d: seq write", arg->file, arg->tid);
+		io = do_seq_write;
+		break;
+	case 1:
+		DEBUG3("%s %d: seq read", arg->file, arg->tid);
+		io = do_seq_read;
+		break;
+	case 2:
+		DEBUG3("%s %d: random write", arg->file, arg->tid);
+		io = do_rand_write;
+		break;
+	case 3:
+		DEBUG3("%s %d: random read", arg->file, arg->tid);
+		io = do_rand_read;
+		break;
+	}
+	
+	return io(arg);
+}
+
+void
+summary(char *title, IOStats *stats)
+{
+	printf("%s: %.3f seconds, %" PRIu64 " ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		title,
+		stats->duration * 1.0/ (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+char *
+randomratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "S";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+char *
+readratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "W";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+int
+gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+void
+worker_summary(worker_arg *arg)
+{
+	IOStats *stats = &arg->stats;
+	
+	printf("%s %s %s %d %"PRIu64 " %" PRIu64 ": %.3f seconds, %" PRIu64
+		" ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		arg->file,
+		randomratio_str(arg->randomratio, alloca(8)),
+		readratio_str(arg->readratio, alloca(8)),
+		arg->blocksize,
+		arg->startoffset, arg->endoffset,
+		stats->duration * 1.0 / (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+void
+dostats(int sig)
+{
+	worker_arg *worker;
+	
+	pthread_mutex_lock(&shared.lock);
+	for (worker = workers; worker; worker = worker->next)
+		worker_summary(worker);
+	pthread_mutex_unlock(&shared.lock);	
+}
+
+/*char *
+parse_worker_arg(worker_arg *arg, char *line)
+{
+	char *s = line;
+	
+	while (*s && !isalpha(*s))
+		s++;
+	if (!*s || (toupper(*s) != 'R' && toupeer(*s) != 'S' && !isdigit(*s))
+		return "random ratio";
+	return 0;
+}
+*/
+		
+void *
+worker(worker_arg *arg)
+{
+	struct timespec t1, t2;
+	IOStats *stats = &arg->stats;
+
+	arg->tid = gettid();
+	srand48_r(rseed, &arg->rbuf);
+
+	DEBUG("%d: starting worker thread on '%s'", arg->tid, arg->file);
+	
+	pthread_mutex_lock(&shared.lock);
+	shared.started++;
+	pthread_cond_wait(&shared.start_cond, &shared.lock);
+	pthread_mutex_unlock(&shared.lock);
+	
+	DEBUG("%d: !! worker thread on '%s'", arg->tid, arg->file);
+	while (!finished) {
+		clock_gettime(CLOCK_REALTIME, &t1);
+		if (do_io(arg) < 0) {
+			//if (debug)
+				WARN("%d: IO error on '%s': %m", arg->tid, arg->file);
+			stats->errors++;
+		} else {
+			clock_gettime(CLOCK_REALTIME, &t2);
+			stats->duration += (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+			stats->ops++;
+			stats->bytes += arg->blocksize;
+		}
+	}
+	stats->lat = stats->duration / stats->ops;
+	worker_summary(arg);
+
+	pthread_mutex_lock(&shared.lock);
+	shared.finished++;
+	shared.total.errors += stats->errors;
+	shared.total.ops += stats->ops;
+	shared.total.duration += stats->duration;
+	shared.total.bytes += stats->bytes;
+	shared.total.lat += stats->lat;
+	pthread_mutex_unlock(&shared.lock);
+	
+	return 0;
+}
+
+/**
+ * Create and initialize new worker thread.
+ * Returns the newly created thread ID.
+ */
+pthread_t
+new_worker(char *file, int blocksize, int randomratio, int readratio, uint64_t start, uint64_t len)
+{
+	worker_arg *arg;
+	pthread_t thid;
+	int fd;
+	
+	openflags |= (readratio == 100) ? O_RDONLY : O_RDWR;
+
+	DEBUG("open flags: 0x%x", openflags);
+	if ((fd = open(file, openflags, 0600)) < 0)
+		PANIC("open '%s' failed", file);
+		
+	if (!(arg = calloc(1, sizeof *arg)))
+		PANIC("out of mem - alloc arg");
+
+	pthread_mutex_lock(&shared.lock);
+	arg->next = workers ;
+	workers = arg;
+	pthread_mutex_unlock(&shared.lock);
+
+	arg->randomratio = randomratio;
+	arg->readratio = readratio;
+	arg->fd = fd;
+	arg->file = strdup(file);
+	arg->blocksize = blocksize;
+	arg->startoffset = start;
+	
+	if ((arg->size = getsize(fd)) < 0)
+		PANIC("can't get size of '%s'", file);
+	
+	if (len == 0 && arg->size > arg->startoffset + blocksize)
+		len = arg->size - arg->startoffset;
+
+	arg->endoffset = arg->startoffset + len;
+	if (arg->size == 0)
+		arg->size = arg->endoffset;
+
+	DEBUG("'%s' size is %" PRId64 " using blocksize %d", file, arg->size, arg->blocksize);
+	if (arg->endoffset - arg->startoffset < blocksize)
+		PANIC("file '%s' is too small, min size is one block (%d)", file, blocksize);
+	if (arg->endoffset > arg->size)
+		PANIC("file '%s' offset %" PRId64 " is out of file/device size range (%"PRId64")",
+			file, arg->endoffset, arg->size);
+	
+	if (!(arg->buf = valloc(blocksize)))
+		PANIC("can't alloc buf sized %d bytes", blocksize);
+	memset(arg->buf, 0, blocksize);
+
+	if (pthread_create(&thid, NULL, (void *(*)(void *))worker, arg))
+		PANIC("thread creation failed [file %s]", file);
+	
+	DEBUG("thread %d created", thid);
+	return thid;
+}
+
+int
+start(int n)
+{
+	time_t t;
+
+	pthread_mutex_lock(&shared.lock);
+	while (n > shared.started) {
+		DEBUG("wait: n %d started %d", n, shared.started);
+		pthread_mutex_unlock(&shared.lock);
+		sleep(1);
+		pthread_mutex_lock(&shared.lock);
+	}
+	pthread_mutex_unlock(&shared.lock);
+
+	time(&t);
+	printf("%d threads are ready, starting test at %s", n, ctime(&t));
+	pthread_cond_broadcast(&shared.start_cond);
+	return 0;
+}
+	
+void
+flush(worker)
+{
+	worker_arg *w;
+	struct timespec t1, t2;
+	IOStats *stats;
+	
+	for (w = workers; w; w = w->next) {
+		stats = &w->stats;
+		clock_gettime(CLOCK_REALTIME, &t1);
+		fsync(w->fd);
+		close(w->fd);
+		clock_gettime(CLOCK_REALTIME, &t2);
+		stats->sduration = (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+		shared.total.sduration += stats->sduration;
+	}
+}
+
+int
+finish(pthread_t *thread_list, int n)
+{
+	int i;
+	
+	finished = 1;
+	for (i = 0; i < n; i++) {
+		pthread_mutex_lock(&shared.lock);
+		DEBUG("wait: n %d finished %d", n, shared.finished);
+		if (shared.finished >= n)
+			break;	// shread lock is still locked, but we are alone, so it is ok
+		pthread_mutex_unlock(&shared.lock);
+		
+		pthread_join(thread_list[i], NULL);
+	}
+	if (write_behind)
+		flush();
+	shared.total.duration /= n;
+	shared.total.lat /= n;
+	return 0;
+}
+
+void usage(void)
+{
+	printf("Usage: %s [-hdV -W -D -b <blocksize> -t <sec> -T <threds_per_dev> -o <startoffset> -l <length> -S <seed>] <S|R|random-ratio> <R|W|read-ratio> <dev/file> ...\n",
+	     prog);
+	printf("\n\tDefaults:\n");
+	printf("\t\tBlocksize %d\n", def_blocksize);
+	printf("\t\tDuration in seconds %d\n", secs);
+	printf("\t\tNumber of threads per file %d\n", threads);
+	printf("\t\tThe default start offset is 0\n");
+	printf("\t\tThe default length for IO is the size of the file/device\n");
+	printf("\t\tThe default random seed is the current time\n");
+	printf("\t\tThe default open flags are:\n");
+	printf("\t\t\t O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC\n");
+	printf("\t\tWrite behind mode (-W): O_CREAT | O_LARGEFILE | O_NOATIME \n");
+	printf("\t\tDirect IO mode (-D): O_CREAT | O_LARGEFILE | O_NOATIME | O_DIRECT \n");
+
+	exit(1);
+}
+
+pthread_t *thread_list;
+
+void doexit(int sig)
+{
+	time_t t;
+	finish(thread_list, shared.started);
+	summary("Total", &shared.total);
+	if (write_behind) {
+		shared.total.duration += shared.total.sduration;
+		shared.total.lat = shared.total.duration / shared.total.ops;
+		summary("Synced", &shared.total);
+	}
+	time(&t);
+	printf("Test is done at %s", ctime(&t));
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct timespec duration = {0}, remaining = {0};
+	int i, t, opt, nfiles, nthreads;
+	int blocksize = def_blocksize;
+	uint64_t len = 0, startoff = 0;
+
+	prog = strchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+	else
+		prog++;
+
+	rseed = time(0);
+
+	while ((opt = getopt(argc, argv, "+hVdt:T:b:s:o:l:S:DW")) != -1) {
+		switch (opt) {
+		default:
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf("%s version %d\n", prog, BTEST_VERSION);
+			exit(0);
+		case 'd':
+			debug++;
+			break;
+		case 'b':
+			blocksize = parse_storage_size(optarg);
+			if (!blocksize)
+				PANIC("invalid blocksize parameter: -b %s",
+				      optarg);
+			printf("IO Block size is %d\n", blocksize);
+			break;
+		case 'o':
+			startoff = parse_storage_size(optarg);
+			printf("File start offset is %" PRId64 "\n", startoff);
+			break;
+		case 'l':
+			len = parse_storage_size(optarg);
+			if (!len)
+				PANIC("invalid len size parameter: -l %s",
+				      optarg);
+			printf("Limit IO space to %s (%" PRId64 " bytes) per file\n", optarg, len);
+			break;
+		case 'S':
+			rseed = atoi(optarg);
+			printf("Use random seed %d\n", rseed);
+			break;
+		case 't':
+			secs = atoi(optarg);
+			if (!secs)
+				PANIC("invalid seconds parameter: -t %s",
+				      optarg);
+			break;
+		case 'T':
+			threads = atoi(optarg);
+			if (!threads)
+				PANIC("invalid threads parameter: -T %s",
+				      optarg);
+			break;
+		case 'W':
+			printf("Allow write behind\n");
+			openflags &= ~(O_SYNC|O_DIRECT);
+			write_behind = 1;
+			break;
+		case 'D':
+			printf("Use direct IO\n");
+			openflags &= ~O_SYNC;
+			openflags |= O_DIRECT;
+			break;		
+		}
+	}
+	if (argc - optind < 3)
+		usage();
+		
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		dorandom = 100;
+		break;
+	case 'S':
+	case 's':
+		dorandom = 0;
+		break;
+	default:
+		dorandom = atoi(argv[optind]);
+		if (dorandom < 0 || dorandom > 100)
+			PANIC("bad random/sequencial parameter: should be R|S|0-100");
+	}
+	optind++;
+	
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		doread = 100;
+		break;
+	case 'W':
+	case 'w':
+		doread = 0;
+		break;
+	default:
+		doread = atoi(argv[optind]);
+		if (doread < 0 || doread > 100)
+			PANIC("bad read/write parameter: should be R|W|0-100");
+	}
+	optind++;
+	
+	DEBUG("using random seed %d", rseed);
+	
+	nfiles = argc - optind;
+	nthreads = nfiles * threads;
+	if (!(thread_list = calloc(nthreads, sizeof(*thread_list))))
+		PANIC("no mem for thread list (threads %d)", threads * nfiles);
+		
+	for (i = 0; i < nfiles; i++)
+		for (t = 0; t < threads; t++)
+			thread_list[i * threads + t] =
+				new_worker(argv[optind + i], blocksize, dorandom, doread, startoff, len);
+
+	signal(SIGTERM, doexit);
+	signal(SIGINT, doexit);
+	signal(SIGUSR1, dostats);
+	start(nthreads);
+	
+	duration.tv_sec = secs;
+
+	while (nanosleep(&duration, &remaining) < 0)
+		duration = remaining;
+			
+	doexit(0);
+	
+	return 0;
+}
Index: btest/scripts/btest-test-sum.awk
===================================================================
--- btest/scripts/btest-test-sum.awk	(revision 0)
+++ btest/scripts/btest-test-sum.awk	(revision 0)
@@ -0,0 +1,25 @@
+#!/bin/awk -f
+
+$1 == "@@" { type = $2; types[ntypes++] = type; next }
+
+$1 == "##" { test = $0; if (testcount[test] > 0) next; tests[ntests++] = test; next }
+
+$1 == "Total:" { totals[type, test] = $0; testcount[test]++; next }
+$1 == "Synced:" { synced[type, test] = $0; next }
+
+END {
+	for (t = 0; t < ntests; t++ ) {
+		test = tests[t]
+		if (testcount[test] == 0)
+			continue
+		print "Test: ", test
+		for (ty = 0; ty < ntypes; ty++) {
+			type = types[ty]
+			if ((type, test) in totals)
+				print type, totals[type, test]
+			if ((type, test) in synced)
+				print type, synced[type, test]
+		}
+	}
+}
+

Property changes on: btest/scripts/btest-test-sum.awk
___________________________________________________________________
Name: svn:executable
   + *

Index: btest/scripts/btest-test
===================================================================
--- btest/scripts/btest-test	(revision 0)
+++ btest/scripts/btest-test	(revision 0)
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+function panic() {
+	echo "Panic: $*" > /dev/stderr
+	exit 1
+}
+
+if [ -z "$1" -o -z "$2" ]; then
+	echo "Usage: `basename $0` <label> <testdir>"
+fi
+
+label=$1
+basedir=$2/btest$$-`date +%s`
+
+echo "@@ $label"
+
+if ! mkdir -p $basedir; then
+	panic "can't mkdir $basedir"
+fi
+
+rm -rf $basedir/*
+
+big=1G
+mid=512M
+small=100M
+
+echo "## Create 16 big ($big) files - 32k block"
+btest -b 32k -t 900 -l $big S W $basedir/fileX{1..16}
+
+echo "## Create 16 big ($big) files - 32k block - allow Write behind"
+btest -W -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 32k block - sync"
+btest -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 4k block - sync"
+btest -b 4k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 10% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 5% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Create 16 middle ($mid) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $mid S W $basedir/fileM{1..16}
+
+echo "## Create 16 small ($small) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $small S W $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
Index: btest/Makefile
===================================================================
--- btest/Makefile	(revision 0)
+++ btest/Makefile	(revision 0)
@@ -0,0 +1,5 @@
+btest: btest.c
+	$(CC) $(CFLAGS) -D _LARGEFILE64_SOURCE -Wall -o $@ $(LDFLAGS) -l pthread -l rt $<
+	
+clean:
+	rm -f *.o btest

[-- Attachment #2: btest-2.patch --]
[-- Type: application/octet-stream, Size: 26465 bytes --]

Index: btest/btest.c
===================================================================
--- btest/btest.c	(revision 0)
+++ btest/btest.c	(revision 0)
@@ -0,0 +1,759 @@
+/*
+ * Block test/exerciser utility
+ *
+ * Copyright (c) 2008 Shahar Frank
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/syscall.h>   /* For SYS_xxx definitions */
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+#include <malloc.h>
+#include <stdlib.h>
+
+#define BTEST_VERSION 1
+
+int secs = 60;
+int threads = 1;
+int def_blocksize = 4 * 1024;
+int openflags = O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC;
+int write_behind;
+
+char *prog;
+int debug;
+int dorandom;
+int doread;
+int rseed;
+
+typedef struct IOStats {
+	char *title;
+	uint64_t duration;
+	uint64_t sduration;		/* sync duration */
+	uint64_t lat;
+	uint64_t ops;
+	uint64_t bytes;
+	uint64_t errors;
+} IOStats;
+
+struct shared {
+	pthread_cond_t start_cond;
+	pthread_mutex_t lock;
+	int started;
+	int finished;
+	IOStats total;
+} shared = {
+	PTHREAD_COND_INITIALIZER,
+	PTHREAD_MUTEX_INITIALIZER,
+	};
+
+volatile int finished;
+
+/** printf style debugging MACRO, conmmon header includes name of function */
+#define WARN(fmt, args...)	warn(__FUNCTION__, fmt, ## args)
+
+/** printf style abort MACRO, conmmon header includes name of function */
+#define PANIC(fmt, args...)	panic(__FUNCTION__, fmt, ## args)
+
+#define DEBUG(fmt, args...)	if (debug) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG2(fmt, args...)	if (debug > 1) warn(__FUNCTION__, fmt, ## args)
+#define DEBUG3(fmt, args...)	if (debug > 2) warn(__FUNCTION__, fmt, ## args)
+
+#ifndef BLKGETSIZE
+#define BLKGETSIZE _IO(0x12,96)
+#endif
+
+#ifndef BLKGETSIZE64
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
+#endif
+
+uint64_t
+timestamp(void)
+{
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+/**
+ * Show a message and abort the probram.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void panic(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "PANIC: [%d:%" PRId64 "] %s: %s%s%s\n", getpid(), timestamp(), fn, buf, errno ? ": " : "", errno ? strerror(errno) : "");
+
+	exit(-1);
+}
+
+/**
+ * Print a message to the stderr.
+ * @param fn the name of the calling function
+ * @param msg printf style message string
+ */
+void warn(const char *fn, char *msg, ...)
+{
+	char buf[512];
+	va_list va;
+	int n;
+
+	va_start(va, msg);
+	n = vsprintf(buf, msg, va);
+	va_end(va);
+	buf[n] = 0;
+
+	fprintf(stderr, "[%s:%d:%" PRId64 "]: %s: %s\n", "btest", getpid(), timestamp(), fn, buf);
+}
+
+uint64_t parse_storage_size(char *arg)
+{
+	int l = strlen(arg);
+	uint64_t factor = 1;
+
+	arg = strdupa(arg);
+	switch (arg[l - 1]) {
+	case 'G':
+	case 'g':
+		factor = 1 << 30;
+		break;
+	case 'M':
+	case 'm':
+		factor = 1 << 20;
+		break;
+	case 'K':
+	case 'k':
+		factor = 1 << 10;
+		break;
+	case 'B':
+	case 'b':
+		factor = 512;
+		break;
+	default:
+		l++;
+	}
+	arg[l] = 0;
+	return strtoull(arg, 0, 0) * factor;
+}
+
+static int64_t
+blockdev_getsize(int fd)
+{
+	int64_t b;
+	long sz;
+	int err;
+
+	err = ioctl (fd, BLKGETSIZE, &sz);
+	if (err)
+		return err;
+
+	err = ioctl(fd, BLKGETSIZE64, &b);
+	if (err || b == 0 || b == sz)
+		b = sz << 9;
+	return b;
+} 
+
+static int64_t
+getsize(int fd)
+{
+	struct stat st;
+
+	if (fstat(fd, &st) < 0) {
+		WARN("fstat failed: %m");
+		return -1;
+	}
+	
+	if (S_ISBLK(st.st_mode))
+		return blockdev_getsize(fd);
+	
+	if (S_ISREG(st.st_mode))
+		return st.st_size;
+	
+	WARN("unsupported file type");
+	return -1;
+}
+
+typedef struct worker_arg {
+	int fd;
+	int blocksize;
+	char *file;
+	int64_t size;
+	loff_t offset;
+	loff_t startoffset;
+	loff_t endoffset;
+	int randomratio;
+	int readratio;
+	void *buf;
+	pid_t tid;
+	int (*io)(struct worker_arg *);
+	struct drand48_data rbuf;
+	IOStats stats;
+	struct worker_arg *next;
+} worker_arg;
+
+worker_arg *workers;
+
+int
+do_seq_read(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pread(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_seq_write(worker_arg *arg)
+{
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset;
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pwrite(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+uint64_t
+saferandom(struct drand48_data *buffer)
+{
+	double d;
+
+	drand48_r(buffer, &d);
+
+	return (uint64_t)d;
+}
+
+int
+do_rand_read(worker_arg *arg)
+{
+	arg->offset = saferandom(&arg->rbuf) * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pread(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_rand_write(worker_arg *arg)
+{
+	arg->offset = saferandom(&arg->rbuf) * arg->blocksize;
+	if (arg->offset + arg->blocksize > arg->endoffset)
+		arg->offset = arg->startoffset + arg->offset % (arg->endoffset - arg->startoffset - arg->blocksize);
+	DEBUG3("file %s fd %d seek to offset %" PRIu64, arg->file, arg->fd, arg->offset);
+	if (pwrite(arg->fd, arg->buf, arg->blocksize, arg->offset) != arg->blocksize)
+		return -1;
+	arg->offset += arg->blocksize;
+	return 0;
+}
+
+int
+do_io(worker_arg *arg)
+{
+	int (*io)(struct worker_arg *);
+	int doread = 0, dorandom = 0;
+
+	if (arg->readratio == 100)
+		doread = 1;
+	else if (arg->readratio == 0)
+		doread = 0;
+	else
+		doread = (saferandom(&arg->rbuf) % 100) < arg->readratio;
+		
+	if (arg->randomratio == 100)
+		dorandom = 1 << 1;
+	else if (arg->randomratio == 0)
+		dorandom = 0 << 1;
+	else
+		dorandom = ((saferandom(&arg->rbuf) % 100) < arg->randomratio) ? 1 << 1 : 0 << 1;
+
+	switch (doread | dorandom) {
+	case 0:
+		DEBUG3("%s %d: seq write", arg->file, arg->tid);
+		io = do_seq_write;
+		break;
+	case 1:
+		DEBUG3("%s %d: seq read", arg->file, arg->tid);
+		io = do_seq_read;
+		break;
+	case 2:
+		DEBUG3("%s %d: random write", arg->file, arg->tid);
+		io = do_rand_write;
+		break;
+	case 3:
+		DEBUG3("%s %d: random read", arg->file, arg->tid);
+		io = do_rand_read;
+		break;
+	}
+	
+	return io(arg);
+}
+
+void
+summary(char *title, IOStats *stats)
+{
+	printf("%s: %.3f seconds, %" PRIu64 " ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		title,
+		stats->duration * 1.0/ (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+char *
+randomratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "S";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+char *
+readratio_str(int ratio, char *buf)
+{
+	if (ratio == 0)
+		return "W";
+	if (ratio == 100)
+		return "R";
+	else
+		sprintf(buf, "%d", ratio);
+	return buf;
+}
+
+int
+gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+void
+worker_summary(worker_arg *arg)
+{
+	IOStats *stats = &arg->stats;
+	
+	printf("%s %s %s %d %"PRIu64 " %" PRIu64 ": %.3f seconds, %" PRIu64
+		" ops, avg latency %" PRIu64 " usec, bandwidth %" PRIu64
+	       " KB/s, errors %" PRIu64"\n",
+		arg->file,
+		randomratio_str(arg->randomratio, alloca(8)),
+		readratio_str(arg->readratio, alloca(8)),
+		arg->blocksize,
+		arg->startoffset, arg->endoffset,
+		stats->duration * 1.0 / (double)1000000.0,
+		stats->ops,
+		stats->lat,
+		(uint64_t)(stats->bytes * 1.0 / (stats->duration / 1000000.0)  / (1 << 10)),
+		stats->errors);
+}
+
+void
+dostats(int sig)
+{
+	worker_arg *worker;
+	
+	pthread_mutex_lock(&shared.lock);
+	for (worker = workers; worker; worker = worker->next)
+		worker_summary(worker);
+	pthread_mutex_unlock(&shared.lock);	
+}
+
+/*char *
+parse_worker_arg(worker_arg *arg, char *line)
+{
+	char *s = line;
+	
+	while (*s && !isalpha(*s))
+		s++;
+	if (!*s || (toupper(*s) != 'R' && toupeer(*s) != 'S' && !isdigit(*s))
+		return "random ratio";
+	return 0;
+}
+*/
+		
+void *
+worker(worker_arg *arg)
+{
+	struct timespec t1, t2;
+	IOStats *stats = &arg->stats;
+
+	arg->tid = gettid();
+	srand48_r(rseed, &arg->rbuf);
+
+	DEBUG("%d: starting worker thread on '%s'", arg->tid, arg->file);
+	
+	pthread_mutex_lock(&shared.lock);
+	shared.started++;
+	pthread_cond_wait(&shared.start_cond, &shared.lock);
+	pthread_mutex_unlock(&shared.lock);
+	
+	DEBUG("%d: !! worker thread on '%s'", arg->tid, arg->file);
+	while (!finished) {
+		clock_gettime(CLOCK_REALTIME, &t1);
+		if (do_io(arg) < 0) {
+			//if (debug)
+				WARN("%d: IO error on '%s': %m", arg->tid, arg->file);
+			stats->errors++;
+		} else {
+			clock_gettime(CLOCK_REALTIME, &t2);
+			stats->duration += (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+			stats->ops++;
+			stats->bytes += arg->blocksize;
+		}
+	}
+	stats->lat = stats->duration / stats->ops;
+	worker_summary(arg);
+
+	pthread_mutex_lock(&shared.lock);
+	shared.finished++;
+	shared.total.errors += stats->errors;
+	shared.total.ops += stats->ops;
+	shared.total.duration += stats->duration;
+	shared.total.bytes += stats->bytes;
+	shared.total.lat += stats->lat;
+	pthread_mutex_unlock(&shared.lock);
+	
+	return 0;
+}
+
+/**
+ * Create and initialize new worker thread.
+ * Returns the newly created thread ID.
+ */
+pthread_t
+new_worker(char *file, int blocksize, int randomratio, int readratio, uint64_t start, uint64_t len)
+{
+	worker_arg *arg;
+	pthread_t thid;
+	int fd;
+	
+	openflags |= (readratio == 100) ? O_RDONLY : O_RDWR;
+
+	DEBUG("open flags: 0x%x", openflags);
+	if ((fd = open(file, openflags, 0600)) < 0)
+		PANIC("open '%s' failed", file);
+		
+	if (!(arg = calloc(1, sizeof *arg)))
+		PANIC("out of mem - alloc arg");
+
+	pthread_mutex_lock(&shared.lock);
+	arg->next = workers ;
+	workers = arg;
+	pthread_mutex_unlock(&shared.lock);
+
+	arg->randomratio = randomratio;
+	arg->readratio = readratio;
+	arg->fd = fd;
+	arg->file = strdup(file);
+	arg->blocksize = blocksize;
+	arg->startoffset = start;
+	
+	if ((arg->size = getsize(fd)) < 0)
+		PANIC("can't get size of '%s'", file);
+	
+	if (len == 0 && arg->size > arg->startoffset + blocksize)
+		len = arg->size - arg->startoffset;
+
+	arg->endoffset = arg->startoffset + len;
+	if (arg->size == 0)
+		arg->size = arg->endoffset;
+
+	DEBUG("'%s' size is %" PRId64 " using blocksize %d", file, arg->size, arg->blocksize);
+	if (arg->endoffset - arg->startoffset < blocksize)
+		PANIC("file '%s' is too small, min size is one block (%d)", file, blocksize);
+	if (arg->endoffset > arg->size)
+		PANIC("file '%s' offset %" PRId64 " is out of file/device size range (%"PRId64")",
+			file, arg->endoffset, arg->size);
+	
+	if (!(arg->buf = valloc(blocksize)))
+		PANIC("can't alloc buf sized %d bytes", blocksize);
+	memset(arg->buf, 0, blocksize);
+
+	if (pthread_create(&thid, NULL, (void *(*)(void *))worker, arg))
+		PANIC("thread creation failed [file %s]", file);
+	
+	DEBUG("thread %d created", thid);
+	return thid;
+}
+
+int
+start(int n)
+{
+	time_t t;
+
+	pthread_mutex_lock(&shared.lock);
+	while (n > shared.started) {
+		DEBUG("wait: n %d started %d", n, shared.started);
+		pthread_mutex_unlock(&shared.lock);
+		sleep(1);
+		pthread_mutex_lock(&shared.lock);
+	}
+	pthread_mutex_unlock(&shared.lock);
+
+	time(&t);
+	printf("%d threads are ready, starting test at %s", n, ctime(&t));
+	pthread_cond_broadcast(&shared.start_cond);
+	return 0;
+}
+	
+void
+flush(worker)
+{
+	worker_arg *w;
+	struct timespec t1, t2;
+	IOStats *stats;
+	
+	for (w = workers; w; w = w->next) {
+		stats = &w->stats;
+		clock_gettime(CLOCK_REALTIME, &t1);
+		fsync(w->fd);
+		close(w->fd);
+		clock_gettime(CLOCK_REALTIME, &t2);
+		stats->sduration = (t2.tv_sec - t1.tv_sec) * 1000000llu + (t2.tv_nsec - t1.tv_nsec) / 1000.0;
+		shared.total.sduration += stats->sduration;
+	}
+}
+
+int
+finish(pthread_t *thread_list, int n)
+{
+	int i;
+	
+	finished = 1;
+	for (i = 0; i < n; i++) {
+		pthread_mutex_lock(&shared.lock);
+		DEBUG("wait: n %d finished %d", n, shared.finished);
+		if (shared.finished >= n)
+			break;	// shread lock is still locked, but we are alone, so it is ok
+		pthread_mutex_unlock(&shared.lock);
+		
+		pthread_join(thread_list[i], NULL);
+	}
+	if (write_behind)
+		flush();
+	shared.total.duration /= n;
+	shared.total.lat /= n;
+	return 0;
+}
+
+void usage(void)
+{
+	printf("Usage: %s [-hdV -W -D -b <blocksize> -t <sec> -T <threds_per_dev> -o <startoffset> -l <length> -S <seed>] <S|R|random-ratio> <R|W|read-ratio> <dev/file> ...\n",
+	     prog);
+	printf("\n\tDefaults:\n");
+	printf("\t\tBlocksize %d\n", def_blocksize);
+	printf("\t\tDuration in seconds %d\n", secs);
+	printf("\t\tNumber of threads per file %d\n", threads);
+	printf("\t\tThe default start offset is 0\n");
+	printf("\t\tThe default length for IO is the size of the file/device\n");
+	printf("\t\tThe default random seed is the current time\n");
+	printf("\t\tThe default open flags are:\n");
+	printf("\t\t\t O_CREAT | O_LARGEFILE | O_NOATIME | O_SYNC\n");
+	printf("\t\tWrite behind mode (-W): O_CREAT | O_LARGEFILE | O_NOATIME \n");
+	printf("\t\tDirect IO mode (-D): O_CREAT | O_LARGEFILE | O_NOATIME | O_DIRECT \n");
+
+	exit(1);
+}
+
+pthread_t *thread_list;
+
+void doexit(int sig)
+{
+	time_t t;
+	finish(thread_list, shared.started);
+	summary("Total", &shared.total);
+	if (write_behind) {
+		shared.total.duration += shared.total.sduration;
+		shared.total.lat = shared.total.duration / shared.total.ops;
+		summary("Synced", &shared.total);
+	}
+	time(&t);
+	printf("Test is done at %s", ctime(&t));
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct timespec duration = {0}, remaining = {0};
+	int i, t, opt, nfiles, nthreads;
+	int blocksize = def_blocksize;
+	uint64_t len = 0, startoff = 0;
+
+	prog = strchr(argv[0], '/');
+	if (!prog)
+		prog = argv[0];
+	else
+		prog++;
+
+	rseed = time(0);
+
+	while ((opt = getopt(argc, argv, "+hVdt:T:b:s:o:l:S:DW")) != -1) {
+		switch (opt) {
+		default:
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf("%s version %d\n", prog, BTEST_VERSION);
+			exit(0);
+		case 'd':
+			debug++;
+			break;
+		case 'b':
+			blocksize = parse_storage_size(optarg);
+			if (!blocksize)
+				PANIC("invalid blocksize parameter: -b %s",
+				      optarg);
+			printf("IO Block size is %d\n", blocksize);
+			break;
+		case 'o':
+			startoff = parse_storage_size(optarg);
+			printf("File start offset is %" PRId64 "\n", startoff);
+			break;
+		case 'l':
+			len = parse_storage_size(optarg);
+			if (!len)
+				PANIC("invalid len size parameter: -l %s",
+				      optarg);
+			printf("Limit IO space to %s (%" PRId64 " bytes) per file\n", optarg, len);
+			break;
+		case 'S':
+			rseed = atoi(optarg);
+			printf("Use random seed %d\n", rseed);
+			break;
+		case 't':
+			secs = atoi(optarg);
+			if (!secs)
+				PANIC("invalid seconds parameter: -t %s",
+				      optarg);
+			break;
+		case 'T':
+			threads = atoi(optarg);
+			if (!threads)
+				PANIC("invalid threads parameter: -T %s",
+				      optarg);
+			break;
+		case 'W':
+			printf("Allow write behind\n");
+			openflags &= ~(O_SYNC|O_DIRECT);
+			write_behind = 1;
+			break;
+		case 'D':
+			printf("Use direct IO\n");
+			openflags &= ~O_SYNC;
+			openflags |= O_DIRECT;
+			break;		
+		}
+	}
+	if (argc - optind < 3)
+		usage();
+		
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		dorandom = 100;
+		break;
+	case 'S':
+	case 's':
+		dorandom = 0;
+		break;
+	default:
+		dorandom = atoi(argv[optind]);
+		if (dorandom < 0 || dorandom > 100)
+			PANIC("bad random/sequencial parameter: should be R|S|0-100");
+	}
+	optind++;
+	
+	switch (argv[optind][0]) {
+	case 'R':
+	case 'r':
+		doread = 100;
+		break;
+	case 'W':
+	case 'w':
+		doread = 0;
+		break;
+	default:
+		doread = atoi(argv[optind]);
+		if (doread < 0 || doread > 100)
+			PANIC("bad read/write parameter: should be R|W|0-100");
+	}
+	optind++;
+	
+	DEBUG("using random seed %d", rseed);
+	
+	nfiles = argc - optind;
+	nthreads = nfiles * threads;
+	if (!(thread_list = calloc(nthreads, sizeof(*thread_list))))
+		PANIC("no mem for thread list (threads %d)", threads * nfiles);
+		
+	for (i = 0; i < nfiles; i++)
+		for (t = 0; t < threads; t++)
+			thread_list[i * threads + t] =
+				new_worker(argv[optind + i], blocksize, dorandom, doread, startoff, len);
+
+	signal(SIGTERM, doexit);
+	signal(SIGINT, doexit);
+	signal(SIGUSR1, dostats);
+	start(nthreads);
+	
+	duration.tv_sec = secs;
+
+	while (nanosleep(&duration, &remaining) < 0)
+		duration = remaining;
+			
+	doexit(0);
+	
+	return 0;
+}
Index: btest/scripts/btest-test-sum.awk
===================================================================
--- btest/scripts/btest-test-sum.awk	(revision 0)
+++ btest/scripts/btest-test-sum.awk	(revision 0)
@@ -0,0 +1,25 @@
+#!/bin/awk -f
+
+$1 == "@@" { type = $2; types[ntypes++] = type; next }
+
+$1 == "##" { test = $0; if (testcount[test] > 0) next; tests[ntests++] = test; next }
+
+$1 == "Total:" { totals[type, test] = $0; testcount[test]++; next }
+$1 == "Synced:" { synced[type, test] = $0; next }
+
+END {
+	for (t = 0; t < ntests; t++ ) {
+		test = tests[t]
+		if (testcount[test] == 0)
+			continue
+		print "Test: ", test
+		for (ty = 0; ty < ntypes; ty++) {
+			type = types[ty]
+			if ((type, test) in totals)
+				print type, totals[type, test]
+			if ((type, test) in synced)
+				print type, synced[type, test]
+		}
+	}
+}
+

Property changes on: btest/scripts/btest-test-sum.awk
___________________________________________________________________
Name: svn:executable
   + *

Index: btest/scripts/btest-test
===================================================================
--- btest/scripts/btest-test	(revision 0)
+++ btest/scripts/btest-test	(revision 0)
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+function panic() {
+	echo "Panic: $*" > /dev/stderr
+	exit 1
+}
+
+if [ -z "$1" -o -z "$2" ]; then
+	echo "Usage: `basename $0` <label> <testdir>"
+fi
+
+label=$1
+basedir=$2/btest$$-`date +%s`
+
+echo "@@ $label"
+
+if ! mkdir -p $basedir; then
+	panic "can't mkdir $basedir"
+fi
+
+rm -rf $basedir/*
+
+big=1G
+mid=512M
+small=100M
+
+echo "## Create 16 big ($big) files - 32k block"
+btest -b 32k -t 900 -l $big S W $basedir/fileX{1..16}
+
+echo "## Create 16 big ($big) files - 32k block - allow Write behind"
+btest -W -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 32k block - sync"
+btest -b 32k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Rewrite 16 big ($big) files - 4k block - sync"
+btest -b 4k -t 900 -l $big S W $basedir/fileY{1..16}
+
+echo "## Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 10% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 16 big ($files) - 5% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 10 75 $basedir/fileY{1..16}
+
+echo "## Create 16 middle ($mid) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $mid S W $basedir/fileM{1..16}
+
+echo "## Create 16 small ($small) sized files - 4k blocks, sync"
+btest -W -b 4k -t 500 -l $small S W $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, sync"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## random Read 16 middle ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileM{1..16}
+
+echo "## random Read 16 small ($mid) sized files - 4k blocks, direct"
+btest -b 4k -t 500 -l $mid R R $basedir/fileS{1..16}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - sync"
+btest -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 900 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - sync"
+btest -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - direct"
+btest -D -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - direct"
+btest -D -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 32k blocks - write behind"
+btest -W -b 32k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
+echo "## Stress IO on 15 mixed files, 5 big ($big), 5 middle sized ($mid) and 5 small ($small) - 25% Random, 75% Read 4k blocks - write behind"
+btest -W -b 4k -t 180 -l $big 25 75 $basedir/fileY{1..5} $basedir/fileM{1..5} $basedir/fileS{1..5}
+
+echo "## Flush cashe using - Seq read of 16 big ($big) files - 32k blocks"
+btest -b 32k -t 300 -l $big S R $basedir/fileX{1..16}
+
Index: btest/Makefile
===================================================================
--- btest/Makefile	(revision 0)
+++ btest/Makefile	(revision 0)
@@ -0,0 +1,5 @@
+btest: btest.c
+	$(CC) $(CFLAGS) -D _LARGEFILE64_SOURCE -Wall -o $@ $(LDFLAGS) -l pthread -l rt $<
+	
+clean:
+	rm -f *.o btest

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel][PATCH] block level testing/execersing utility
  2008-08-28  9:27             ` [Qemu-devel][PATCH] block level testing/execersing utility Shahar Frank
  2008-08-28  9:47               ` Samuel Thibault
@ 2008-08-28 20:11               ` Anthony Liguori
  1 sibling, 0 replies; 13+ messages in thread
From: Anthony Liguori @ 2008-08-28 20:11 UTC (permalink / raw)
  To: qemu-devel

Shahar Frank wrote:
> Hi All,
>
> The attached is a small utility to test and exercise block level device.
> I wrote it to test and benchmark Qemu image backends and to test Qemu
> image related features. I think it may be useful for others too.
>   

Sounds like it could be useful but I don't think the QEMU source tree is 
the best place for it to live.  You should probably just setup a simple 
sourceforge project.

Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel] Re: [PATCH] qemu-fuse
  2008-08-27 15:50 [Qemu-devel][PATCH] qemu-fuse Shahar Frank
  2008-08-27 16:28 ` Anthony Liguori
@ 2008-08-28 21:05 ` Szabolcs Szakacsits
  1 sibling, 0 replies; 13+ messages in thread
From: Szabolcs Szakacsits @ 2008-08-28 21:05 UTC (permalink / raw)
  To: qemu-devel

Shahar Frank <shaharf <at> qumranet.com> writes:

> +  "\tmount -o loop -t ntfs-3g -o force /tmp/qemu/img1 /tmp/ntfs\n",

Though 'mount -t ntfs-3g ...' is always the preferred way to 
ntfs-3g mount, the above -o loop case is strongly depreciated
because 'ntfs-3g -o force /tmp/qemu/img1 /tmp/ntfs' will always 
work reliable [unlike the loop case] and being faster [no unneeded 
loop layer].

     Szaka

--
NTFS-3G: http://ntfs-3g.org

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2008-08-28 21:10 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-27 15:50 [Qemu-devel][PATCH] qemu-fuse Shahar Frank
2008-08-27 16:28 ` Anthony Liguori
2008-08-27 17:02   ` Luca Bigliardi
2008-08-27 18:00     ` Anthony Liguori
2008-08-27 18:10       ` Daniel P. Berrange
2008-08-27 18:59         ` Jamie Lokier
2008-08-28  8:03           ` Shahar Frank
2008-08-28  9:27             ` [Qemu-devel][PATCH] block level testing/execersing utility Shahar Frank
2008-08-28  9:47               ` Samuel Thibault
2008-08-28 12:22                 ` Shahar Frank
2008-08-28 20:11               ` Anthony Liguori
2008-08-27 18:00   ` [Qemu-devel][PATCH] qemu-fuse Jamie Lokier
2008-08-28 21:05 ` [Qemu-devel] Re: [PATCH] qemu-fuse Szabolcs Szakacsits

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).