All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/4] Add libvdisk, and vdisk_tool
@ 2007-06-19 13:16 Ben Guthro
  2007-06-19 16:10 ` Mark McLoughlin
  0 siblings, 1 reply; 2+ messages in thread
From: Ben Guthro @ 2007-06-19 13:16 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 236 bytes --]

[PATCH 4/4] Add libvdisk, and vdisk_tool
vdisk-support.patch
provides libvdisk, and vdisk_tool, as described in [PATCH 0/4]
Signed-off-by: Boris Ostrovsky <bostrovsky@virtualiron.com>
Signed-off-by: Ben Guthro <bguthro@virtualiron.com>

[-- Attachment #2: vdisk-support.patch --]
[-- Type: text/x-patch, Size: 107435 bytes --]

diff -r 7cad1f06a7f6 tools/Makefile
--- a/tools/Makefile	Tue Jun 19 08:13:59 2007 -0400
+++ b/tools/Makefile	Tue Jun 19 08:13:59 2007 -0400
@@ -17,6 +17,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-$(VTPM_TOOLS) += vtpm
 SUBDIRS-y += xenstat
 SUBDIRS-y += libaio
+SUBDIRS-y += vdisk
 SUBDIRS-y += blktap
 SUBDIRS-y += libfsimage
 SUBDIRS-$(XENFB_TOOLS) += xenfb
diff -r 7cad1f06a7f6 tools/vdisk/Makefile
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/Makefile	Tue Jun 19 08:13:59 2007 -0400
@@ -0,0 +1,62 @@
+#
+# Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+#
+# Portions have been modified by Virtual Iron Software, Inc.
+# (c) 2007. This file and the modifications can be redistributed and/or
+# modified under the terms and conditions of the GNU General Public
+# License, version 2.1 and not any later version of the GPL, as published
+# by the Free Software Foundation.
+#
+XEN_ROOT = ../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+LIBVHD_SRC	= vhd.c vhd_utils.c
+LIBVDISK_SRC	= vdisk_utils.c vdisk_common.c
+TOOL_SRC 	= vdisk_tool.c
+
+LIBAIO_DIR   = ../libaio/src
+
+CFLAGS		= -O2 -fno-strict-aliasing -fPIC -Wall -Werror -rdynamic \
+		-D_FILE_OFFSET_BITS=64 \
+		-D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -I./ \
+		-I$(LIBAIO_DIR)
+
+LIB_LDFLAGS	= -dy -shared -L$(LIBAIO_DIR) -laio
+
+INSTALL		= /usr/bin/install
+
+all: default
+default: vdisk_tool libvdisk_vhd.so libvdisk.so
+
+
+%.o: %.c
+	$(CC) $(CFLAGS) -rdynamic  -c $< -o $@
+
+vdisk_tool: $(TOOL_SRC:%.c=%.o) libvdisk_vhd.so libvdisk.so
+	gcc $(LOCAL_CFLAGS) -o vdisk_tool -g $(TOOL_SRC) -L./ \
+		-I$(LIBAIO_DIR) \
+		-L$(LIBAIO_DIR) -L. -lvdisk -ldl -laio
+
+libvdisk_vhd.so: $(LIBVHD_SRC:%.c=%.o) libvdisk.so
+	$(LD) $(LIB_LDFLAGS) -o $@ $^
+
+libvdisk.so: $(LIBVDISK_SRC:%.c=%.o)
+	$(LD) $(LIB_LDFLAGS) -o $@ $^
+
+install: all
+	$(INSTALL) -d $(DESTDIR)/usr/bin
+	$(INSTALL) -d $(DESTDIR)/usr/lib64
+	$(INSTALL) vdisk_tool $(DESTDIR)/usr/bin
+	$(INSTALL) libvdisk_vhd.so libvdisk.so $(DESTDIR)/usr/lib64
+	$(INSTALL) -d $(DESTDIR)/usr/include
+	for header in *.h; do $(INSTALL) $$header $(DESTDIR)/usr/include; done
+
+clean:
+	/bin/rm -f *.o libvdisk_vhd.so vdisk_tool libvdisk.so
+
+depend .depend dep:
+	$(CC) $(CFLAGS) -M $(LIBVDISK_SRC) $(LIBVHD_SRC) $(TOOL_SRC)> .depend
+
+ifeq (.depend,$(wildcard .depend))
+include .depend
+endif
diff -r 7cad1f06a7f6 tools/vdisk/list.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/list.h	Tue Jun 19 08:13:59 2007 -0400
@@ -0,0 +1,168 @@
+// Copy of /usr/include/linux/list.h that does not
+// depend on __KERNEL__ and _LVM_H_INCLUDE
+
+#ifndef _LIST_H
+#define _LIST_H
+
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries. 
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_add(struct list_head * new,
+	struct list_head * prev,
+	struct list_head * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static __inline__ void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static __inline__ void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static __inline__ void __list_del(struct list_head * prev,
+				  struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static __inline__ void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = entry->prev = 0;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static __inline__ void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry); 
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static __inline__ int list_empty(struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static __inline__ void list_splice(struct list_head *list, struct list_head *head)
+{
+	struct list_head *first = list->next;
+
+	if (first != list) {
+		struct list_head *last = list->prev;
+		struct list_head *at = head->next;
+
+		first->prev = head;
+		head->next = first;
+
+		last->next = at;
+		at->prev = last;
+	}
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); \
+        	pos = pos->next)
+        	
+/**
+ * list_for_each_safe	-	iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop counter.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+
+
+#endif
diff -r 7cad1f06a7f6 tools/vdisk/vdisk.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk.h	Tue Jun 19 08:14:43 2007 -0400
@@ -0,0 +1,214 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_H
+#define __VDISK_H
+
+#include <sys/types.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/limits.h>
+#include <syslog.h>
+#include <libaio.h>
+#include "list.h"
+
+// vdisk_tool's operations
+#define VDISK_OP_CREATE    (1<<0)
+#define VDISK_OP_HEADERS   (1<<1)
+#define VDISK_OP_DUMP      (1<<2)
+#define VDISK_OP_MODIFY    (1<<3)
+
+// Return codes
+#define VID_BLOCK_MAPPED    (0)
+#define VID_BLOCK_NOTMAPPED (-1)
+#define VID_BLOCK_TOOBIG    (-2)
+#define VID_BLOCK_MAPERR    (-3)
+
+// IO operation codes
+#define VDISK_READ  (0)
+#define VDISK_WRITE (1)
+
+// Async IO macros
+#define VDISK_HASH_SZ        (2048)
+#define VDISK_HASH_IDX(x)    ((x) & (VDISK_HASH_SZ-1))
+#define VDISK_INVALID_HASH   (-1)
+#define REQUEST_ASYNC_FD     (1) // Should really be defined in kernel
+
+#define SECTOR_SIZE          (512)
+
+// vdisk device flags
+#define VDISK_SYNCIO_BUF     (1<<0)
+#define VDISK_RO             (1<<1)
+
+// vdisk file flags
+#define VDF_LEAF    (1<<0) // last COW child (writeable)
+
+// Statistics gathering
+#define	VDISK_STATS	     (0)
+#define VDISK_SYNCIO_STATS   (0)
+
+#if VDISK_STATS
+#define	DO_STATS(x)	x
+#else
+#define	DO_STATS(x)
+#endif
+
+
+
+
+// Datatype for addressing host memory 
+#if defined __x86_64__
+typedef uint64_t addr_t;
+#else
+typedef uint32_t addr_t;
+#endif
+
+typedef	int file_t;
+
+// Forward declaration
+struct vdisk_dev;
+
+// Stores info about a pending async IO
+typedef struct pending_aio {
+	uint32_t block;
+	uint32_t num_blocks;
+	void *arg;
+	void *aiocb;
+	off_t off;
+	file_t fd;
+	int op;
+	int res;
+} pending_aio_t;
+
+// Hash that stores async IO data
+typedef struct vdisk_hash {
+	uint64_t key;
+	struct iocb io;
+	pending_aio_t pio;
+} vdisk_hash_t;
+
+// run data to allow coalescing of writes when doing posix_fadvise() sync/flush
+typedef struct vdisk_syncio {
+	int 	is_set;
+	off_t	io_start;
+	off_t	io_len;
+#if VDISK_SYNCIO_STATS
+	unsigned long	total_writes;
+	unsigned long	contig_writes;
+	unsigned long	flush_size_sub1MB;
+	unsigned long	flush_size_sub2MB;
+	unsigned long	flush_size_sub4MB;
+	unsigned long	flush_size_sub8MB;
+	unsigned long	flush_size_ovr8MB;
+	unsigned long	flush_size_force;
+	time_t		last_dbg_print;
+#endif
+} vdisk_syncio_t;
+
+// Per-file structure
+typedef struct vd_file {
+	struct list_head vdf_list;
+	char name[PATH_MAX];
+	file_t fd;
+	int flags;
+	int batch_sz;           // number of blocks that are mapped sequentially
+	void *vdf;              // format-specific data
+	vdisk_syncio_t *syncio;	// allows sync io to buffer in pagecache for 
+	                        //  better io performance
+} vd_file_t;
+
+// Data describing format's properties (ops etc.)
+typedef struct vdf_data {
+	char ftype[8];                    // File name extension
+
+	int (*open)(struct vdisk_dev *vdisk, char *filename);
+	void (*close)(struct vdisk_dev *vdisk);
+	int (*map_block)(vd_file_t *vf, uint32_t *blockno, int num_blocks, 
+			 int op, void **arg);
+	int (*xfer_commit)(void *arg, int err);
+	int (*print_header)(vd_file_t *vf);
+	int (*parse_args)(int argc, int operations, char *argv[], void **optp);
+	int (*create_vdisk)(char *filename, void *optp);
+	int (*modify_vdisk)(struct vdisk_dev *vdisk, void *optp);
+	struct list_head vdfd_list; // connects to global format list
+} vdf_data_t;
+
+// Top-level datastructure
+typedef struct vdisk_dev {
+
+	struct vdisk_geom {
+		int cyls;
+		int heads;
+		int secs;
+	} geom;
+
+        ssize_t sz;      // Device size (bytes)
+	
+	int flags;
+
+	// head of vdisk files (vd_file_t) list
+	struct list_head vdf_head;
+
+	vdf_data_t *vdfd;
+
+	// AIO data
+	vdisk_hash_t hash[VDISK_HASH_SZ];
+	struct iocb *aio_submit[VDISK_HASH_SZ];
+	struct io_event aio_events[VDISK_HASH_SZ];
+	io_context_t ioctx;
+	int use_aio;
+	int aio_fd;
+	int aio_cnt;
+
+	// Stats
+	uint64_t busyio;
+	uint64_t syncio;
+	uint64_t asyncio;
+	uint64_t tot_io;
+} vdisk_dev_t;
+
+struct program_props {
+	void *alloc_func;
+	void *free_func;
+	int out_target;
+};
+
+
+#define VDISK_OUT_STDERR (0)
+#define VDISK_OUT_SYSLOG (1)
+extern int vdisk_dbg_level;
+extern int vdisk_out_target;
+#define VIDDBG(n, fmt, args...) vdisk_log_error(n, __FILE__, __LINE__, fmt, ##args)
+
+#define ASSERT(expr)                                                    \
+	((expr) ? 0 :                                                   \
+	 ({								\
+		 VIDDBG(0, "Assertion failed: %s\n", __STRING(expr));	\
+		 abort();						\
+	 }));
+
+extern int vdisk_pagesz; //4K
+
+extern void vdisk_log_error(int level, char *file, int line, char *fmt, ...);
+extern int vdf_read_state(vdisk_dev_t *vdisk, char *filename);
+extern int vdf_print_headers(vdisk_dev_t *vdisk, char *filename);
+extern int vdisk_register (vdf_data_t *vdfd);
+extern void vdisk_unregister (vdf_data_t *vdfd);
+extern int vdf_init(vdisk_dev_t *vdisk, char *fname);
+extern int vdisk_common_init(vdisk_dev_t *vdisk);
+extern int vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype);
+extern int vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio);
+extern int vdisk_rw(void *hdl, int64_t sector_num, 
+		    uint8_t *buf, int nb_sectors, int write, void *aiocb);
+extern void vdisk_alloc_init(void *alloc_func, void *free_func);
+extern int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+		      struct program_props *props, uint8_t flags);
+extern void vdisk_fini(vdisk_dev_t *vdisk);
+
+#endif /* __VDISK_H */
diff -r 7cad1f06a7f6 tools/vdisk/vdisk_common.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_common.c	Tue Jun 19 08:15:02 2007 -0400
@@ -0,0 +1,637 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dlfcn.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+
+static int vdisk_initialized = 0;
+int vdisk_pagesz = 0;
+
+void vdisk_fini(vdisk_dev_t *vdisk)
+{
+	struct list_head *ptr;
+	vd_file_t *vdf;
+
+	// We may have already closed the device
+	if ((vdisk == NULL) || (vdisk->vdfd == NULL) ||
+	    (vdisk->vdfd->close == NULL))
+		return;
+
+	list_for_each(ptr, &vdisk->vdf_head) {
+		vdf = list_entry(ptr, vd_file_t, vdf_list);
+		free(vdf->syncio);
+		vdf->syncio = NULL;
+	}
+
+	vdisk->vdfd->close(vdisk);
+}
+
+int vdisk_init(vdisk_dev_t *vdisk, char *filename,
+	       struct program_props *props, uint8_t flags)
+{
+	int err;
+	int i;
+	char *fname;
+
+
+	vdisk_common_init(NULL/*XXX: ?? */);
+
+	if (props != NULL) {
+		// Set where output is directed
+		vdisk_out_target = props->out_target;
+		vdisk_alloc_init(props->alloc_func, props->free_func);
+	} else {
+		vdisk_out_target = VDISK_OUT_STDERR;
+		vdisk_alloc_init(NULL, NULL);
+	}
+
+	fname = strchr(filename, ':');
+	if (fname == NULL)
+		fname = filename;
+	else
+		fname++;
+
+	vdisk->flags = flags;
+
+	err = vdf_init(vdisk, fname);
+	if (err != 0) {
+		VIDDBG(0, "Can't initialize format's data for %s\n",
+			filename);
+		return (err);
+	}
+
+	if (vdisk->use_aio) {
+
+		// Initialize async IO data
+		for (i=0;i<VDISK_HASH_SZ;i++)
+			vdisk->hash[i].key = VDISK_INVALID_HASH;
+		
+		vdisk->aio_cnt = 0;
+		
+		vdisk->ioctx = (io_context_t) REQUEST_ASYNC_FD;   
+		vdisk->aio_fd = io_setup(VDISK_HASH_SZ, &vdisk->ioctx);
+		if (vdisk->aio_fd < 0) {
+			VIDDBG(0, "io_setup can't get async poll ID (%s). "
+			       " Async IO will not be available\n",
+				strerror(errno));
+			vdisk->use_aio = 0;		
+		}
+	} else
+		vdisk->aio_fd = -1;
+
+	return (0);
+}
+
+int
+vdf_init(vdisk_dev_t *vdisk, char *fname) 
+{
+	char *ext;
+	int err;
+	struct list_head *ptr;
+	vd_file_t *vdf;
+
+	ext = strrchr(fname, '.');
+	if (ext == NULL) {
+		VIDDBG(0, "Can't determine file type for %s\n", fname);
+		return (EINVAL);
+	}
+
+	ext++; // Skip '.'
+
+	err = vdf_find_vdfd(vdisk, ext);
+	if (err) {
+		VIDDBG(0, "Can't find format's data\n");
+		return (err);
+	}
+
+	err = vdf_read_state(vdisk, fname);
+	if (err) {
+		VIDDBG(0, "failed to read headers\n");
+		return (-1);
+	}
+
+	if (vdisk->flags & VDISK_SYNCIO_BUF) {
+		list_for_each(ptr, &vdisk->vdf_head) {
+			vdf = list_entry(ptr, vd_file_t, vdf_list);
+			vdf->syncio = calloc( 1, sizeof(vdisk_syncio_t));
+			if (!vdf->syncio) {
+				VIDDBG(0, "vdisk_alloc_syncio_run_data() "
+				       "failed '%s', thus no speed up\n",
+				       strerror(errno));
+			}
+		}
+	}
+
+	return (0);
+}
+
+int
+vdisk_map_block(struct vdisk_dev *dev, 
+		uint32_t *blockno,      /* IN/OUT */
+		int op,
+		vd_file_t **vf,
+		void **arg)
+{
+	struct list_head *ptr;
+	vd_file_t *vdf;
+	int res = VID_BLOCK_NOTMAPPED;
+
+	list_for_each(ptr, &dev->vdf_head) {
+
+		*vf = vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+		res = dev->vdfd->map_block(vdf, blockno, 1, op, arg);
+		if (res == VID_BLOCK_MAPPED)
+			return (res);		
+	}
+
+	if (op == VDISK_WRITE)
+		VIDDBG(0, "Couldn't map block %d\n", *blockno);
+
+	return (res);
+}
+
+int
+vdf_read_state(vdisk_dev_t *vdisk, char *filename)
+{
+	int err;
+	int i;
+
+	INIT_LIST_HEAD(&vdisk->vdf_head);
+
+	if (vdisk->use_aio) {
+		for (i=0;i<VDISK_HASH_SZ;i++)
+			vdisk->hash[i].key = VDISK_INVALID_HASH;
+	
+		memset(&vdisk->ioctx, 0, sizeof(io_context_t));
+		err = io_queue_init(100, &vdisk->ioctx);
+		if (err) {
+			VIDDBG(0, "io_queue_init() failed: %s. "
+			       " Async IO will not be available\n", 
+			       strerror(-1*err));
+			vdisk->use_aio = 0;
+		}
+	}
+
+	err = vdisk->vdfd->open(vdisk, filename);
+	if (err) {
+		VIDDBG(0, "Problems opening vdisk %s (error %d)\n", 
+		       filename, err);
+		return (err);
+	}
+	return (0);
+}
+
+int
+vdf_print_headers(vdisk_dev_t *vdisk, char *filename)
+{
+	int err;
+	vd_file_t *vf;
+	//struct list_head *ptr;
+
+	err = vdf_read_state(vdisk, filename);
+	if (err) {
+		VIDDBG(0, "Failed to read state for %s\n", filename);
+		return (err);
+	}
+
+#if 0	
+	list_for_each(ptr, &vdisk->vdf_head) {
+
+		vf = list_entry(ptr, vd_file_t, vdf_list);
+		(void)vdisk->vdfd->print_header(vf->vdf);
+	}
+#endif
+	vf = list_entry(vdisk->vdf_head.next, vd_file_t, vdf_list);
+	(void)vdisk->vdfd->print_header(vf);
+
+	return (0);
+}
+
+int
+vdisk_xfer_cb(vdisk_dev_t *vdisk, struct pending_aio *pio)
+{
+	uint32_t blk;
+	int err = 0;
+
+	ASSERT(pio != NULL);
+
+	err = vdisk->vdfd->xfer_commit(pio->arg, pio->res);
+	if (err)
+		VIDDBG(0, "Failed to commit transfer (error %d)\n", err);
+	
+	if (pio->op == VDISK_WRITE) {
+		err = fsync(pio->fd);
+		if (err)
+			VIDDBG(0, "fsync: %s\n", strerror(errno));
+	}
+	
+	/*
+	 * posix_fadvise() (or, rather, kernel's sys_fadvise64_64())
+	 * invalidates whole pages only.
+	 */
+	err = posix_fadvise(pio->fd, (pio->off & (~((off_t)vdisk_pagesz-1))),
+			    (ssize_t)(pio->num_blocks<<9) + (off_t)vdisk_pagesz,
+			    POSIX_FADV_DONTNEED);
+	if (err)
+		VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+	
+
+	for (blk=pio->block; blk < (pio->block + pio->num_blocks); blk++)
+		vdisk->hash[VDISK_HASH_IDX(blk)].key = VDISK_INVALID_HASH;
+
+	return (err);
+}
+
+
+int vdisk_rw(void *hdl, int64_t block, 
+	     uint8_t *buf, int nb_blocks,
+	     int op, void *aiocb)
+{
+        off_t offset;
+	unsigned long bytes;
+	uint32_t real_block, blk;
+	vd_file_t *vdf = NULL;
+	void *arg = NULL;
+	struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+	int i;
+	struct list_head *ptr;
+	int res = 0;
+	char *b = (char *)buf;
+	char *pool = NULL;
+	int batch;
+	int use_aio = vdisk->use_aio;
+	int busy = 0;
+	int hash_index;
+	int zero_blocks = 0;
+
+	VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d\n", 
+	       block, nb_blocks);
+
+	if (((block + (nb_blocks-1)) << 9) >= vdisk->sz) {
+		return (-ENOSPC);
+	}
+
+	vdisk->tot_io++;
+
+	if (use_aio) {
+		// Check whether the hash has available slots and reserve them
+		// We reserve them as we go because we want to make sure that
+		// the request fits in the hash.
+		for (i=0, blk=block; i<nb_blocks; i++, blk++) {
+			hash_index = VDISK_HASH_IDX(blk);
+			VIDDBG(50, "block=0x%" PRIx64 ", nb_blocks=%d i=%d "
+			       "blk=0x%x, vdisk->hash.key[%d]=0x%" PRIx64 "\n", 
+			       block, nb_blocks, i,
+			       blk, hash_index, 
+			       vdisk->hash[hash_index].key);
+			if (vdisk->hash[hash_index].key != VDISK_INVALID_HASH) {
+				vdisk->busyio++;
+				if (vdisk->hash[hash_index].key != blk)
+					busy = 1;
+				use_aio = 0;
+				break;
+			}
+			vdisk->hash[hash_index].key = blk;
+			VIDDBG(50, "hash_index=%d, blk=%d\n", 
+			       hash_index, blk);
+		}
+
+		// We need to free hash entries that we've just reserved.
+		if (!use_aio) {
+			uint32_t b;
+			
+			VIDDBG(50, "Freeing hash for block %" PRId64 "\n",
+			       block);
+			if (blk != 0) {
+				for (b=blk-1; b>=block; b--) {
+					hash_index = VDISK_HASH_IDX(b);
+					vdisk->hash[hash_index].key = 
+						VDISK_INVALID_HASH;
+				}
+			}
+			VIDDBG(50, "Done\n");
+			if (busy) {
+				VIDDBG(50, "Busy\n");
+				return (-EBUSY);
+			}
+			vdisk->syncio++;
+		}
+	}
+
+	// We can only transfer to/from an aligned buffer
+	if ((addr_t)buf & 511) {
+		b = pool = vdisk_malloc((nb_blocks+1) * 512);
+		if (pool == NULL) {
+			VIDDBG(0, "Can't create buffer\n");
+			return (-ENOMEM);
+		}
+		while ((addr_t)b & 511) b++;
+		VIDDBG(10, "Aligned buffer %p (pool %p, b %p)\n", buf, pool, b);
+
+		use_aio = 0;
+	}
+
+	i = 0; // block in the buf[]
+	while (nb_blocks>0) {
+
+		// Find largest contiguous set of blocks that we
+		// we can access in a single IO.
+
+		batch = nb_blocks;
+	again:
+		arg = NULL;
+		list_for_each(ptr, &vdisk->vdf_head) {
+		       			
+			vdf = list_entry(ptr, vd_file_t, vdf_list);
+
+			real_block = (uint32_t)block;
+
+			// Make batch fit into a single vdf->batch_sz
+			if ( ((block + batch - 1) & ~(vdf->batch_sz-1))
+			     != (block & ~(vdf->batch_sz-1)))
+				batch = ( (block + vdf->batch_sz) & 
+					  ~(vdf->batch_sz-1) )
+					- block;
+
+			// Map the requested block set to address in the file			
+			res = vdisk->vdfd->map_block(vdf, &real_block, 
+						     batch, op, &arg);
+
+			if (res == VID_BLOCK_TOOBIG) {
+				// Some blocks are mapped and some are not.
+				// Need to try a smaller batch
+
+				batch >>= 1;
+				if (!batch) {
+					int j;
+					// Free hash entries
+					for (j=0,blk=block; j<nb_blocks; j++,blk++) {
+						hash_index = VDISK_HASH_IDX(blk);
+						ASSERT(vdisk->hash[hash_index].key
+						       == blk);
+						vdisk->hash[hash_index].key = 
+							VDISK_INVALID_HASH;
+					}
+
+					VIDDBG(0, "Inconsistent mapping error\n");
+					return EINVAL;
+				}
+				goto again;
+			}
+
+			if ((res != VID_BLOCK_NOTMAPPED) ||
+			    ((vdf->flags & VDF_LEAF) && (op == VDISK_WRITE)))
+				break;
+		}
+
+		if (res != VID_BLOCK_MAPPED) {
+			
+			// Unallocated blocks return zeroes for reads
+			if ((op == VDISK_READ) && (res == VID_BLOCK_NOTMAPPED)) {
+				
+				if (use_aio) {
+					int j;
+					// Free up hash entries
+					for (j=0,blk=block; j<batch; j++,blk++) {
+						hash_index = VDISK_HASH_IDX(blk);
+						ASSERT(vdisk->hash[hash_index].key
+						       == blk);
+						vdisk->hash[hash_index].key = 
+							VDISK_INVALID_HASH;
+					}
+				}
+
+				memset(&buf[i*512], 0, batch*512);
+				i += batch;
+				b += batch * 512;
+				block += batch;
+				nb_blocks -= batch;
+				zero_blocks += batch;
+				VIDDBG(10, "Skipping %d blocks\n", batch);
+				continue;
+			}
+
+			VIDDBG(0, "Couldn't map block %d (%d)\n", 
+			       block, res);
+			if (pool)
+				vdisk_free(pool);
+			return (-1*res);
+		}
+
+		VIDDBG(50, "mapped sector %" PRId64 " to block %d for read\n", 
+		       block, real_block);
+
+		// Offset in the file
+		offset = (uint64_t)real_block << 9;
+
+		if (use_aio)
+			vdisk->asyncio++;
+
+		// Perform IO
+		if (op == VDISK_WRITE) {
+			if (pool)
+				memcpy(b, &buf[i*512], batch * 512); 
+			if (!use_aio)
+				bytes = vdisk_syncio(vdf->fd, b, batch * 512, 
+						     offset, VDISK_WRITE, vdf->syncio);
+			else
+				bytes = vdisk_asyncio(vdisk, block, vdf->fd, 
+						      b, batch * 512, offset, 
+						      arg, aiocb, VDISK_WRITE);
+		} else /* VDISK_READ */ {
+			if (!use_aio) {
+				bytes = vdisk_syncio(vdf->fd, b, batch * 512, 
+						     offset, VDISK_READ, NULL);
+				if (pool)
+					memcpy(&buf[i*512], b, batch * 512);
+			} else {
+				bytes = vdisk_asyncio(vdisk, block, vdf->fd,
+						      b, batch * 512, offset, 
+						      arg, aiocb, VDISK_READ);
+			}
+		}
+
+		if (bytes != batch * 512) {
+			VIDDBG(0, "%s %ld bytes (block %d) instead of "
+			       "%d (%s)\n", (op==VDISK_WRITE)?"Wrote":"Read", 
+			       bytes, real_block, batch * 512, vdf->name);
+			if ((signed long)bytes == -1)
+				res = errno;
+		}
+	
+		if (!use_aio)
+			if (vdisk->vdfd->xfer_commit(arg, res))
+				VIDDBG(0, "Couldn't commit transfer\n");
+		
+		i += batch;
+		b += batch * 512;
+		block += batch;
+		nb_blocks -= batch;
+	}
+
+	if (pool)
+		vdisk_free(pool);
+
+	/*
+	 * Returning number of processed bytes to caller who requested AIO 
+	 * (vdisk->use_aio && aiocb) will tell him that there is no 
+	 * need to wait for AIO completion
+	 * There are two cases when this happens:
+	 *  - We couldn't perform any AIOs (use_aio == 0)
+	 *  - Some requests have been reads to unallocated blocks (and 
+	 *    thus are read as zeroes). Note that if *some* blocks have been
+	 *    sent as AIOs, the caller will need to wait for completions 
+	 *    (and we return zero).
+	 */
+	if (!use_aio)
+		return (i * 512); // 'i' is number of accessed sectors;
+	else if (vdisk->use_aio && aiocb && (zero_blocks != 0))
+		return (zero_blocks * 512); 
+	else
+		return (0);
+}
+
+LIST_HEAD(vdfd_head);
+
+// Register new file format
+int
+vdisk_register(vdf_data_t *new_vdfd)
+{
+	struct list_head *ptr;
+	vdf_data_t *vdfd;
+
+	list_for_each(ptr, &vdfd_head) {
+		vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+		if (vdfd == new_vdfd) {
+			return (-1);
+		}
+	}
+
+	list_add(&new_vdfd->vdfd_list, &vdfd_head);
+	VIDDBG(10, "Registered \"%s\" format\n", new_vdfd->ftype);
+	return (0);
+}
+
+// Unregister file format
+void
+vdisk_unregister(vdf_data_t *vdfd)
+{
+	struct list_head *ptr;
+	
+	list_for_each(ptr, &vdfd_head) {
+		if (vdfd == list_entry(ptr, vdf_data_t, vdfd_list)) {
+			list_del(&vdfd->vdfd_list);
+			break;
+		}
+	}
+}
+
+// Find format-specific library, load it and call its init routine
+int
+vdisk_init_format(char *name)
+{
+	void *handle;
+	char libname[64];
+	char initfunc[32];
+	void (*init)();
+	char *err;
+
+	// Construct library name
+	(void)strcpy(libname, "libvdisk_");
+	(void)strcat(libname, name);
+	(void)strcat(libname, ".so");
+
+	handle = dlopen (libname, RTLD_LAZY);
+	if (!handle) {
+		VIDDBG(0, "%s\n", dlerror());
+		return (-1);
+	}
+
+	dlerror();    // Clear any existing error
+
+	// Construct init function name
+	(void)strcpy(initfunc, name);
+	(void)strcat(initfunc, "_init");
+
+	*(void **) (&init) = dlsym(handle, initfunc);
+	if ((err = dlerror()) != NULL)  {
+                      VIDDBG(0, "%s\n", err);
+                      return (-1);
+	}
+
+	// Call format-specific init routine
+	(*init)();
+
+	return (0);
+}
+
+int
+vdf_find_vdfd(vdisk_dev_t *vdisk, char *ftype)
+{
+	struct list_head *ptr;
+	vdf_data_t *vdfd;
+	int err;
+	int attempt = 0;
+
+	while (attempt < 2) {
+		list_for_each(ptr, &vdfd_head) {
+			
+			vdfd = list_entry(ptr, vdf_data_t, vdfd_list);
+			
+			if (!strcmp(vdfd->ftype, ftype)) {
+				
+				vdisk->vdfd = vdfd;
+				return (0);
+			}
+		}
+
+		if (attempt) {
+			VIDDBG(0, "Unknown format %s\n", ftype);
+			return (EINVAL);
+		}
+
+		// Didn't find vdfd for this extension, maybe we need
+		// to initialize it and try again.
+		err = vdisk_init_format(ftype);
+		if (err != 0) {
+			VIDDBG(0, "Can't initialize format %s\n", ftype);
+			return (err);
+		}
+		attempt++;
+	}
+
+	/*NOTREACHED*/
+	return (EINVAL);
+}
+
+int
+vdisk_common_init(vdisk_dev_t *vdisk)
+{
+	if (vdisk_initialized)
+		return (0);
+
+	INIT_LIST_HEAD(&vdfd_head);
+
+	vdisk_pagesz = getpagesize();
+
+	vdisk_initialized = 1;
+
+	return (0);
+}
diff -r 7cad1f06a7f6 tools/vdisk/vdisk_tool.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_tool.c	Tue Jun 19 08:15:25 2007 -0400
@@ -0,0 +1,338 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE  // for strndup()
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+#include <getopt.h>
+
+#include "vdisk.h"
+
+extern int vdisk_init_format(char *);
+static char *supported_formats[] = {"vhd", NULL};
+
+int
+init_tool()
+{
+	int err;
+	int i;
+	
+	err = vdisk_common_init(NULL/*XXX: ?? */);
+	if (err) {
+		VIDDBG(0, "Failed to initialize vdisk\n");
+		return (err);
+	}
+	
+	for (i=0; ;i++) {
+		if (supported_formats[i] == NULL)
+			break;
+		
+		err = vdisk_init_format(supported_formats[i]);
+		if (err) {
+			VIDDBG(0, "Failed to initialize %s format\n",
+				supported_formats[i]);
+			return (err);
+		}
+	}
+	return (0);
+}
+
+static void
+print_usage(char *prog)
+{
+	int i;
+
+	fprintf(stderr, "Usage: %s OPTIONS -# <format-specific options> "
+		"<filename>\n", prog);
+	fprintf(stderr, 
+		" OPTIONS:\n"
+		"          [-f <format>] [-C] [-H] [-M] "
+		"[-D <block> [-b <num_blocks>] [-o outfile]]\n"
+                "	-C		Create a vdisk\n"
+                "	-H		Read vdisk headers from file\n" 
+                "	-M		Modify a vdisk\n"
+                "	-D		Dump a vhd\n"
+                "	  block 	  first block to read (required)\n"
+                "	  num_blocks 	  number of blocks to read. If not\n" 
+                "			    specified, whole file will be read\n"
+                "	  outfile 	  output file. If not specified,\n" 
+                "			    stdout is used\n"                
+		" Supported formats: ");
+	for (i=0; ;i++) {
+		if (supported_formats[i] == NULL) {
+			fprintf(stderr, "\n");
+			break;
+		}
+		fprintf(stderr, "%s ", supported_formats[i]);
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	char filename[PATH_MAX];
+	char *outfile = NULL;
+	char format[16] = "vhd";
+	int operations = 0; 
+	char c = 0;
+	extern char *optarg;
+	extern int optind, opterr, optopt;
+	vdisk_dev_t vdisk;
+	int err;
+	void *optp = NULL; // Format-specific options
+	char *file_fmt;
+	int i;
+	int first_block = 0, num_blocks = -1;
+	struct program_props props;
+	uint8_t flags;
+
+	//init_tool();
+
+	/* 
+	 * Read the filename argument first -- we may need 
+	 * it to determine format 
+	 */
+	strcpy(filename, argv[argc-1]);
+	file_fmt = strrchr(filename, '.');
+
+	// See whether what we think is file's format is supported
+	if (file_fmt) {
+		file_fmt++; // Skip '.'
+		for (i=0; ;i++) {
+			if (supported_formats[i] == NULL) {
+				// Not a supported format, ignore suffix
+				file_fmt = NULL;
+				break;
+			}
+			
+			if (!strcmp(file_fmt, supported_formats[i]))
+				break;	// Found it
+		}
+	}
+
+	vdisk.vdfd = NULL;
+
+	while (c != '#') {
+
+		c = getopt(argc, argv, "f:CHMD:b:o:#");
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'f':
+			strcpy(format, optarg);
+
+			/*
+			 * If we either coudn't determine format from filename
+			 * argument or we thought we could but '-f' specifies
+			 * different format, we append appropriate suffix
+			 */
+			if (!file_fmt || strcmp(format, file_fmt)) {
+				(void)strcat(filename, ".");
+				(void)strcat(filename, format);
+				file_fmt = format;
+			} 
+
+			break;
+		case 'C':
+			operations |= VDISK_OP_CREATE;
+			break;
+		case 'H':
+			/* File to read headers from */
+			operations |= VDISK_OP_HEADERS;
+			break;
+		case 'M':
+			/* File to read headers from */
+			operations |= VDISK_OP_MODIFY;
+			break;
+		case 'D':
+			first_block = atol(optarg);
+			operations |= VDISK_OP_DUMP;
+			break;
+		case 'b':
+			num_blocks = atol(optarg);
+			if (num_blocks < 0) {
+				VIDDBG(0, "Number of blocks must be a "
+				       "non-negative number\n");
+				exit(1);
+			}
+			break;
+		case 'o':
+			// Don't confuse vdisk with output file
+			if (optarg == argv[argc-1]) {
+				print_usage(argv[0]);
+				exit(1);
+			}
+			outfile = strndup(optarg, strlen(optarg));
+			if (outfile == NULL) {
+				VIDDBG(0, "Out of memory\n");
+				exit(1);
+			}
+			
+		case '#':
+
+			if (file_fmt) {
+				err = vdf_find_vdfd(&vdisk, file_fmt);
+				if (err) {
+					VIDDBG(0, "Fail to initialize "
+						"format data for %s\n",
+						format);
+					return (err);
+				}
+			} else {
+				VIDDBG(0, "Unspecified or unsupported format\n");
+				print_usage(argv[0]);
+				return (EINVAL);
+			}
+
+			if (vdisk.vdfd->parse_args(argc, operations,
+						    argv, &optp) != 0) {
+				print_usage(argv[0]);
+				return (EINVAL);
+			}
+			
+			break;
+		default:
+			print_usage(argv[0]);
+			return (EINVAL);
+		}
+	}
+
+	/* 
+	 * At least one operation type is needed and 
+	 * filename needs to be specified
+	 */
+	if (!operations || !file_fmt) {
+		print_usage(argv[0]);
+		return (EINVAL);
+	}
+
+	// XXX: We probably should have initialized by now
+	if (vdisk.vdfd == NULL) {
+		err = vdf_find_vdfd(&vdisk, file_fmt);
+		if (err) {
+			VIDDBG(0, "Fail to initialize format data for %s\n",
+				format);
+			return (err);
+		}
+	}
+
+
+	// First create file, if requested
+	if (operations & VDISK_OP_CREATE) {
+		err = vdisk.vdfd->create_vdisk(filename, optp);
+		if (err) {
+			VIDDBG(0, "Can't create file\n");
+			return (err);
+		}
+	}
+	 
+	props.alloc_func = NULL;
+	props.free_func = NULL;
+	props.out_target = VDISK_OUT_STDERR;
+
+	if (!(operations & VDISK_OP_CREATE) && 
+	    !(operations & VDISK_OP_MODIFY))
+		flags = VDISK_RO;
+	else
+		flags = 0;
+
+	err = vdisk_init(&vdisk, filename, &props, flags);
+	if (err) {
+		VIDDBG(0, "Fail to initialize from file %s\n",
+			format);
+		return (err);
+	}
+
+	if (operations & VDISK_OP_HEADERS) {
+		err = vdf_print_headers(&vdisk, filename);
+		if (err) {
+			VIDDBG(0, "Can't read headers\n");
+			return (err);
+		}
+	}
+
+	if (operations & VDISK_OP_MODIFY) {
+		err = vdisk.vdfd->modify_vdisk(&vdisk, optp);
+		if (err) {
+			VIDDBG(0, "Can't modify headers\n");
+			return (err);
+		}
+	}
+
+	if (operations & VDISK_OP_DUMP) {
+		uint8_t *buf, *p;
+		int bytes;
+		int chunk_log = 21; // 2MB
+		int nblocks;
+		int fd;
+		
+		// Open output file (use stdout if not specified)
+		if (outfile != NULL) {
+			fd = open(outfile, O_RDWR|O_CREAT, 
+				  S_IRUSR|S_IWUSR);
+			if (fd == -1) {
+				VIDDBG(0, "Can't open %s: %s\n",
+				       outfile, strerror(errno));
+				exit(1);
+			}
+		} else
+			fd = 1; // stdout
+
+		// Allocate 512b-aligned read buffer
+		p = malloc((1<<chunk_log) + 512);		
+		while (p == NULL) { // Try smaller chunks if we fail
+			if (chunk_log == 0) {
+				VIDDBG(0, "Can't allocate buffer\n");
+				exit(1);
+			}
+			chunk_log--;
+			p = malloc((1<<chunk_log) + 512);
+		}
+		buf = p;
+		while ((addr_t)buf & 511) buf++;
+
+		// nblocks per transfer
+		nblocks = (1<<chunk_log) >> 9;
+
+		// If number of blocks to read is not specified,
+		// read whole vdisk
+		if (num_blocks < 0) 
+			num_blocks = vdisk.sz >> 9;
+
+		for (i=0; i<num_blocks; i+=nblocks) {
+
+			// This could happen on last iteration
+			if ((i+nblocks) > num_blocks)
+				nblocks = num_blocks - i;
+
+			bytes = vdisk_rw(&vdisk, first_block+i, buf, nblocks,
+					 VDISK_READ, NULL);
+			if (bytes != (nblocks << 9)) {
+				VIDDBG(0, "vdisk_rw() returned %d\n", bytes);
+				exit(1);
+			}
+
+			bytes = write(fd, buf, nblocks<<9);
+			if (bytes == -1) {
+				VIDDBG(0, "write: %s\n", strerror(errno));
+				exit(1);
+			}
+		}
+
+		free(p);
+	}
+	return 0;
+}
diff -r 7cad1f06a7f6 tools/vdisk/vdisk_utils.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.c	Tue Jun 19 08:15:38 2007 -0400
@@ -0,0 +1,435 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h> 
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdarg.h>
+#include <libaio.h>
+#include <time.h>
+#include <limits.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+
+#define VDISK_MAX_ERRORS       (100)
+#define VDISK_ERR_STRING_LEN   (512)
+
+static void *(*vdisk_alloc_func)(size_t sz) = malloc;
+static void (*vdisk_free_func)(void *ptr) = free;
+
+int vdisk_dbg_level = 1;
+int vdisk_out_target = VDISK_OUT_STDERR; // where to print messages
+
+// Don't want to keep this on stack
+static char argstring[VDISK_ERR_STRING_LEN];
+
+// Data structure to help with message throttling
+struct vdisk_log_mgt {
+	int vdisk_err_cnt;
+	int interval;
+	int restart;
+	time_t last_error;
+	time_t next_check;
+};
+static struct vdisk_log_mgt vdisk_log = {
+	.vdisk_err_cnt = 0,
+	.restart       = 0,
+	.interval      = 0,
+	.last_error    = (time_t)0,
+	.next_check    = (time_t)LONG_MAX,
+};
+
+// Print the message to either syslog or stderr, optionally
+// specifying filename and line number
+static void
+vdisk_print_msg(char *file, int line, char *msg)
+{
+	if (file) {
+		if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+			syslog(LOG_DEBUG, "%s:%d: %s",
+			       file, line, msg);
+		} else {
+			fprintf(stderr, "%s:%d: %s",
+				file, line, msg);
+		}
+	} else {
+		if (vdisk_out_target == VDISK_OUT_SYSLOG) {
+			syslog(LOG_DEBUG, "%s", msg);
+		} else {
+			fprintf(stderr, "%s", msg);
+		}
+	}
+}
+
+void
+vdisk_log_error(int level, char *file, int line, char *fmt, ...)
+{
+	int print_msg;
+
+	if (level > vdisk_dbg_level)
+		return;
+				
+	print_msg = 0;
+
+	// Decide whether to print the message.
+	// Only manage message reporting for level 0, which is
+	// usually reserved for errors.	Other messages will be
+	// printed unconditionally.	
+	if (level == 0) {			
+		time_t now;
+
+		if (time(&now) == (time_t)-1) {
+			// This should never happen ;-()
+			vdisk_print_msg(NULL, 0, "vdisk: Can't get time, "
+					"error reporting stopped\n");
+			return; // XXX: Or continue?
+		}
+
+		if (now >= vdisk_log.next_check) {
+
+			if (now - vdisk_log.last_error > 
+			    (time_t)vdisk_log.interval) {
+				// reset message throttling
+				vdisk_log.restart = 0;
+				vdisk_log.interval = 0;
+				vdisk_log.vdisk_err_cnt = 0;
+				vdisk_log.next_check = LONG_MAX;
+				vdisk_print_msg(NULL, 0, "vdisk: Restoring "
+						"error reporting\n");
+			}
+
+			if (vdisk_log.restart) {				
+				// Double the interval, max at 128 seconds
+				vdisk_log.interval = (vdisk_log.interval > 64) ? 
+					vdisk_log.interval :
+					(vdisk_log.interval * 2);
+				vdisk_log.next_check += 
+					(time_t)vdisk_log.interval;
+				vdisk_log.restart = 0;
+			} 
+		 
+			vdisk_log.vdisk_err_cnt = 0;	
+
+		} else {
+			// Message received during throttling interval.
+			// We will need to double the interval later
+			vdisk_log.restart = 1;
+		}
+
+		if (vdisk_log.vdisk_err_cnt < VDISK_MAX_ERRORS) {
+			vdisk_log.vdisk_err_cnt++;
+			print_msg = 1;
+		}	       		
+
+		if (vdisk_log.vdisk_err_cnt == VDISK_MAX_ERRORS) {
+			vdisk_log.vdisk_err_cnt++;
+			if (vdisk_log.interval == 0) {
+				// Start interval management
+				vdisk_print_msg(NULL, 0, "vdisk: Too many "
+						"errors, slowing down rate "
+						"of reporting\n");
+				vdisk_log.interval = 1;
+				vdisk_log.next_check = now + 
+					(time_t)vdisk_log.interval;
+			}
+		} 
+
+		vdisk_log.last_error = now;
+
+	} else
+		print_msg = 1;
+
+
+	if (print_msg) {
+		va_list args;
+		
+		// Roll arguments into a string
+		va_start(args, fmt);
+		(void)vsnprintf(argstring, VDISK_ERR_STRING_LEN,
+				fmt, args);
+		va_end(args);
+
+		vdisk_print_msg(file, line, argstring);
+	}
+}
+
+void
+vdisk_alloc_init(void *alloc_func, void *free_func)
+{
+	if (alloc_func != NULL)
+		vdisk_alloc_func = alloc_func;
+
+	if (free_func != NULL)
+		vdisk_free_func = free_func;
+}
+
+void *
+vdisk_malloc(size_t sz)
+{
+	void *ptr;
+
+	ptr = vdisk_alloc_func(sz);
+	if (ptr)
+		memset(ptr, 0, sz);
+	return (ptr);
+}
+
+void
+vdisk_free(void *ptr)
+{
+	vdisk_free_func(ptr);
+	ptr = NULL;
+}
+
+int
+vdisk_close(int fp)
+{
+	int err;
+
+	err = fsync(fp);
+	if (err)
+		VIDDBG(0, "fsync(): %s\n", strerror(errno));
+
+	// Invalidate all pages from page cache
+	err = posix_fadvise(fp, 0, 0, POSIX_FADV_DONTNEED);
+	if (err)
+		VIDDBG(0, "posix_fadvise(): %s\n", strerror(errno));
+
+	err = close(fp);
+	return (err);
+}
+
+size_t
+vdisk_size(int f, size_t *sz)
+{
+	size_t cur;
+	int err;
+
+	/*
+	 * XXX: Obviously, we should use fstat(). Unfortunately, I couldn't 
+	 * figure out how to make a dynamic library that calls fstat. 
+	 * See glibc FAQ for descritpion of *problem* (why couldn't they
+	 * provide a solution as well?)
+	 */
+
+	// Remember current position
+	cur = lseek(f, 0, SEEK_CUR);
+	if (cur == -1) {
+		err = errno;
+		VIDDBG(0, "lseek: Can't seek to current: %s\n", strerror(errno));
+		return (err);
+	}
+
+	*sz = lseek(f, 0, SEEK_END);
+	if (*sz == -1) {
+		err = errno;
+		VIDDBG(0, "lseek: Can't seek to end: %s\n", strerror(errno));
+		return (err);
+	}
+
+	// Restore current position
+	cur = lseek(f, 0, SEEK_SET);
+	if (cur == -1) {
+		err = errno;
+		VIDDBG(0, "lseek: Can't seek to current: %s\n", strerror(errno));
+		return (err);
+	}
+	
+	return (0);
+}
+
+size_t
+vdisk_asyncio(vdisk_dev_t *vdisk, uint64_t block, 
+	      int fp, char *buf, 
+	      size_t size, off_t off, 
+	      void *arg, void *aiocb,
+	      int op)
+{
+	int hash_index = VDISK_HASH_IDX(block);
+	struct iocb *io;
+	struct pending_aio *pio;
+
+
+	ASSERT(vdisk->aio_cnt < VDISK_HASH_SZ);
+	ASSERT(vdisk->hash[hash_index].key == block);
+
+	io = &vdisk->hash[hash_index].io;
+	pio = &vdisk->hash[hash_index].pio;
+	
+	pio->arg = arg;
+	pio->block = block;
+	pio->aiocb = aiocb;
+	pio->num_blocks = size>>9;
+	pio->off = off;
+	pio->fd = fp;
+	pio->op = op;
+
+	if (op == VDISK_WRITE)
+		io_prep_pwrite(io, fp, buf, size, off);
+	else
+		io_prep_pread(io, fp, buf, size, off);
+
+	io->data = pio;
+
+	VIDDBG(50, "Using hash entry %d (block %d)\n", 
+	       VDISK_HASH_IDX(pio->block), pio->block);
+
+	vdisk->aio_submit[vdisk->aio_cnt++] = io;
+
+	return (size);
+}
+
+static void
+vdisk_manage_pcache(int fp, vdisk_syncio_t *syncio, off_t start, off_t len)
+{
+#define	WRITE_RUN	(1<<22)	//4MB
+	int res;
+	DO_STATS(time_t now);
+
+	DO_STATS(++(syncio->total_writes));
+
+	if (syncio->is_set) {
+		if (start >= syncio->io_start &&
+		    start <= syncio->io_start + syncio->io_len) {
+			syncio->io_len -= (syncio->io_start + 
+					   syncio->io_len) - start;
+			syncio->io_len += len;
+			DO_STATS(++(syncio->contig_writes));
+			if (syncio->io_len > WRITE_RUN) {
+				DO_STATS(++(syncio->flush_size_force));
+
+				syncio->is_set = 0;
+				
+				res = fsync(fp);
+				if (res)
+					VIDDBG(0, "fsync: %s\n",
+					       strerror(errno));
+				
+				res = posix_fadvise(fp, syncio->io_start,
+						    syncio->io_len, 
+						    POSIX_FADV_DONTNEED);
+				if (res)
+					VIDDBG(0, "posix_fadvise: %s\n", 
+					       strerror(errno));
+			}
+			len = 0; // NOTE:len is consumed into previous
+		} else {
+#if VDISK_SYNCIO_STATS
+			if (syncio->io_len < (1<<20))
+				++(syncio->flush_size_sub1MB);
+			else if (syncio->io_len < (1<<21))
+				++(syncio->flush_size_sub2MB);
+			else if (syncio->io_len < (1<<22))
+				++(syncio->flush_size_sub4MB);
+			else if (syncio->io_len < (1<<23))
+				++(syncio->flush_size_sub8MB);
+			else
+				++(syncio->flush_size_ovr8MB);
+#endif /* VDISK_SYNCIO_STATS */
+			syncio->is_set = 0;
+			res = fsync(fp);
+			if (res)
+				VIDDBG(0, "fsync: %s\n", strerror(errno));
+			res = posix_fadvise(fp, syncio->io_start,
+					    syncio->io_len, 
+					    POSIX_FADV_DONTNEED);
+			if (res)
+				VIDDBG(0, "posix_fadvise: %s\n", 
+				       strerror(errno));
+		}
+	}
+	if (len > 0) {
+		if (len <= WRITE_RUN) {
+			syncio->is_set = 1;
+			syncio->io_start = start;
+			syncio->io_len = len;
+		} else {
+			DO_STATS(++(syncio->flush_size_force));
+			res = fsync(fp);
+			if (res)
+				VIDDBG(0, "fsync: %s\n", strerror(errno));
+			res = posix_fadvise(fp, start, len, 
+					    POSIX_FADV_DONTNEED);
+			if (res)
+				VIDDBG(0, "posix_fadvise: %s\n",
+				       strerror(errno));
+		}
+	}
+#if VDISK_SYNCIO_STATS
+	now = time(NULL);
+	if (now >= syncio->last_dbg_print + 60) {
+		VIDDBG(0, ":WRITE_PERF: [%lu] tWrts %lu | conWrts %lu | s1M %lu"
+		       " | s2M %lu | s4M %lu | s8M %lu | o8M %lu | f %lu\n",
+		       (unsigned long)(now - syncio->last_dbg_print),
+		       syncio->total_writes, syncio->contig_writes,
+		       syncio->flush_size_sub1MB, syncio->flush_size_sub2MB,
+		       syncio->flush_size_sub4MB, syncio->flush_size_sub8MB,
+		       syncio->flush_size_ovr8MB, syncio->flush_size_force);
+		syncio->last_dbg_print = now;
+	}
+#endif /* VDISK_SYNCIO_STATS */
+}
+
+size_t
+vdisk_syncio(int fp, char *buf, size_t size, off_t off, int op, 
+	     vdisk_syncio_t *syncio)
+{
+	size_t bytes;
+	off_t res;
+	off_t io_start;
+	off_t io_len;
+
+	ASSERT(!(size & 511));
+	ASSERT(!(off & 511));
+	ASSERT(!((addr_t)buf & 511));
+
+	res = vdisk_seek(fp, off, SEEK_SET);
+	if (res != off) {
+		VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 ": %s\n",
+		       off, strerror(errno));
+		return (-1);
+	}
+
+	if (op == VDISK_WRITE) {
+		bytes = write(fp, buf, size);
+	} else
+		bytes = read(fp, buf, size);
+
+	if (bytes != size) {
+		VIDDBG(0, "%s %zd bytes instead of %zd: %s\n",
+		       (op == VDISK_WRITE)?"Wrote":"Read",
+		       bytes, size, strerror(errno));
+	} 
+
+	io_start = (off & (~((off_t)vdisk_pagesz-1)));
+	io_len = (size + vdisk_pagesz);
+
+	if (op == VDISK_READ) {
+		res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+		if (res)
+			VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+	} else if (syncio) {
+		vdisk_manage_pcache(fp, syncio, io_start, io_len);
+	} else {
+		res = fsync(fp);
+		if (res)
+			VIDDBG(0, "fsync: %s\n", strerror(errno));
+		res = posix_fadvise(fp, io_start, io_len, POSIX_FADV_DONTNEED);
+		if (res)
+			VIDDBG(0, "posix_fadvise: %s\n", strerror(errno));
+	}
+
+	return (bytes);	
+}
diff -r 7cad1f06a7f6 tools/vdisk/vdisk_utils.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vdisk_utils.h	Tue Jun 19 08:15:51 2007 -0400
@@ -0,0 +1,36 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VDISK_UTILS
+#define __VDISK_UTILS
+
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "vdisk.h"
+
+
+
+#define vdisk_open(cp, fl, mode) open((cp), (fl), (mode))
+#define vdisk_seek(fp, off, whence) lseek64((fp), (off), (whence))
+
+extern void *vdisk_malloc(size_t sz);
+extern void vdisk_free(void *ptr);
+extern int vdisk_close(int fp);
+extern size_t vdisk_size(int f, size_t *sz);
+extern size_t vdisk_syncio(int fp, char *buf, size_t sz, loff_t off, 
+			   int op, vdisk_syncio_t *syncio);
+extern size_t vdisk_asyncio(vdisk_dev_t *, uint64_t, int, char *, size_t, 
+			    loff_t, void *, void *, int);
+
+
+#endif /* __VDISK_UTILS */
diff -r 7cad1f06a7f6 tools/vdisk/vhd.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.c	Tue Jun 19 08:16:06 2007 -0400
@@ -0,0 +1,925 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#define _GNU_SOURCE // for O_DIRECT
+#include <stdio.h>
+#include <stdlib.h> 
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "list.h"
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+char __vhd_zeroes[VHD_FTR_SZ+512];
+char *vhd_zeroes;
+
+#define BLOCK_MASK (~(((addr_t)1<<9)-1))
+
+
+int
+vhd_verify_metadata(vd_file_t *vf)
+{
+	// XXX: Something more robust, maybe?
+	return (0);
+}
+
+int
+vhd_read_footer(vd_file_t *vf)
+{
+	off_t ftr_off, res;
+	vhd_file_t *vhd = vf->vdf;
+	size_t bytes;
+
+	if (vhd->ftr_mem == NULL) {
+		vhd->ftr_mem = vdisk_malloc(VHD_FTR_SZ+512);
+		if (vhd->ftr_mem == NULL) {
+			VIDDBG(0, "Couldn't allocate dynamic header\n");
+			return (ENOMEM);
+		}
+		vhd->ftr = vhd->ftr_mem;
+		while ((addr_t)vhd->ftr & 511) vhd->ftr++; 
+	}
+
+	/* Find file size (seek to the end) */
+	res = vdisk_seek(vf->fd, 0, SEEK_END);
+	if (res == -1) {
+		VIDDBG(0, "lseek couldn't set offset to end of file\n");
+		vdisk_free(vhd->ftr_mem);
+		return (-1);
+	}
+
+	ftr_off = res - 512;
+
+	res = vdisk_seek(vf->fd, ftr_off, SEEK_SET);
+	if (res != ftr_off) {
+		VIDDBG(0, "lseek couldn't set offset to 0x%" PRIx64 "\n",
+		       ftr_off);
+		vdisk_free(vhd->ftr_mem);
+		return (-1);
+	}
+
+	if ((bytes = vdisk_syncio(vf->fd, vhd->ftr, 512, ftr_off, 
+				  VDISK_READ, NULL)) != 512) {
+		VIDDBG(0, "vdisk read from offset 0x%" PRIx64 " failed "
+		       "(read %zd insted of 512) %d\n", 
+		       ftr_off, bytes, errno);
+		vdisk_free(vhd->ftr_mem);
+		return (-1);
+	}
+
+	return 0;
+}
+
+int
+vhd_read_dynhdr(vd_file_t *vf)
+{
+	off_t res;
+	vhd_file_t *vhd = vf->vdf;
+	size_t bat_sz;
+	int err = 0;
+
+
+	vhd->dhdr_mem = vdisk_malloc(VHD_DHDR_SZ+512);
+	if (vhd->dhdr_mem == NULL) {
+		VIDDBG(0, "Couldn't allocate dynamic header\n");
+		err = ENOMEM;
+		goto fail;
+	}
+
+	vhd->dhdr = vhd->dhdr_mem;
+	while ((addr_t)vhd->dhdr & 511) vhd->dhdr++; 
+
+	// Skip  copy of the footer
+	res = vdisk_seek(vf->fd, VHD_FTR_SZ, SEEK_SET);
+	if (res != VHD_FTR_SZ) {
+		VIDDBG(0, "Couldn't skip copy of the footer\n");
+		err = -1;
+		goto fail;
+	}
+	
+	if (vdisk_syncio(vf->fd, vhd->dhdr, VHD_DHDR_SZ, VHD_FTR_SZ, VDISK_READ, NULL)
+	    != VHD_DHDR_SZ) {
+		VIDDBG(0, "Failed to read dynamic header");
+		err = -1;
+		goto fail;
+	}
+
+	// Read BAT (in 512B units)
+	// XXX: May need to only keep a part of BAT due to memory size concerns
+	bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+	if (bat_sz & 511)
+		bat_sz += (512-(bat_sz & 511));
+
+	vhd->bat_mem = vdisk_malloc(bat_sz+512);
+	if (vhd->bat_mem == NULL) {
+		VIDDBG(0, "Couldn't allocate BAT\n");
+		err = ENOMEM;
+		goto fail;
+	}
+	vhd->bat = vhd->bat_mem;
+	while ((addr_t)vhd->bat & 511) vhd->bat++; 
+
+	if (vdisk_syncio(vf->fd, (char *)vhd->bat, bat_sz, 
+			 VHD_DHDR_SZ+VHD_FTR_SZ, VDISK_READ, NULL) != bat_sz) {
+		VIDDBG(0, "Failed to read BAT");
+		err = -1;
+		goto fail;
+	}
+
+	return (0);
+
+fail:
+	return (err);
+}
+
+int
+vhd_read_metadata(vdisk_dev_t *vdisk, vd_file_t *vf)
+{
+	int err;
+	int type;
+	vhd_file_t *vhd = NULL;
+	int secs_per_block;
+	uint32_t geom;
+
+	vf->vdf = (vhd_file_t *)vdisk_malloc(sizeof(vhd_file_t));
+	if (vf->vdf == NULL) {
+		VIDDBG(0, "Couldn't allocate format-specific data\n");
+		err = ENOMEM;
+		goto fail;
+	}
+	
+	vhd = vf->vdf;
+	memset(vhd, 0, sizeof(vhd_file_t));
+
+	err = vhd_read_footer(vf);
+	if (err) {
+		VIDDBG(0, "Couldn't read footer\n");
+		goto fail;
+	}
+
+	vdisk->sz = vhd_get_ftr_orig_sz(vhd->ftr);
+
+	type = vhd_get_ftr_type(vhd->ftr);
+	if ( (type != VHD_TYPE_FIXED) &&
+	     (type != VHD_TYPE_DYNAMIC) &&
+	     (type != VHD_TYPE_DIFF)){
+		// Return error for VHD_TYPE_NONE as well.
+		VIDDBG(0, "Unsupported VHD file type (%d)\n", type);
+		err = EIO; // XXX: Something else?
+		goto fail;
+	}
+
+	if (type != VHD_TYPE_FIXED) {
+		size_t sz;
+		int i;
+
+		// We should have a dynamic header
+		err = vhd_read_dynhdr(vf);
+		if (err) {
+			VIDDBG(0, "Couldn't read dynamic header\n");
+			goto fail;
+		}
+
+		// No fls() in userland, so we do log2 ourselves
+		vhd->sec_per_block_log = 0;
+		secs_per_block = vhd_get_dhdr_blksz(vhd->dhdr) >> 9;
+		while (secs_per_block >>= 1)
+			vhd->sec_per_block_log++;
+
+		if (type == VHD_TYPE_DYNAMIC) {
+			// How many sectors are mapped sequentially
+			vf->batch_sz = (1<<vhd->sec_per_block_log);
+		} else {
+			// XXX: Need to think about this.
+			vf->batch_sz = 1;	
+		}
+
+		// bytes for sectormap is ((sectors per block) / 8)
+		vhd->sectormap_sz = (vhd_get_dhdr_blksz(vhd->dhdr) >> 9) >> 3;
+
+		// Align on 512-byte boundary
+		if ((vhd->sectormap_sz == 0) || (vhd->sectormap_sz & 511)) 
+			vhd->sectormap_sz += 512 - (vhd->sectormap_sz & 511);
+		
+		// First new block will be allocated where the footer
+		// currently is, which is at the end of the file
+		err = vdisk_size(vf->fd, &sz);
+		if (err) {
+			VIDDBG(0, "Couldn't get file size\n");
+			goto fail;
+		}
+		vhd->next_block_off = (sz-VHD_FTR_SZ) >> 9;
+
+		// Allocate sectormap buffer
+		vhd->sec_mem = vdisk_malloc(512*2);
+		if (vhd->sec_mem == NULL) {
+			//XXX: free everything
+			VIDDBG(0, "Can't allocate sectormap\n");
+			err = ENOMEM;
+			goto fail;
+		}
+		vhd->secmap_chunk = vhd->sec_mem;
+		while ((addr_t)vhd->secmap_chunk & 511) vhd->secmap_chunk++;
+
+		// Allocate sectormap cache
+		for (i=0;i<VHD_CACHE_SZ;i++) {
+			vhd->cache[i].sec_mem = vdisk_malloc(512*2);
+			if (vhd->cache[i].sec_mem == NULL) {
+				//XXX: free everything
+				VIDDBG(0, "Can't allocate sectormap\n");
+				err = ENOMEM;
+				goto fail;
+			}
+			vhd->cache[i].secmap_chunk = vhd->cache[i].sec_mem;
+			while ((addr_t)vhd->cache[i].secmap_chunk & 511) 
+				vhd->cache[i].secmap_chunk++;
+
+			// Point to sector 0 (or any other sector),
+			// but make the map empty
+			vhd->cache[i].first_sector = 0; //VHD_INVALID_SECTOR;
+			memset(vhd->cache[i].secmap_chunk, 0, 512);
+		}
+
+		if (VHD_CACHE_SZ > 0) {
+			vhd->cache_head = &vhd->cache[0];
+			vhd->cache[0].prev = NULL;
+			for (i=1;i<VHD_CACHE_SZ;i++) {
+				vhd->cache[i-1].next = &vhd->cache[i];
+				vhd->cache[i].prev = &vhd->cache[i-1];
+			}
+			vhd->cache_tail = &vhd->cache[VHD_CACHE_SZ-1];
+			vhd->cache[VHD_CACHE_SZ-1].next = NULL;
+		} //else
+		//vhd->cache_head == NULL;
+	} else
+		vf->batch_sz = (1<<30); // (signed) infinity
+
+	vf->flags = 0;
+
+	err = vhd_verify_metadata(vf);
+	if (err) {
+		VIDDBG(0, "File appears to be corrupted\n");
+
+		// XXX: It may be salvageable
+		if (type != VHD_TYPE_FIXED) {
+			vdisk_free(vhd->dhdr_mem);
+			vdisk_free(vhd->bat_mem);
+			vdisk_free(vhd->sec_mem);
+		}
+		err = EIO;
+		goto fail;
+	}
+
+	// We are assuming here that all files of the
+	// vdisk have the same geometry.
+	geom = vhd_get_ftr_geom(vhd->ftr);
+	vdisk->geom.cyls = (geom >> 16) & 0xffff;
+	vdisk->geom.heads = (geom >> 8) & 0xff;
+	vdisk->geom.secs = geom & 0xff;
+
+       	return (0);
+
+fail:
+	if (vhd) {
+		if (vhd->ftr_mem)
+			vdisk_free(vhd->ftr_mem);
+		if (vhd->dhdr_mem)
+			vdisk_free(vhd->dhdr_mem);
+		if (vhd->bat_mem)
+			vdisk_free(vhd->bat_mem);
+		if (vhd->sec_mem)
+			vdisk_free(vhd->sec_mem);
+		vdisk_free(vhd);
+	}
+	return (err);
+}
+
+int
+vhd_alloc_block(vd_file_t *vf, uint32_t blockno)
+{
+	size_t bytes;
+	off_t bat_off;
+	char *ptr;
+	vhd_file_t *vhd = vf->vdf;
+	size_t blocksz;
+
+
+	ASSERT(__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY);
+	ASSERT((vhd_get_dhdr_blksz(vhd->dhdr) & 511) == 0);
+	ASSERT((vhd->sectormap_sz & 511) == 0);
+
+	blocksz = vhd_get_dhdr_blksz(vhd->dhdr) + vhd->sectormap_sz;
+
+	/*
+	 * First try to write footer at new position.
+	 * The hole should be filled with zeroes
+	 * XXX: Are we sure?
+	 */
+	bytes = vdisk_syncio(vf->fd, vhd->ftr, VHD_FTR_SZ, 
+			     (vhd->next_block_off<<9) + blocksz,
+			     VDISK_WRITE, NULL);
+	if (bytes != VHD_FTR_SZ) {
+		VIDDBG(0, "Can't append footer\n");
+		return (EIO);
+	}
+	
+
+	// Overwrite footer with zeroes
+	bytes = vdisk_syncio(vf->fd, vhd_zeroes, VHD_FTR_SZ, 
+			     vhd->next_block_off<<9, VDISK_WRITE, NULL);
+	if (bytes != VHD_FTR_SZ) {
+		VIDDBG(0, "Can't overwrite footer\n");
+		return (EIO);
+	}
+
+	// Now update BAT in a 512-b chunk
+	vhd->bat[blockno] = __arch__swab32(vhd->next_block_off);
+	bat_off = (VHD_FTR_SZ + VHD_DHDR_SZ + (blockno<<2)) & BLOCK_MASK;
+	ptr = (char *)(((addr_t)&vhd->bat[blockno]) & BLOCK_MASK);
+	bytes = vdisk_syncio(vf->fd, ptr, 512, bat_off, VDISK_WRITE, NULL);
+	if (bytes != 512) {
+		VIDDBG(0, "Can't update BAT\n");
+		return (EIO);
+	}
+
+	vhd->next_block_off += (blocksz >> 9);
+
+	return(0);
+}
+
+/*
+ * It would be easier to use test_bit()/set_bit() routines,
+ * but x86 bit test/set instructions count bits (in the last byte)
+ * from LSb, which is not what we want. We could recompute pos
+ * (pos=(pos&(~7))+7-(pos&7)) but doing this operation more
+ * explicitely seems to be safer.
+ */
+inline int
+vhd_test_bit(int pos, char *buf)
+{
+	char *addr = (char *)((addr_t)buf + (pos>>3));
+	uint8_t byte = *addr;
+	uint8_t bitinbyte = 7-(pos&7);
+	
+	return (byte & (1<<bitinbyte));
+}
+
+inline int
+vhd_test_bitset(int start, int bits, char *buf)
+{
+	int i;
+
+	for (i=0;i<bits;i++)
+		if (!vhd_test_bit(start+i, buf))
+			return (0);
+
+	return (1);
+}
+
+inline void
+vhd_set_bit(int pos, char *buf)
+{
+	char *addr = (char *)((addr_t)buf + (pos>>3));
+	uint8_t byte = *addr;
+	uint8_t bitinbyte = 7-(pos&7);
+	
+	*addr = byte | (1<<bitinbyte);
+}
+
+inline void
+vhd_set_bitset(int start, int bits, char *buf)
+{
+	int i;
+
+	for (i=0;i<bits;i++)
+		vhd_set_bit(start+i, buf);
+}
+
+
+int
+vhd_xfer_commit(void *arg, int err)
+{
+	vhd_xfer_t *vhdx = arg;
+	size_t bytes;
+
+	if (arg == NULL)
+		return (0);
+
+	if (err == 0) {
+
+		// Read the 512b chunk of sector map 
+		bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512, 
+				     vhdx->secmap_addr, VDISK_READ, NULL);
+		if (bytes != 512) {
+			VIDDBG(0, "Failed to read sector bitmap\n");
+			vdisk_free(vhdx->mem);
+			return (EIO);
+		}
+		
+		// Set sector bit
+		vhd_set_bitset(vhdx->sector_bit, vhdx->num_secs, 
+			       vhdx->secmap_chunk);
+		
+		// and write it back
+		bytes = vdisk_syncio(vhdx->fd, vhdx->secmap_chunk, 512, 
+				     vhdx->secmap_addr, VDISK_WRITE, NULL);
+		if (bytes != 512) {
+			VIDDBG(0, "Can't commit access\n");
+			vdisk_free(vhdx->mem);
+			return (EIO);
+		}
+		
+		if (vhdx->cache && vhdx->first_sector != VHD_INVALID_SECTOR) {
+			ASSERT(vhdx->cache->first_sector == VHD_INVALID_SECTOR);
+			memcpy(vhdx->cache->secmap_chunk, vhdx->secmap_chunk, 512);
+			vhdx->cache->first_sector = vhdx->first_sector;
+		}		
+	}
+
+	vdisk_free(vhdx->mem);
+	return (0);
+}
+
+// Microsoft uses "sector" for 512-byte unit that we 
+// refer to as "block" elsewhere.
+// This routine is *NOT* SMP-safe!
+int
+vhd_map_block(vd_file_t *vf, 
+	      uint32_t *sectorno,      /* IN/OUT */
+	      int num_secs,
+	      int op,
+	      void **arg)	
+{
+	vhd_file_t *vhd = vf->vdf;
+	int type = vhd_get_ftr_type(vhd->ftr);
+	uint32_t blockno; // block of sectors in the file
+	int err;
+	size_t bytes;
+	int sector_bit; // bit offset into 512b chunk of sectormap
+	int sector_in_block;
+	off_t sectormap_addr;
+	uint32_t first_sector;
+	vhd_cache_t *cache = vhd->cache_head;
+
+
+	if (type == VHD_TYPE_FIXED)
+		return (VID_BLOCK_MAPPED);
+
+	vhd->stats.access++;
+	
+	blockno = *sectorno >> vhd->sec_per_block_log;
+	
+	// We can only map sequence on sectors in the same block
+	ASSERT(((*sectorno+num_secs-1) >> vhd->sec_per_block_log)
+	       == blockno);
+
+	// First sector in the block (really, blockno<<vhd->sec_per_block_log)
+	first_sector = *sectorno & (~(((uint32_t)1<<vhd->sec_per_block_log)-1));
+
+	// This sector's offset in the block
+	sector_in_block = *sectorno & (((uint32_t)1<<vhd->sec_per_block_log)-1);
+
+	sector_bit = sector_in_block & ((512*8)-1); // 8 bits in a byte
+	while (cache != NULL) {
+		if (cache->first_sector == first_sector) {
+			// Sectormap is cached
+			if (vhd_test_bitset(sector_bit, num_secs, 
+					    cache->secmap_chunk)) {
+				
+				// sector is mapped
+				*sectorno = cache->phys_first_sector +
+					+ sector_in_block;
+				
+				vhd->stats.cache_hit++;
+				
+				// Make the line LRU
+				if (cache->prev) {
+					cache->prev->next = cache->next;
+					if (cache->next)
+						cache->next->prev =
+							cache->prev;
+					else
+						vhd->cache_tail = cache->prev;
+					
+					cache->next = vhd->cache_head;
+					cache->next->prev = cache;
+					cache->prev = NULL;
+					vhd->cache_head = cache;
+				}
+				
+				return (VID_BLOCK_MAPPED);
+			} else {
+				break;
+			}
+		}
+		cache = cache->next;
+	}
+	
+	if (__arch__swab32(vhd->bat[blockno]) == VHD_BAT_INVALID_ENTRY) {
+		
+		// For reads, the caller will assume that
+		// read returned zeroes
+		if (op == VDISK_READ)
+			return (VID_BLOCK_NOTMAPPED);
+		
+		err = vhd_alloc_block(vf, blockno);
+		vhd->stats.block_alloc++;
+		VIDDBG(100, "Allocated block %d\n", blockno);
+		if (err) {
+			VIDDBG(0, "Failed to allocate block\n");
+			return (err);
+		}
+	}
+	
+	if (VHD_CACHE_SZ > 0) {
+		
+		if (vhd->cache_tail->first_sector != VHD_INVALID_SECTOR) {
+			if ((cache == NULL) && (vhd->cache_head != NULL)) {
+				vhd_cache_t *oldh = vhd->cache_head;
+				vhd_cache_t *oldt = vhd->cache_tail;
+				
+				vhd->cache_head = oldt;
+				vhd->cache_tail = oldt->prev;
+				
+				vhd->cache_head->next = oldh;
+				oldh->prev = oldt;
+				
+				vhd->cache_head->prev = NULL;
+				
+				vhd->cache_tail->next = NULL;
+				
+				cache = vhd->cache_head;
+			}
+			
+		} else {
+			// tail cache fill is in-flight. We assume that
+			// all others are in-flight as well.
+			// We will not be caching
+			// XXX: we should probably walk the list
+			//first_sector = VHD_INVALID_SECTOR;
+		}
+	}
+	
+	// Read a block worth of sector bitmap
+	sectormap_addr = 
+		((uint64_t)__arch__swab32(vhd->bat[blockno])<<9) +
+		((sector_in_block>>3) & BLOCK_MASK);
+	bytes = vdisk_syncio(vf->fd, vhd->secmap_chunk, 512, 
+			     sectormap_addr, VDISK_READ, NULL);
+	if (bytes != 512) {
+		VIDDBG(0, "Failed to read sector bitmap\n");
+		return (EIO);
+	}
+	
+	// See whether the sector is present
+	if (!vhd_test_bitset(sector_bit, num_secs, vhd->secmap_chunk)) {
+		vhd_xfer_t *vhdx;
+		int byteaddr, bitno;
+		char *ptr;
+		
+		// For reads, the caller will assume that
+		// read returned zeroes
+		if (op == VDISK_READ) {
+			int i;
+			int mapped = 0;
+			
+			for (i=0; i<num_secs; i++) {
+				if (vhd_test_bit(sector_bit+i, 
+						 vhd->secmap_chunk)) {
+					mapped = 1;
+					break;
+				}
+			}
+
+			if (!mapped) {
+				// None of blocks is mapped
+				return (VID_BLOCK_NOTMAPPED);
+			} else {
+				// Some blocks are mapped and some are not
+				return (VID_BLOCK_TOOBIG);
+			}
+		}
+
+		byteaddr = sector_bit >> 3; // Find word in the map
+		bitno = sector_bit & 7;     // Bit in the word
+		ASSERT(byteaddr<512);
+		
+		// sectormap is the first member and will be aligned
+		vhdx = vdisk_malloc(sizeof(vhd_xfer_t)+512);
+		if (vhdx == NULL) {
+			VIDDBG(0, "Failed to allocate commit data\n");
+			return (EIO);
+		}
+		
+		ptr = (char *)vhdx;
+		while ((addr_t)ptr & 511) ptr++;
+		
+		if (((addr_t)ptr - (addr_t)vhdx) >= 512)
+			VIDDBG(0, "vhdx=%p, ptr=%p\n", vhdx, ptr);
+		
+		ASSERT(((addr_t)ptr - (addr_t)vhdx) < 512);
+		
+		((vhd_xfer_t *)ptr)->mem = (void *)vhdx;
+		vhdx = (vhd_xfer_t *)ptr;
+		vhdx->fd = vf->fd;
+		vhdx->secmap_addr = sectormap_addr;
+		vhdx->sector_bit = sector_bit;
+		vhdx->num_secs = num_secs;
+		
+		if (VHD_CACHE_SZ > 0) {
+			//vhdx->cache = &vhd->cache[cache_index];
+			vhdx->cache = cache;
+			vhdx->first_sector = first_sector;
+			if (cache) // Flush old cache entry 
+				cache->first_sector = VHD_INVALID_SECTOR;
+		} else
+			vhdx->first_sector = VHD_INVALID_SECTOR;
+		
+		*arg = vhdx;
+		
+		vhd->stats.sec_alloc++;
+		
+	} else {
+		// cache the map
+		if (VHD_CACHE_SZ > 0) {
+			if (cache && 
+			    (cache->first_sector != VHD_INVALID_SECTOR)) {
+				memcpy(cache->secmap_chunk, 
+				       vhd->secmap_chunk, 512);
+				cache->first_sector = first_sector;
+			}
+		}
+	}	
+
+	if (cache)
+		cache->phys_first_sector = __arch__swab32(vhd->bat[blockno]) +
+			(vhd->sectormap_sz >> 9);
+	
+	// Sector in the backing file
+	*sectorno = (__arch__swab32(vhd->bat[blockno])) + sector_in_block 
+		+ (vhd->sectormap_sz >> 9);
+	
+
+	return (VID_BLOCK_MAPPED);
+}
+
+void
+vhd_close(struct vdisk_dev *vdisk)
+{
+	struct list_head *ptr, *tmp;
+	vd_file_t *vf;
+	vhd_file_t *vhd;
+	int err;
+
+	if (vdisk == NULL) {
+		VIDDBG(0, "Invalid vdisk pointer\n");
+		return;
+	}
+
+	list_for_each_safe(ptr, tmp, &vdisk->vdf_head) {
+
+		vf = list_entry(ptr, vd_file_t, vdf_list);
+		if (vf == NULL) {
+			VIDDBG(0, "Invalid vdisk file pointer\n");
+			return;
+		}
+
+		vhd = vf->vdf;
+		if (vhd) {
+			VIDDBG(10, "VHD Stats for %s: \n"
+			       "\t accesses:\t%" PRId64 "\n"
+			       "\t cache_hit:\t%" PRId64 "\n"
+			       "\t block_alloc:\t%" PRId64 "\n"
+			       "\t sec_alloc:\t%" PRId64 "\n"
+			       "\t total IOs:\t%" PRId64 "\n"
+			       "\t busy:\t%" PRId64 "\n"
+			       "\t sync:\t%" PRId64 "\n"
+			       "\t async:\t%" PRId64 "\n",
+			       vf->name,
+			       vhd->stats.access, 
+			       vhd->stats.cache_hit,
+			       vhd->stats.block_alloc, 
+			       vhd->stats.sec_alloc,
+			       vdisk->tot_io, 
+			       vdisk->busyio, 
+			       vdisk->syncio, 
+			       vdisk->asyncio);
+
+			if (vhd->ftr_mem)
+				vdisk_free(vhd->ftr_mem);
+			if (vhd->dhdr_mem)
+				vdisk_free(vhd->dhdr_mem);
+			if (vhd->bat_mem)
+				vdisk_free(vhd->bat_mem);
+			if (vhd->sec_mem)
+				vdisk_free(vhd->sec_mem);
+			vdisk_free(vhd);
+		}
+
+		list_del(&vf->vdf_list);
+		
+		err = vdisk_close(vf->fd);
+		if (err)
+			VIDDBG(0, "close(%s): %s\n", vf->name, strerror(errno));
+
+		vdisk_free(vf);
+
+		if (list_empty(&vdisk->vdf_head))
+			break;
+	}
+}
+
+
+int vhd_open(struct vdisk_dev *vdisk, char *filename)
+{
+	int ret = 0;
+	int err;
+	vd_file_t *vf, *child_vf = NULL;
+	char *f, *child = NULL;
+	vhd_file_t *vhd;
+	int rw;
+
+	if (vdisk->flags & VDISK_RO)
+		rw = O_RDONLY;
+	else
+		rw = O_RDWR;
+
+	f = (char *)filename;
+	
+	while (f != NULL) { // Read all file associated with this VD file
+		
+		vf = (vd_file_t *)vdisk_malloc(sizeof(vd_file_t));
+		if (vf == NULL) {
+			VIDDBG(0, "Couldn't allocate vd_file structure\n");
+			vdisk_free(vdisk);
+			return (ENOMEM);
+		}
+		memset(vf, 0, sizeof(vd_file_t));
+
+		if (strlen(f) > PATH_MAX) {
+			strncpy(vf->name, f, PATH_MAX-1);
+			vf->name[PATH_MAX] = 0;
+		} else
+			strcpy(vf->name, f);
+
+		vf->fd = vdisk_open(f, rw, 0);
+		if (vf->fd < 0) {
+			VIDDBG(0, "Failed to open %s\n", f);
+			vdisk_free(vf);
+			vhd_close((void *)vdisk);
+			return (errno);
+		}	
+		err = vhd_read_metadata(vdisk, vf);
+		if (err) {
+			VIDDBG(0, "Couldn't read metadata for %s\n", f);
+			vdisk_free(vf);
+			vhd_close((void *)vdisk);				
+			return (err);
+		}
+		
+		
+		if (child_vf == NULL) {
+			vf->flags |= VDF_LEAF;
+			rw = O_RDONLY; // for next iteration
+		}
+
+#if 0
+		// If this is a parent, verify paternity
+		if (!vhd_isfamily(vf, child_vf)) {
+			VIDDBG(0, "%s is not parent of %s\n",
+			       f, child_vf);
+		}
+#endif		
+
+		list_add_tail(&vf->vdf_list, &vdisk->vdf_head);
+		
+		vhd = (vhd_file_t *)(vf->vdf);
+		if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+			int i;
+			
+			child = f;
+			child_vf = vf;
+			
+			for (i=0;i<8;i++) {
+				ple_t ple;
+				int fd;
+				
+				vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+				if (ple.code == VHD_DYN_PLE_ABS ||
+				    ple.code == VHD_DYN_PLE_REL ) {
+					f = vhd_get_parent_name(vf, &ple);
+					if (f == NULL) {
+						VIDDBG(0, "Can't locate parent "
+						       "info for %s\n", f);
+						ret = EINVAL;
+						goto out;
+					}
+					
+					// stat would be better
+					fd = open(f, O_RDONLY);
+					if (fd < 0) {
+						if (errno == ENOENT ||
+						    errno == ELOOP ||
+						    errno == ENOTDIR ||
+						    errno == ENODEV ||
+						    errno == EFAULT) {
+							continue;
+						} else  {
+							VIDDBG(0, "stat(%s): %s\n",
+							       f, strerror(errno));
+							ret = errno;
+							goto out;
+						}
+					} else {
+						(void)close(fd);
+						break;
+					}
+				}
+			}
+		} else
+			break;
+	}
+out:
+	return ret;	
+}
+
+uint64_t
+vhd_size(void *hdl)
+{
+	struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+	return (vdisk->sz);
+}
+
+int
+vhd_get_geometry(void *hdl, int *cyls, int *heads, int *secs)
+{
+	struct vdisk_dev *vdisk = (struct vdisk_dev *)hdl;
+	vd_file_t *vf = NULL;
+	struct list_head *ptr;
+	vhd_file_t *vhd;
+	uint32_t geom;
+
+
+	// Assume that the last file (base) has all the info
+	list_for_each(ptr, &vdisk->vdf_head)
+		vf = list_entry(ptr, vd_file_t, vdf_list);
+
+	if (!vf) {
+		VIDDBG(0, "Can't find base file\n");
+		return (-1);
+	}
+
+	vhd = (vhd_file_t *)vf->vdf;
+	if (vhd == NULL) {
+		VIDDBG(0, "Can't find VHD data\n");
+		return (-1);
+	}
+	geom = vhd_get_ftr_geom(vhd->ftr);
+
+	*cyls = (geom >> 16) & 0xffff;
+	*heads = (geom >> 8) & 0xff;
+	*secs = geom & 0xff;
+
+	VIDDBG(10, "geom = 0x%x (0x%x 0x%x 0x%x)\n", geom, *cyls, *heads, *secs);
+	
+	return (0);
+}
+
+vdf_data_t vdfd_vhd = {
+	VHD_EXTENSION,
+	vhd_open,
+	vhd_close,
+	vhd_map_block,
+	vhd_xfer_commit,
+	vhd_print_header,
+	vhd_parse_args,
+	vhd_create_vdisk,
+	vhd_modify_vdisk,
+	{NULL,NULL},
+};
+
+void
+vhd_init()
+{
+	vhd_zeroes = __vhd_zeroes;
+	while ((addr_t)vhd_zeroes & 511) vhd_zeroes++;
+
+	vdisk_register(&vdfd_vhd);
+	memset(vhd_zeroes, 0, VHD_FTR_SZ);
+}
+
+void
+vhd_exit()
+{
+	vdisk_unregister(&vdfd_vhd);
+}
diff -r 7cad1f06a7f6 tools/vdisk/vhd.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd.h	Tue Jun 19 08:16:16 2007 -0400
@@ -0,0 +1,107 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_H
+#define __VHD_H
+
+#define VHD_EXTENSION "vhd"
+
+#define VHD_FTR_SZ   (512)
+#define VHD_DHDR_SZ  (1024)
+
+#define VHD_BAT_INVALID_ENTRY (0xffffffff)
+
+#define VHD_CACHE_SZ       (16)
+#define VHD_INVALID_SECTOR (0xffffffff)
+
+//Parent locator entry
+typedef struct ple {
+	uint32_t code;
+	uint32_t data_space;
+	uint32_t data_len;
+	uint32_t reserved; // XXX: do we care?
+	uint64_t data_off;
+} ple_t;
+
+typedef struct vhd_cache {
+	char *secmap_chunk;         // 512b chunk of block's sectormap 
+	char *sec_mem;
+	uint32_t first_sector;      // First sector of the sectormap
+	uint32_t phys_first_sector; // Sector in the file that first_sector
+	                            // maps to
+	struct vhd_cache *next, *prev;
+} vhd_cache_t;
+
+typedef struct vhd_stat {
+	uint64_t access;
+	uint64_t cache_hit;
+	uint64_t block_alloc;
+	uint64_t sec_alloc;
+} vhd_stat_t;
+
+typedef struct vhd_xfer {
+	// sectormap *must* be first member!
+	char secmap_chunk[512];   // 512b chunk of sectormap. 
+	off_t secmap_addr;        // Address of the chunk
+	int sector_bit;           // bit to be set in sectormap chunk
+	int num_secs;
+	vhd_cache_t *cache;
+	int first_sector;
+	file_t fd;
+	void *mem;             // memory for vhd_xfer
+} vhd_xfer_t;
+
+typedef struct vhd_file {
+	char *secmap_chunk;    // 512B-aligned block of sectormap. 
+	char *sec_mem;         // memory for sectormap section
+	char *ftr;             // 512B-aligned footer
+	char *ftr_mem;         // memory for footer
+	char *dhdr;            // 512B-aligned dynamic header
+	char *dhdr_mem;        // memory for dynamic header
+	uint32_t *bat;         // 512B-aligned Block Allocation Table
+	uint32_t *bat_mem;     // memory for BAT
+	vhd_cache_t cache[VHD_CACHE_SZ];
+	vhd_cache_t *cache_head;
+	vhd_cache_t *cache_tail;
+	vhd_stat_t stats;
+	int sec_per_block_log;
+	int sectormap_sz;
+	off_t next_block_off;
+} vhd_file_t;
+
+
+#define VHD_ARG_SZ        (1<<0)
+#define VHD_ARG_TYPE      (1<<1)
+#define VHD_ARG_BLOCKSZ   (1<<2)
+#define VHD_ARG_UUID      (1<<3)
+#define VHD_ARG_TIME      (1<<4)
+#define VHD_ARG_PARENT    (1<<5)
+
+
+typedef struct vhd_args {
+	size_t vhd_sz;
+	uint8_t type;
+	size_t blocksz;
+	uint8_t uuid[16];
+	char *parent;
+	uint64_t args_mask;
+} vhd_args_t;
+
+
+extern vdf_data_t vdfd_vhd;
+extern char *vhd_zeroes; // Just a bunch of zeroes
+
+extern int vhd_print_header(vd_file_t *vf);
+extern int vhd_parse_args(int argc, int operations, char *argv[], void **args);
+extern int vhd_create_vdisk(char *filename, void *args);
+extern int vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args);
+extern char *vhd_get_parent_name(vd_file_t *vf, ple_t *ple);
+extern void vhd_init(void);
+extern void vhd_exit(void);
+
+#endif /* __VHD_H */
diff -r 7cad1f06a7f6 tools/vdisk/vhd_footer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_footer.h	Tue Jun 19 08:16:30 2007 -0400
@@ -0,0 +1,316 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#ifndef __VHD_FOOTER_H
+#define __VHD_FOOTER_H
+
+#include <string.h>
+#include <linux/types.h>
+#include <linux/byteorder/swab.h>
+
+#define VHD_COOKIE (uint64_t) (   (uint64_t)'c'		\
+			       | ((uint64_t)'o'<<(8*1))	\
+			       | ((uint64_t)'n'<<(8*2))	\
+			       | ((uint64_t)'e'<<(8*3))	\
+			       | ((uint64_t)'c'<<(8*4))	\
+			       | ((uint64_t)'t'<<(8*5))	\
+			       | ((uint64_t)'i'<<(8*6))	\
+			       | ((uint64_t)'x'<<(8*7)))
+
+#define VHD_FEATURES_NONE (0)
+#define VHD_FEATURES_TEMP (1)
+#define VHD_FEATURES_RSVD (2) 
+
+#define VHD_FORMAT_VER_1 (0x00010000)
+
+/* data offset for fixed disks */
+#define VHD_FIXED_OFFSET ((uint64_t)-1)
+
+#define VHD_CREATOR_APP ((uint32_t)'v' \
+			 | ((uint32_t)'i'<<8) \
+			 | ((uint32_t)'t'<<16) \
+			 | ((uint32_t)'l'<<24))
+#define VHD_CREATOR_VER_1 (0x00010000)
+
+#define VHD_CREATOR_HOST_OS ((uint32_t)'L' \
+			     | ((uint32_t)'i'<<8) \
+			     | ((uint32_t)'n'<<16) \
+			     | ((uint32_t)'x'<<24))
+
+#define VHD_TYPE_NONE       (0)
+#define VHD_TYPE_FIXED      (2)
+#define VHD_TYPE_DYNAMIC    (3)
+#define VHD_TYPE_DIFF       (4)
+
+
+
+#define VHD_GEOM(c,h,s) { \
+		ASSERT((c<=0xffff) && (h<=0xff) && (s<=0xff)) ;	\
+		(s | (h<<8) | (c<<16)); }
+
+
+static inline uint64_t vhd_get_ftr_cookie(char *ftr) {
+	uint64_t tmp = *(uint64_t *)(&ftr[0]);
+	return (tmp);
+}
+static inline void vhd_set_ftr_cookie(char *ftr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&ftr[0]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_features(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[8]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_features(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[8]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_fformat(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[12]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_fformat(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[12]) = __arch__swab32(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_dataoff(char *ftr) {
+	uint64_t tmp = *(uint64_t *)(&ftr[16]);
+	return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_dataoff(char *ftr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&ftr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_timestamp(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[24]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_timestamp(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_app(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[28]);
+	return (tmp);
+}
+static inline void vhd_set_ftr_cr_app(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[28]) = tmp;
+}
+
+static inline uint32_t vhd_get_ftr_cr_ver(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[32]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_cr_ver(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[32]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_cr_hostos(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[36]);
+	return (tmp);
+}
+static inline void vhd_set_ftr_cr_hostos(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[36]) = tmp;
+}
+
+static inline uint64_t vhd_get_ftr_orig_sz(char *ftr) {
+	uint64_t tmp = *(uint64_t *)(&ftr[40]);
+	return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_orig_sz(char *ftr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&ftr[40]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_ftr_cur_sz(char *ftr) {
+	uint64_t tmp = *(uint64_t *)(&ftr[48]);
+	return __arch__swab64(tmp);
+}
+static inline void vhd_set_ftr_cur_sz(char *ftr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&ftr[48]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_geom(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[56]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_geom(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[56]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_ftr_type(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[60]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_type(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[60]) = __arch__swab32(tmp);
+}
+
+#define VHD_FTR_CHKSUM_OFF (64)
+static inline uint32_t vhd_get_ftr_chksum(char *ftr) {
+	uint32_t tmp = *(uint32_t *)(&ftr[64]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_ftr_chksum(char *ftr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&ftr[64]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_ftr_uid(char *ftr) {
+	return (uint8_t *)&ftr[68];
+}
+static inline void vhd_set_ftr_uid(char *ftr, uint8_t *val) {
+	memcpy(&ftr[68], val, 16);
+}
+
+static inline uint8_t vhd_get_ftr_saved_state(char *ftr) {
+	uint8_t tmp = *(uint8_t *)(&ftr[84]);
+	return (tmp);
+}
+static inline void vhd_set_ftr_saved_state(char *ftr, uint8_t val) {
+	uint8_t tmp = val;
+	*(uint8_t *)(&ftr[84]) = tmp;
+}
+
+
+
+#define VHD_DYN_COOKIE (uint64_t) (   (uint64_t)'c'		\
+				      | ((uint64_t)'x'<<(8*1))	\
+				      | ((uint64_t)'s'<<(8*2))	\
+				      | ((uint64_t)'p'<<(8*3))	\
+				      | ((uint64_t)'a'<<(8*4))	\
+				      | ((uint64_t)'r'<<(8*5))	\
+				      | ((uint64_t)'s'<<(8*6))	\
+				      | ((uint64_t)'e'<<(8*7)))
+
+#define VHD_DYN_OFFSET      ((uint64_t)-1)
+#define VHD_DYN_HDR_VER_1   (0x00010000)
+
+// Parent locator codes (our own)
+#define VHD_DYN_PLE_ABS   (   (uint64_t)'u'		\
+			      | ((uint64_t)'x'<<(8*1))	\
+			      | ((uint64_t)'n'<<(8*2))	\
+			      | ((uint64_t)'L'<<(8*3)))
+#define VHD_DYN_PLE_REL   (   (uint64_t)'k'		\
+			      | ((uint64_t)'x'<<(8*1))	\
+			      | ((uint64_t)'n'<<(8*2))	\
+			      | ((uint64_t)'L'<<(8*3)))
+
+
+static inline uint64_t vhd_get_dhdr_cookie(char *hdr) {
+	uint64_t tmp = *(uint64_t *)(&hdr[0]);
+	return (tmp);
+}
+static inline void vhd_set_dhdr_cookie(char *hdr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&hdr[0]) = tmp;
+}
+
+static inline uint64_t vhd_get_dhdr_dataoff(char *hdr) {
+	uint64_t tmp = *(uint64_t *)(&hdr[8]);
+	return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_dataoff(char *hdr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&hdr[8]) = __arch__swab64(tmp);
+}
+
+static inline uint64_t vhd_get_dhdr_tbloff(char *hdr) {
+	uint64_t tmp = *(uint64_t *)(&hdr[16]);
+	return __arch__swab64(tmp);
+}
+static inline void vhd_set_dhdr_tbloff(char *hdr, uint64_t val) {
+	uint64_t tmp = val;
+	*(uint64_t *)(&hdr[16]) = __arch__swab64(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_hdrver(char *hdr) {
+	uint32_t tmp = *(uint32_t *)(&hdr[24]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_hdrver(char *hdr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&hdr[24]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_tbl_entries(char *hdr) {
+	uint32_t tmp = *(uint32_t *)(&hdr[28]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_tbl_entries(char *hdr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&hdr[28]) = __arch__swab32(tmp);
+}
+
+static inline uint32_t vhd_get_dhdr_blksz(char *hdr) {
+	uint32_t tmp = *(uint32_t *)(&hdr[32]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_blksz(char *hdr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&hdr[32]) = __arch__swab32(tmp);
+}
+
+#define VHD_DHDR_CHKSUM_OFF (36)
+static inline uint32_t vhd_get_dhdr_chksum(char *hdr) {
+	uint32_t tmp = *(uint32_t *)(&hdr[36]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_chksum(char *hdr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&hdr[36]) = __arch__swab32(tmp);
+}
+
+static inline uint8_t *vhd_get_dhdr_puid(char *hdr) {
+	return (uint8_t *)&hdr[40];
+}
+static inline void vhd_set_dhdr_puid(char *hdr,  uint8_t *val) {
+	memcpy(&hdr[40], val, 16);
+}
+
+static inline uint32_t vhd_get_dhdr_ptimestamp(char *hdr) {
+	uint32_t tmp = *(uint32_t *)(&hdr[56]);
+	return __arch__swab32(tmp);
+}
+static inline void vhd_set_dhdr_ptimestamp(char *hdr, uint32_t val) {
+	uint32_t tmp = val;
+	*(uint32_t *)(&hdr[56]) = __arch__swab32(tmp);
+}
+
+static inline void vhd_get_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+	char *tmp = &hdr[576+24*idx];
+	
+	ple->code = __arch__swab32(*(uint32_t *)tmp);
+	ple->data_space = __arch__swab32(*(uint32_t *)(tmp+4));
+	ple->data_len = __arch__swab32(*(uint32_t *)(tmp+8));
+	ple->data_off = __arch__swab64(*(uint64_t *)(tmp+16));
+}
+
+static inline void vhd_set_dhdr_ple(char *hdr, ple_t *ple, int idx) {
+	char *tmp = &hdr[576+24*idx];
+	
+	*(uint32_t *)(tmp) = __arch__swab32(ple->code);
+	*(uint32_t *)(tmp+4) = __arch__swab32(ple->data_space);
+	*(uint32_t *)(tmp+8) = __arch__swab32(ple->data_len);
+	*(uint64_t *)(tmp+16) = __arch__swab64(ple->data_off);
+}
+
+
+
+#endif /* __VHD_FOOTER_H */
diff -r 7cad1f06a7f6 tools/vdisk/vhd_utils.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/vdisk/vhd_utils.c	Tue Jun 19 08:13:59 2007 -0400
@@ -0,0 +1,964 @@
+// Copyright (c) 2003-2007, Virtual Iron Software, Inc.
+//
+// Portions have been modified by Virtual Iron Software, Inc.
+// (c) 2007. This file and the modifications can be redistributed and/or
+// modified under the terms and conditions of the GNU General Public
+// License, version 2.1 and not any later version of the GPL, as published
+// by the Free Software Foundation.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/stddef.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "vdisk.h"
+#include "vdisk_utils.h"
+#include "vhd.h"
+#include "vhd_footer.h"
+
+
+// Taken from Microsoft's VHD spec (hence notations...)
+static uint32_t
+vhd_chs(ssize_t sz)
+{
+	uint32_t totalSectors = (sz >> 9); // Assume sz in whole #sectors 
+	int sectorsPerTrack, heads, cylinderTimesHeads, cylinders;
+
+
+	if (totalSectors > 65535 * 16 * 255)
+		totalSectors = 65535 * 16 * 255;
+
+	if (totalSectors >= 65535 * 16 * 63) {
+		sectorsPerTrack = 255;
+		heads = 16;
+		cylinderTimesHeads = totalSectors / sectorsPerTrack;
+	} else {
+		sectorsPerTrack = 17; 
+		cylinderTimesHeads = totalSectors / sectorsPerTrack;
+
+		heads = (cylinderTimesHeads + 1023) / 1024;
+      
+		if (heads < 4)
+			heads = 4;
+
+		if (cylinderTimesHeads >= (heads * 1024) || heads > 16) {
+			sectorsPerTrack = 31;
+			heads = 16;
+			cylinderTimesHeads = totalSectors / sectorsPerTrack;	
+		}
+
+		if (cylinderTimesHeads >= (heads * 1024)) {
+			sectorsPerTrack = 63;
+			heads = 16;
+			cylinderTimesHeads = totalSectors / sectorsPerTrack;
+		}
+	}
+	cylinders = cylinderTimesHeads / heads;
+
+	return (VHD_GEOM(cylinders, heads, sectorsPerTrack));
+}
+
+uint32_t
+vhd_chksum(char *ptr, size_t sz, char *excl)
+{
+	uint32_t chksum = 0;
+	int i;
+
+	if (ptr == NULL)
+		return (0);
+
+	for (i=0; i<sz; i++)
+		chksum += (uint8_t)ptr[i];
+
+	if (excl != NULL) {
+		// Subtract 4 bytes of checksum
+		chksum -= (excl[0] + excl[1] + excl[2] + excl[3]);
+	}
+
+	return (~chksum);
+}
+
+
+static char *
+vhd_time(uint32_t *file_time, uint32_t *cur_time, int f2c)
+{
+	time_t tm, tm_1970, tm_2000;
+	char *timestr;
+	struct tm epoch_1970 = {0,0,0,1,0,70,0,0,0};
+	struct tm epoch_2000 = {0,0,0,1,0,100,0,0,0};
+	
+
+	tm_1970 = mktime(&epoch_1970);
+	tm_2000 = mktime(&epoch_2000);
+
+	if (f2c) {
+		if (file_time == NULL) {
+			VIDDBG(0, "Invalid time\n");
+			if (cur_time != NULL)
+				*cur_time = 0;
+			return NULL;
+		}
+
+		tm = *file_time + (tm_2000-tm_1970);
+		timestr = ctime(&tm);
+		if (timestr == NULL) {
+			VIDDBG(0, "Couldn't convert time (0x%x)\n", *file_time);
+			return NULL;
+		}
+		if (cur_time != NULL)
+			*cur_time = tm;
+		timestr[strlen(timestr)-1] = '\0';
+	} else {
+		if (cur_time == NULL) {
+			VIDDBG(0, "Invalid time\n");
+			if (file_time != NULL)
+				*file_time = 0;
+			return NULL;
+		}
+		tm = *cur_time - (tm_2000-tm_1970);
+		timestr = ctime(&tm);
+		if (timestr == NULL) {
+			VIDDBG(0, "Couldn't convert time (0x%x)\n", *cur_time);
+			return NULL;
+		}
+		if (file_time != NULL)
+			*file_time = tm;
+		timestr[strlen(timestr)-1] = '\0';
+	}
+
+	return timestr;
+}
+
+
+char *
+vhd_get_parent_name(vd_file_t *vf, ple_t *ple)
+{
+	char *cp, *buf, *pool;
+	int bytes;
+
+	if ((ple == NULL) || (ple->data_len == 0)) {
+		VIDDBG(0, "Invalid data\n");
+		return (NULL);
+	}
+
+	// The file is opened with O_DIRECT, so we need to
+	// align buffer on 512-byte boundary
+	pool = buf = vdisk_malloc(ple->data_space+512);
+	if (buf == NULL) {
+		perror("malloc");
+		return (NULL);
+	}
+	while((addr_t)buf & 511) buf++;
+
+	if (lseek(vf->fd, ple->data_off, SEEK_SET) != ple->data_off) {
+		perror("lseek");
+		vdisk_free(pool);
+		return NULL;
+	}
+
+	bytes = read(vf->fd, buf, (size_t)ple->data_space);
+	if (bytes != ple->data_space) {
+		perror("read");
+		vdisk_free(pool);
+		VIDDBG(0, "fd = %d\n", vf->fd);
+		return NULL;
+	}
+
+	cp = vdisk_malloc(ple->data_len+1);
+	if (cp == NULL) {
+		perror("malloc");
+		vdisk_free(pool);
+		return (NULL);
+	}
+	
+	strncpy(cp, buf, ple->data_len);
+	buf[ple->data_len] = 0;
+	vdisk_free(pool);
+
+	//XXX: for codes W2Ru and W2ku we need to convert from UTF-16 to ASCII
+	return cp;
+}
+
+int
+vhd_print_header(vd_file_t *vf)
+{
+	char *cp;
+	uint64_t v64;
+	uint32_t v32;
+	vhd_file_t *vhd = (vhd_file_t *)(vf->vdf);
+	size_t sz, max_sz;
+	int i;
+	int err;
+
+	// Figure out max file size
+
+	err = vdisk_size(vf->fd, &sz);
+	if (err) {
+		VIDDBG(0, "Couldn't get file size\n");
+		return (err);
+	}
+
+	if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_FIXED)
+		max_sz = sz;
+	else {
+		uint64_t unmapped_blocks = 0;
+		size_t new_bytes;
+
+		// Count blocks that haven't been allocated
+		for (i=0; i< vhd_get_dhdr_tbl_entries(vhd->dhdr); i++)
+			if (__arch__swab32(vhd->bat[i]) == 
+			    VHD_BAT_INVALID_ENTRY)
+				unmapped_blocks++;
+
+		// XXX: Assume that block size is in 512-byte chunks
+		new_bytes = unmapped_blocks * (vhd->sectormap_sz + 
+					       vhd_get_dhdr_blksz(vhd->dhdr));
+		max_sz = sz + new_bytes;
+	}
+
+	printf("FILE %s:\n", vf->name);
+	printf("\tMaximum file size:\t0x%016zx\n\n", max_sz);
+
+
+	v64 = vhd_get_ftr_cookie(vhd->ftr);
+	cp = (char *)&v64;
+	printf("\tCookie:\t\t\t0x%016" PRIx64 " (\"%c%c%c%c%c%c%c%c\")\n", v64,
+	       cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6], cp[7]);
+	
+	printf("\tFeatures:\t\t0x%08x\n", vhd_get_ftr_features(vhd->ftr));
+	printf("\tFile format vervion:\t0x%08x\n", vhd_get_ftr_fformat(vhd->ftr));
+	printf("\tData Offset:\t\t0x%016" PRIx64 "\n", 
+	       vhd_get_ftr_dataoff(vhd->ftr));
+
+	v32 = vhd_get_ftr_timestamp(vhd->ftr);
+	printf("\ttimestamp:\t\t0x%08x (%s)\n", v32, vhd_time(&v32, NULL, 1));
+	
+	printf("\tCreator App:\t\t0x%08x\n", vhd_get_ftr_cr_app(vhd->ftr));
+	printf("\tCreator Ver:\t\t0x%08x\n", vhd_get_ftr_cr_ver(vhd->ftr));
+	printf("\tCreator Host OS:\t0x%08x\n", vhd_get_ftr_cr_hostos(vhd->ftr));
+	printf("\tOriginal size:\t\t0x%016" PRIx64 "\n",
+	       vhd_get_ftr_orig_sz(vhd->ftr));
+	printf("\tCurrent size:\t\t0x%016" PRIx64 "\n", 
+	       vhd_get_ftr_cur_sz(vhd->ftr));
+	printf("\tGeometry:\t\t0x%08x\n", vhd_get_ftr_geom(vhd->ftr));
+	printf("\tType:\t\t\t0x%08x\n", vhd_get_ftr_type(vhd->ftr));
+	printf("\tChecksum:\t\t0x%08x\n", vhd_get_ftr_chksum(vhd->ftr));
+
+	printf("\tUnique ID:\t\t");
+	cp = (char *)vhd_get_ftr_uid(vhd->ftr);
+	for (i=0;i<16;i++)
+		printf("%02x", (*cp++) & 0xff);
+
+	printf("\n\tSaved state:\t\t0x%08x\n", vhd_get_ftr_saved_state(vhd->ftr));
+	if ((vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DYNAMIC ) ||
+	    (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF )) {
+		
+		printf(" Dynamic Header:\n");
+		
+		v64 = vhd_get_dhdr_cookie(vhd->dhdr);
+		cp = (char *)&v64;
+		printf("\t Cookie:\t\t0x%016" PRIx64 " (\"%c%c%c%c%c%c%c%c\")\n",
+		       v64, cp[0], cp[1], cp[2], cp[3], cp[4], cp[5], cp[6], cp[7]);
+		printf("\t Data Offset:\t\t0x%016" PRIx64 "\n",
+		       vhd_get_dhdr_dataoff(vhd->dhdr));
+		printf("\t Table Offset:\t\t0x%016" PRIx64 "\n",
+		       vhd_get_dhdr_tbloff(vhd->dhdr));
+		printf("\t Max Table Entries:\t0x%08x\n", 
+		       vhd_get_dhdr_tbl_entries(vhd->dhdr));
+		printf("\t Block Size:\t\t0x%08x\n", 
+		       vhd_get_dhdr_blksz(vhd->dhdr));
+		printf("\t Checksum:\t\t0x%08x\n", 
+		       vhd_get_dhdr_chksum(vhd->dhdr));
+	}
+
+	if (vhd_get_ftr_type(vhd->ftr) == VHD_TYPE_DIFF ) {
+		
+		printf("\t Parent Unique ID:\t"); 
+		cp = (char *)vhd_get_dhdr_puid(vhd->dhdr);
+		for (i=0;i<16;i++)
+			printf("%02x", (*cp++) & 0xff);
+		v32 = vhd_get_dhdr_ptimestamp(vhd->dhdr);
+		printf("\n\t Parent Timestamp:\t0x%08x (%s)\n", 
+		       v32, vhd_time(&v32, NULL, 1));
+
+		for (i=0;i<8;i++) {
+			ple_t ple;
+
+			vhd_get_dhdr_ple(vhd->dhdr, &ple, i);
+			if (ple.code != 0) {
+				printf("\t Parent Locator Entry %d:\n", i);
+				
+				cp = (char *)&ple.code;
+				printf("\t\tPlatform Code:\t0x%08x "
+				       "(\"%c%c%c%c\")\n", 
+				       ple.code, cp[3], cp[2], cp[1], cp[0]);
+				printf("\t\tData Space:\t0x%08x\n",
+				       ple.data_space);
+				printf("\t\tData Length:\t0x%08x\n",
+				       ple.data_len);
+				printf("\t\tData Offset:\t0x%016" PRIx64" \n",
+				       ple.data_off);
+				cp = vhd_get_parent_name(vf, &ple);
+				if (cp == NULL) {
+					VIDDBG(0, "Can't locate parent info "
+					       "in file\n");
+					continue;
+				}
+				printf("\t\tParent Locator:\t%s\n", cp);
+				free(cp);
+			}
+		}
+	}
+
+	return (0);
+}
+
+int
+vhd_parse_args(int argc, int operations, char *argv[], void **args)
+{
+	char c;
+	int i;
+	extern char *optarg;
+	extern int optind, opterr, optopt;
+	vhd_args_t *vhd_args;
+
+	void vhd_usage() {
+		fprintf(stderr, "VHD-specific options: "
+			"-S <size(MB)> [-f|-d [-p <parent>]] [-B <size(B)>]"
+			" [-u UUID] [-t]\n");
+	}
+
+	vhd_args = malloc(sizeof(vhd_args_t));
+	if (vhd_args == NULL) {
+		VIDDBG(0, "Can't allocate arguments\n");
+		return (-1);
+	}
+
+	memset(vhd_args, 0, sizeof(vhd_args_t));
+	vhd_args->type = VHD_TYPE_NONE;
+	vhd_args->blocksz = 0x200000; // 2MB
+
+	while (1) {
+
+		c = getopt(argc, argv, "S:fdstB:u:p:");
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'f':
+			vhd_args->type = VHD_TYPE_FIXED;
+			vhd_args->args_mask |= VHD_ARG_TYPE;
+			break;
+		case 's': // 's' for "sparse"
+			VIDDBG(0, "'-s' option is obsolete. Use '-d' instead\n");
+		case 'd':
+			vhd_args->type = VHD_TYPE_DYNAMIC;
+			vhd_args->args_mask |= VHD_ARG_TYPE;
+			break;
+		case 'S':
+			vhd_args->vhd_sz = atol(optarg) * 1024 * 1024;
+			vhd_args->args_mask |= VHD_ARG_SZ;
+			break;
+		case 't':
+			vhd_args->args_mask |= VHD_ARG_TIME;
+			break;
+		case 'p':
+			vhd_args->args_mask |= VHD_ARG_PARENT;
+			vhd_args->parent = malloc(strlen(optarg)+1);
+			if (vhd_args->parent == NULL) {
+				VIDDBG(0, "Out of memory\n");
+				goto fail;
+			}
+			strncpy(vhd_args->parent, optarg, strlen(optarg)+1);
+			break;
+		case 'B':
+			vhd_args->blocksz = atol(optarg);
+			// Must be in 512 byte chunks
+			if (vhd_args->blocksz & 511) {
+				VIDDBG(0, 
+				       "block size must be divisible by 512\n");
+				goto fail;
+			}
+			vhd_args->args_mask |= VHD_ARG_BLOCKSZ;
+			break;
+		case 'u':
+			if ((optarg == NULL) || (strlen(optarg) != 32)) {
+				VIDDBG(0, "UUID is a 16-byte (32-character)"
+				       " string\n");
+				goto fail;
+			}
+
+			// Convert UUID characters to hex
+			for(i=0;i<32;i++) {
+				uint8_t val;
+
+				val = optarg[i];
+				if (!isxdigit(val)) {
+					VIDDBG(0, "Invalid character in UUID "
+					       "string ('%c')\n", optarg[i]);
+					free(vhd_args);
+					vhd_usage();
+					return (-1);
+				}
+				if (isalpha(val)) {
+					val = tolower(val);
+					val -= ('a' - 0xa);
+				} else
+					val -= '0';
+	
+				// two hex numbers per byte
+				vhd_args->uuid[i>>1] |= (val << (4*((i&1)^1)));
+			}
+			vhd_args->args_mask |= VHD_ARG_UUID;
+			break;
+		default:
+			vhd_usage();
+			goto fail;
+		}
+	}
+
+	if ((vhd_args->parent != NULL) && (vhd_args->type == VHD_TYPE_FIXED)) {
+		VIDDBG(0, "Fixed VHD cannot have a parent\n");
+		goto fail;
+	}
+
+	if (operations & VDISK_OP_CREATE) {
+		if (vhd_args->parent == NULL) {
+			if ((vhd_args->vhd_sz == 0) ||
+			    (vhd_args->type == VHD_TYPE_NONE))
+				goto fail;
+		}
+	}
+
+	if (vhd_args->vhd_sz % vhd_args->blocksz) {
+		VIDDBG(0, "File size must be multiple of block size\n");
+		goto fail;
+	}
+
+	if (operations & VDISK_OP_MODIFY) {
+		if (vhd_args->args_mask & VHD_ARG_PARENT) {
+			if (vhd_args->type == VHD_TYPE_FIXED) {
+				VIDDBG(0, "Fixed VHDs can't have parents\n");
+				goto fail;
+			}
+		}
+		if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+			VIDDBG(0, "Can't modify VHD's size or block size\n");
+			goto fail;
+		}
+	}
+
+	if (vhd_args->parent != NULL) {
+		vhd_args->type = VHD_TYPE_DIFF;
+		if (vhd_args->args_mask & (VHD_ARG_SZ | VHD_ARG_BLOCKSZ)) {
+			VIDDBG(0, "Differencing VHD's size and block size "
+			       "are inherited from parent\n");
+			goto fail;
+		}
+	}
+
+
+	*args = vhd_args;
+	return (0);
+
+fail:
+	if (vhd_args->parent)
+		free(vhd_args->parent);
+	free(vhd_args);
+	vhd_usage();
+	return (-1);
+}
+
+// Store differencing file's parent information
+static int
+vhd_store_parent(int vfd, vhd_file_t *vhd, vhd_file_t *pvhd, 
+		 char *parentname, loff_t *data)
+{
+	uint32_t bat_sz;
+	ple_t ple;
+	int i;
+	int err;
+	size_t bytes;
+
+	vhd_set_dhdr_puid(vhd->dhdr, vhd_get_ftr_uid(pvhd->ftr));
+	vhd_set_dhdr_ptimestamp(vhd->dhdr, 
+				vhd_get_ftr_timestamp(pvhd->ftr));
+	
+	memset(&ple, 0, sizeof(ple_t));
+	for (i=0;i<8;i++)
+		vhd_set_dhdr_ple(vhd->dhdr, &ple, i);
+	
+	if (parentname[0] == '/')
+		ple.code = VHD_DYN_PLE_ABS;
+	else
+		ple.code = VHD_DYN_PLE_REL;
+	
+	// XXX: The spec says this is number of 512b sectors,
+	// but file created by MS's Virtual PC tool seems to
+	// think this is number of bytes, aligned at 512b
+	ple.data_space = (strlen(parentname) + 1 + 512)
+		& (~511);
+	ple.data_len = strlen(parentname) + 1;
+	
+	bat_sz = vhd_get_dhdr_tbl_entries(vhd->dhdr) << 2;
+
+	ple.data_off = VHD_DHDR_SZ + VHD_FTR_SZ +
+		bat_sz + 
+		((bat_sz & 511) ? (512-(bat_sz&511)) : 0) +
+		512; // XXX: see comment in vhd_create_vdisk()
+	vhd_set_dhdr_ple(vhd->dhdr, &ple, 0);
+
+	// Recalculate checksum
+	vhd_set_dhdr_chksum(vhd->dhdr, 
+			    vhd_chksum(vhd->dhdr, VHD_DHDR_SZ, 
+				       &vhd->dhdr[VHD_DHDR_CHKSUM_OFF]));
+	
+	if (lseek(vfd, VHD_FTR_SZ, SEEK_SET) !=  VHD_FTR_SZ) {
+		err = errno;
+		VIDDBG(0, "lseek: %s", strerror(err));
+		return (err);
+	}
+	
+	// Write the dynamic header
+	bytes = write(vfd, vhd->dhdr, VHD_DHDR_SZ);
+	if (bytes != VHD_DHDR_SZ) {
+		err = errno;
+		VIDDBG(0, "write: %s", strerror(err));
+		return (err);
+	}
+	
+	// Write PLE
+	bytes = lseek(vfd, ple.data_off, SEEK_SET);
+	if (bytes != ple.data_off) {
+		err = errno;
+		VIDDBG(0, "lseek: %s", strerror(err));
+		return (err);
+	}
+	bytes = write(vfd, parentname, strlen(parentname)+1);
+	if (bytes != strlen(parentname)+1) {
+		err = errno;
+		VIDDBG(0, "write: %s", strerror(err));
+		return (err);
+	}
+
+	if (data != NULL)
+		*data = (loff_t)ple.data_off + (loff_t)ple.data_space;
+
+	return (0);
+}
+
+
+int
+vhd_modify_vdisk(struct vdisk_dev *vdisk, void *args)
+{
+	vhd_args_t *vhd_args = args;
+	vd_file_t *vf = NULL;
+	vhd_file_t *vhd;
+	size_t sz, bytes;
+	int err;
+	int store_footer = 0;
+	struct list_head *ptr;
+	int stop = 0;
+
+
+	// XXX: We always make a single pass
+	list_for_each(ptr, &vdisk->vdf_head) {
+
+		vf = list_entry(ptr, vd_file_t, vdf_list);
+		if ((vf == NULL) || (vf->vdf == NULL)) {
+			VIDDBG(0, "Can't access vdisk's structures\n");
+			return (-1);
+		}
+		vhd = (vhd_file_t *)vf->vdf;
+
+		// Close and reopen file (it may have been open O_DIRECT)
+		err = vdisk_close(vf->fd);
+		if (err) {
+			VIDDBG(0, "Can't close %s:%d\n", vf->name, err);
+			return (err);
+		}
+
+		vf->fd = open(vf->name, O_RDWR, 0644);
+		if (vf->fd == -1) {
+			err = errno;
+			VIDDBG(0, "Can't open %s:%d\n", vf->name, strerror(errno));
+			return (err);
+		}
+
+		// Update UUID
+		if (vhd_args->args_mask & VHD_ARG_UUID) {
+
+			vhd_set_ftr_uid(vhd->ftr, vhd_args->uuid);
+			
+			store_footer = 1;
+			stop = 1;
+		}
+
+		// Change parent name
+		if (vhd_args->args_mask & VHD_ARG_PARENT) {
+			vhd_file_t *pvhd;
+			struct vdisk_dev parent;
+			vd_file_t *pvf;
+
+			// Open parent file
+			err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+			if (err) {
+				VIDDBG(0, "Failed to initialize state for "
+				       "parent %s\n", vhd_args->parent);
+				return (err);
+			}
+			pvf = list_entry(parent.vdf_head.next, vd_file_t, vdf_list);
+			pvhd = (vhd_file_t *)pvf->vdf;
+
+			// Update dynamic header and parent data
+			err = vhd_store_parent(vf->fd, vhd, pvhd,
+					       vhd_args->parent, NULL);
+			if (err) {
+				VIDDBG(0, "Failed to store parent name (%s)\n",
+				       vhd_args->parent);
+				vdisk_fini(&parent);
+				return (err);
+			}
+			vdisk_fini(&parent);
+
+			store_footer = 1;
+			stop = 1;
+		}
+
+		// Update timestamp
+		if (vhd_args->args_mask & VHD_ARG_TIME) {
+			uint32_t curtime, ftime;
+			
+			curtime = time(NULL);
+			if (curtime == -1) {
+				perror("time");
+				return (errno);
+			}
+			(void)vhd_time(&ftime, &curtime, 0);
+			vhd_set_ftr_timestamp(vhd->ftr, ftime);
+
+			stop = 1;
+		}
+		
+		// Recompute footer's checksum
+		vhd_set_ftr_chksum(vhd->ftr, 
+				   vhd_chksum(vhd->ftr, VHD_FTR_SZ, 
+					      &vhd->ftr[VHD_FTR_CHKSUM_OFF]));
+		
+		// Write the footer back if needed
+		if (store_footer) {
+
+			err = vdisk_size(vf->fd, &sz);
+			if (err != 0) {
+				VIDDBG(0, "Can't determine vdisk's size\n");
+				return (-1);
+			}
+
+			if (lseek(vf->fd, (sz-VHD_FTR_SZ), SEEK_SET) != 
+			    (sz - VHD_FTR_SZ)) {
+				perror("lseek");
+				return (errno);
+			}
+			bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+			if (bytes != VHD_FTR_SZ) {
+				perror("write");
+				return (errno);
+			}			
+			
+			// For non-fixed disks write footer at front as well 
+			if (vhd_get_ftr_type(vhd->ftr) != VHD_TYPE_FIXED) {
+				if (lseek(vf->fd, 0, SEEK_SET) != 0) {
+					perror("lseek");
+					return (errno);
+				}
+				bytes = write(vf->fd, vhd->ftr, VHD_FTR_SZ);
+				if (bytes != VHD_FTR_SZ) {
+					perror("write");
+					return (errno);
+				}
+			}
+		}
+
+		if (stop)
+			break;
+	}
+
+	if (fsync(vf->fd))
+		VIDDBG(0, "fsync: %s\n", strerror(errno));
+
+	return (0);
+}
+
+int
+vhd_create_vdisk(char *filename, void *args)
+{
+	vhd_args_t *vhd_args = args;
+	vhd_file_t vhd;
+	uint32_t curtime, ftime;
+	int vfd = -1;
+	ssize_t bytes;
+	int i;
+	int err = 0;
+	char *hdr_pool = NULL, *ftr_pool = NULL;
+	struct vdisk_dev parent;
+
+	vfd = open(filename, O_CREAT|O_EXCL|O_RDWR, 0644);
+	if (vfd == -1) {
+		if (errno == EEXIST) {
+			size_t sz;
+
+			// File already exists
+			if (vhd_args->type != VHD_TYPE_FIXED) {
+				VIDDBG(0, "Raw files can only be converted to "
+				       "fixed VHD format\n");
+				return (EINVAL);
+			}
+
+			vfd = open(filename, O_RDWR, 0644);
+			if (vfd == -1) {
+				err = errno;
+				VIDDBG(0, "vfd open(%s, O_RDWR) failed: %s\n", 
+				       filename, strerror(err));
+				return (err);
+			}
+
+			err = vdisk_size(vfd, &sz);
+			if (err) {
+				VIDDBG(0, "vdisk_size(%s) failed: %s\n",
+				       filename, strerror(err));
+				return (err);
+			}
+
+			if (vhd_args->vhd_sz < sz) {
+				VIDDBG(0, "WARNING: Truncating %s (%ld bytes) "
+				       "to %ld bytes\n", 
+				       filename, sz, vhd_args->vhd_sz);
+
+				err = ftruncate(vfd, vhd_args->vhd_sz);
+				if (err == -1) {
+					err = errno;
+					VIDDBG(0, "ftruncate(%s, %ld): %s\n",
+					       filename, vhd_args->vhd_sz, 
+					       strerror(err));
+				return (err);	
+				}
+			}
+		} else {
+			err = errno;
+			VIDDBG(0, "vfd open(%s, O_CREAT|O_EXCL|O_RDWR) "
+			       "failed: %s\n", filename, strerror(err));
+			return (err);
+		}
+	}
+	
+	parent.vdfd = NULL; 
+
+	memset((char *)&vhd, 0, sizeof(vhd));
+	ftr_pool = vhd.ftr = vdisk_malloc(VHD_FTR_SZ+512);
+	if (vhd.ftr == NULL) {
+		VIDDBG(0, "Couldn't allocate VHD footer\n");
+		close(vfd);
+		return (ENOMEM);
+	}
+	while ((addr_t)vhd.ftr & 511) vhd.ftr++; 
+
+	vhd_set_ftr_cookie(vhd.ftr, VHD_COOKIE);
+	vhd_set_ftr_features(vhd.ftr, VHD_FEATURES_RSVD);
+	vhd_set_ftr_fformat(vhd.ftr, VHD_FORMAT_VER_1);
+	vhd_set_ftr_type(vhd.ftr, vhd_args->type);
+	
+	curtime = time(NULL);
+	if (curtime == -1) {
+		err = errno;
+		perror("time");
+		goto out;
+	}
+	(void)vhd_time(&ftime, &curtime, 0);
+	vhd_set_ftr_timestamp(vhd.ftr, ftime);
+
+	vhd_set_ftr_cr_app(vhd.ftr, VHD_CREATOR_APP);
+	vhd_set_ftr_cr_ver(vhd.ftr, VHD_CREATOR_VER_1);
+	vhd_set_ftr_cr_hostos(vhd.ftr, VHD_CREATOR_HOST_OS);
+	vhd_set_ftr_orig_sz(vhd.ftr, vhd_args->vhd_sz);
+	vhd_set_ftr_cur_sz(vhd.ftr, vhd_args->vhd_sz);
+	vhd_set_ftr_geom(vhd.ftr, vhd_chs(vhd_args->vhd_sz));
+
+	vhd_set_ftr_uid(vhd.ftr, vhd_args->uuid);
+
+	if (vhd_args->type == VHD_TYPE_FIXED)
+		vhd_set_ftr_dataoff(vhd.ftr, VHD_FIXED_OFFSET);
+	else if ((vhd_args->type == VHD_TYPE_DYNAMIC) || 
+		 (vhd_args->type == VHD_TYPE_DIFF))
+		vhd_set_ftr_dataoff(vhd.ftr, VHD_FTR_SZ);
+	else
+		ASSERT(0);
+
+	vhd_set_ftr_chksum(vhd.ftr, vhd_chksum(vhd.ftr, VHD_FTR_SZ, 
+					       &vhd.ftr[VHD_FTR_CHKSUM_OFF]));	
+
+	// Create dynamic header
+	if ((vhd_args->type == VHD_TYPE_DYNAMIC) || 
+	    (vhd_args->type == VHD_TYPE_DIFF)) {
+
+		uint32_t bat_entry, bat_sz;
+		loff_t data;
+		vhd_file_t *pvhd = NULL;
+		vd_file_t *pvf;
+
+		if (vhd_args->type == VHD_TYPE_DIFF) {
+			// Read parent data
+			err = vdisk_init(&parent, vhd_args->parent, NULL, 0);
+			if (err) {
+				VIDDBG(0, "Failed to initialize state for "
+				       "parent %s\n", vhd_args->parent);
+				return (err);
+			}
+			pvf = list_entry(parent.vdf_head.next, 
+					 vd_file_t, vdf_list);
+			pvhd = (vhd_file_t *)pvf->vdf;
+
+			// Update footer fields inherited from parent
+			vhd_set_ftr_orig_sz(vhd.ftr, 
+					    vhd_get_ftr_orig_sz(pvhd->ftr));
+			vhd_set_ftr_cur_sz(vhd.ftr, 
+					   vhd_get_ftr_cur_sz(pvhd->ftr));
+			vhd_set_ftr_geom(vhd.ftr, 
+					 vhd_get_ftr_geom(pvhd->ftr));
+
+			vhd_args->vhd_sz = vhd_get_ftr_cur_sz(vhd.ftr);
+		}
+
+		hdr_pool = vhd.dhdr = vdisk_malloc(VHD_DHDR_SZ+512);
+		if (vhd.dhdr == NULL) {
+			vdisk_free(ftr_pool);
+			VIDDBG(0, "Couldn't allocate dynamic header\n");
+			err = ENOMEM;
+			goto out;
+		}
+		while ((addr_t)vhd.dhdr & 511) vhd.dhdr++; 
+
+		vhd_set_dhdr_cookie(vhd.dhdr, VHD_DYN_COOKIE);
+		vhd_set_dhdr_dataoff(vhd.dhdr, VHD_DYN_OFFSET);
+		vhd_set_dhdr_tbloff(vhd.dhdr, VHD_FTR_SZ+VHD_DHDR_SZ);
+		vhd_set_dhdr_hdrver(vhd.dhdr, VHD_DYN_HDR_VER_1);
+		vhd_set_dhdr_tbl_entries(vhd.dhdr,
+					 vhd_args->vhd_sz/vhd_args->blocksz);
+		vhd_set_dhdr_blksz(vhd.dhdr, vhd_args->blocksz);
+
+		vhd_set_dhdr_chksum(vhd.dhdr, 
+				    vhd_chksum(vhd.dhdr, VHD_DHDR_SZ, 
+					       &vhd.dhdr[VHD_DHDR_CHKSUM_OFF]));
+
+		// Write the copy of the footer first
+		bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+		if (bytes != VHD_FTR_SZ) {
+			perror("write");
+			err = errno;
+			goto out;
+		}
+
+		// Write the dynamic header
+		bytes = write(vfd, vhd.dhdr, VHD_DHDR_SZ);
+		if (bytes != VHD_DHDR_SZ) {
+			perror("write");
+			vdisk_free(vhd.dhdr);
+			close(vfd);
+			return (errno);
+		}
+
+		// Initialize BAT
+		// XXX: Make it faster perhaps?
+		bat_entry = VHD_BAT_INVALID_ENTRY;
+		for (i=0; i< vhd_get_dhdr_tbl_entries(vhd.dhdr); i++) {
+			bytes = write(vfd, &bat_entry, 4);
+			if (bytes != 4) {
+				err = errno;
+				perror("write");
+				goto out;
+			}
+		}
+
+		// BAT must end on sector boundary (512 bytes)
+		bat_entry = 0;
+		bat_sz = vhd_get_dhdr_tbl_entries(vhd.dhdr) << 2;
+		if (bat_sz & 511) {
+			for (i=0; i<512-(bat_sz&511);i++) {
+				// Write 1 byte at a time
+				bytes = write(vfd, &bat_entry, 1);
+				if (bytes != 1) {
+					perror("write");
+					err = errno;
+					goto out;
+				}
+			}
+		}
+
+		// XXX: It appears that there is a 512B block
+		// at the end of BAT, which is not mentioned in the spec
+		for (i=0; i<512>>2; i++) {
+			bytes = write(vfd, &bat_entry, 4);
+			if (bytes != 4) {
+				err = errno;
+				perror("write");
+				goto out;
+			}
+		}
+
+		if (vhd_args->type == VHD_TYPE_DIFF) {
+			// This will store dynamic header again, but that's OK
+			err = vhd_store_parent(vfd, &vhd, pvhd,
+					       vhd_args->parent, &data);
+			if (err) {
+				VIDDBG(0, "Failed to store parent name (%s)\n",
+				       vhd_args->parent);
+				return (err);
+			}
+
+			bytes = lseek(vfd, data, SEEK_SET);
+			if (bytes != data) {
+				err = errno;
+				perror("lseek");
+				goto out;
+			}
+		}
+	} else {
+		// for fixed disk, seek to the end of the file
+		if (lseek(vfd, vhd_args->vhd_sz, SEEK_SET) != 
+		    vhd_args->vhd_sz) {
+			perror("lseek");
+			err = errno;
+			goto out;
+		}
+	}
+
+	// Write footer. For fixed disks allocate whole filesize
+	bytes = write(vfd, vhd.ftr, VHD_FTR_SZ);
+	if (bytes != VHD_FTR_SZ) {
+		perror("write");
+		err = errno;
+		goto out;
+	}
+
+out:
+	if (parent.vdfd != NULL)
+		vdisk_fini(&parent);
+
+	if (ftr_pool)
+		vdisk_free(ftr_pool);
+	if (hdr_pool)
+		vdisk_free(hdr_pool);
+	if (vfd != -1) {
+		if (fsync(vfd))
+			VIDDBG(0, "fsync: %s\n", strerror(errno));
+		close(vfd);
+	}
+
+	return (err);
+}


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2007-06-19 16:10 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-06-19 13:16 [PATCH 4/4] Add libvdisk, and vdisk_tool Ben Guthro
2007-06-19 16:10 ` Mark McLoughlin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.