xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support
@ 2013-07-15 11:55 Thanos Makatos
  2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
  2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
  0 siblings, 2 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch series introduces full VHD support, plus some minor code
improvements.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1 of 2 RESEND] blktap3/vhd: Import VHD support
  2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
  2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
  1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch imports the VHD driver from blktap2, with all changes coming from
blktap2.5.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

diff --git a/tools/blktap3/drivers/Makefile b/tools/blktap3/drivers/Makefile
--- a/tools/blktap3/drivers/Makefile
+++ b/tools/blktap3/drivers/Makefile
@@ -95,6 +95,7 @@ LIBSRING := sring/libsring.a
 #MISC-OBJS-y := atomicio.o
 
 BLK-OBJS-y  := block-aio.o
+BLK-OBJS-y  += block-vhd.o
 # FIXME The following exist in blktap2 but not in blktap2.5.
 #BLK-OBJS-y += aes.o
 #BLK-OBJS-y += md5.o
diff --git a/tools/blktap2/drivers/block-vhd.c b/tools/blktap3/drivers/block-vhd.c
copy from tools/blktap2/drivers/block-vhd.c
copy to tools/blktap3/drivers/block-vhd.c
--- a/tools/blktap2/drivers/block-vhd.c
+++ b/tools/blktap3/drivers/block-vhd.c
@@ -1,5 +1,8 @@
-/* 
- * Copyright (c) 2008, XenSource Inc.
+/*
+ *
+ * Copyright (c) 2007, XenSource Inc.
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,6 +27,10 @@
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * block-vhd.c: asynchronous vhd implementation.
  *
  * A note on write transactions:
  * Writes that require updating the BAT or bitmaps cannot be signaled
@@ -50,6 +57,8 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
+#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
+                       /* e2fsprogs-devel.                            */
 #include <string.h>    /* for memset.                                 */
 #include <libaio.h>
 #include <sys/mman.h>
@@ -59,6 +68,7 @@
 #include "tapdisk-driver.h"
 #include "tapdisk-interface.h"
 #include "tapdisk-disktype.h"
+#include "tapdisk-storage.h"
 
 unsigned int SPB;
 
@@ -72,7 +82,7 @@ unsigned int SPB;
 	do {								\
 		DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %"	\
 		    PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: "	\
-		    "%lu, BBLK: 0x%04x\n",				\
+		    "%u, BBLK: 0x%04x\n",				\
 		    s->vhd.file, s->queued, s->completed, s->returned,	\
 		    VHD_REQS_DATA - s->vreq_free_count,			\
 		    s->bat.pbw_blk);					\
@@ -84,21 +94,20 @@ unsigned int SPB;
 			__FILE__, __LINE__, #_p);			\
 		DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n",	\
 		    __FILE__, __LINE__, #_p);				\
-		tlog_flush();						\
-		*(int*)0 = 0;						\
+		td_panic();						\
 	}
 
 #if (DEBUGGING == 1)
   #define DBG(level, _f, _a...)      DPRINTF(_f, ##_a)
-  #define ERR(err, _f, _a...)        DPRINTF("ERROR: %d: " _f, err, ##_a)
+  #define ERR(_s, err, _f, _a...)    DPRINTF("ERROR: %d: " _f, err, ##_a)
   #define TRACE(s)                   ((void)0)
 #elif (DEBUGGING == 2)
   #define DBG(level, _f, _a...)      tlog_write(level, _f, ##_a)
-  #define ERR(err, _f, _a...)	     tlog_error(err, _f, ##_a)
+  #define ERR(_s, _err, _f, _a...)   tlog_drv_error((_s)->driver, _err, _f, ##_a)
   #define TRACE(s)                   __TRACE(s)
 #else
   #define DBG(level, _f, _a...)      ((void)0)
-  #define ERR(err, _f, _a...)        ((void)0)
+  #define ERR(_s, err, _f, _a...)    ((void)0)
   #define TRACE(s)                   ((void)0)
 #endif
 
@@ -121,6 +130,7 @@ unsigned int SPB;
 #define VHD_OP_BITMAP_READ           3
 #define VHD_OP_BITMAP_WRITE          4
 #define VHD_OP_ZERO_BM_WRITE         5
+#define VHD_OP_REDUNDANT_BM_WRITE    6
 
 #define VHD_BM_BAT_LOCKED            0
 #define VHD_BM_BAT_CLEAR             1
@@ -194,8 +204,8 @@ struct vhd_bat_state {
 };
 
 struct vhd_bitmap {
-	u32                       blk;
-	u64                       seqno;       /* lru sequence number */
+	uint32_t                  blk;
+	uint64_t                  seqno;       /* lru sequence number */
 	vhd_flag_t                status;
 
 	char                     *map;         /* map should only be modified
@@ -220,15 +230,16 @@ struct vhd_state {
 
         /* VHD stuff */
 	vhd_context_t             vhd;
-	u32                       spp;         /* sectors per page */
-        u32                       spb;         /* sectors per block */
-        u64                       next_db;     /* pointer to the next 
+	uint32_t                  spp;         /* sectors per page */
+	uint32_t                  spb;         /* sectors per block */
+	uint64_t                  first_db;    /* pointer to datablock 0 */
+	uint64_t                  next_db;     /* pointer to the next 
 						* (unallocated) datablock */
 
 	struct vhd_bat_state      bat;
 
-	u64                       bm_lru;      /* lru sequence number */
-	u32                       bm_secs;     /* size of bitmap, in sectors */
+	uint64_t                  bm_lru;      /* lru sequence number */
+	uint32_t                  bm_secs;     /* size of bitmap, in sectors */
 	struct vhd_bitmap        *bitmap[VHD_CACHE_SIZE];
 
 	int                       bm_free_count;
@@ -239,6 +250,12 @@ struct vhd_state {
 	struct vhd_request       *vreq_free[VHD_REQS_DATA];
 	struct vhd_request        vreq_list[VHD_REQS_DATA];
 
+	/* for redundant bitmap writes */
+	int                       padbm_size;
+	char                     *padbm_buf;
+	long int                  debug_skipped_redundant_writes;
+	long int                  debug_done_redundant_writes;
+
 	td_driver_t              *driver;
 
 	uint64_t                  queued;
@@ -274,7 +291,7 @@ vhd_initialize(struct vhd_state *s)
 		_vhd_zsize += VHD_BLOCK_SIZE;
 
 	_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
-			  MAP_SHARED | MAP_ANON, -1, 0);
+			  MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (_vhd_zeros == MAP_FAILED) {
 		EPRINTF("vhd_initialize failed: %d\n", -errno);
 		_vhd_zeros = NULL;
@@ -292,6 +309,7 @@ vhd_free(struct vhd_state *s)
 	if (_vhd_master != s || !_vhd_zeros)
 		return;
 
+	free(s->padbm_buf);
 	munmap(_vhd_zeros, _vhd_zsize);
 	_vhd_zsize  = 0;
 	_vhd_zeros  = NULL;
@@ -333,23 +351,23 @@ static int
 vhd_kill_footer(struct vhd_state *s)
 {
 	int err;
-	off_t end;
-	char *zeros;
+	off64_t end;
+	void *zeros;
 
 	if (s->vhd.footer.type == HD_TYPE_FIXED)
 		return 0;
 
-	err = posix_memalign((void **)&zeros, 512, 512);
+	err = posix_memalign(&zeros, 512, 512);
 	if (err)
 		return -err;
 
 	err = 1;
 	memset(zeros, 0xc7c7c7c7, 512);
 
-	if ((end = lseek(s->vhd.fd, 0, SEEK_END)) == -1)
+	if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
 		goto fail;
 
-	if (lseek(s->vhd.fd, (end - 512), SEEK_SET) == -1)
+	if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
 		goto fail;
 
 	if (write(s->vhd.fd, zeros, 512) != 512)
@@ -368,7 +386,7 @@ static inline int
 find_next_free_block(struct vhd_state *s)
 {
 	int err;
-	off_t eom;
+	off64_t eom;
 	uint32_t i, entry;
 
 	err = vhd_end_of_headers(&s->vhd, &eom);
@@ -376,6 +394,9 @@ find_next_free_block(struct vhd_state *s
 		return err;
 
 	s->next_db = secs_round_up(eom);
+	s->first_db = s->next_db;
+	if ((s->first_db + s->bm_secs) % s->spp)
+		s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp));
 
 	for (i = 0; i < s->bat.bat.entries; i++) {
 		entry = bat_entry(s, i);
@@ -398,12 +419,11 @@ vhd_free_bat(struct vhd_state *s)
 static int
 vhd_initialize_bat(struct vhd_state *s)
 {
-	int err, psize, batmap_required, i;
+	int err, batmap_required, i;
+	void *buf;
 
 	memset(&s->bat, 0, sizeof(struct vhd_bat));
 
-	psize = getpagesize();
-
 	err = vhd_read_bat(&s->vhd, &s->bat.bat);
 	if (err) {
 		EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
@@ -436,12 +456,11 @@ vhd_initialize_bat(struct vhd_state *s)
 					s->vhd.file);
 	}
 
-	err = posix_memalign((void **)&s->bat.bat_buf,
-			     VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
-	if (err) {
-		s->bat.bat_buf = NULL;
+	err = posix_memalign(&buf, VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
+	if (err)
 		goto fail;
-	}
+
+	s->bat.bat_buf = buf;
 
 	return 0;
 
@@ -471,6 +490,7 @@ vhd_initialize_bitmap_cache(struct vhd_s
 {
 	int i, err, map_size;
 	struct vhd_bitmap *bm;
+	void *map, *shadow;
 
 	memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
 
@@ -481,17 +501,17 @@ vhd_initialize_bitmap_cache(struct vhd_s
 	for (i = 0; i < VHD_CACHE_SIZE; i++) {
 		bm = s->bitmap_list + i;
 
-		err = posix_memalign((void **)&bm->map, 512, map_size);
-		if (err) {
-			bm->map = NULL;
+		err = posix_memalign(&map, 512, map_size);
+		if (err)
 			goto fail;
-		}
-
-		err = posix_memalign((void **)&bm->shadow, 512, map_size);
-		if (err) {
-			bm->shadow = NULL;
+
+		bm->map = map;
+
+		err = posix_memalign(&shadow, 512, map_size);
+		if (err)
 			goto fail;
-		}
+
+		bm->shadow = shadow;
 
 		memset(bm->map, 0, map_size);
 		memset(bm->shadow, 0, map_size);
@@ -508,6 +528,8 @@ fail:
 static int
 vhd_initialize_dynamic_disk(struct vhd_state *s)
 {
+	uint32_t bm_size;
+	void *buf;
 	int err;
 
 	err = vhd_get_header(&s->vhd);
@@ -527,6 +549,21 @@ vhd_initialize_dynamic_disk(struct vhd_s
 	s->spb     = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
 	s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
 
+	s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+	if (s->bm_secs % getpagesize())
+		s->padbm_size += getpagesize();
+
+	err = posix_memalign(&buf, 512, s->padbm_size);
+	if (err)
+		return -err;
+
+	s->padbm_buf = buf;
+	bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+	memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+	memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+	s->debug_skipped_redundant_writes = 0;
+	s->debug_done_redundant_writes = 0;
+
 	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
 		return 0;
 
@@ -616,6 +653,9 @@ static int
 	o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ? 
 		   VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
 
+	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
+		set_vhd_flag(o_flags, VHD_OPEN_STRICT);
+
 	err = vhd_open(&s->vhd, name, o_flags);
 	if (err) {
 		libvhd_set_log_level(1);
@@ -650,8 +690,7 @@ static int
 	driver->info.sector_size = VHD_SECTOR_SIZE;
 	driver->info.info        = 0;
 
-        DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%"PRIu64
-            ", inf:%u)\n",
+        DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
 	    driver->info.size, driver->info.sector_size, driver->info.info);
 
 	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) && 
@@ -692,6 +731,8 @@ static int
 			      VHD_FLAG_OPEN_NO_CACHE);
 
 	/* pre-allocate for all but NFS and LVM storage */
+	driver->storage = tapdisk_storage_type(name);
+
 	if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
 	    driver->storage != TAPDISK_STORAGE_TYPE_LVM)
 		vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
@@ -726,11 +767,14 @@ static int
 {
 	int err;
 	struct vhd_state *s;
-	struct vhd_bitmap *bm;
 	
 	DBG(TLOG_WARN, "vhd_close\n");
 	s = (struct vhd_state *)driver->data;
 
+	DPRINTF("gaps written/skipped: %ld/%ld\n", 
+			s->debug_done_redundant_writes,
+			s->debug_skipped_redundant_writes);
+
 	/* don't write footer if tapdisk is read-only */
 	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
 		goto free;
@@ -770,10 +814,9 @@ static int
 
 int
 vhd_validate_parent(td_driver_t *child_driver,
-		    td_driver_t *parent_driver, td_flag_t flags)
+		    td_driver_t *parent_driver,
+            __attribute__((unused)) td_flag_t flags)
 {
-	uint32_t status;
-	struct stat stats;
 	struct vhd_state *child  = (struct vhd_state *)child_driver->data;
 	struct vhd_state *parent;
 
@@ -807,7 +850,7 @@ vhd_validate_parent(td_driver_t *child_d
 	}
 	*/
 
-	if (vhd_uuid_compare(&child->vhd.header.prt_uuid, &parent->vhd.footer.uuid)) {
+	if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
 		DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
 			"snapshot.  Child image no longer valid.\n",
 			__func__, child->vhd.file, parent->vhd.file);
@@ -838,12 +881,10 @@ vhd_get_parent_id(td_driver_t *driver, t
 	if (err)
 		return err;
 
-	id->name       = parent;
-	id->drivertype = DISK_TYPE_VHD;
-	if (vhd_parent_raw(&s->vhd)) {
-		DPRINTF("VHD: parent is raw\n");
-		id->drivertype = DISK_TYPE_AIO;
-	}
+	id->name   = parent;
+	id->type   = vhd_parent_raw(&s->vhd) ? DISK_TYPE_AIO : DISK_TYPE_VHD;
+	id->flags |= TD_OPEN_SHAREABLE|TD_OPEN_RDONLY;
+
 	return 0;
 }
 
@@ -1034,7 +1075,7 @@ static struct vhd_bitmap *
 remove_lru_bitmap(struct vhd_state *s)
 {
 	int i, idx = 0;
-	u64 seq = s->bm_lru;
+	uint64_t seq = s->bm_lru;
 	struct vhd_bitmap *bm, *lru = NULL;
 
 	for (i = 0; i < VHD_CACHE_SIZE; i++) {
@@ -1138,7 +1179,7 @@ free_vhd_bitmap(struct vhd_state *s, str
 static int
 read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
 {
-	u32 blk, sec;
+	uint32_t blk, sec;
 	struct vhd_bitmap *bm;
 
 	/* in fixed disks, every block is present */
@@ -1186,7 +1227,7 @@ read_bitmap_cache_span(struct vhd_state 
 		       uint64_t sector, int nr_secs, int value)
 {
 	int ret;
-	u32 blk, sec;
+	uint32_t blk, sec;
 	struct vhd_bitmap *bm;
 
 	/* in fixed disks, every block is present */
@@ -1285,9 +1326,9 @@ static int
 schedule_bat_write(struct vhd_state *s)
 {
 	int i;
-	u32 blk;
+	uint32_t blk;
 	char *buf;
-	u64 offset;
+	uint64_t offset;
 	struct vhd_request *req;
 
 	ASSERT(bat_locked(s));
@@ -1299,10 +1340,10 @@ schedule_bat_write(struct vhd_state *s)
 	init_vhd_request(s, req);
 	memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
 
-	((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
+	((uint32_t *)buf)[blk % 128] = s->bat.pbw_offset;
 
 	for (i = 0; i < 128; i++)
-		BE32_OUT(&((u32 *)buf)[i]);
+		BE32_OUT(&((uint32_t *)buf)[i]);
 
 	offset         = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
 	req->treq.secs = 1;
@@ -1343,6 +1384,55 @@ schedule_zero_bm_write(struct vhd_state 
 	aio_write(s, req, offset);
 }
 
+/* This is a performance optimization. When writing sequentially into full 
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in 
+ * throughput. To prevent skipping, we issue redundant writes into the (padded) 
+ * bitmap area just to make all writes sequential. This will help VHDs on raw 
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the 
+ * block is completely full (i.e. the batmap entry is set). If the block is not 
+ * completely full then one of the following two things will be true:
+ *  1. we'll either be allocating new sectors in this block and writing its
+ *     bitmap transactionally, which will be slow anyways; or
+ *  2. the IO will be skipping over the unallocated sectors again, so the
+ *     pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact 
+ * simplifies the implementation of redundant writes: since we know the bitmap 
+ * cannot be updated by anyone else, we don't have to worry about transactions 
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, uint32_t blk)
+{
+	uint64_t offset;
+	struct vhd_request *req;
+
+	ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+	ASSERT(test_batmap(s, blk));
+
+	req = alloc_vhd_request(s);
+	if (!req) 
+		return;
+
+	req->treq.buf = s->padbm_buf;
+
+	offset = bat_entry(s, blk);
+	ASSERT(offset != DD_BLK_UNUSED);
+	offset <<= VHD_SECTOR_SHIFT;
+	offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+	req->op        = VHD_OP_REDUNDANT_BM_WRITE;
+	req->treq.sec  = blk * s->spb;
+	req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+	req->next      = NULL;
+
+	DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+	    blk, offset);
+
+	aio_write(s, req, offset);
+}
+
 static int
 update_bat(struct vhd_state *s, uint32_t blk)
 {
@@ -1380,10 +1470,10 @@ update_bat(struct vhd_state *s, uint32_t
 static int
 allocate_block(struct vhd_state *s, uint32_t blk)
 {
-	char *zeros;
 	int err, gap;
 	uint64_t offset, size;
 	struct vhd_bitmap *bm;
+	ssize_t count;
 
 	ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
 
@@ -1410,15 +1500,16 @@ allocate_block(struct vhd_state *s, uint
 	    blk, s->bat.pbw_offset);
 
 	if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
-		ERR(errno, "lseek failed\n");
+		ERR(s, -errno, "lseek failed\n");
 		return -errno;
 	}
 
-	size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
-	err  = write(s->vhd.fd, vhd_zeros(size), size);
-	if (err != size) {
-		err = (err == -1 ? -errno : -EIO);
-		ERR(err, "write failed");
+	size  = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
+	count = write(s->vhd.fd, vhd_zeros(size), size);
+	if (count != size) {
+		err = count < 0 ? -errno : -ENOSPC;
+		ERR(s, -errno,
+		    "write failed (%zd, offset %"PRIu64")\n", count, offset);
 		return err;
 	}
 
@@ -1445,8 +1536,8 @@ allocate_block(struct vhd_state *s, uint
 static int 
 schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 {
-	u64 offset;
-	u32 blk = 0, sec = 0;
+	uint64_t offset;
+	uint32_t blk = 0, sec = 0;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1490,8 +1581,8 @@ static int
 schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 {
 	int err;
-	u64 offset;
-	u32 blk = 0, sec = 0;
+	uint64_t offset;
+	uint32_t blk = 0, sec = 0;
 	struct vhd_bitmap  *bm = NULL;
 	struct vhd_request *req;
 
@@ -1539,7 +1630,11 @@ schedule_data_write(struct vhd_state *s,
 			set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
 		} else
 			add_to_transaction(&bm->tx, req);
-	}
+	} else if (sec == 0 && 	/* first sector inside data block */
+		   s->vhd.footer.type != HD_TYPE_FIXED && 
+		   bat_entry(s, blk) != s->first_db &&
+		   test_batmap(s, blk))
+		schedule_redundant_bm_write(s, blk);
 
 	aio_write(s, req, offset);
 
@@ -1554,7 +1649,7 @@ static int
 schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
 {
 	int err;
-	u64 offset;
+	uint64_t offset;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req = NULL;
 
@@ -1596,7 +1691,7 @@ schedule_bitmap_read(struct vhd_state *s
 static void
 schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
 {
-	u64 offset;
+	uint64_t offset;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1641,7 +1736,7 @@ schedule_bitmap_write(struct vhd_state *
 static int
 __vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1767,7 +1862,6 @@ vhd_queue_write(td_driver_t *driver, td_
 
 		case VHD_BM_BAT_LOCKED:
 			err = -EBUSY;
-			clone.blocked = 1;
 			goto fail;
 
 		case VHD_BM_BAT_CLEAR:
@@ -1860,9 +1954,9 @@ signal_completion(struct vhd_request *li
 static void
 start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
 {
-	int i, error = 0;
 	struct vhd_transaction *tx;
 	struct vhd_request *r, *next;
+	int i;
 
 	if (!bm->queue.head)
 		return;
@@ -1885,7 +1979,7 @@ start_new_bitmap_transaction(struct vhd_
 		if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
 			tx->finished++;
 			if (!r->error) {
-				u32 sec = r->treq.sec % s->spb;
+				uint32_t sec = r->treq.sec % s->spb;
 				for (i = 0; i < r->treq.secs; i++)
 					vhd_bitmap_set(&s->vhd,
 						       bm->shadow, sec + i);
@@ -2027,7 +2121,7 @@ finish_bat_write(struct vhd_request *req
 static void
 finish_zero_bm_write(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap *bm;
 	struct vhd_transaction *tx = req->tx;
 	struct vhd_state *s = req->state;
@@ -2058,10 +2152,30 @@ finish_zero_bm_write(struct vhd_request 
 		finish_data_transaction(s, bm);
 }
 
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+	/* uint32_t blk; */
+	struct vhd_state *s = (struct vhd_state *) req->state;
+
+	s->returned++;
+	TRACE(s);	
+	/* blk = req->treq.sec / s->spb;
+	   DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+	if (req->error) {
+		ERR(s, req->error, "lsec: 0x%08"PRIx64, req->treq.sec);
+	}
+	free_vhd_request(s, req);
+	s->debug_done_redundant_writes++;
+	return 0;
+}
+
+
 static void
 finish_bitmap_read(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *r, *next;
 	struct vhd_state   *s = req->state;
@@ -2113,7 +2227,7 @@ finish_bitmap_read(struct vhd_request *r
 static void
 finish_bitmap_write(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_transaction *tx;
 	struct vhd_state *s = req->state;
@@ -2156,7 +2270,7 @@ finish_data_write(struct vhd_request *re
 	set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
 
 	if (tx) {
-		u32 blk, sec;
+		uint32_t blk, sec;
 		struct vhd_bitmap *bm;
 
 		blk = req->treq.sec / s->spb;
@@ -2199,7 +2313,7 @@ vhd_complete(void *arg, struct tiocb *ti
 	req->error = err;
 
 	if (req->error)
-		ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
+		ERR(s, req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
 		    "nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
 		    s->vhd.file, req->op, req->treq.sec, req->treq.secs,
 		    io->u.c.nbytes, req->treq.sec / s->spb,
@@ -2226,6 +2340,10 @@ vhd_complete(void *arg, struct tiocb *ti
 		finish_zero_bm_write(req);
 		break;
 
+	case VHD_OP_REDUNDANT_BM_WRITE:
+		finish_redundant_bm_write(req);
+		break;
+
 	case VHD_OP_BAT_WRITE:
 		finish_bat_write(req);
 		break;
@@ -2250,14 +2368,15 @@ vhd_debug(td_driver_t *driver)
 	DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
 	    s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
 
-	DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
+	DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%u total)\n", VHD_REQS_DATA);
 	for (i = 0; i < VHD_REQS_DATA; i++) {
 		struct vhd_request *r = &s->vreq_list[i];
 		td_request_t *t       = &r->treq;
+		const char *vname     = t->vreq ? t->vreq->name: NULL;
 		if (t->secs)
-			DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
+			DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
 			    " lsec: 0x%08"PRIx64", flags: %d, this: %p, "
-			    "next: %p, tx: %p\n", i, t->id, r->error, r->op,
+			    "next: %p, tx: %p\n", i, vname, t->sidx, r->error, r->op,
 			    t->sec, r->flags, r, r->next, r->tx);
 	}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements
  2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
  2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
  1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch introduces a few extra checks in the code path that tell whether a
particular Virtual Disk Image driver is available.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

diff --git a/tools/blktap3/drivers/tapdisk-control.c b/tools/blktap3/drivers/tapdisk-control.c
--- a/tools/blktap3/drivers/tapdisk-control.c
+++ b/tools/blktap3/drivers/tapdisk-control.c
@@ -586,7 +586,7 @@ tapdisk_control_open_image(
         tapdisk_message_t *request, tapdisk_message_t * const response)
 {
 	int err;
-	td_vbd_t *vbd;
+	td_vbd_t *vbd = NULL;
 	td_flag_t flags;
     td_disk_info_t info;
     int prt_path_len;
@@ -663,7 +663,9 @@ out:
         response->u.image.sector_size = info.sector_size;
         response->u.image.info = info.info;
         response->type = TAPDISK_MESSAGE_OPEN_RSP;
-    }
+    } else
+        if (vbd)
+            tapdisk_server_remove_vbd(vbd);
 
 	return err;
 
diff --git a/tools/blktap3/drivers/tapdisk-disktype.c b/tools/blktap3/drivers/tapdisk-disktype.c
--- a/tools/blktap3/drivers/tapdisk-disktype.c
+++ b/tools/blktap3/drivers/tapdisk-disktype.c
@@ -148,7 +148,7 @@ const disk_info_t *tapdisk_disk_types[] 
 extern struct tap_disk tapdisk_aio;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_sync;
@@ -160,7 +160,7 @@ extern struct tap_disk tapdisk_vhd;
 extern struct tap_disk tapdisk_ram;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_qcow;
@@ -169,39 +169,74 @@ extern struct tap_disk tapdisk_qcow;
 extern struct tap_disk tapdisk_vhd_index;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_log;
 #endif
 
-const struct tap_disk *tapdisk_disk_drivers[] = {
-	[DISK_TYPE_AIO]         = &tapdisk_aio,
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt)
+{
+    static const struct tap_disk *tapdisk_disk_drivers[] = {
+        [DISK_TYPE_AIO]         = &tapdisk_aio,
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * XXX Commented out in blktap2.5.
+         */
 #if 0
-	[DISK_TYPE_SYNC]        = &tapdisk_sync,
-	[DISK_TYPE_VMDK]        = &tapdisk_vmdk,
-    [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+        [DISK_TYPE_SYNC]        = &tapdisk_sync,
+        [DISK_TYPE_VMDK]        = &tapdisk_vmdk,
+        [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+#endif
+        [DISK_TYPE_VHD]         = &tapdisk_vhd,
+
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
+#if 0
+        [DISK_TYPE_RAM]         = &tapdisk_ram,
 #endif
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * XXX Commented out in blktap2.5.
+         */
 #if 0
-	[DISK_TYPE_QCOW]        = &tapdisk_qcow,
+        [DISK_TYPE_QCOW]        = &tapdisk_qcow,
 #endif
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
 #if 0
-	[DISK_TYPE_LOG]         = &tapdisk_log,
+        [DISK_TYPE_BLOCK_CACHE] = &tapdisk_block_cache,
+	    [DISK_TYPE_VINDEX]      = &tapdisk_vhd_index,
 #endif
-	0,
-};
+
+        /*
+         * XXX Commented out in blktap2.5.
+         */
+#if 0
+        [DISK_TYPE_LOG]         = &tapdisk_log,
+#endif
+
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
+#if 0
+        [DISK_TYPE_LCACHE]      = &tapdisk_lcache,
+        [DISK_TYPE_LLPCACHE]    = &tapdisk_llpcache,
+        [DISK_TYPE_LLECACHE]    = &tapdisk_llecache,
+        [DISK_TYPE_VALVE]       = &tapdisk_valve,
+        [DISK_TYPE_NBD]         = &tapdisk_nbd,
+#endif
+    };
+
+    if (dt < 0 || dt > ARRAY_SIZE(tapdisk_disk_drivers))
+        return NULL;
+    else
+        return tapdisk_disk_drivers[dt];
+}
 
 int
 tapdisk_disktype_find(const char *name)
@@ -217,7 +252,7 @@ tapdisk_disktype_find(const char *name)
 		if (strcmp(name, info->name))
 			continue;
 
-		if (!tapdisk_disk_drivers[i])
+		if (!tapdisk_disk_driver_get(i))
 			return -ENOSYS;
 
 		return i;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.h b/tools/blktap3/drivers/tapdisk-disktype.h
--- a/tools/blktap3/drivers/tapdisk-disktype.h
+++ b/tools/blktap3/drivers/tapdisk-disktype.h
@@ -29,32 +29,34 @@
 #ifndef __DISKTYPES_H__
 #define __DISKTYPES_H__
 
-#define DISK_TYPE_AIO         0
-#define DISK_TYPE_SYNC        1
-#define DISK_TYPE_VMDK        2
-#define DISK_TYPE_VHDSYNC     3
-#define DISK_TYPE_VHD         4
-#define DISK_TYPE_RAM         5
-#define DISK_TYPE_QCOW        6
-#define DISK_TYPE_BLOCK_CACHE 7
-#define DISK_TYPE_VINDEX      8
-#define DISK_TYPE_LOG         9
-#define DISK_TYPE_REMUS       10
-#define DISK_TYPE_LCACHE      11
-#define DISK_TYPE_LLECACHE    12
-#define DISK_TYPE_LLPCACHE    13
-#define DISK_TYPE_VALVE       14
+enum disk_type {
+    DISK_TYPE_AIO,
+    DISK_TYPE_SYNC,
+    DISK_TYPE_VMDK,
+    DISK_TYPE_VHDSYNC,
+    DISK_TYPE_VHD,
+    DISK_TYPE_RAM,
+    DISK_TYPE_QCOW,
+    DISK_TYPE_BLOCK_CACHE,
+    DISK_TYPE_VINDEX,
+    DISK_TYPE_LOG,
+    DISK_TYPE_REMUS,
+    DISK_TYPE_LCACHE,
+    DISK_TYPE_LLECACHE,
+    DISK_TYPE_LLPCACHE,
+    DISK_TYPE_VALVE};
 
 #define DISK_TYPE_NAME_MAX    32
 
 typedef struct disk_info {
 	const char     *name; /* driver name, e.g. 'aio' */
 	char           *desc;  /* e.g. "raw image" */
-	unsigned int    flags; 
+	unsigned int    flags;
 } disk_info_t;
 
 extern const disk_info_t     *tapdisk_disk_types[];
-extern const struct tap_disk *tapdisk_disk_drivers[];
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt);
 
 /* one single controller for all instances of disk type */
 #define DISK_TYPE_SINGLE_CONTROLLER (1<<0)
diff --git a/tools/blktap3/drivers/tapdisk-driver.c b/tools/blktap3/drivers/tapdisk-driver.c
--- a/tools/blktap3/drivers/tapdisk-driver.c
+++ b/tools/blktap3/drivers/tapdisk-driver.c
@@ -73,7 +73,7 @@ tapdisk_driver_allocate(int type, const 
 	td_driver_t *driver;
 	const struct tap_disk *ops;
 
-	ops = tapdisk_disk_drivers[type];
+	ops = tapdisk_disk_driver_get(type);
 	if (!ops)
 		return NULL;
 
diff --git a/tools/blktap3/drivers/tapdisk-interface.c b/tools/blktap3/drivers/tapdisk-interface.c
--- a/tools/blktap3/drivers/tapdisk-interface.c
+++ b/tools/blktap3/drivers/tapdisk-interface.c
@@ -67,6 +67,8 @@ int
 	int err;
 	td_driver_t *driver;
 
+    assert(image);
+
 	driver = image->driver;
 	if (!driver) {
 		driver = tapdisk_driver_allocate(image->type,
@@ -80,6 +82,8 @@ int
 	}
 
 	if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
+        assert(driver->ops);
+        assert(driver->ops->td_open);
 		err = driver->ops->td_open(driver, image->name, image->flags);
 		if (err) {
 			if (!image->driver)
diff --git a/tools/blktap3/drivers/tapdisk-server.c b/tools/blktap3/drivers/tapdisk-server.c
--- a/tools/blktap3/drivers/tapdisk-server.c
+++ b/tools/blktap3/drivers/tapdisk-server.c
@@ -90,8 +90,11 @@ tapdisk_server_get_vbd(const char *param
     assert(params);
 
 	tapdisk_server_for_each_vbd(vbd, tmp)
-		if (!strcmp(vbd->name, params))
-			return vbd;
+        if (vbd->name) {
+            /* TODO VBDs without name? Should this be treated as a bug? */
+    		if (!strcmp(vbd->name, params))
+	    		return vbd;
+        }
 
 	return NULL;
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-07-15 11:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).