All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support
@ 2013-07-15 11:55 Thanos Makatos
  2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
  2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
  0 siblings, 2 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch series introduces full VHD support, plus some minor code
improvements.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1 of 2 RESEND] blktap3/vhd: Import VHD support
  2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
  2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
  1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch imports the VHD driver from blktap2, with all changes coming from
blktap2.5.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

diff --git a/tools/blktap3/drivers/Makefile b/tools/blktap3/drivers/Makefile
--- a/tools/blktap3/drivers/Makefile
+++ b/tools/blktap3/drivers/Makefile
@@ -95,6 +95,7 @@ LIBSRING := sring/libsring.a
 #MISC-OBJS-y := atomicio.o
 
 BLK-OBJS-y  := block-aio.o
+BLK-OBJS-y  += block-vhd.o
 # FIXME The following exist in blktap2 but not in blktap2.5.
 #BLK-OBJS-y += aes.o
 #BLK-OBJS-y += md5.o
diff --git a/tools/blktap2/drivers/block-vhd.c b/tools/blktap3/drivers/block-vhd.c
copy from tools/blktap2/drivers/block-vhd.c
copy to tools/blktap3/drivers/block-vhd.c
--- a/tools/blktap2/drivers/block-vhd.c
+++ b/tools/blktap3/drivers/block-vhd.c
@@ -1,5 +1,8 @@
-/* 
- * Copyright (c) 2008, XenSource Inc.
+/*
+ *
+ * Copyright (c) 2007, XenSource Inc.
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,6 +27,10 @@
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * block-vhd.c: asynchronous vhd implementation.
  *
  * A note on write transactions:
  * Writes that require updating the BAT or bitmaps cannot be signaled
@@ -50,6 +57,8 @@
 #include <unistd.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
+#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
+                       /* e2fsprogs-devel.                            */
 #include <string.h>    /* for memset.                                 */
 #include <libaio.h>
 #include <sys/mman.h>
@@ -59,6 +68,7 @@
 #include "tapdisk-driver.h"
 #include "tapdisk-interface.h"
 #include "tapdisk-disktype.h"
+#include "tapdisk-storage.h"
 
 unsigned int SPB;
 
@@ -72,7 +82,7 @@ unsigned int SPB;
 	do {								\
 		DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %"	\
 		    PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: "	\
-		    "%lu, BBLK: 0x%04x\n",				\
+		    "%u, BBLK: 0x%04x\n",				\
 		    s->vhd.file, s->queued, s->completed, s->returned,	\
 		    VHD_REQS_DATA - s->vreq_free_count,			\
 		    s->bat.pbw_blk);					\
@@ -84,21 +94,20 @@ unsigned int SPB;
 			__FILE__, __LINE__, #_p);			\
 		DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n",	\
 		    __FILE__, __LINE__, #_p);				\
-		tlog_flush();						\
-		*(int*)0 = 0;						\
+		td_panic();						\
 	}
 
 #if (DEBUGGING == 1)
   #define DBG(level, _f, _a...)      DPRINTF(_f, ##_a)
-  #define ERR(err, _f, _a...)        DPRINTF("ERROR: %d: " _f, err, ##_a)
+  #define ERR(_s, err, _f, _a...)    DPRINTF("ERROR: %d: " _f, err, ##_a)
   #define TRACE(s)                   ((void)0)
 #elif (DEBUGGING == 2)
   #define DBG(level, _f, _a...)      tlog_write(level, _f, ##_a)
-  #define ERR(err, _f, _a...)	     tlog_error(err, _f, ##_a)
+  #define ERR(_s, _err, _f, _a...)   tlog_drv_error((_s)->driver, _err, _f, ##_a)
   #define TRACE(s)                   __TRACE(s)
 #else
   #define DBG(level, _f, _a...)      ((void)0)
-  #define ERR(err, _f, _a...)        ((void)0)
+  #define ERR(_s, err, _f, _a...)    ((void)0)
   #define TRACE(s)                   ((void)0)
 #endif
 
@@ -121,6 +130,7 @@ unsigned int SPB;
 #define VHD_OP_BITMAP_READ           3
 #define VHD_OP_BITMAP_WRITE          4
 #define VHD_OP_ZERO_BM_WRITE         5
+#define VHD_OP_REDUNDANT_BM_WRITE    6
 
 #define VHD_BM_BAT_LOCKED            0
 #define VHD_BM_BAT_CLEAR             1
@@ -194,8 +204,8 @@ struct vhd_bat_state {
 };
 
 struct vhd_bitmap {
-	u32                       blk;
-	u64                       seqno;       /* lru sequence number */
+	uint32_t                  blk;
+	uint64_t                  seqno;       /* lru sequence number */
 	vhd_flag_t                status;
 
 	char                     *map;         /* map should only be modified
@@ -220,15 +230,16 @@ struct vhd_state {
 
         /* VHD stuff */
 	vhd_context_t             vhd;
-	u32                       spp;         /* sectors per page */
-        u32                       spb;         /* sectors per block */
-        u64                       next_db;     /* pointer to the next 
+	uint32_t                  spp;         /* sectors per page */
+	uint32_t                  spb;         /* sectors per block */
+	uint64_t                  first_db;    /* pointer to datablock 0 */
+	uint64_t                  next_db;     /* pointer to the next 
 						* (unallocated) datablock */
 
 	struct vhd_bat_state      bat;
 
-	u64                       bm_lru;      /* lru sequence number */
-	u32                       bm_secs;     /* size of bitmap, in sectors */
+	uint64_t                  bm_lru;      /* lru sequence number */
+	uint32_t                  bm_secs;     /* size of bitmap, in sectors */
 	struct vhd_bitmap        *bitmap[VHD_CACHE_SIZE];
 
 	int                       bm_free_count;
@@ -239,6 +250,12 @@ struct vhd_state {
 	struct vhd_request       *vreq_free[VHD_REQS_DATA];
 	struct vhd_request        vreq_list[VHD_REQS_DATA];
 
+	/* for redundant bitmap writes */
+	int                       padbm_size;
+	char                     *padbm_buf;
+	long int                  debug_skipped_redundant_writes;
+	long int                  debug_done_redundant_writes;
+
 	td_driver_t              *driver;
 
 	uint64_t                  queued;
@@ -274,7 +291,7 @@ vhd_initialize(struct vhd_state *s)
 		_vhd_zsize += VHD_BLOCK_SIZE;
 
 	_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
-			  MAP_SHARED | MAP_ANON, -1, 0);
+			  MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (_vhd_zeros == MAP_FAILED) {
 		EPRINTF("vhd_initialize failed: %d\n", -errno);
 		_vhd_zeros = NULL;
@@ -292,6 +309,7 @@ vhd_free(struct vhd_state *s)
 	if (_vhd_master != s || !_vhd_zeros)
 		return;
 
+	free(s->padbm_buf);
 	munmap(_vhd_zeros, _vhd_zsize);
 	_vhd_zsize  = 0;
 	_vhd_zeros  = NULL;
@@ -333,23 +351,23 @@ static int
 vhd_kill_footer(struct vhd_state *s)
 {
 	int err;
-	off_t end;
-	char *zeros;
+	off64_t end;
+	void *zeros;
 
 	if (s->vhd.footer.type == HD_TYPE_FIXED)
 		return 0;
 
-	err = posix_memalign((void **)&zeros, 512, 512);
+	err = posix_memalign(&zeros, 512, 512);
 	if (err)
 		return -err;
 
 	err = 1;
 	memset(zeros, 0xc7c7c7c7, 512);
 
-	if ((end = lseek(s->vhd.fd, 0, SEEK_END)) == -1)
+	if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
 		goto fail;
 
-	if (lseek(s->vhd.fd, (end - 512), SEEK_SET) == -1)
+	if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
 		goto fail;
 
 	if (write(s->vhd.fd, zeros, 512) != 512)
@@ -368,7 +386,7 @@ static inline int
 find_next_free_block(struct vhd_state *s)
 {
 	int err;
-	off_t eom;
+	off64_t eom;
 	uint32_t i, entry;
 
 	err = vhd_end_of_headers(&s->vhd, &eom);
@@ -376,6 +394,9 @@ find_next_free_block(struct vhd_state *s
 		return err;
 
 	s->next_db = secs_round_up(eom);
+	s->first_db = s->next_db;
+	if ((s->first_db + s->bm_secs) % s->spp)
+		s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp));
 
 	for (i = 0; i < s->bat.bat.entries; i++) {
 		entry = bat_entry(s, i);
@@ -398,12 +419,11 @@ vhd_free_bat(struct vhd_state *s)
 static int
 vhd_initialize_bat(struct vhd_state *s)
 {
-	int err, psize, batmap_required, i;
+	int err, batmap_required, i;
+	void *buf;
 
 	memset(&s->bat, 0, sizeof(struct vhd_bat));
 
-	psize = getpagesize();
-
 	err = vhd_read_bat(&s->vhd, &s->bat.bat);
 	if (err) {
 		EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
@@ -436,12 +456,11 @@ vhd_initialize_bat(struct vhd_state *s)
 					s->vhd.file);
 	}
 
-	err = posix_memalign((void **)&s->bat.bat_buf,
-			     VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
-	if (err) {
-		s->bat.bat_buf = NULL;
+	err = posix_memalign(&buf, VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
+	if (err)
 		goto fail;
-	}
+
+	s->bat.bat_buf = buf;
 
 	return 0;
 
@@ -471,6 +490,7 @@ vhd_initialize_bitmap_cache(struct vhd_s
 {
 	int i, err, map_size;
 	struct vhd_bitmap *bm;
+	void *map, *shadow;
 
 	memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
 
@@ -481,17 +501,17 @@ vhd_initialize_bitmap_cache(struct vhd_s
 	for (i = 0; i < VHD_CACHE_SIZE; i++) {
 		bm = s->bitmap_list + i;
 
-		err = posix_memalign((void **)&bm->map, 512, map_size);
-		if (err) {
-			bm->map = NULL;
+		err = posix_memalign(&map, 512, map_size);
+		if (err)
 			goto fail;
-		}
-
-		err = posix_memalign((void **)&bm->shadow, 512, map_size);
-		if (err) {
-			bm->shadow = NULL;
+
+		bm->map = map;
+
+		err = posix_memalign(&shadow, 512, map_size);
+		if (err)
 			goto fail;
-		}
+
+		bm->shadow = shadow;
 
 		memset(bm->map, 0, map_size);
 		memset(bm->shadow, 0, map_size);
@@ -508,6 +528,8 @@ fail:
 static int
 vhd_initialize_dynamic_disk(struct vhd_state *s)
 {
+	uint32_t bm_size;
+	void *buf;
 	int err;
 
 	err = vhd_get_header(&s->vhd);
@@ -527,6 +549,21 @@ vhd_initialize_dynamic_disk(struct vhd_s
 	s->spb     = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
 	s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
 
+	s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+	if (s->bm_secs % getpagesize())
+		s->padbm_size += getpagesize();
+
+	err = posix_memalign(&buf, 512, s->padbm_size);
+	if (err)
+		return -err;
+
+	s->padbm_buf = buf;
+	bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+	memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+	memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+	s->debug_skipped_redundant_writes = 0;
+	s->debug_done_redundant_writes = 0;
+
 	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
 		return 0;
 
@@ -616,6 +653,9 @@ static int
 	o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ? 
 		   VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
 
+	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
+		set_vhd_flag(o_flags, VHD_OPEN_STRICT);
+
 	err = vhd_open(&s->vhd, name, o_flags);
 	if (err) {
 		libvhd_set_log_level(1);
@@ -650,8 +690,7 @@ static int
 	driver->info.sector_size = VHD_SECTOR_SIZE;
 	driver->info.info        = 0;
 
-        DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%"PRIu64
-            ", inf:%u)\n",
+        DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
 	    driver->info.size, driver->info.sector_size, driver->info.info);
 
 	if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) && 
@@ -692,6 +731,8 @@ static int
 			      VHD_FLAG_OPEN_NO_CACHE);
 
 	/* pre-allocate for all but NFS and LVM storage */
+	driver->storage = tapdisk_storage_type(name);
+
 	if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
 	    driver->storage != TAPDISK_STORAGE_TYPE_LVM)
 		vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
@@ -726,11 +767,14 @@ static int
 {
 	int err;
 	struct vhd_state *s;
-	struct vhd_bitmap *bm;
 	
 	DBG(TLOG_WARN, "vhd_close\n");
 	s = (struct vhd_state *)driver->data;
 
+	DPRINTF("gaps written/skipped: %ld/%ld\n", 
+			s->debug_done_redundant_writes,
+			s->debug_skipped_redundant_writes);
+
 	/* don't write footer if tapdisk is read-only */
 	if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
 		goto free;
@@ -770,10 +814,9 @@ static int
 
 int
 vhd_validate_parent(td_driver_t *child_driver,
-		    td_driver_t *parent_driver, td_flag_t flags)
+		    td_driver_t *parent_driver,
+            __attribute__((unused)) td_flag_t flags)
 {
-	uint32_t status;
-	struct stat stats;
 	struct vhd_state *child  = (struct vhd_state *)child_driver->data;
 	struct vhd_state *parent;
 
@@ -807,7 +850,7 @@ vhd_validate_parent(td_driver_t *child_d
 	}
 	*/
 
-	if (vhd_uuid_compare(&child->vhd.header.prt_uuid, &parent->vhd.footer.uuid)) {
+	if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
 		DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
 			"snapshot.  Child image no longer valid.\n",
 			__func__, child->vhd.file, parent->vhd.file);
@@ -838,12 +881,10 @@ vhd_get_parent_id(td_driver_t *driver, t
 	if (err)
 		return err;
 
-	id->name       = parent;
-	id->drivertype = DISK_TYPE_VHD;
-	if (vhd_parent_raw(&s->vhd)) {
-		DPRINTF("VHD: parent is raw\n");
-		id->drivertype = DISK_TYPE_AIO;
-	}
+	id->name   = parent;
+	id->type   = vhd_parent_raw(&s->vhd) ? DISK_TYPE_AIO : DISK_TYPE_VHD;
+	id->flags |= TD_OPEN_SHAREABLE|TD_OPEN_RDONLY;
+
 	return 0;
 }
 
@@ -1034,7 +1075,7 @@ static struct vhd_bitmap *
 remove_lru_bitmap(struct vhd_state *s)
 {
 	int i, idx = 0;
-	u64 seq = s->bm_lru;
+	uint64_t seq = s->bm_lru;
 	struct vhd_bitmap *bm, *lru = NULL;
 
 	for (i = 0; i < VHD_CACHE_SIZE; i++) {
@@ -1138,7 +1179,7 @@ free_vhd_bitmap(struct vhd_state *s, str
 static int
 read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
 {
-	u32 blk, sec;
+	uint32_t blk, sec;
 	struct vhd_bitmap *bm;
 
 	/* in fixed disks, every block is present */
@@ -1186,7 +1227,7 @@ read_bitmap_cache_span(struct vhd_state 
 		       uint64_t sector, int nr_secs, int value)
 {
 	int ret;
-	u32 blk, sec;
+	uint32_t blk, sec;
 	struct vhd_bitmap *bm;
 
 	/* in fixed disks, every block is present */
@@ -1285,9 +1326,9 @@ static int
 schedule_bat_write(struct vhd_state *s)
 {
 	int i;
-	u32 blk;
+	uint32_t blk;
 	char *buf;
-	u64 offset;
+	uint64_t offset;
 	struct vhd_request *req;
 
 	ASSERT(bat_locked(s));
@@ -1299,10 +1340,10 @@ schedule_bat_write(struct vhd_state *s)
 	init_vhd_request(s, req);
 	memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
 
-	((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
+	((uint32_t *)buf)[blk % 128] = s->bat.pbw_offset;
 
 	for (i = 0; i < 128; i++)
-		BE32_OUT(&((u32 *)buf)[i]);
+		BE32_OUT(&((uint32_t *)buf)[i]);
 
 	offset         = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
 	req->treq.secs = 1;
@@ -1343,6 +1384,55 @@ schedule_zero_bm_write(struct vhd_state 
 	aio_write(s, req, offset);
 }
 
+/* This is a performance optimization. When writing sequentially into full 
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in 
+ * throughput. To prevent skipping, we issue redundant writes into the (padded) 
+ * bitmap area just to make all writes sequential. This will help VHDs on raw 
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the 
+ * block is completely full (i.e. the batmap entry is set). If the block is not 
+ * completely full then one of the following two things will be true:
+ *  1. we'll either be allocating new sectors in this block and writing its
+ *     bitmap transactionally, which will be slow anyways; or
+ *  2. the IO will be skipping over the unallocated sectors again, so the
+ *     pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact 
+ * simplifies the implementation of redundant writes: since we know the bitmap 
+ * cannot be updated by anyone else, we don't have to worry about transactions 
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, uint32_t blk)
+{
+	uint64_t offset;
+	struct vhd_request *req;
+
+	ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+	ASSERT(test_batmap(s, blk));
+
+	req = alloc_vhd_request(s);
+	if (!req) 
+		return;
+
+	req->treq.buf = s->padbm_buf;
+
+	offset = bat_entry(s, blk);
+	ASSERT(offset != DD_BLK_UNUSED);
+	offset <<= VHD_SECTOR_SHIFT;
+	offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+	req->op        = VHD_OP_REDUNDANT_BM_WRITE;
+	req->treq.sec  = blk * s->spb;
+	req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+	req->next      = NULL;
+
+	DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+	    blk, offset);
+
+	aio_write(s, req, offset);
+}
+
 static int
 update_bat(struct vhd_state *s, uint32_t blk)
 {
@@ -1380,10 +1470,10 @@ update_bat(struct vhd_state *s, uint32_t
 static int
 allocate_block(struct vhd_state *s, uint32_t blk)
 {
-	char *zeros;
 	int err, gap;
 	uint64_t offset, size;
 	struct vhd_bitmap *bm;
+	ssize_t count;
 
 	ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
 
@@ -1410,15 +1500,16 @@ allocate_block(struct vhd_state *s, uint
 	    blk, s->bat.pbw_offset);
 
 	if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
-		ERR(errno, "lseek failed\n");
+		ERR(s, -errno, "lseek failed\n");
 		return -errno;
 	}
 
-	size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
-	err  = write(s->vhd.fd, vhd_zeros(size), size);
-	if (err != size) {
-		err = (err == -1 ? -errno : -EIO);
-		ERR(err, "write failed");
+	size  = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
+	count = write(s->vhd.fd, vhd_zeros(size), size);
+	if (count != size) {
+		err = count < 0 ? -errno : -ENOSPC;
+		ERR(s, -errno,
+		    "write failed (%zd, offset %"PRIu64")\n", count, offset);
 		return err;
 	}
 
@@ -1445,8 +1536,8 @@ allocate_block(struct vhd_state *s, uint
 static int 
 schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 {
-	u64 offset;
-	u32 blk = 0, sec = 0;
+	uint64_t offset;
+	uint32_t blk = 0, sec = 0;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1490,8 +1581,8 @@ static int
 schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
 {
 	int err;
-	u64 offset;
-	u32 blk = 0, sec = 0;
+	uint64_t offset;
+	uint32_t blk = 0, sec = 0;
 	struct vhd_bitmap  *bm = NULL;
 	struct vhd_request *req;
 
@@ -1539,7 +1630,11 @@ schedule_data_write(struct vhd_state *s,
 			set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
 		} else
 			add_to_transaction(&bm->tx, req);
-	}
+	} else if (sec == 0 && 	/* first sector inside data block */
+		   s->vhd.footer.type != HD_TYPE_FIXED && 
+		   bat_entry(s, blk) != s->first_db &&
+		   test_batmap(s, blk))
+		schedule_redundant_bm_write(s, blk);
 
 	aio_write(s, req, offset);
 
@@ -1554,7 +1649,7 @@ static int
 schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
 {
 	int err;
-	u64 offset;
+	uint64_t offset;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req = NULL;
 
@@ -1596,7 +1691,7 @@ schedule_bitmap_read(struct vhd_state *s
 static void
 schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
 {
-	u64 offset;
+	uint64_t offset;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1641,7 +1736,7 @@ schedule_bitmap_write(struct vhd_state *
 static int
 __vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *req;
 
@@ -1767,7 +1862,6 @@ vhd_queue_write(td_driver_t *driver, td_
 
 		case VHD_BM_BAT_LOCKED:
 			err = -EBUSY;
-			clone.blocked = 1;
 			goto fail;
 
 		case VHD_BM_BAT_CLEAR:
@@ -1860,9 +1954,9 @@ signal_completion(struct vhd_request *li
 static void
 start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
 {
-	int i, error = 0;
 	struct vhd_transaction *tx;
 	struct vhd_request *r, *next;
+	int i;
 
 	if (!bm->queue.head)
 		return;
@@ -1885,7 +1979,7 @@ start_new_bitmap_transaction(struct vhd_
 		if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
 			tx->finished++;
 			if (!r->error) {
-				u32 sec = r->treq.sec % s->spb;
+				uint32_t sec = r->treq.sec % s->spb;
 				for (i = 0; i < r->treq.secs; i++)
 					vhd_bitmap_set(&s->vhd,
 						       bm->shadow, sec + i);
@@ -2027,7 +2121,7 @@ finish_bat_write(struct vhd_request *req
 static void
 finish_zero_bm_write(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap *bm;
 	struct vhd_transaction *tx = req->tx;
 	struct vhd_state *s = req->state;
@@ -2058,10 +2152,30 @@ finish_zero_bm_write(struct vhd_request 
 		finish_data_transaction(s, bm);
 }
 
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+	/* uint32_t blk; */
+	struct vhd_state *s = (struct vhd_state *) req->state;
+
+	s->returned++;
+	TRACE(s);	
+	/* blk = req->treq.sec / s->spb;
+	   DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+	if (req->error) {
+		ERR(s, req->error, "lsec: 0x%08"PRIx64, req->treq.sec);
+	}
+	free_vhd_request(s, req);
+	s->debug_done_redundant_writes++;
+	return 0;
+}
+
+
 static void
 finish_bitmap_read(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_request *r, *next;
 	struct vhd_state   *s = req->state;
@@ -2113,7 +2227,7 @@ finish_bitmap_read(struct vhd_request *r
 static void
 finish_bitmap_write(struct vhd_request *req)
 {
-	u32 blk;
+	uint32_t blk;
 	struct vhd_bitmap  *bm;
 	struct vhd_transaction *tx;
 	struct vhd_state *s = req->state;
@@ -2156,7 +2270,7 @@ finish_data_write(struct vhd_request *re
 	set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
 
 	if (tx) {
-		u32 blk, sec;
+		uint32_t blk, sec;
 		struct vhd_bitmap *bm;
 
 		blk = req->treq.sec / s->spb;
@@ -2199,7 +2313,7 @@ vhd_complete(void *arg, struct tiocb *ti
 	req->error = err;
 
 	if (req->error)
-		ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
+		ERR(s, req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
 		    "nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
 		    s->vhd.file, req->op, req->treq.sec, req->treq.secs,
 		    io->u.c.nbytes, req->treq.sec / s->spb,
@@ -2226,6 +2340,10 @@ vhd_complete(void *arg, struct tiocb *ti
 		finish_zero_bm_write(req);
 		break;
 
+	case VHD_OP_REDUNDANT_BM_WRITE:
+		finish_redundant_bm_write(req);
+		break;
+
 	case VHD_OP_BAT_WRITE:
 		finish_bat_write(req);
 		break;
@@ -2250,14 +2368,15 @@ vhd_debug(td_driver_t *driver)
 	DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
 	    s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
 
-	DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
+	DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%u total)\n", VHD_REQS_DATA);
 	for (i = 0; i < VHD_REQS_DATA; i++) {
 		struct vhd_request *r = &s->vreq_list[i];
 		td_request_t *t       = &r->treq;
+		const char *vname     = t->vreq ? t->vreq->name: NULL;
 		if (t->secs)
-			DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
+			DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
 			    " lsec: 0x%08"PRIx64", flags: %d, this: %p, "
-			    "next: %p, tx: %p\n", i, t->id, r->error, r->op,
+			    "next: %p, tx: %p\n", i, vname, t->sidx, r->error, r->op,
 			    t->sec, r->flags, r, r->next, r->tx);
 	}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements
  2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
  2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
  1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
  To: xen-devel; +Cc: thanos.makatos

This patch introduces a few extra checks in the code path that tell whether a
particular Virtual Disk Image driver is available.

Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>

diff --git a/tools/blktap3/drivers/tapdisk-control.c b/tools/blktap3/drivers/tapdisk-control.c
--- a/tools/blktap3/drivers/tapdisk-control.c
+++ b/tools/blktap3/drivers/tapdisk-control.c
@@ -586,7 +586,7 @@ tapdisk_control_open_image(
         tapdisk_message_t *request, tapdisk_message_t * const response)
 {
 	int err;
-	td_vbd_t *vbd;
+	td_vbd_t *vbd = NULL;
 	td_flag_t flags;
     td_disk_info_t info;
     int prt_path_len;
@@ -663,7 +663,9 @@ out:
         response->u.image.sector_size = info.sector_size;
         response->u.image.info = info.info;
         response->type = TAPDISK_MESSAGE_OPEN_RSP;
-    }
+    } else
+        if (vbd)
+            tapdisk_server_remove_vbd(vbd);
 
 	return err;
 
diff --git a/tools/blktap3/drivers/tapdisk-disktype.c b/tools/blktap3/drivers/tapdisk-disktype.c
--- a/tools/blktap3/drivers/tapdisk-disktype.c
+++ b/tools/blktap3/drivers/tapdisk-disktype.c
@@ -148,7 +148,7 @@ const disk_info_t *tapdisk_disk_types[] 
 extern struct tap_disk tapdisk_aio;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_sync;
@@ -160,7 +160,7 @@ extern struct tap_disk tapdisk_vhd;
 extern struct tap_disk tapdisk_ram;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_qcow;
@@ -169,39 +169,74 @@ extern struct tap_disk tapdisk_qcow;
 extern struct tap_disk tapdisk_vhd_index;
 
 /*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
  */
 #if 0
 extern struct tap_disk tapdisk_log;
 #endif
 
-const struct tap_disk *tapdisk_disk_drivers[] = {
-	[DISK_TYPE_AIO]         = &tapdisk_aio,
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt)
+{
+    static const struct tap_disk *tapdisk_disk_drivers[] = {
+        [DISK_TYPE_AIO]         = &tapdisk_aio,
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * XXX Commented out in blktap2.5.
+         */
 #if 0
-	[DISK_TYPE_SYNC]        = &tapdisk_sync,
-	[DISK_TYPE_VMDK]        = &tapdisk_vmdk,
-    [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+        [DISK_TYPE_SYNC]        = &tapdisk_sync,
+        [DISK_TYPE_VMDK]        = &tapdisk_vmdk,
+        [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+#endif
+        [DISK_TYPE_VHD]         = &tapdisk_vhd,
+
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
+#if 0
+        [DISK_TYPE_RAM]         = &tapdisk_ram,
 #endif
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * XXX Commented out in blktap2.5.
+         */
 #if 0
-	[DISK_TYPE_QCOW]        = &tapdisk_qcow,
+        [DISK_TYPE_QCOW]        = &tapdisk_qcow,
 #endif
 
-/*
- * TODO Why commented out?
- */
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
 #if 0
-	[DISK_TYPE_LOG]         = &tapdisk_log,
+        [DISK_TYPE_BLOCK_CACHE] = &tapdisk_block_cache,
+	    [DISK_TYPE_VINDEX]      = &tapdisk_vhd_index,
 #endif
-	0,
-};
+
+        /*
+         * XXX Commented out in blktap2.5.
+         */
+#if 0
+        [DISK_TYPE_LOG]         = &tapdisk_log,
+#endif
+
+        /*
+         * TODO Commeneted out to simplify the upstreaming process.
+         */
+#if 0
+        [DISK_TYPE_LCACHE]      = &tapdisk_lcache,
+        [DISK_TYPE_LLPCACHE]    = &tapdisk_llpcache,
+        [DISK_TYPE_LLECACHE]    = &tapdisk_llecache,
+        [DISK_TYPE_VALVE]       = &tapdisk_valve,
+        [DISK_TYPE_NBD]         = &tapdisk_nbd,
+#endif
+    };
+
+    if (dt < 0 || dt > ARRAY_SIZE(tapdisk_disk_drivers))
+        return NULL;
+    else
+        return tapdisk_disk_drivers[dt];
+}
 
 int
 tapdisk_disktype_find(const char *name)
@@ -217,7 +252,7 @@ tapdisk_disktype_find(const char *name)
 		if (strcmp(name, info->name))
 			continue;
 
-		if (!tapdisk_disk_drivers[i])
+		if (!tapdisk_disk_driver_get(i))
 			return -ENOSYS;
 
 		return i;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.h b/tools/blktap3/drivers/tapdisk-disktype.h
--- a/tools/blktap3/drivers/tapdisk-disktype.h
+++ b/tools/blktap3/drivers/tapdisk-disktype.h
@@ -29,32 +29,34 @@
 #ifndef __DISKTYPES_H__
 #define __DISKTYPES_H__
 
-#define DISK_TYPE_AIO         0
-#define DISK_TYPE_SYNC        1
-#define DISK_TYPE_VMDK        2
-#define DISK_TYPE_VHDSYNC     3
-#define DISK_TYPE_VHD         4
-#define DISK_TYPE_RAM         5
-#define DISK_TYPE_QCOW        6
-#define DISK_TYPE_BLOCK_CACHE 7
-#define DISK_TYPE_VINDEX      8
-#define DISK_TYPE_LOG         9
-#define DISK_TYPE_REMUS       10
-#define DISK_TYPE_LCACHE      11
-#define DISK_TYPE_LLECACHE    12
-#define DISK_TYPE_LLPCACHE    13
-#define DISK_TYPE_VALVE       14
+enum disk_type {
+    DISK_TYPE_AIO,
+    DISK_TYPE_SYNC,
+    DISK_TYPE_VMDK,
+    DISK_TYPE_VHDSYNC,
+    DISK_TYPE_VHD,
+    DISK_TYPE_RAM,
+    DISK_TYPE_QCOW,
+    DISK_TYPE_BLOCK_CACHE,
+    DISK_TYPE_VINDEX,
+    DISK_TYPE_LOG,
+    DISK_TYPE_REMUS,
+    DISK_TYPE_LCACHE,
+    DISK_TYPE_LLECACHE,
+    DISK_TYPE_LLPCACHE,
+    DISK_TYPE_VALVE};
 
 #define DISK_TYPE_NAME_MAX    32
 
 typedef struct disk_info {
 	const char     *name; /* driver name, e.g. 'aio' */
 	char           *desc;  /* e.g. "raw image" */
-	unsigned int    flags; 
+	unsigned int    flags;
 } disk_info_t;
 
 extern const disk_info_t     *tapdisk_disk_types[];
-extern const struct tap_disk *tapdisk_disk_drivers[];
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt);
 
 /* one single controller for all instances of disk type */
 #define DISK_TYPE_SINGLE_CONTROLLER (1<<0)
diff --git a/tools/blktap3/drivers/tapdisk-driver.c b/tools/blktap3/drivers/tapdisk-driver.c
--- a/tools/blktap3/drivers/tapdisk-driver.c
+++ b/tools/blktap3/drivers/tapdisk-driver.c
@@ -73,7 +73,7 @@ tapdisk_driver_allocate(int type, const 
 	td_driver_t *driver;
 	const struct tap_disk *ops;
 
-	ops = tapdisk_disk_drivers[type];
+	ops = tapdisk_disk_driver_get(type);
 	if (!ops)
 		return NULL;
 
diff --git a/tools/blktap3/drivers/tapdisk-interface.c b/tools/blktap3/drivers/tapdisk-interface.c
--- a/tools/blktap3/drivers/tapdisk-interface.c
+++ b/tools/blktap3/drivers/tapdisk-interface.c
@@ -67,6 +67,8 @@ int
 	int err;
 	td_driver_t *driver;
 
+    assert(image);
+
 	driver = image->driver;
 	if (!driver) {
 		driver = tapdisk_driver_allocate(image->type,
@@ -80,6 +82,8 @@ int
 	}
 
 	if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
+        assert(driver->ops);
+        assert(driver->ops->td_open);
 		err = driver->ops->td_open(driver, image->name, image->flags);
 		if (err) {
 			if (!image->driver)
diff --git a/tools/blktap3/drivers/tapdisk-server.c b/tools/blktap3/drivers/tapdisk-server.c
--- a/tools/blktap3/drivers/tapdisk-server.c
+++ b/tools/blktap3/drivers/tapdisk-server.c
@@ -90,8 +90,11 @@ tapdisk_server_get_vbd(const char *param
     assert(params);
 
 	tapdisk_server_for_each_vbd(vbd, tmp)
-		if (!strcmp(vbd->name, params))
-			return vbd;
+        if (vbd->name) {
+            /* TODO VBDs without name? Should this be treated as a bug? */
+    		if (!strcmp(vbd->name, params))
+	    		return vbd;
+        }
 
 	return NULL;
 }

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-07-15 11:55 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.