* [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support
@ 2013-07-15 11:55 Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
0 siblings, 2 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch series introduces full VHD support, plus some minor code
improvements.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1 of 2 RESEND] blktap3/vhd: Import VHD support
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch imports the VHD driver from blktap2, with all changes coming from
blktap2.5.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
diff --git a/tools/blktap3/drivers/Makefile b/tools/blktap3/drivers/Makefile
--- a/tools/blktap3/drivers/Makefile
+++ b/tools/blktap3/drivers/Makefile
@@ -95,6 +95,7 @@ LIBSRING := sring/libsring.a
#MISC-OBJS-y := atomicio.o
BLK-OBJS-y := block-aio.o
+BLK-OBJS-y += block-vhd.o
# FIXME The following exist in blktap2 but not in blktap2.5.
#BLK-OBJS-y += aes.o
#BLK-OBJS-y += md5.o
diff --git a/tools/blktap2/drivers/block-vhd.c b/tools/blktap3/drivers/block-vhd.c
copy from tools/blktap2/drivers/block-vhd.c
copy to tools/blktap3/drivers/block-vhd.c
--- a/tools/blktap2/drivers/block-vhd.c
+++ b/tools/blktap3/drivers/block-vhd.c
@@ -1,5 +1,8 @@
-/*
- * Copyright (c) 2008, XenSource Inc.
+/*
+ *
+ * Copyright (c) 2007, XenSource Inc.
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,6 +27,10 @@
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * block-vhd.c: asynchronous vhd implementation.
*
* A note on write transactions:
* Writes that require updating the BAT or bitmaps cannot be signaled
@@ -50,6 +57,8 @@
#include <unistd.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
+#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
+ /* e2fsprogs-devel. */
#include <string.h> /* for memset. */
#include <libaio.h>
#include <sys/mman.h>
@@ -59,6 +68,7 @@
#include "tapdisk-driver.h"
#include "tapdisk-interface.h"
#include "tapdisk-disktype.h"
+#include "tapdisk-storage.h"
unsigned int SPB;
@@ -72,7 +82,7 @@ unsigned int SPB;
do { \
DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %" \
PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: " \
- "%lu, BBLK: 0x%04x\n", \
+ "%u, BBLK: 0x%04x\n", \
s->vhd.file, s->queued, s->completed, s->returned, \
VHD_REQS_DATA - s->vreq_free_count, \
s->bat.pbw_blk); \
@@ -84,21 +94,20 @@ unsigned int SPB;
__FILE__, __LINE__, #_p); \
DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n", \
__FILE__, __LINE__, #_p); \
- tlog_flush(); \
- *(int*)0 = 0; \
+ td_panic(); \
}
#if (DEBUGGING == 1)
#define DBG(level, _f, _a...) DPRINTF(_f, ##_a)
- #define ERR(err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
+ #define ERR(_s, err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
#define TRACE(s) ((void)0)
#elif (DEBUGGING == 2)
#define DBG(level, _f, _a...) tlog_write(level, _f, ##_a)
- #define ERR(err, _f, _a...) tlog_error(err, _f, ##_a)
+ #define ERR(_s, _err, _f, _a...) tlog_drv_error((_s)->driver, _err, _f, ##_a)
#define TRACE(s) __TRACE(s)
#else
#define DBG(level, _f, _a...) ((void)0)
- #define ERR(err, _f, _a...) ((void)0)
+ #define ERR(_s, err, _f, _a...) ((void)0)
#define TRACE(s) ((void)0)
#endif
@@ -121,6 +130,7 @@ unsigned int SPB;
#define VHD_OP_BITMAP_READ 3
#define VHD_OP_BITMAP_WRITE 4
#define VHD_OP_ZERO_BM_WRITE 5
+#define VHD_OP_REDUNDANT_BM_WRITE 6
#define VHD_BM_BAT_LOCKED 0
#define VHD_BM_BAT_CLEAR 1
@@ -194,8 +204,8 @@ struct vhd_bat_state {
};
struct vhd_bitmap {
- u32 blk;
- u64 seqno; /* lru sequence number */
+ uint32_t blk;
+ uint64_t seqno; /* lru sequence number */
vhd_flag_t status;
char *map; /* map should only be modified
@@ -220,15 +230,16 @@ struct vhd_state {
/* VHD stuff */
vhd_context_t vhd;
- u32 spp; /* sectors per page */
- u32 spb; /* sectors per block */
- u64 next_db; /* pointer to the next
+ uint32_t spp; /* sectors per page */
+ uint32_t spb; /* sectors per block */
+ uint64_t first_db; /* pointer to datablock 0 */
+ uint64_t next_db; /* pointer to the next
* (unallocated) datablock */
struct vhd_bat_state bat;
- u64 bm_lru; /* lru sequence number */
- u32 bm_secs; /* size of bitmap, in sectors */
+ uint64_t bm_lru; /* lru sequence number */
+ uint32_t bm_secs; /* size of bitmap, in sectors */
struct vhd_bitmap *bitmap[VHD_CACHE_SIZE];
int bm_free_count;
@@ -239,6 +250,12 @@ struct vhd_state {
struct vhd_request *vreq_free[VHD_REQS_DATA];
struct vhd_request vreq_list[VHD_REQS_DATA];
+ /* for redundant bitmap writes */
+ int padbm_size;
+ char *padbm_buf;
+ long int debug_skipped_redundant_writes;
+ long int debug_done_redundant_writes;
+
td_driver_t *driver;
uint64_t queued;
@@ -274,7 +291,7 @@ vhd_initialize(struct vhd_state *s)
_vhd_zsize += VHD_BLOCK_SIZE;
_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
- MAP_SHARED | MAP_ANON, -1, 0);
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (_vhd_zeros == MAP_FAILED) {
EPRINTF("vhd_initialize failed: %d\n", -errno);
_vhd_zeros = NULL;
@@ -292,6 +309,7 @@ vhd_free(struct vhd_state *s)
if (_vhd_master != s || !_vhd_zeros)
return;
+ free(s->padbm_buf);
munmap(_vhd_zeros, _vhd_zsize);
_vhd_zsize = 0;
_vhd_zeros = NULL;
@@ -333,23 +351,23 @@ static int
vhd_kill_footer(struct vhd_state *s)
{
int err;
- off_t end;
- char *zeros;
+ off64_t end;
+ void *zeros;
if (s->vhd.footer.type == HD_TYPE_FIXED)
return 0;
- err = posix_memalign((void **)&zeros, 512, 512);
+ err = posix_memalign(&zeros, 512, 512);
if (err)
return -err;
err = 1;
memset(zeros, 0xc7c7c7c7, 512);
- if ((end = lseek(s->vhd.fd, 0, SEEK_END)) == -1)
+ if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
goto fail;
- if (lseek(s->vhd.fd, (end - 512), SEEK_SET) == -1)
+ if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
goto fail;
if (write(s->vhd.fd, zeros, 512) != 512)
@@ -368,7 +386,7 @@ static inline int
find_next_free_block(struct vhd_state *s)
{
int err;
- off_t eom;
+ off64_t eom;
uint32_t i, entry;
err = vhd_end_of_headers(&s->vhd, &eom);
@@ -376,6 +394,9 @@ find_next_free_block(struct vhd_state *s
return err;
s->next_db = secs_round_up(eom);
+ s->first_db = s->next_db;
+ if ((s->first_db + s->bm_secs) % s->spp)
+ s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp));
for (i = 0; i < s->bat.bat.entries; i++) {
entry = bat_entry(s, i);
@@ -398,12 +419,11 @@ vhd_free_bat(struct vhd_state *s)
static int
vhd_initialize_bat(struct vhd_state *s)
{
- int err, psize, batmap_required, i;
+ int err, batmap_required, i;
+ void *buf;
memset(&s->bat, 0, sizeof(struct vhd_bat));
- psize = getpagesize();
-
err = vhd_read_bat(&s->vhd, &s->bat.bat);
if (err) {
EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
@@ -436,12 +456,11 @@ vhd_initialize_bat(struct vhd_state *s)
s->vhd.file);
}
- err = posix_memalign((void **)&s->bat.bat_buf,
- VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
- if (err) {
- s->bat.bat_buf = NULL;
+ err = posix_memalign(&buf, VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
+ if (err)
goto fail;
- }
+
+ s->bat.bat_buf = buf;
return 0;
@@ -471,6 +490,7 @@ vhd_initialize_bitmap_cache(struct vhd_s
{
int i, err, map_size;
struct vhd_bitmap *bm;
+ void *map, *shadow;
memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
@@ -481,17 +501,17 @@ vhd_initialize_bitmap_cache(struct vhd_s
for (i = 0; i < VHD_CACHE_SIZE; i++) {
bm = s->bitmap_list + i;
- err = posix_memalign((void **)&bm->map, 512, map_size);
- if (err) {
- bm->map = NULL;
+ err = posix_memalign(&map, 512, map_size);
+ if (err)
goto fail;
- }
-
- err = posix_memalign((void **)&bm->shadow, 512, map_size);
- if (err) {
- bm->shadow = NULL;
+
+ bm->map = map;
+
+ err = posix_memalign(&shadow, 512, map_size);
+ if (err)
goto fail;
- }
+
+ bm->shadow = shadow;
memset(bm->map, 0, map_size);
memset(bm->shadow, 0, map_size);
@@ -508,6 +528,8 @@ fail:
static int
vhd_initialize_dynamic_disk(struct vhd_state *s)
{
+ uint32_t bm_size;
+ void *buf;
int err;
err = vhd_get_header(&s->vhd);
@@ -527,6 +549,21 @@ vhd_initialize_dynamic_disk(struct vhd_s
s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
+ s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+ if (s->bm_secs % getpagesize())
+ s->padbm_size += getpagesize();
+
+ err = posix_memalign(&buf, 512, s->padbm_size);
+ if (err)
+ return -err;
+
+ s->padbm_buf = buf;
+ bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+ memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+ memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+ s->debug_skipped_redundant_writes = 0;
+ s->debug_done_redundant_writes = 0;
+
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
return 0;
@@ -616,6 +653,9 @@ static int
o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ?
VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
+ if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
+ set_vhd_flag(o_flags, VHD_OPEN_STRICT);
+
err = vhd_open(&s->vhd, name, o_flags);
if (err) {
libvhd_set_log_level(1);
@@ -650,8 +690,7 @@ static int
driver->info.sector_size = VHD_SECTOR_SIZE;
driver->info.info = 0;
- DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%"PRIu64
- ", inf:%u)\n",
+ DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
driver->info.size, driver->info.sector_size, driver->info.info);
if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) &&
@@ -692,6 +731,8 @@ static int
VHD_FLAG_OPEN_NO_CACHE);
/* pre-allocate for all but NFS and LVM storage */
+ driver->storage = tapdisk_storage_type(name);
+
if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
driver->storage != TAPDISK_STORAGE_TYPE_LVM)
vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
@@ -726,11 +767,14 @@ static int
{
int err;
struct vhd_state *s;
- struct vhd_bitmap *bm;
DBG(TLOG_WARN, "vhd_close\n");
s = (struct vhd_state *)driver->data;
+ DPRINTF("gaps written/skipped: %ld/%ld\n",
+ s->debug_done_redundant_writes,
+ s->debug_skipped_redundant_writes);
+
/* don't write footer if tapdisk is read-only */
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
goto free;
@@ -770,10 +814,9 @@ static int
int
vhd_validate_parent(td_driver_t *child_driver,
- td_driver_t *parent_driver, td_flag_t flags)
+ td_driver_t *parent_driver,
+ __attribute__((unused)) td_flag_t flags)
{
- uint32_t status;
- struct stat stats;
struct vhd_state *child = (struct vhd_state *)child_driver->data;
struct vhd_state *parent;
@@ -807,7 +850,7 @@ vhd_validate_parent(td_driver_t *child_d
}
*/
- if (vhd_uuid_compare(&child->vhd.header.prt_uuid, &parent->vhd.footer.uuid)) {
+ if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
"snapshot. Child image no longer valid.\n",
__func__, child->vhd.file, parent->vhd.file);
@@ -838,12 +881,10 @@ vhd_get_parent_id(td_driver_t *driver, t
if (err)
return err;
- id->name = parent;
- id->drivertype = DISK_TYPE_VHD;
- if (vhd_parent_raw(&s->vhd)) {
- DPRINTF("VHD: parent is raw\n");
- id->drivertype = DISK_TYPE_AIO;
- }
+ id->name = parent;
+ id->type = vhd_parent_raw(&s->vhd) ? DISK_TYPE_AIO : DISK_TYPE_VHD;
+ id->flags |= TD_OPEN_SHAREABLE|TD_OPEN_RDONLY;
+
return 0;
}
@@ -1034,7 +1075,7 @@ static struct vhd_bitmap *
remove_lru_bitmap(struct vhd_state *s)
{
int i, idx = 0;
- u64 seq = s->bm_lru;
+ uint64_t seq = s->bm_lru;
struct vhd_bitmap *bm, *lru = NULL;
for (i = 0; i < VHD_CACHE_SIZE; i++) {
@@ -1138,7 +1179,7 @@ free_vhd_bitmap(struct vhd_state *s, str
static int
read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
{
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
/* in fixed disks, every block is present */
@@ -1186,7 +1227,7 @@ read_bitmap_cache_span(struct vhd_state
uint64_t sector, int nr_secs, int value)
{
int ret;
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
/* in fixed disks, every block is present */
@@ -1285,9 +1326,9 @@ static int
schedule_bat_write(struct vhd_state *s)
{
int i;
- u32 blk;
+ uint32_t blk;
char *buf;
- u64 offset;
+ uint64_t offset;
struct vhd_request *req;
ASSERT(bat_locked(s));
@@ -1299,10 +1340,10 @@ schedule_bat_write(struct vhd_state *s)
init_vhd_request(s, req);
memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
- ((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
+ ((uint32_t *)buf)[blk % 128] = s->bat.pbw_offset;
for (i = 0; i < 128; i++)
- BE32_OUT(&((u32 *)buf)[i]);
+ BE32_OUT(&((uint32_t *)buf)[i]);
offset = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
req->treq.secs = 1;
@@ -1343,6 +1384,55 @@ schedule_zero_bm_write(struct vhd_state
aio_write(s, req, offset);
}
+/* This is a performance optimization. When writing sequentially into full
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in
+ * throughput. To prevent skipping, we issue redundant writes into the (padded)
+ * bitmap area just to make all writes sequential. This will help VHDs on raw
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the
+ * block is completely full (i.e. the batmap entry is set). If the block is not
+ * completely full then one of the following two things will be true:
+ * 1. we'll either be allocating new sectors in this block and writing its
+ * bitmap transactionally, which will be slow anyways; or
+ * 2. the IO will be skipping over the unallocated sectors again, so the
+ * pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact
+ * simplifies the implementation of redundant writes: since we know the bitmap
+ * cannot be updated by anyone else, we don't have to worry about transactions
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, uint32_t blk)
+{
+ uint64_t offset;
+ struct vhd_request *req;
+
+ ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+ ASSERT(test_batmap(s, blk));
+
+ req = alloc_vhd_request(s);
+ if (!req)
+ return;
+
+ req->treq.buf = s->padbm_buf;
+
+ offset = bat_entry(s, blk);
+ ASSERT(offset != DD_BLK_UNUSED);
+ offset <<= VHD_SECTOR_SHIFT;
+ offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+ req->op = VHD_OP_REDUNDANT_BM_WRITE;
+ req->treq.sec = blk * s->spb;
+ req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+ req->next = NULL;
+
+ DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+ blk, offset);
+
+ aio_write(s, req, offset);
+}
+
static int
update_bat(struct vhd_state *s, uint32_t blk)
{
@@ -1380,10 +1470,10 @@ update_bat(struct vhd_state *s, uint32_t
static int
allocate_block(struct vhd_state *s, uint32_t blk)
{
- char *zeros;
int err, gap;
uint64_t offset, size;
struct vhd_bitmap *bm;
+ ssize_t count;
ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
@@ -1410,15 +1500,16 @@ allocate_block(struct vhd_state *s, uint
blk, s->bat.pbw_offset);
if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
- ERR(errno, "lseek failed\n");
+ ERR(s, -errno, "lseek failed\n");
return -errno;
}
- size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
- err = write(s->vhd.fd, vhd_zeros(size), size);
- if (err != size) {
- err = (err == -1 ? -errno : -EIO);
- ERR(err, "write failed");
+ size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
+ count = write(s->vhd.fd, vhd_zeros(size), size);
+ if (count != size) {
+ err = count < 0 ? -errno : -ENOSPC;
+ ERR(s, -errno,
+ "write failed (%zd, offset %"PRIu64")\n", count, offset);
return err;
}
@@ -1445,8 +1536,8 @@ allocate_block(struct vhd_state *s, uint
static int
schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
{
- u64 offset;
- u32 blk = 0, sec = 0;
+ uint64_t offset;
+ uint32_t blk = 0, sec = 0;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1490,8 +1581,8 @@ static int
schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
{
int err;
- u64 offset;
- u32 blk = 0, sec = 0;
+ uint64_t offset;
+ uint32_t blk = 0, sec = 0;
struct vhd_bitmap *bm = NULL;
struct vhd_request *req;
@@ -1539,7 +1630,11 @@ schedule_data_write(struct vhd_state *s,
set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
} else
add_to_transaction(&bm->tx, req);
- }
+ } else if (sec == 0 && /* first sector inside data block */
+ s->vhd.footer.type != HD_TYPE_FIXED &&
+ bat_entry(s, blk) != s->first_db &&
+ test_batmap(s, blk))
+ schedule_redundant_bm_write(s, blk);
aio_write(s, req, offset);
@@ -1554,7 +1649,7 @@ static int
schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
{
int err;
- u64 offset;
+ uint64_t offset;
struct vhd_bitmap *bm;
struct vhd_request *req = NULL;
@@ -1596,7 +1691,7 @@ schedule_bitmap_read(struct vhd_state *s
static void
schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
{
- u64 offset;
+ uint64_t offset;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1641,7 +1736,7 @@ schedule_bitmap_write(struct vhd_state *
static int
__vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1767,7 +1862,6 @@ vhd_queue_write(td_driver_t *driver, td_
case VHD_BM_BAT_LOCKED:
err = -EBUSY;
- clone.blocked = 1;
goto fail;
case VHD_BM_BAT_CLEAR:
@@ -1860,9 +1954,9 @@ signal_completion(struct vhd_request *li
static void
start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
{
- int i, error = 0;
struct vhd_transaction *tx;
struct vhd_request *r, *next;
+ int i;
if (!bm->queue.head)
return;
@@ -1885,7 +1979,7 @@ start_new_bitmap_transaction(struct vhd_
if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
tx->finished++;
if (!r->error) {
- u32 sec = r->treq.sec % s->spb;
+ uint32_t sec = r->treq.sec % s->spb;
for (i = 0; i < r->treq.secs; i++)
vhd_bitmap_set(&s->vhd,
bm->shadow, sec + i);
@@ -2027,7 +2121,7 @@ finish_bat_write(struct vhd_request *req
static void
finish_zero_bm_write(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_transaction *tx = req->tx;
struct vhd_state *s = req->state;
@@ -2058,10 +2152,30 @@ finish_zero_bm_write(struct vhd_request
finish_data_transaction(s, bm);
}
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+ /* uint32_t blk; */
+ struct vhd_state *s = (struct vhd_state *) req->state;
+
+ s->returned++;
+ TRACE(s);
+ /* blk = req->treq.sec / s->spb;
+ DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+ if (req->error) {
+ ERR(s, req->error, "lsec: 0x%08"PRIx64, req->treq.sec);
+ }
+ free_vhd_request(s, req);
+ s->debug_done_redundant_writes++;
+ return 0;
+}
+
+
static void
finish_bitmap_read(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_request *r, *next;
struct vhd_state *s = req->state;
@@ -2113,7 +2227,7 @@ finish_bitmap_read(struct vhd_request *r
static void
finish_bitmap_write(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_transaction *tx;
struct vhd_state *s = req->state;
@@ -2156,7 +2270,7 @@ finish_data_write(struct vhd_request *re
set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
if (tx) {
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
blk = req->treq.sec / s->spb;
@@ -2199,7 +2313,7 @@ vhd_complete(void *arg, struct tiocb *ti
req->error = err;
if (req->error)
- ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
+ ERR(s, req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
"nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
s->vhd.file, req->op, req->treq.sec, req->treq.secs,
io->u.c.nbytes, req->treq.sec / s->spb,
@@ -2226,6 +2340,10 @@ vhd_complete(void *arg, struct tiocb *ti
finish_zero_bm_write(req);
break;
+ case VHD_OP_REDUNDANT_BM_WRITE:
+ finish_redundant_bm_write(req);
+ break;
+
case VHD_OP_BAT_WRITE:
finish_bat_write(req);
break;
@@ -2250,14 +2368,15 @@ vhd_debug(td_driver_t *driver)
DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
- DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
+ DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%u total)\n", VHD_REQS_DATA);
for (i = 0; i < VHD_REQS_DATA; i++) {
struct vhd_request *r = &s->vreq_list[i];
td_request_t *t = &r->treq;
+ const char *vname = t->vreq ? t->vreq->name: NULL;
if (t->secs)
- DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
+ DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
" lsec: 0x%08"PRIx64", flags: %d, this: %p, "
- "next: %p, tx: %p\n", i, t->id, r->error, r->op,
+ "next: %p, tx: %p\n", i, vname, t->sidx, r->error, r->op,
t->sec, r->flags, r, r->next, r->tx);
}
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch introduces a few extra checks in the code path that tell whether a
particular Virtual Disk Image driver is available.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
diff --git a/tools/blktap3/drivers/tapdisk-control.c b/tools/blktap3/drivers/tapdisk-control.c
--- a/tools/blktap3/drivers/tapdisk-control.c
+++ b/tools/blktap3/drivers/tapdisk-control.c
@@ -586,7 +586,7 @@ tapdisk_control_open_image(
tapdisk_message_t *request, tapdisk_message_t * const response)
{
int err;
- td_vbd_t *vbd;
+ td_vbd_t *vbd = NULL;
td_flag_t flags;
td_disk_info_t info;
int prt_path_len;
@@ -663,7 +663,9 @@ out:
response->u.image.sector_size = info.sector_size;
response->u.image.info = info.info;
response->type = TAPDISK_MESSAGE_OPEN_RSP;
- }
+ } else
+ if (vbd)
+ tapdisk_server_remove_vbd(vbd);
return err;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.c b/tools/blktap3/drivers/tapdisk-disktype.c
--- a/tools/blktap3/drivers/tapdisk-disktype.c
+++ b/tools/blktap3/drivers/tapdisk-disktype.c
@@ -148,7 +148,7 @@ const disk_info_t *tapdisk_disk_types[]
extern struct tap_disk tapdisk_aio;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_sync;
@@ -160,7 +160,7 @@ extern struct tap_disk tapdisk_vhd;
extern struct tap_disk tapdisk_ram;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_qcow;
@@ -169,39 +169,74 @@ extern struct tap_disk tapdisk_qcow;
extern struct tap_disk tapdisk_vhd_index;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_log;
#endif
-const struct tap_disk *tapdisk_disk_drivers[] = {
- [DISK_TYPE_AIO] = &tapdisk_aio,
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt)
+{
+ static const struct tap_disk *tapdisk_disk_drivers[] = {
+ [DISK_TYPE_AIO] = &tapdisk_aio,
-/*
- * TODO Why commented out?
- */
+ /*
+ * XXX Commented out in blktap2.5.
+ */
#if 0
- [DISK_TYPE_SYNC] = &tapdisk_sync,
- [DISK_TYPE_VMDK] = &tapdisk_vmdk,
- [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+ [DISK_TYPE_SYNC] = &tapdisk_sync,
+ [DISK_TYPE_VMDK] = &tapdisk_vmdk,
+ [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+#endif
+ [DISK_TYPE_VHD] = &tapdisk_vhd,
+
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
+#if 0
+ [DISK_TYPE_RAM] = &tapdisk_ram,
#endif
-/*
- * TODO Why commented out?
- */
+ /*
+ * XXX Commented out in blktap2.5.
+ */
#if 0
- [DISK_TYPE_QCOW] = &tapdisk_qcow,
+ [DISK_TYPE_QCOW] = &tapdisk_qcow,
#endif
-/*
- * TODO Why commented out?
- */
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
#if 0
- [DISK_TYPE_LOG] = &tapdisk_log,
+ [DISK_TYPE_BLOCK_CACHE] = &tapdisk_block_cache,
+ [DISK_TYPE_VINDEX] = &tapdisk_vhd_index,
#endif
- 0,
-};
+
+ /*
+ * XXX Commented out in blktap2.5.
+ */
+#if 0
+ [DISK_TYPE_LOG] = &tapdisk_log,
+#endif
+
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
+#if 0
+ [DISK_TYPE_LCACHE] = &tapdisk_lcache,
+ [DISK_TYPE_LLPCACHE] = &tapdisk_llpcache,
+ [DISK_TYPE_LLECACHE] = &tapdisk_llecache,
+ [DISK_TYPE_VALVE] = &tapdisk_valve,
+ [DISK_TYPE_NBD] = &tapdisk_nbd,
+#endif
+ };
+
+ if (dt < 0 || dt > ARRAY_SIZE(tapdisk_disk_drivers))
+ return NULL;
+ else
+ return tapdisk_disk_drivers[dt];
+}
int
tapdisk_disktype_find(const char *name)
@@ -217,7 +252,7 @@ tapdisk_disktype_find(const char *name)
if (strcmp(name, info->name))
continue;
- if (!tapdisk_disk_drivers[i])
+ if (!tapdisk_disk_driver_get(i))
return -ENOSYS;
return i;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.h b/tools/blktap3/drivers/tapdisk-disktype.h
--- a/tools/blktap3/drivers/tapdisk-disktype.h
+++ b/tools/blktap3/drivers/tapdisk-disktype.h
@@ -29,32 +29,34 @@
#ifndef __DISKTYPES_H__
#define __DISKTYPES_H__
-#define DISK_TYPE_AIO 0
-#define DISK_TYPE_SYNC 1
-#define DISK_TYPE_VMDK 2
-#define DISK_TYPE_VHDSYNC 3
-#define DISK_TYPE_VHD 4
-#define DISK_TYPE_RAM 5
-#define DISK_TYPE_QCOW 6
-#define DISK_TYPE_BLOCK_CACHE 7
-#define DISK_TYPE_VINDEX 8
-#define DISK_TYPE_LOG 9
-#define DISK_TYPE_REMUS 10
-#define DISK_TYPE_LCACHE 11
-#define DISK_TYPE_LLECACHE 12
-#define DISK_TYPE_LLPCACHE 13
-#define DISK_TYPE_VALVE 14
+enum disk_type {
+ DISK_TYPE_AIO,
+ DISK_TYPE_SYNC,
+ DISK_TYPE_VMDK,
+ DISK_TYPE_VHDSYNC,
+ DISK_TYPE_VHD,
+ DISK_TYPE_RAM,
+ DISK_TYPE_QCOW,
+ DISK_TYPE_BLOCK_CACHE,
+ DISK_TYPE_VINDEX,
+ DISK_TYPE_LOG,
+ DISK_TYPE_REMUS,
+ DISK_TYPE_LCACHE,
+ DISK_TYPE_LLECACHE,
+ DISK_TYPE_LLPCACHE,
+ DISK_TYPE_VALVE};
#define DISK_TYPE_NAME_MAX 32
typedef struct disk_info {
const char *name; /* driver name, e.g. 'aio' */
char *desc; /* e.g. "raw image" */
- unsigned int flags;
+ unsigned int flags;
} disk_info_t;
extern const disk_info_t *tapdisk_disk_types[];
-extern const struct tap_disk *tapdisk_disk_drivers[];
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt);
/* one single controller for all instances of disk type */
#define DISK_TYPE_SINGLE_CONTROLLER (1<<0)
diff --git a/tools/blktap3/drivers/tapdisk-driver.c b/tools/blktap3/drivers/tapdisk-driver.c
--- a/tools/blktap3/drivers/tapdisk-driver.c
+++ b/tools/blktap3/drivers/tapdisk-driver.c
@@ -73,7 +73,7 @@ tapdisk_driver_allocate(int type, const
td_driver_t *driver;
const struct tap_disk *ops;
- ops = tapdisk_disk_drivers[type];
+ ops = tapdisk_disk_driver_get(type);
if (!ops)
return NULL;
diff --git a/tools/blktap3/drivers/tapdisk-interface.c b/tools/blktap3/drivers/tapdisk-interface.c
--- a/tools/blktap3/drivers/tapdisk-interface.c
+++ b/tools/blktap3/drivers/tapdisk-interface.c
@@ -67,6 +67,8 @@ int
int err;
td_driver_t *driver;
+ assert(image);
+
driver = image->driver;
if (!driver) {
driver = tapdisk_driver_allocate(image->type,
@@ -80,6 +82,8 @@ int
}
if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
+ assert(driver->ops);
+ assert(driver->ops->td_open);
err = driver->ops->td_open(driver, image->name, image->flags);
if (err) {
if (!image->driver)
diff --git a/tools/blktap3/drivers/tapdisk-server.c b/tools/blktap3/drivers/tapdisk-server.c
--- a/tools/blktap3/drivers/tapdisk-server.c
+++ b/tools/blktap3/drivers/tapdisk-server.c
@@ -90,8 +90,11 @@ tapdisk_server_get_vbd(const char *param
assert(params);
tapdisk_server_for_each_vbd(vbd, tmp)
- if (!strcmp(vbd->name, params))
- return vbd;
+ if (vbd->name) {
+ /* TODO VBDs without name? Should this be treated as a bug? */
+ if (!strcmp(vbd->name, params))
+ return vbd;
+ }
return NULL;
}
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-07-15 11:55 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).