* [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support
@ 2013-07-15 11:55 Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
0 siblings, 2 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch series introduces full VHD support, plus some minor code
improvements.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1 of 2 RESEND] blktap3/vhd: Import VHD support
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch imports the VHD driver from blktap2, with all changes coming from
blktap2.5.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
diff --git a/tools/blktap3/drivers/Makefile b/tools/blktap3/drivers/Makefile
--- a/tools/blktap3/drivers/Makefile
+++ b/tools/blktap3/drivers/Makefile
@@ -95,6 +95,7 @@ LIBSRING := sring/libsring.a
#MISC-OBJS-y := atomicio.o
BLK-OBJS-y := block-aio.o
+BLK-OBJS-y += block-vhd.o
# FIXME The following exist in blktap2 but not in blktap2.5.
#BLK-OBJS-y += aes.o
#BLK-OBJS-y += md5.o
diff --git a/tools/blktap2/drivers/block-vhd.c b/tools/blktap3/drivers/block-vhd.c
copy from tools/blktap2/drivers/block-vhd.c
copy to tools/blktap3/drivers/block-vhd.c
--- a/tools/blktap2/drivers/block-vhd.c
+++ b/tools/blktap3/drivers/block-vhd.c
@@ -1,5 +1,8 @@
-/*
- * Copyright (c) 2008, XenSource Inc.
+/*
+ *
+ * Copyright (c) 2007, XenSource Inc.
+ * Copyright (c) 2010, Citrix Systems, Inc.
+ *
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -24,6 +27,10 @@
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * block-vhd.c: asynchronous vhd implementation.
*
* A note on write transactions:
* Writes that require updating the BAT or bitmaps cannot be signaled
@@ -50,6 +57,8 @@
#include <unistd.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
+#include <uuid/uuid.h> /* For whatever reason, Linux packages this in */
+ /* e2fsprogs-devel. */
#include <string.h> /* for memset. */
#include <libaio.h>
#include <sys/mman.h>
@@ -59,6 +68,7 @@
#include "tapdisk-driver.h"
#include "tapdisk-interface.h"
#include "tapdisk-disktype.h"
+#include "tapdisk-storage.h"
unsigned int SPB;
@@ -72,7 +82,7 @@ unsigned int SPB;
do { \
DBG(TLOG_DBG, "%s: QUEUED: %" PRIu64 ", COMPLETED: %" \
PRIu64", RETURNED: %" PRIu64 ", DATA_ALLOCATED: " \
- "%lu, BBLK: 0x%04x\n", \
+ "%u, BBLK: 0x%04x\n", \
s->vhd.file, s->queued, s->completed, s->returned, \
VHD_REQS_DATA - s->vreq_free_count, \
s->bat.pbw_blk); \
@@ -84,21 +94,20 @@ unsigned int SPB;
__FILE__, __LINE__, #_p); \
DBG(TLOG_WARN, "%s:%d: FAILED ASSERTION: '%s'\n", \
__FILE__, __LINE__, #_p); \
- tlog_flush(); \
- *(int*)0 = 0; \
+ td_panic(); \
}
#if (DEBUGGING == 1)
#define DBG(level, _f, _a...) DPRINTF(_f, ##_a)
- #define ERR(err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
+ #define ERR(_s, err, _f, _a...) DPRINTF("ERROR: %d: " _f, err, ##_a)
#define TRACE(s) ((void)0)
#elif (DEBUGGING == 2)
#define DBG(level, _f, _a...) tlog_write(level, _f, ##_a)
- #define ERR(err, _f, _a...) tlog_error(err, _f, ##_a)
+ #define ERR(_s, _err, _f, _a...) tlog_drv_error((_s)->driver, _err, _f, ##_a)
#define TRACE(s) __TRACE(s)
#else
#define DBG(level, _f, _a...) ((void)0)
- #define ERR(err, _f, _a...) ((void)0)
+ #define ERR(_s, err, _f, _a...) ((void)0)
#define TRACE(s) ((void)0)
#endif
@@ -121,6 +130,7 @@ unsigned int SPB;
#define VHD_OP_BITMAP_READ 3
#define VHD_OP_BITMAP_WRITE 4
#define VHD_OP_ZERO_BM_WRITE 5
+#define VHD_OP_REDUNDANT_BM_WRITE 6
#define VHD_BM_BAT_LOCKED 0
#define VHD_BM_BAT_CLEAR 1
@@ -194,8 +204,8 @@ struct vhd_bat_state {
};
struct vhd_bitmap {
- u32 blk;
- u64 seqno; /* lru sequence number */
+ uint32_t blk;
+ uint64_t seqno; /* lru sequence number */
vhd_flag_t status;
char *map; /* map should only be modified
@@ -220,15 +230,16 @@ struct vhd_state {
/* VHD stuff */
vhd_context_t vhd;
- u32 spp; /* sectors per page */
- u32 spb; /* sectors per block */
- u64 next_db; /* pointer to the next
+ uint32_t spp; /* sectors per page */
+ uint32_t spb; /* sectors per block */
+ uint64_t first_db; /* pointer to datablock 0 */
+ uint64_t next_db; /* pointer to the next
* (unallocated) datablock */
struct vhd_bat_state bat;
- u64 bm_lru; /* lru sequence number */
- u32 bm_secs; /* size of bitmap, in sectors */
+ uint64_t bm_lru; /* lru sequence number */
+ uint32_t bm_secs; /* size of bitmap, in sectors */
struct vhd_bitmap *bitmap[VHD_CACHE_SIZE];
int bm_free_count;
@@ -239,6 +250,12 @@ struct vhd_state {
struct vhd_request *vreq_free[VHD_REQS_DATA];
struct vhd_request vreq_list[VHD_REQS_DATA];
+ /* for redundant bitmap writes */
+ int padbm_size;
+ char *padbm_buf;
+ long int debug_skipped_redundant_writes;
+ long int debug_done_redundant_writes;
+
td_driver_t *driver;
uint64_t queued;
@@ -274,7 +291,7 @@ vhd_initialize(struct vhd_state *s)
_vhd_zsize += VHD_BLOCK_SIZE;
_vhd_zeros = mmap(0, _vhd_zsize, PROT_READ,
- MAP_SHARED | MAP_ANON, -1, 0);
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (_vhd_zeros == MAP_FAILED) {
EPRINTF("vhd_initialize failed: %d\n", -errno);
_vhd_zeros = NULL;
@@ -292,6 +309,7 @@ vhd_free(struct vhd_state *s)
if (_vhd_master != s || !_vhd_zeros)
return;
+ free(s->padbm_buf);
munmap(_vhd_zeros, _vhd_zsize);
_vhd_zsize = 0;
_vhd_zeros = NULL;
@@ -333,23 +351,23 @@ static int
vhd_kill_footer(struct vhd_state *s)
{
int err;
- off_t end;
- char *zeros;
+ off64_t end;
+ void *zeros;
if (s->vhd.footer.type == HD_TYPE_FIXED)
return 0;
- err = posix_memalign((void **)&zeros, 512, 512);
+ err = posix_memalign(&zeros, 512, 512);
if (err)
return -err;
err = 1;
memset(zeros, 0xc7c7c7c7, 512);
- if ((end = lseek(s->vhd.fd, 0, SEEK_END)) == -1)
+ if ((end = lseek64(s->vhd.fd, 0, SEEK_END)) == -1)
goto fail;
- if (lseek(s->vhd.fd, (end - 512), SEEK_SET) == -1)
+ if (lseek64(s->vhd.fd, (end - 512), SEEK_SET) == -1)
goto fail;
if (write(s->vhd.fd, zeros, 512) != 512)
@@ -368,7 +386,7 @@ static inline int
find_next_free_block(struct vhd_state *s)
{
int err;
- off_t eom;
+ off64_t eom;
uint32_t i, entry;
err = vhd_end_of_headers(&s->vhd, &eom);
@@ -376,6 +394,9 @@ find_next_free_block(struct vhd_state *s
return err;
s->next_db = secs_round_up(eom);
+ s->first_db = s->next_db;
+ if ((s->first_db + s->bm_secs) % s->spp)
+ s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp));
for (i = 0; i < s->bat.bat.entries; i++) {
entry = bat_entry(s, i);
@@ -398,12 +419,11 @@ vhd_free_bat(struct vhd_state *s)
static int
vhd_initialize_bat(struct vhd_state *s)
{
- int err, psize, batmap_required, i;
+ int err, batmap_required, i;
+ void *buf;
memset(&s->bat, 0, sizeof(struct vhd_bat));
- psize = getpagesize();
-
err = vhd_read_bat(&s->vhd, &s->bat.bat);
if (err) {
EPRINTF("%s: reading bat: %d\n", s->vhd.file, err);
@@ -436,12 +456,11 @@ vhd_initialize_bat(struct vhd_state *s)
s->vhd.file);
}
- err = posix_memalign((void **)&s->bat.bat_buf,
- VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
- if (err) {
- s->bat.bat_buf = NULL;
+ err = posix_memalign(&buf, VHD_SECTOR_SIZE, VHD_SECTOR_SIZE);
+ if (err)
goto fail;
- }
+
+ s->bat.bat_buf = buf;
return 0;
@@ -471,6 +490,7 @@ vhd_initialize_bitmap_cache(struct vhd_s
{
int i, err, map_size;
struct vhd_bitmap *bm;
+ void *map, *shadow;
memset(s->bitmap_list, 0, sizeof(struct vhd_bitmap) * VHD_CACHE_SIZE);
@@ -481,17 +501,17 @@ vhd_initialize_bitmap_cache(struct vhd_s
for (i = 0; i < VHD_CACHE_SIZE; i++) {
bm = s->bitmap_list + i;
- err = posix_memalign((void **)&bm->map, 512, map_size);
- if (err) {
- bm->map = NULL;
+ err = posix_memalign(&map, 512, map_size);
+ if (err)
goto fail;
- }
-
- err = posix_memalign((void **)&bm->shadow, 512, map_size);
- if (err) {
- bm->shadow = NULL;
+
+ bm->map = map;
+
+ err = posix_memalign(&shadow, 512, map_size);
+ if (err)
goto fail;
- }
+
+ bm->shadow = shadow;
memset(bm->map, 0, map_size);
memset(bm->shadow, 0, map_size);
@@ -508,6 +528,8 @@ fail:
static int
vhd_initialize_dynamic_disk(struct vhd_state *s)
{
+ uint32_t bm_size;
+ void *buf;
int err;
err = vhd_get_header(&s->vhd);
@@ -527,6 +549,21 @@ vhd_initialize_dynamic_disk(struct vhd_s
s->spb = s->vhd.header.block_size >> VHD_SECTOR_SHIFT;
s->bm_secs = secs_round_up_no_zero(s->spb >> 3);
+ s->padbm_size = (s->bm_secs / getpagesize()) * getpagesize();
+ if (s->bm_secs % getpagesize())
+ s->padbm_size += getpagesize();
+
+ err = posix_memalign(&buf, 512, s->padbm_size);
+ if (err)
+ return -err;
+
+ s->padbm_buf = buf;
+ bm_size = s->bm_secs << VHD_SECTOR_SHIFT;
+ memset(s->padbm_buf, 0, s->padbm_size - bm_size);
+ memset(s->padbm_buf + (s->padbm_size - bm_size), ~0, bm_size);
+ s->debug_skipped_redundant_writes = 0;
+ s->debug_done_redundant_writes = 0;
+
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_NO_CACHE))
return 0;
@@ -616,6 +653,9 @@ static int
o_flags = ((test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) ?
VHD_OPEN_RDONLY : VHD_OPEN_RDWR);
+ if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT))
+ set_vhd_flag(o_flags, VHD_OPEN_STRICT);
+
err = vhd_open(&s->vhd, name, o_flags);
if (err) {
libvhd_set_log_level(1);
@@ -650,8 +690,7 @@ static int
driver->info.sector_size = VHD_SECTOR_SIZE;
driver->info.info = 0;
- DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%"PRIu64
- ", inf:%u)\n",
+ DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n",
driver->info.size, driver->info.sector_size, driver->info.info);
if (test_vhd_flag(flags, VHD_FLAG_OPEN_STRICT) &&
@@ -692,6 +731,8 @@ static int
VHD_FLAG_OPEN_NO_CACHE);
/* pre-allocate for all but NFS and LVM storage */
+ driver->storage = tapdisk_storage_type(name);
+
if (driver->storage != TAPDISK_STORAGE_TYPE_NFS &&
driver->storage != TAPDISK_STORAGE_TYPE_LVM)
vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE;
@@ -726,11 +767,14 @@ static int
{
int err;
struct vhd_state *s;
- struct vhd_bitmap *bm;
DBG(TLOG_WARN, "vhd_close\n");
s = (struct vhd_state *)driver->data;
+ DPRINTF("gaps written/skipped: %ld/%ld\n",
+ s->debug_done_redundant_writes,
+ s->debug_skipped_redundant_writes);
+
/* don't write footer if tapdisk is read-only */
if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_RDONLY))
goto free;
@@ -770,10 +814,9 @@ static int
int
vhd_validate_parent(td_driver_t *child_driver,
- td_driver_t *parent_driver, td_flag_t flags)
+ td_driver_t *parent_driver,
+ __attribute__((unused)) td_flag_t flags)
{
- uint32_t status;
- struct stat stats;
struct vhd_state *child = (struct vhd_state *)child_driver->data;
struct vhd_state *parent;
@@ -807,7 +850,7 @@ vhd_validate_parent(td_driver_t *child_d
}
*/
- if (vhd_uuid_compare(&child->vhd.header.prt_uuid, &parent->vhd.footer.uuid)) {
+ if (uuid_compare(child->vhd.header.prt_uuid, parent->vhd.footer.uuid)) {
DPRINTF("ERROR: %s: %s, %s: parent uuid has changed since "
"snapshot. Child image no longer valid.\n",
__func__, child->vhd.file, parent->vhd.file);
@@ -838,12 +881,10 @@ vhd_get_parent_id(td_driver_t *driver, t
if (err)
return err;
- id->name = parent;
- id->drivertype = DISK_TYPE_VHD;
- if (vhd_parent_raw(&s->vhd)) {
- DPRINTF("VHD: parent is raw\n");
- id->drivertype = DISK_TYPE_AIO;
- }
+ id->name = parent;
+ id->type = vhd_parent_raw(&s->vhd) ? DISK_TYPE_AIO : DISK_TYPE_VHD;
+ id->flags |= TD_OPEN_SHAREABLE|TD_OPEN_RDONLY;
+
return 0;
}
@@ -1034,7 +1075,7 @@ static struct vhd_bitmap *
remove_lru_bitmap(struct vhd_state *s)
{
int i, idx = 0;
- u64 seq = s->bm_lru;
+ uint64_t seq = s->bm_lru;
struct vhd_bitmap *bm, *lru = NULL;
for (i = 0; i < VHD_CACHE_SIZE; i++) {
@@ -1138,7 +1179,7 @@ free_vhd_bitmap(struct vhd_state *s, str
static int
read_bitmap_cache(struct vhd_state *s, uint64_t sector, uint8_t op)
{
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
/* in fixed disks, every block is present */
@@ -1186,7 +1227,7 @@ read_bitmap_cache_span(struct vhd_state
uint64_t sector, int nr_secs, int value)
{
int ret;
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
/* in fixed disks, every block is present */
@@ -1285,9 +1326,9 @@ static int
schedule_bat_write(struct vhd_state *s)
{
int i;
- u32 blk;
+ uint32_t blk;
char *buf;
- u64 offset;
+ uint64_t offset;
struct vhd_request *req;
ASSERT(bat_locked(s));
@@ -1299,10 +1340,10 @@ schedule_bat_write(struct vhd_state *s)
init_vhd_request(s, req);
memcpy(buf, &bat_entry(s, blk - (blk % 128)), 512);
- ((u32 *)buf)[blk % 128] = s->bat.pbw_offset;
+ ((uint32_t *)buf)[blk % 128] = s->bat.pbw_offset;
for (i = 0; i < 128; i++)
- BE32_OUT(&((u32 *)buf)[i]);
+ BE32_OUT(&((uint32_t *)buf)[i]);
offset = s->vhd.header.table_offset + (blk - (blk % 128)) * 4;
req->treq.secs = 1;
@@ -1343,6 +1384,55 @@ schedule_zero_bm_write(struct vhd_state
aio_write(s, req, offset);
}
+/* This is a performance optimization. When writing sequentially into full
+ * blocks, skipping (up-to-date) bitmaps causes an approx. 25% reduction in
+ * throughput. To prevent skipping, we issue redundant writes into the (padded)
+ * bitmap area just to make all writes sequential. This will help VHDs on raw
+ * block devices, while the FS-based VHDs shouldn't suffer much.
+ *
+ * Note that it only makes sense to perform this reduntant bitmap write if the
+ * block is completely full (i.e. the batmap entry is set). If the block is not
+ * completely full then one of the following two things will be true:
+ * 1. we'll either be allocating new sectors in this block and writing its
+ * bitmap transactionally, which will be slow anyways; or
+ * 2. the IO will be skipping over the unallocated sectors again, so the
+ * pattern will not be sequential anyways
+ * In either case a redundant bitmap write becomes pointless. This fact
+ * simplifies the implementation of redundant writes: since we know the bitmap
+ * cannot be updated by anyone else, we don't have to worry about transactions
+ * or potential write conflicts.
+ * */
+static void
+schedule_redundant_bm_write(struct vhd_state *s, uint32_t blk)
+{
+ uint64_t offset;
+ struct vhd_request *req;
+
+ ASSERT(s->vhd.footer.type != HD_TYPE_FIXED);
+ ASSERT(test_batmap(s, blk));
+
+ req = alloc_vhd_request(s);
+ if (!req)
+ return;
+
+ req->treq.buf = s->padbm_buf;
+
+ offset = bat_entry(s, blk);
+ ASSERT(offset != DD_BLK_UNUSED);
+ offset <<= VHD_SECTOR_SHIFT;
+ offset -= s->padbm_size - (s->bm_secs << VHD_SECTOR_SHIFT);
+
+ req->op = VHD_OP_REDUNDANT_BM_WRITE;
+ req->treq.sec = blk * s->spb;
+ req->treq.secs = s->padbm_size >> VHD_SECTOR_SHIFT;
+ req->next = NULL;
+
+ DBG(TLOG_DBG, "blk: %u, writing redundant bitmap at %" PRIu64 "\n",
+ blk, offset);
+
+ aio_write(s, req, offset);
+}
+
static int
update_bat(struct vhd_state *s, uint32_t blk)
{
@@ -1380,10 +1470,10 @@ update_bat(struct vhd_state *s, uint32_t
static int
allocate_block(struct vhd_state *s, uint32_t blk)
{
- char *zeros;
int err, gap;
uint64_t offset, size;
struct vhd_bitmap *bm;
+ ssize_t count;
ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED);
@@ -1410,15 +1500,16 @@ allocate_block(struct vhd_state *s, uint
blk, s->bat.pbw_offset);
if (lseek(s->vhd.fd, offset, SEEK_SET) == (off_t)-1) {
- ERR(errno, "lseek failed\n");
+ ERR(s, -errno, "lseek failed\n");
return -errno;
}
- size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
- err = write(s->vhd.fd, vhd_zeros(size), size);
- if (err != size) {
- err = (err == -1 ? -errno : -EIO);
- ERR(err, "write failed");
+ size = vhd_sectors_to_bytes(s->spb + s->bm_secs + gap);
+ count = write(s->vhd.fd, vhd_zeros(size), size);
+ if (count != size) {
+ err = count < 0 ? -errno : -ENOSPC;
+ ERR(s, -errno,
+ "write failed (%zd, offset %"PRIu64")\n", count, offset);
return err;
}
@@ -1445,8 +1536,8 @@ allocate_block(struct vhd_state *s, uint
static int
schedule_data_read(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
{
- u64 offset;
- u32 blk = 0, sec = 0;
+ uint64_t offset;
+ uint32_t blk = 0, sec = 0;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1490,8 +1581,8 @@ static int
schedule_data_write(struct vhd_state *s, td_request_t treq, vhd_flag_t flags)
{
int err;
- u64 offset;
- u32 blk = 0, sec = 0;
+ uint64_t offset;
+ uint32_t blk = 0, sec = 0;
struct vhd_bitmap *bm = NULL;
struct vhd_request *req;
@@ -1539,7 +1630,11 @@ schedule_data_write(struct vhd_state *s,
set_vhd_flag(req->flags, VHD_FLAG_REQ_QUEUED);
} else
add_to_transaction(&bm->tx, req);
- }
+ } else if (sec == 0 && /* first sector inside data block */
+ s->vhd.footer.type != HD_TYPE_FIXED &&
+ bat_entry(s, blk) != s->first_db &&
+ test_batmap(s, blk))
+ schedule_redundant_bm_write(s, blk);
aio_write(s, req, offset);
@@ -1554,7 +1649,7 @@ static int
schedule_bitmap_read(struct vhd_state *s, uint32_t blk)
{
int err;
- u64 offset;
+ uint64_t offset;
struct vhd_bitmap *bm;
struct vhd_request *req = NULL;
@@ -1596,7 +1691,7 @@ schedule_bitmap_read(struct vhd_state *s
static void
schedule_bitmap_write(struct vhd_state *s, uint32_t blk)
{
- u64 offset;
+ uint64_t offset;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1641,7 +1736,7 @@ schedule_bitmap_write(struct vhd_state *
static int
__vhd_queue_request(struct vhd_state *s, uint8_t op, td_request_t treq)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_request *req;
@@ -1767,7 +1862,6 @@ vhd_queue_write(td_driver_t *driver, td_
case VHD_BM_BAT_LOCKED:
err = -EBUSY;
- clone.blocked = 1;
goto fail;
case VHD_BM_BAT_CLEAR:
@@ -1860,9 +1954,9 @@ signal_completion(struct vhd_request *li
static void
start_new_bitmap_transaction(struct vhd_state *s, struct vhd_bitmap *bm)
{
- int i, error = 0;
struct vhd_transaction *tx;
struct vhd_request *r, *next;
+ int i;
if (!bm->queue.head)
return;
@@ -1885,7 +1979,7 @@ start_new_bitmap_transaction(struct vhd_
if (test_vhd_flag(r->flags, VHD_FLAG_REQ_FINISHED)) {
tx->finished++;
if (!r->error) {
- u32 sec = r->treq.sec % s->spb;
+ uint32_t sec = r->treq.sec % s->spb;
for (i = 0; i < r->treq.secs; i++)
vhd_bitmap_set(&s->vhd,
bm->shadow, sec + i);
@@ -2027,7 +2121,7 @@ finish_bat_write(struct vhd_request *req
static void
finish_zero_bm_write(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_transaction *tx = req->tx;
struct vhd_state *s = req->state;
@@ -2058,10 +2152,30 @@ finish_zero_bm_write(struct vhd_request
finish_data_transaction(s, bm);
}
+static int
+finish_redundant_bm_write(struct vhd_request *req)
+{
+ /* uint32_t blk; */
+ struct vhd_state *s = (struct vhd_state *) req->state;
+
+ s->returned++;
+ TRACE(s);
+ /* blk = req->treq.sec / s->spb;
+ DBG(TLOG_DBG, "blk: %u\n", blk); */
+
+ if (req->error) {
+ ERR(s, req->error, "lsec: 0x%08"PRIx64, req->treq.sec);
+ }
+ free_vhd_request(s, req);
+ s->debug_done_redundant_writes++;
+ return 0;
+}
+
+
static void
finish_bitmap_read(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_request *r, *next;
struct vhd_state *s = req->state;
@@ -2113,7 +2227,7 @@ finish_bitmap_read(struct vhd_request *r
static void
finish_bitmap_write(struct vhd_request *req)
{
- u32 blk;
+ uint32_t blk;
struct vhd_bitmap *bm;
struct vhd_transaction *tx;
struct vhd_state *s = req->state;
@@ -2156,7 +2270,7 @@ finish_data_write(struct vhd_request *re
set_vhd_flag(req->flags, VHD_FLAG_REQ_FINISHED);
if (tx) {
- u32 blk, sec;
+ uint32_t blk, sec;
struct vhd_bitmap *bm;
blk = req->treq.sec / s->spb;
@@ -2199,7 +2313,7 @@ vhd_complete(void *arg, struct tiocb *ti
req->error = err;
if (req->error)
- ERR(req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
+ ERR(s, req->error, "%s: op: %u, lsec: %"PRIu64", secs: %u, "
"nbytes: %lu, blk: %"PRIu64", blk_offset: %u",
s->vhd.file, req->op, req->treq.sec, req->treq.secs,
io->u.c.nbytes, req->treq.sec / s->spb,
@@ -2226,6 +2340,10 @@ vhd_complete(void *arg, struct tiocb *ti
finish_zero_bm_write(req);
break;
+ case VHD_OP_REDUNDANT_BM_WRITE:
+ finish_redundant_bm_write(req);
+ break;
+
case VHD_OP_BAT_WRITE:
finish_bat_write(req);
break;
@@ -2250,14 +2368,15 @@ vhd_debug(td_driver_t *driver)
DBG(TLOG_WARN, "READS: 0x%08"PRIx64", AVG_READ_SIZE: %f\n",
s->reads, (s->reads ? ((float)s->read_size / s->reads) : 0.0));
- DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%lu total)\n", VHD_REQS_DATA);
+ DBG(TLOG_WARN, "ALLOCATED REQUESTS: (%u total)\n", VHD_REQS_DATA);
for (i = 0; i < VHD_REQS_DATA; i++) {
struct vhd_request *r = &s->vreq_list[i];
td_request_t *t = &r->treq;
+ const char *vname = t->vreq ? t->vreq->name: NULL;
if (t->secs)
- DBG(TLOG_WARN, "%d: id: 0x%04"PRIx64", err: %d, op: %d,"
+ DBG(TLOG_WARN, "%d: vreq: %s.%d, err: %d, op: %d,"
" lsec: 0x%08"PRIx64", flags: %d, this: %p, "
- "next: %p, tx: %p\n", i, t->id, r->error, r->op,
+ "next: %p, tx: %p\n", i, vname, t->sidx, r->error, r->op,
t->sec, r->flags, r, r->next, r->tx);
}
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
@ 2013-07-15 11:55 ` Thanos Makatos
1 sibling, 0 replies; 3+ messages in thread
From: Thanos Makatos @ 2013-07-15 11:55 UTC (permalink / raw)
To: xen-devel; +Cc: thanos.makatos
This patch introduces a few extra checks in the code path that tell whether a
particular Virtual Disk Image driver is available.
Signed-off-by: Thanos Makatos <thanos.makatos@citrix.com>
diff --git a/tools/blktap3/drivers/tapdisk-control.c b/tools/blktap3/drivers/tapdisk-control.c
--- a/tools/blktap3/drivers/tapdisk-control.c
+++ b/tools/blktap3/drivers/tapdisk-control.c
@@ -586,7 +586,7 @@ tapdisk_control_open_image(
tapdisk_message_t *request, tapdisk_message_t * const response)
{
int err;
- td_vbd_t *vbd;
+ td_vbd_t *vbd = NULL;
td_flag_t flags;
td_disk_info_t info;
int prt_path_len;
@@ -663,7 +663,9 @@ out:
response->u.image.sector_size = info.sector_size;
response->u.image.info = info.info;
response->type = TAPDISK_MESSAGE_OPEN_RSP;
- }
+ } else
+ if (vbd)
+ tapdisk_server_remove_vbd(vbd);
return err;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.c b/tools/blktap3/drivers/tapdisk-disktype.c
--- a/tools/blktap3/drivers/tapdisk-disktype.c
+++ b/tools/blktap3/drivers/tapdisk-disktype.c
@@ -148,7 +148,7 @@ const disk_info_t *tapdisk_disk_types[]
extern struct tap_disk tapdisk_aio;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_sync;
@@ -160,7 +160,7 @@ extern struct tap_disk tapdisk_vhd;
extern struct tap_disk tapdisk_ram;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_qcow;
@@ -169,39 +169,74 @@ extern struct tap_disk tapdisk_qcow;
extern struct tap_disk tapdisk_vhd_index;
/*
- * TODO Why commented out?
+ * XXX Commented out in blktap2.5.
*/
#if 0
extern struct tap_disk tapdisk_log;
#endif
-const struct tap_disk *tapdisk_disk_drivers[] = {
- [DISK_TYPE_AIO] = &tapdisk_aio,
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt)
+{
+ static const struct tap_disk *tapdisk_disk_drivers[] = {
+ [DISK_TYPE_AIO] = &tapdisk_aio,
-/*
- * TODO Why commented out?
- */
+ /*
+ * XXX Commented out in blktap2.5.
+ */
#if 0
- [DISK_TYPE_SYNC] = &tapdisk_sync,
- [DISK_TYPE_VMDK] = &tapdisk_vmdk,
- [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+ [DISK_TYPE_SYNC] = &tapdisk_sync,
+ [DISK_TYPE_VMDK] = &tapdisk_vmdk,
+ [DISK_TYPE_VHDSYNC] = &tapdisk_vhdsync_disk
+#endif
+ [DISK_TYPE_VHD] = &tapdisk_vhd,
+
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
+#if 0
+ [DISK_TYPE_RAM] = &tapdisk_ram,
#endif
-/*
- * TODO Why commented out?
- */
+ /*
+ * XXX Commented out in blktap2.5.
+ */
#if 0
- [DISK_TYPE_QCOW] = &tapdisk_qcow,
+ [DISK_TYPE_QCOW] = &tapdisk_qcow,
#endif
-/*
- * TODO Why commented out?
- */
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
#if 0
- [DISK_TYPE_LOG] = &tapdisk_log,
+ [DISK_TYPE_BLOCK_CACHE] = &tapdisk_block_cache,
+ [DISK_TYPE_VINDEX] = &tapdisk_vhd_index,
#endif
- 0,
-};
+
+ /*
+ * XXX Commented out in blktap2.5.
+ */
+#if 0
+ [DISK_TYPE_LOG] = &tapdisk_log,
+#endif
+
+ /*
+ * TODO Commeneted out to simplify the upstreaming process.
+ */
+#if 0
+ [DISK_TYPE_LCACHE] = &tapdisk_lcache,
+ [DISK_TYPE_LLPCACHE] = &tapdisk_llpcache,
+ [DISK_TYPE_LLECACHE] = &tapdisk_llecache,
+ [DISK_TYPE_VALVE] = &tapdisk_valve,
+ [DISK_TYPE_NBD] = &tapdisk_nbd,
+#endif
+ };
+
+ if (dt < 0 || dt > ARRAY_SIZE(tapdisk_disk_drivers))
+ return NULL;
+ else
+ return tapdisk_disk_drivers[dt];
+}
int
tapdisk_disktype_find(const char *name)
@@ -217,7 +252,7 @@ tapdisk_disktype_find(const char *name)
if (strcmp(name, info->name))
continue;
- if (!tapdisk_disk_drivers[i])
+ if (!tapdisk_disk_driver_get(i))
return -ENOSYS;
return i;
diff --git a/tools/blktap3/drivers/tapdisk-disktype.h b/tools/blktap3/drivers/tapdisk-disktype.h
--- a/tools/blktap3/drivers/tapdisk-disktype.h
+++ b/tools/blktap3/drivers/tapdisk-disktype.h
@@ -29,32 +29,34 @@
#ifndef __DISKTYPES_H__
#define __DISKTYPES_H__
-#define DISK_TYPE_AIO 0
-#define DISK_TYPE_SYNC 1
-#define DISK_TYPE_VMDK 2
-#define DISK_TYPE_VHDSYNC 3
-#define DISK_TYPE_VHD 4
-#define DISK_TYPE_RAM 5
-#define DISK_TYPE_QCOW 6
-#define DISK_TYPE_BLOCK_CACHE 7
-#define DISK_TYPE_VINDEX 8
-#define DISK_TYPE_LOG 9
-#define DISK_TYPE_REMUS 10
-#define DISK_TYPE_LCACHE 11
-#define DISK_TYPE_LLECACHE 12
-#define DISK_TYPE_LLPCACHE 13
-#define DISK_TYPE_VALVE 14
+enum disk_type {
+ DISK_TYPE_AIO,
+ DISK_TYPE_SYNC,
+ DISK_TYPE_VMDK,
+ DISK_TYPE_VHDSYNC,
+ DISK_TYPE_VHD,
+ DISK_TYPE_RAM,
+ DISK_TYPE_QCOW,
+ DISK_TYPE_BLOCK_CACHE,
+ DISK_TYPE_VINDEX,
+ DISK_TYPE_LOG,
+ DISK_TYPE_REMUS,
+ DISK_TYPE_LCACHE,
+ DISK_TYPE_LLECACHE,
+ DISK_TYPE_LLPCACHE,
+ DISK_TYPE_VALVE};
#define DISK_TYPE_NAME_MAX 32
typedef struct disk_info {
const char *name; /* driver name, e.g. 'aio' */
char *desc; /* e.g. "raw image" */
- unsigned int flags;
+ unsigned int flags;
} disk_info_t;
extern const disk_info_t *tapdisk_disk_types[];
-extern const struct tap_disk *tapdisk_disk_drivers[];
+const struct tap_disk *
+tapdisk_disk_driver_get(const enum disk_type dt);
/* one single controller for all instances of disk type */
#define DISK_TYPE_SINGLE_CONTROLLER (1<<0)
diff --git a/tools/blktap3/drivers/tapdisk-driver.c b/tools/blktap3/drivers/tapdisk-driver.c
--- a/tools/blktap3/drivers/tapdisk-driver.c
+++ b/tools/blktap3/drivers/tapdisk-driver.c
@@ -73,7 +73,7 @@ tapdisk_driver_allocate(int type, const
td_driver_t *driver;
const struct tap_disk *ops;
- ops = tapdisk_disk_drivers[type];
+ ops = tapdisk_disk_driver_get(type);
if (!ops)
return NULL;
diff --git a/tools/blktap3/drivers/tapdisk-interface.c b/tools/blktap3/drivers/tapdisk-interface.c
--- a/tools/blktap3/drivers/tapdisk-interface.c
+++ b/tools/blktap3/drivers/tapdisk-interface.c
@@ -67,6 +67,8 @@ int
int err;
td_driver_t *driver;
+ assert(image);
+
driver = image->driver;
if (!driver) {
driver = tapdisk_driver_allocate(image->type,
@@ -80,6 +82,8 @@ int
}
if (!td_flag_test(driver->state, TD_DRIVER_OPEN)) {
+ assert(driver->ops);
+ assert(driver->ops->td_open);
err = driver->ops->td_open(driver, image->name, image->flags);
if (err) {
if (!image->driver)
diff --git a/tools/blktap3/drivers/tapdisk-server.c b/tools/blktap3/drivers/tapdisk-server.c
--- a/tools/blktap3/drivers/tapdisk-server.c
+++ b/tools/blktap3/drivers/tapdisk-server.c
@@ -90,8 +90,11 @@ tapdisk_server_get_vbd(const char *param
assert(params);
tapdisk_server_for_each_vbd(vbd, tmp)
- if (!strcmp(vbd->name, params))
- return vbd;
+ if (vbd->name) {
+ /* TODO VBDs without name? Should this be treated as a bug? */
+ if (!strcmp(vbd->name, params))
+ return vbd;
+ }
return NULL;
}
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-07-15 11:55 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-07-15 11:55 [PATCH 0 of 2 RESEND] blktap3/vhd: Full VHD support Thanos Makatos
2013-07-15 11:55 ` [PATCH 1 of 2 RESEND] blktap3/vhd: Import " Thanos Makatos
2013-07-15 11:55 ` [PATCH 2 of 2 RESEND] blktap3/vhd: Assorted improvements Thanos Makatos
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.