* [PATCH] stubdom: add asynchronous disk flush support
@ 2008-04-05 12:48 Samuel Thibault
0 siblings, 0 replies; only message in thread
From: Samuel Thibault @ 2008-04-05 12:48 UTC (permalink / raw)
To: xen-devel
stubdom: add asynchronous disk flush support
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
diff -r 4558664bea4a extras/mini-os/blkfront.c
--- a/extras/mini-os/blkfront.c Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/blkfront.c Sat Apr 05 13:25:43 2008 +0100
@@ -48,11 +48,7 @@ struct blkfront_dev {
char *nodename;
char *backend;
- unsigned sector_size;
- unsigned sectors;
- int mode;
- int barrier;
- int flush;
+ struct blkfront_info info;
#ifdef HAVE_LIBC
int fd;
@@ -70,7 +66,7 @@ void blkfront_handler(evtchn_port_t port
wake_up(&blkfront_queue);
}
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info)
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info)
{
xenbus_transaction_t xbt;
char* err;
@@ -163,9 +159,9 @@ done:
return NULL;
}
if (*c == 'w')
- *mode = dev->mode = O_RDWR;
+ dev->info.mode = O_RDWR;
else
- *mode = dev->mode = O_RDONLY;
+ dev->info.mode = O_RDONLY;
free(c);
snprintf(path, sizeof(path), "%s/state", dev->backend);
@@ -177,24 +173,26 @@ done:
xenbus_unwatch_path(XBT_NIL, path);
snprintf(path, sizeof(path), "%s/info", dev->backend);
- *info = xenbus_read_integer(path);
+ dev->info.info = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/sectors", dev->backend);
// FIXME: read_integer returns an int, so disk size limited to 1TB for now
- *sectors = dev->sectors = xenbus_read_integer(path);
+ dev->info.sectors = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/sector-size", dev->backend);
- *sector_size = dev->sector_size = xenbus_read_integer(path);
+ dev->info.sector_size = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/feature-barrier", dev->backend);
- dev->barrier = xenbus_read_integer(path);
+ dev->info.barrier = xenbus_read_integer(path);
snprintf(path, sizeof(path), "%s/feature-flush-cache", dev->backend);
- dev->flush = xenbus_read_integer(path);
+ dev->info.flush = xenbus_read_integer(path);
+
+ *info = dev->info;
}
unmask_evtchn(dev->evtchn);
- printk("%u sectors of %u bytes\n", dev->sectors, dev->sector_size);
+ printk("%u sectors of %u bytes\n", dev->info.sectors, dev->info.sector_size);
printk("**************************\n");
return dev;
@@ -258,11 +256,11 @@ void blkfront_aio(struct blkfront_aiocb
uintptr_t start, end;
// Can't io at non-sector-aligned location
- ASSERT(!(aiocbp->aio_offset & (dev->sector_size-1)));
+ ASSERT(!(aiocbp->aio_offset & (dev->info.sector_size-1)));
// Can't io non-sector-sized amounts
- ASSERT(!(aiocbp->aio_nbytes & (dev->sector_size-1)));
+ ASSERT(!(aiocbp->aio_nbytes & (dev->info.sector_size-1)));
// Can't io non-sector-aligned buffer
- ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->sector_size-1)));
+ ASSERT(!((uintptr_t) aiocbp->aio_buf & (dev->info.sector_size-1)));
start = (uintptr_t)aiocbp->aio_buf & PAGE_MASK;
end = ((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes + PAGE_SIZE - 1) & PAGE_MASK;
@@ -280,7 +278,7 @@ void blkfront_aio(struct blkfront_aiocb
req->nr_segments = n;
req->handle = dev->handle;
req->id = (uintptr_t) aiocbp;
- req->sector_number = aiocbp->aio_offset / dev->sector_size;
+ req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
for (j = 0; j < n; j++) {
uintptr_t data = start + j * PAGE_SIZE;
@@ -292,10 +290,10 @@ void blkfront_aio(struct blkfront_aiocb
aiocbp->gref[j] = req->seg[j].gref =
gnttab_grant_access(dev->dom, virtual_to_mfn(data), write);
req->seg[j].first_sect = 0;
- req->seg[j].last_sect = PAGE_SIZE / dev->sector_size - 1;
+ req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
}
- req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->sector_size;
- req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->sector_size;
+ req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size;
+ req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
dev->ring.req_prod_pvt = i + 1;
@@ -313,6 +311,62 @@ void blkfront_aio_read(struct blkfront_a
void blkfront_aio_read(struct blkfront_aiocb *aiocbp)
{
blkfront_aio(aiocbp, 0);
+}
+
+static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op, uint64_t id)
+{
+ int i;
+ struct blkif_request *req;
+ int notify;
+
+ blkfront_wait_slot(dev);
+ i = dev->ring.req_prod_pvt;
+ req = RING_GET_REQUEST(&dev->ring, i);
+ req->operation = op;
+ req->nr_segments = 0;
+ req->handle = dev->handle;
+ req->id = id;
+ /* Not needed anyway, but the backend will check it */
+ req->sector_number = 0;
+ dev->ring.req_prod_pvt = i + 1;
+ wmb();
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
+ if (notify) notify_remote_via_evtchn(dev->evtchn);
+}
+
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op)
+{
+ struct blkfront_dev *dev = aiocbp->aio_dev;
+ blkfront_push_operation(dev, op, (uintptr_t) aiocbp);
+}
+
+void blkfront_sync(struct blkfront_dev *dev)
+{
+ unsigned long flags;
+
+ if (dev->info.mode == O_RDWR) {
+ if (dev->info.barrier == 1)
+ blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER, 0);
+
+ if (dev->info.flush == 1)
+ blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE, 0);
+ }
+
+ /* Note: This won't finish if another thread enqueues requests. */
+ local_irq_save(flags);
+ DEFINE_WAIT(w);
+ while (1) {
+ blkfront_aio_poll(dev);
+ if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
+ break;
+
+ add_waiter(w, blkfront_queue);
+ local_irq_restore(flags);
+ schedule();
+ local_irq_save(flags);
+ }
+ remove_waiter(w);
+ local_irq_restore(flags);
}
int blkfront_aio_poll(struct blkfront_dev *dev)
@@ -337,93 +391,45 @@ moretodo:
rsp = RING_GET_RESPONSE(&dev->ring, cons);
nr_consumed++;
- if (rsp->status != BLKIF_RSP_OKAY)
- printk("block error %d for op %d\n", rsp->status, rsp->operation);
+ struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
+ int status = rsp->status;
+
+ if (status != BLKIF_RSP_OKAY)
+ printk("block error %d for op %d\n", status, rsp->operation);
switch (rsp->operation) {
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
{
- struct blkfront_aiocb *aiocbp = (void*) (uintptr_t) rsp->id;
- int status = rsp->status;
int j;
for (j = 0; j < aiocbp->n; j++)
gnttab_end_access(aiocbp->gref[j]);
- dev->ring.rsp_cons = ++cons;
- /* Nota: callback frees aiocbp itself */
- aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
- if (dev->ring.rsp_cons != cons)
- /* We reentered, we must not continue here */
- goto out;
break;
}
+
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ break;
+
default:
printk("unrecognized block operation %d response\n", rsp->operation);
- case BLKIF_OP_WRITE_BARRIER:
- case BLKIF_OP_FLUSH_DISKCACHE:
- dev->ring.rsp_cons = ++cons;
+ }
+
+ dev->ring.rsp_cons = ++cons;
+ /* Nota: callback frees aiocbp itself */
+ if (aiocbp && aiocbp->aio_cb)
+ aiocbp->aio_cb(aiocbp, status ? -EIO : 0);
+ if (dev->ring.rsp_cons != cons)
+ /* We reentered, we must not continue here */
break;
- }
}
-out:
RING_FINAL_CHECK_FOR_RESPONSES(&dev->ring, more);
if (more) goto moretodo;
return nr_consumed;
-}
-
-static void blkfront_push_operation(struct blkfront_dev *dev, uint8_t op)
-{
- int i;
- struct blkif_request *req;
- int notify;
-
- blkfront_wait_slot(dev);
- i = dev->ring.req_prod_pvt;
- req = RING_GET_REQUEST(&dev->ring, i);
- req->operation = op;
- req->nr_segments = 0;
- req->handle = dev->handle;
- /* Not used */
- req->id = 0;
- /* Not needed anyway, but the backend will check it */
- req->sector_number = 0;
- dev->ring.req_prod_pvt = i + 1;
- wmb();
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&dev->ring, notify);
- if (notify) notify_remote_via_evtchn(dev->evtchn);
-}
-
-void blkfront_sync(struct blkfront_dev *dev)
-{
- unsigned long flags;
-
- if (dev->mode == O_RDWR) {
- if (dev->barrier == 1)
- blkfront_push_operation(dev, BLKIF_OP_WRITE_BARRIER);
-
- if (dev->flush == 1)
- blkfront_push_operation(dev, BLKIF_OP_FLUSH_DISKCACHE);
- }
-
- /* Note: This won't finish if another thread enqueues requests. */
- local_irq_save(flags);
- DEFINE_WAIT(w);
- while (1) {
- blkfront_aio_poll(dev);
- if (RING_FREE_REQUESTS(&dev->ring) == RING_SIZE(&dev->ring))
- break;
-
- add_waiter(w, blkfront_queue);
- local_irq_restore(flags);
- schedule();
- local_irq_save(flags);
- }
- remove_waiter(w);
- local_irq_restore(flags);
}
#ifdef HAVE_LIBC
diff -r 4558664bea4a extras/mini-os/include/blkfront.h
--- a/extras/mini-os/include/blkfront.h Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/include/blkfront.h Sat Apr 05 13:25:43 2008 +0100
@@ -15,13 +15,23 @@ struct blkfront_aiocb
void (*aio_cb)(struct blkfront_aiocb *aiocb, int ret);
};
-struct blkfront_dev *init_blkfront(char *nodename, uint64_t *sectors, unsigned *sector_size, int *mode, int *info);
+struct blkfront_info
+{
+ uint64_t sectors;
+ unsigned sector_size;
+ int mode;
+ int info;
+ int barrier;
+ int flush;
+};
+struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
#ifdef HAVE_LIBC
int blkfront_open(struct blkfront_dev *dev);
#endif
void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
void blkfront_aio_read(struct blkfront_aiocb *aiocbp);
void blkfront_aio_write(struct blkfront_aiocb *aiocbp);
+void blkfront_aio_push_operation(struct blkfront_aiocb *aiocbp, uint8_t op);
int blkfront_aio_poll(struct blkfront_dev *dev);
void blkfront_sync(struct blkfront_dev *dev);
void shutdown_blkfront(struct blkfront_dev *dev);
diff -r 4558664bea4a extras/mini-os/kernel.c
--- a/extras/mini-os/kernel.c Fri Apr 04 16:07:44 2008 +0100
+++ b/extras/mini-os/kernel.c Sat Apr 05 13:25:43 2008 +0100
@@ -91,9 +91,7 @@ static void netfront_thread(void *p)
}
static struct blkfront_dev *blk_dev;
-static uint64_t blk_sectors;
-static unsigned blk_sector_size;
-static int blk_mode;
+static struct blkfront_info blk_info;
static uint64_t blk_size_read;
static uint64_t blk_size_write;
@@ -111,9 +109,9 @@ static struct blk_req *blk_alloc_req(uin
{
struct blk_req *req = xmalloc(struct blk_req);
req->aiocb.aio_dev = blk_dev;
- req->aiocb.aio_buf = _xmalloc(blk_sector_size, blk_sector_size);
- req->aiocb.aio_nbytes = blk_sector_size;
- req->aiocb.aio_offset = sector * blk_sector_size;
+ req->aiocb.aio_buf = _xmalloc(blk_info.sector_size, blk_info.sector_size);
+ req->aiocb.aio_nbytes = blk_info.sector_size;
+ req->aiocb.aio_offset = sector * blk_info.sector_size;
req->aiocb.data = req;
req->next = NULL;
return req;
@@ -125,7 +123,7 @@ static void blk_read_completed(struct bl
if (ret)
printk("got error code %d when reading at offset %ld\n", ret, aiocb->aio_offset);
else
- blk_size_read += blk_sector_size;
+ blk_size_read += blk_info.sector_size;
free(aiocb->aio_buf);
free(req);
}
@@ -154,10 +152,10 @@ static void blk_write_read_completed(str
free(req);
return;
}
- blk_size_read += blk_sector_size;
+ blk_size_read += blk_info.sector_size;
buf = (int*) aiocb->aio_buf;
rand_value = req->rand_value;
- for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+ for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
if (buf[i] != rand_value) {
printk("bogus data at offset %ld\n", aiocb->aio_offset + i);
break;
@@ -177,7 +175,7 @@ static void blk_write_completed(struct b
free(req);
return;
}
- blk_size_write += blk_sector_size;
+ blk_size_write += blk_info.sector_size;
/* Push write check */
req->next = blk_to_read;
blk_to_read = req;
@@ -195,7 +193,7 @@ static void blk_write_sector(uint64_t se
req->rand_value = rand_value = rand();
buf = (int*) req->aiocb.aio_buf;
- for (i = 0; i < blk_sector_size / sizeof(int); i++) {
+ for (i = 0; i < blk_info.sector_size / sizeof(int); i++) {
buf[i] = rand_value;
rand_value *= RAND_MIX;
}
@@ -207,35 +205,34 @@ static void blkfront_thread(void *p)
static void blkfront_thread(void *p)
{
time_t lasttime = 0;
- int blk_info;
- blk_dev = init_blkfront(NULL, &blk_sectors, &blk_sector_size, &blk_mode, &blk_info);
+ blk_dev = init_blkfront(NULL, &blk_info);
if (!blk_dev)
return;
- if (blk_info & VDISK_CDROM)
+ if (blk_info.info & VDISK_CDROM)
printk("Block device is a CDROM\n");
- if (blk_info & VDISK_REMOVABLE)
+ if (blk_info.info & VDISK_REMOVABLE)
printk("Block device is removable\n");
- if (blk_info & VDISK_READONLY)
+ if (blk_info.info & VDISK_READONLY)
printk("Block device is read-only\n");
#ifdef BLKTEST_WRITE
- if (blk_mode == O_RDWR) {
+ if (blk_info.mode == O_RDWR) {
blk_write_sector(0);
- blk_write_sector(blk_sectors-1);
+ blk_write_sector(blk_info.sectors-1);
} else
#endif
{
blk_read_sector(0);
- blk_read_sector(blk_sectors-1);
+ blk_read_sector(blk_info.sectors-1);
}
while (1) {
- uint64_t sector = rand() % blk_sectors;
+ uint64_t sector = rand() % blk_info.sectors;
struct timeval tv;
#ifdef BLKTEST_WRITE
- if (blk_mode == O_RDWR)
+ if (blk_info.mode == O_RDWR)
blk_write_sector(sector);
else
#endif
diff -r 4558664bea4a tools/ioemu/block-vbd.c
--- a/tools/ioemu/block-vbd.c Fri Apr 04 16:07:44 2008 +0100
+++ b/tools/ioemu/block-vbd.c Sat Apr 05 13:25:43 2008 +0100
@@ -49,11 +49,7 @@ typedef struct BDRVVbdState {
typedef struct BDRVVbdState {
struct blkfront_dev *dev;
int fd;
- int type;
- int mode;
- int info;
- uint64_t sectors;
- unsigned sector_size;
+ struct blkfront_info info;
QEMU_LIST_ENTRY(BDRVVbdState) list;
} BDRVVbdState;
@@ -81,13 +77,13 @@ static int vbd_open(BlockDriverState *bs
//handy to test posix access
//return -EIO;
- s->dev = init_blkfront((char *) filename, &s->sectors, &s->sector_size, &s->mode, &s->info);
+ s->dev = init_blkfront((char *) filename, &s->info);
if (!s->dev)
return -EIO;
- if (SECTOR_SIZE % s->sector_size) {
- printf("sector size is %d, we only support sector sizes that divide %d\n", s->sector_size, SECTOR_SIZE);
+ if (SECTOR_SIZE % s->info.sector_size) {
+ printf("sector size is %d, we only support sector sizes that divide %d\n", s->info.sector_size, SECTOR_SIZE);
return -EIO;
}
@@ -267,6 +263,32 @@ static void vbd_aio_cancel(BlockDriverAI
// Try to cancel. If can't, wait for it, drop the callback and call qemu_aio_release(acb)
}
+static void vbd_nop_cb(void *opaque, int ret)
+{
+}
+
+static BlockDriverAIOCB *vbd_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVVbdState *s = bs->opaque;
+ VbdAIOCB *acb = NULL;
+
+ if (s->info.barrier == 1) {
+ acb = vbd_aio_setup(bs, 0, NULL, 0,
+ s->info.flush == 1 ? vbd_nop_cb : cb, opaque);
+ if (!acb)
+ return NULL;
+ blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_WRITE_BARRIER);
+ }
+ if (s->info.flush == 1) {
+ acb = vbd_aio_setup(bs, 0, NULL, 0, cb, opaque);
+ if (!acb)
+ return NULL;
+ blkfront_aio_push_operation(&acb->aiocb, BLKIF_OP_FLUSH_DISKCACHE);
+ }
+ return &acb->common;
+}
+
static void vbd_close(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
@@ -282,13 +304,14 @@ static int64_t vbd_getlength(BlockDrive
static int64_t vbd_getlength(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
- return s->sectors * s->sector_size;
+ return s->info.sectors * s->info.sector_size;
}
-static void vbd_flush(BlockDriverState *bs)
+static int vbd_flush(BlockDriverState *bs)
{
BDRVVbdState *s = bs->opaque;
blkfront_sync(s->dev);
+ return 0;
}
/***********************************************/
@@ -333,6 +356,7 @@ BlockDriver bdrv_vbd = {
.bdrv_aio_read = vbd_aio_read,
.bdrv_aio_write = vbd_aio_write,
.bdrv_aio_cancel = vbd_aio_cancel,
+ .bdrv_aio_flush = vbd_aio_flush,
.aiocb_size = sizeof(VbdAIOCB),
.bdrv_read = vbd_read,
.bdrv_write = vbd_write,
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2008-04-05 12:48 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-05 12:48 [PATCH] stubdom: add asynchronous disk flush support Samuel Thibault
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.