* [Qemu-devel] [PATCH 2/6] change vectored block I/O API to plain iovecs
2009-03-14 19:27 [Qemu-devel] [PATCH 0/6] add real vectored block I/O support Christoph Hellwig
2009-03-14 19:27 ` [Qemu-devel] [PATCH 1/6] more BlockDriver C99 initializers Christoph Hellwig
@ 2009-03-14 19:28 ` Christoph Hellwig
2009-03-15 12:42 ` Avi Kivity
2009-03-14 19:28 ` [Qemu-devel] [PATCH 3/6] virtio-blk: use generic vectored I/O APIs Christoph Hellwig
` (3 subsequent siblings)
5 siblings, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2009-03-14 19:28 UTC (permalink / raw)
To: qemu-devel
QEMUIOVector is a useful helper for the dma-helper.c internals but for a generic
block API it's more of hindrance. Some top-level consumers like virtio-blk
already have the plain iovec and segment number at hand and can pass it down
directly, and for those that just have a single element and need to fake up
a vector the plain iovec also is a lot easier.
Last but not leas we want to push down vectored I/O to the lowest level,
and if posix-aio-compat.c wants to stay somewhat true to it's goal of
beeing like an ehanced posix AIO API it should stick to posix types.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: qemu/block.c
===================================================================
--- qemu.orig/block.c 2009-03-14 14:01:44.000000000 +0100
+++ qemu/block.c 2009-03-14 14:32:55.000000000 +0100
@@ -1253,7 +1253,9 @@ char *bdrv_snapshot_dump(char *buf, int
/* async I/Os */
typedef struct VectorTranslationState {
- QEMUIOVector *iov;
+ struct iovec *iov;
+ int nr_iov;
+ int size;
uint8_t *bounce;
int is_write;
BlockDriverAIOCB *aiocb;
@@ -1265,7 +1267,7 @@ static void bdrv_aio_rw_vector_cb(void *
VectorTranslationState *s = opaque;
if (!s->is_write) {
- qemu_iovec_from_buffer(s->iov, s->bounce, s->iov->size);
+ iovec_from_buffer(s->iov, s->nr_iov, s->bounce, s->size);
}
qemu_vfree(s->bounce);
s->this_aiocb->cb(s->this_aiocb->opaque, ret);
@@ -1274,7 +1276,8 @@ static void bdrv_aio_rw_vector_cb(void *
static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
- QEMUIOVector *iov,
+ struct iovec *iov,
+ int nr_iov,
int nb_sectors,
BlockDriverCompletionFunc *cb,
void *opaque,
@@ -1286,10 +1289,12 @@ static BlockDriverAIOCB *bdrv_aio_rw_vec
s->this_aiocb = aiocb;
s->iov = iov;
- s->bounce = qemu_memalign(512, nb_sectors * 512);
+ s->nr_iov = nr_iov;
+ s->size = nb_sectors * 512;
+ s->bounce = qemu_memalign(512, s->size);
s->is_write = is_write;
if (is_write) {
- qemu_iovec_to_buffer(s->iov, s->bounce);
+ iovec_to_buffer(s->iov, s->nr_iov, s->bounce);
s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
bdrv_aio_rw_vector_cb, s);
} else {
@@ -1300,24 +1305,24 @@ static BlockDriverAIOCB *bdrv_aio_rw_vec
}
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
+ return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
cb, opaque, 0);
}
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- return bdrv_aio_rw_vector(bs, sector_num, iov, nb_sectors,
+ return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
cb, opaque, 1);
}
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h 2009-03-14 14:00:54.000000000 +0100
+++ qemu/block.h 2009-03-14 14:32:55.000000000 +0100
@@ -87,10 +87,10 @@ typedef struct BlockDriverAIOCB BlockDri
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
Index: qemu/cutils.c
===================================================================
--- qemu.orig/cutils.c 2009-03-14 14:00:54.000000000 +0100
+++ qemu/cutils.c 2009-03-14 14:32:55.000000000 +0100
@@ -135,28 +135,29 @@ void qemu_iovec_reset(QEMUIOVector *qiov
qiov->size = 0;
}
-void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf)
+void iovec_to_buffer(struct iovec *iov, int nr_iov, void *buf)
{
uint8_t *p = (uint8_t *)buf;
int i;
- for (i = 0; i < qiov->niov; ++i) {
- memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len);
- p += qiov->iov[i].iov_len;
+ for (i = 0; i < nr_iov; ++i) {
+ memcpy(p, iov[i].iov_base, iov[i].iov_len);
+ p += iov[i].iov_len;
}
}
-void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count)
+void iovec_from_buffer(struct iovec *iov, int nr_iov,
+ const void *buf, size_t count)
{
const uint8_t *p = (const uint8_t *)buf;
size_t copy;
int i;
- for (i = 0; i < qiov->niov && count; ++i) {
+ for (i = 0; i < nr_iov && count; ++i) {
copy = count;
- if (copy > qiov->iov[i].iov_len)
- copy = qiov->iov[i].iov_len;
- memcpy(qiov->iov[i].iov_base, p, copy);
+ if (copy > iov[i].iov_len)
+ copy = iov[i].iov_len;
+ memcpy(iov[i].iov_base, p, copy);
p += copy;
count -= copy;
}
Index: qemu/dma-helpers.c
===================================================================
--- qemu.orig/dma-helpers.c 2009-03-14 14:00:54.000000000 +0100
+++ qemu/dma-helpers.c 2009-03-14 14:32:55.000000000 +0100
@@ -110,10 +110,10 @@ static void dma_bdrv_cb(void *opaque, in
}
if (dbs->is_write) {
- bdrv_aio_writev(dbs->bs, dbs->sector_num, &dbs->iov,
+ bdrv_aio_writev(dbs->bs, dbs->sector_num, dbs->iov.iov, dbs->iov.niov,
dbs->iov.size / 512, dma_bdrv_cb, dbs);
} else {
- bdrv_aio_readv(dbs->bs, dbs->sector_num, &dbs->iov,
+ bdrv_aio_readv(dbs->bs, dbs->sector_num, dbs->iov.iov, dbs->iov.niov,
dbs->iov.size / 512, dma_bdrv_cb, dbs);
}
}
Index: qemu/qemu-common.h
===================================================================
--- qemu.orig/qemu-common.h 2009-03-14 14:00:54.000000000 +0100
+++ qemu/qemu-common.h 2009-03-14 14:32:55.000000000 +0100
@@ -197,8 +197,10 @@ void qemu_iovec_init(QEMUIOVector *qiov,
void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
void qemu_iovec_destroy(QEMUIOVector *qiov);
void qemu_iovec_reset(QEMUIOVector *qiov);
-void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf);
-void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count);
+
+void iovec_to_buffer(struct iovec *iov, int nr_iov, void *buf);
+void iovec_from_buffer(struct iovec *iov, int nr_iov,
+ const void *buf, size_t count);
struct Monitor;
typedef struct Monitor Monitor;
^ permalink raw reply [flat|nested] 20+ messages in thread
* [Qemu-devel] [PATCH 4/6] remove bdrv_aio_read/bdrv_aio_write
2009-03-14 19:27 [Qemu-devel] [PATCH 0/6] add real vectored block I/O support Christoph Hellwig
` (2 preceding siblings ...)
2009-03-14 19:28 ` [Qemu-devel] [PATCH 3/6] virtio-blk: use generic vectored I/O APIs Christoph Hellwig
@ 2009-03-14 19:30 ` Christoph Hellwig
2009-03-14 19:30 ` [Qemu-devel] [PATCH 5/6] push down vector linearization to posix-aio-compat.c Christoph Hellwig
2009-03-14 19:31 ` [Qemu-devel] [PATCH 6/6] experimental native preadv/pwritev support for Linux Christoph Hellwig
5 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2009-03-14 19:30 UTC (permalink / raw)
To: qemu-devel
Always use the vectored APIs to reduce code churn once we switch the BlockDriver
API to be vectored.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: qemu/hw/ide.c
===================================================================
--- qemu.orig/hw/ide.c 2009-03-14 13:40:54.000000000 +0100
+++ qemu/hw/ide.c 2009-03-14 14:37:07.000000000 +0100
@@ -496,6 +496,7 @@ typedef struct BMDMAState {
IDEState *ide_if;
BlockDriverCompletionFunc *dma_cb;
BlockDriverAIOCB *aiocb;
+ struct iovec iov;
int64_t sector_num;
uint32_t nsector;
} BMDMAState;
@@ -1467,9 +1468,10 @@ static void ide_atapi_cmd_read_dma_cb(vo
#ifdef DEBUG_AIO
printf("aio_read_cd: lba=%u n=%d\n", s->lba, n);
#endif
- bm->aiocb = bdrv_aio_read(s->bs, (int64_t)s->lba << 2,
- s->io_buffer + data_offset, n * 4,
- ide_atapi_cmd_read_dma_cb, bm);
+ bm->iov.iov_base = s->io_buffer + data_offset;
+ bm->iov.iov_len = n * 4 * 512;
+ bm->aiocb = bdrv_aio_readv(s->bs, (int64_t)s->lba << 2, &bm->iov, 1,
+ n * 4, ide_atapi_cmd_read_dma_cb, bm);
if (!bm->aiocb) {
/* Note: media not present is the most likely case */
ide_atapi_cmd_error(s, SENSE_NOT_READY,
Index: qemu/hw/scsi-disk.c
===================================================================
--- qemu.orig/hw/scsi-disk.c 2009-03-14 13:40:22.000000000 +0100
+++ qemu/hw/scsi-disk.c 2009-03-14 14:37:07.000000000 +0100
@@ -52,9 +52,7 @@ typedef struct SCSIRequest {
/* Both sector and sector_count are in terms of qemu 512 byte blocks. */
uint64_t sector;
uint32_t sector_count;
- /* The amounnt of data in the buffer. */
- int buf_len;
- uint8_t *dma_buf;
+ struct iovec iov;
BlockDriverAIOCB *aiocb;
struct SCSIRequest *next;
uint32_t status;
@@ -89,12 +87,12 @@ static SCSIRequest *scsi_new_request(SCS
free_requests = r->next;
} else {
r = qemu_malloc(sizeof(SCSIRequest));
- r->dma_buf = qemu_memalign(512, SCSI_DMA_BUF_SIZE);
+ r->iov.iov_base = qemu_memalign(512, SCSI_DMA_BUF_SIZE);
}
r->dev = s;
r->tag = tag;
r->sector_count = 0;
- r->buf_len = 0;
+ r->iov.iov_len = 0;
r->aiocb = NULL;
r->status = 0;
@@ -173,9 +171,9 @@ static void scsi_read_complete(void * op
scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NO_SENSE);
return;
}
- DPRINTF("Data ready tag=0x%x len=%d\n", r->tag, r->buf_len);
+ DPRINTF("Data ready tag=0x%x len=%d\n", r->tag, r->iov.iov_len);
- s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->buf_len);
+ s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->iov.iov_len);
}
/* Read more data from scsi device into buffer. */
@@ -193,9 +191,9 @@ static void scsi_read_data(SCSIDevice *d
return;
}
if (r->sector_count == (uint32_t)-1) {
- DPRINTF("Read buf_len=%d\n", r->buf_len);
+ DPRINTF("Read buf_len=%d\n", r->iov.iov_len);
r->sector_count = 0;
- s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->buf_len);
+ s->completion(s->opaque, SCSI_REASON_DATA, r->tag, r->iov.iov_len);
return;
}
DPRINTF("Read sector_count=%d\n", r->sector_count);
@@ -208,9 +206,9 @@ static void scsi_read_data(SCSIDevice *d
if (n > SCSI_DMA_BUF_SIZE / 512)
n = SCSI_DMA_BUF_SIZE / 512;
- r->buf_len = n * 512;
- r->aiocb = bdrv_aio_read(s->bdrv, r->sector, r->dma_buf, n,
- scsi_read_complete, r);
+ r->iov.iov_len = n * 512;
+ r->aiocb = bdrv_aio_readv(s->bdrv, r->sector, &r->iov, 1, n,
+ scsi_read_complete, r);
if (r->aiocb == NULL)
scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
r->sector += n;
@@ -250,7 +248,7 @@ static void scsi_write_complete(void * o
return;
}
- n = r->buf_len / 512;
+ n = r->iov.iov_len / 512;
r->sector += n;
r->sector_count -= n;
if (r->sector_count == 0) {
@@ -260,7 +258,7 @@ static void scsi_write_complete(void * o
if (len > SCSI_DMA_BUF_SIZE) {
len = SCSI_DMA_BUF_SIZE;
}
- r->buf_len = len;
+ r->iov.iov_len = len;
DPRINTF("Write complete tag=0x%x more=%d\n", r->tag, len);
s->completion(s->opaque, SCSI_REASON_DATA, r->tag, len);
}
@@ -271,10 +269,10 @@ static void scsi_write_request(SCSIReque
SCSIDeviceState *s = r->dev;
uint32_t n;
- n = r->buf_len / 512;
+ n = r->iov.iov_len / 512;
if (n) {
- r->aiocb = bdrv_aio_write(s->bdrv, r->sector, r->dma_buf, n,
- scsi_write_complete, r);
+ r->aiocb = bdrv_aio_writev(s->bdrv, r->sector, &r->iov, 1, n,
+ scsi_write_complete, r);
if (r->aiocb == NULL)
scsi_command_complete(r, STATUS_CHECK_CONDITION,
SENSE_HARDWARE_ERROR);
@@ -334,7 +332,7 @@ static uint8_t *scsi_get_buf(SCSIDevice
BADF("Bad buffer tag 0x%x\n", tag);
return NULL;
}
- return r->dma_buf;
+ return r->iov.iov_base;
}
/* Execute a scsi command. Returns the length of the data expected by the
@@ -364,7 +362,7 @@ static int32_t scsi_send_command(SCSIDev
/* ??? Tags are not unique for different luns. We only implement a
single lun, so this should not matter. */
r = scsi_new_request(s, tag);
- outbuf = r->dma_buf;
+ outbuf = r->iov.iov_base;
is_write = 0;
DPRINTF("Command: lun=%d tag=0x%x data=0x%02x", lun, tag, buf[0]);
switch (command >> 5) {
@@ -426,7 +424,7 @@ static int32_t scsi_send_command(SCSIDev
outbuf[0] = 0xf0;
outbuf[1] = 0;
outbuf[2] = s->sense;
- r->buf_len = 4;
+ r->iov.iov_len = 4;
break;
case 0x12:
DPRINTF("Inquiry (len %d)\n", len);
@@ -451,20 +449,20 @@ static int32_t scsi_send_command(SCSIDev
DPRINTF("Inquiry EVPD[Supported pages] "
"buffer size %d\n", len);
- r->buf_len = 0;
+ r->iov.iov_len = 0;
if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
- outbuf[r->buf_len++] = 5;
+ outbuf[r->iov.iov_len++] = 5;
} else {
- outbuf[r->buf_len++] = 0;
+ outbuf[r->iov.iov_len++] = 0;
}
- outbuf[r->buf_len++] = 0x00; // this page
- outbuf[r->buf_len++] = 0x00;
- outbuf[r->buf_len++] = 3; // number of pages
- outbuf[r->buf_len++] = 0x00; // list of supported pages (this page)
- outbuf[r->buf_len++] = 0x80; // unit serial number
- outbuf[r->buf_len++] = 0x83; // device identification
+ outbuf[r->iov.iov_len++] = 0x00; // this page
+ outbuf[r->iov.iov_len++] = 0x00;
+ outbuf[r->iov.iov_len++] = 3; // number of pages
+ outbuf[r->iov.iov_len++] = 0x00; // list of supported pages (this page)
+ outbuf[r->iov.iov_len++] = 0x80; // unit serial number
+ outbuf[r->iov.iov_len++] = 0x83; // device identification
}
break;
case 0x80:
@@ -481,20 +479,20 @@ static int32_t scsi_send_command(SCSIDev
DPRINTF("Inquiry EVPD[Serial number] buffer size %d\n", len);
l = MIN(len, strlen(s->drive_serial_str));
- r->buf_len = 0;
+ r->iov.iov_len = 0;
/* Supported page codes */
if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
- outbuf[r->buf_len++] = 5;
+ outbuf[r->iov.iov_len++] = 5;
} else {
- outbuf[r->buf_len++] = 0;
+ outbuf[r->iov.iov_len++] = 0;
}
- outbuf[r->buf_len++] = 0x80; // this page
- outbuf[r->buf_len++] = 0x00;
- outbuf[r->buf_len++] = l;
- memcpy(&outbuf[r->buf_len], s->drive_serial_str, l);
- r->buf_len += l;
+ outbuf[r->iov.iov_len++] = 0x80; // this page
+ outbuf[r->iov.iov_len++] = 0x00;
+ outbuf[r->iov.iov_len++] = l;
+ memcpy(&outbuf[r->iov.iov_len], s->drive_serial_str, l);
+ r->iov.iov_len += l;
}
break;
@@ -508,25 +506,25 @@ static int32_t scsi_send_command(SCSIDev
DPRINTF("Inquiry EVPD[Device identification] "
"buffer size %d\n", len);
- r->buf_len = 0;
+ r->iov.iov_len = 0;
if (bdrv_get_type_hint(s->bdrv) == BDRV_TYPE_CDROM) {
- outbuf[r->buf_len++] = 5;
+ outbuf[r->iov.iov_len++] = 5;
} else {
- outbuf[r->buf_len++] = 0;
+ outbuf[r->iov.iov_len++] = 0;
}
- outbuf[r->buf_len++] = 0x83; // this page
- outbuf[r->buf_len++] = 0x00;
- outbuf[r->buf_len++] = 3 + id_len;
-
- outbuf[r->buf_len++] = 0x2; // ASCII
- outbuf[r->buf_len++] = 0; // not officially assigned
- outbuf[r->buf_len++] = 0; // reserved
- outbuf[r->buf_len++] = id_len; // length of data following
+ outbuf[r->iov.iov_len++] = 0x83; // this page
+ outbuf[r->iov.iov_len++] = 0x00;
+ outbuf[r->iov.iov_len++] = 3 + id_len;
+
+ outbuf[r->iov.iov_len++] = 0x2; // ASCII
+ outbuf[r->iov.iov_len++] = 0; // not officially assigned
+ outbuf[r->iov.iov_len++] = 0; // reserved
+ outbuf[r->iov.iov_len++] = id_len; // length of data following
- memcpy(&outbuf[r->buf_len],
+ memcpy(&outbuf[r->iov.iov_len],
bdrv_get_device_name(s->bdrv), id_len);
- r->buf_len += id_len;
+ r->iov.iov_len += id_len;
}
break;
default:
@@ -582,7 +580,7 @@ static int32_t scsi_send_command(SCSIDev
outbuf[4] = len - 5; /* Additional Length = (Len - 1) - 4 */
/* Sync data transfer and TCQ. */
outbuf[7] = 0x10 | (s->tcq ? 0x02 : 0);
- r->buf_len = len;
+ r->iov.iov_len = len;
break;
case 0x16:
DPRINTF("Reserve(6)\n");
@@ -717,10 +715,10 @@ static int32_t scsi_send_command(SCSIDev
p[21] = (16 * 176) & 0xff;
p += 22;
}
- r->buf_len = p - outbuf;
- outbuf[0] = r->buf_len - 4;
- if (r->buf_len > len)
- r->buf_len = len;
+ r->iov.iov_len = p - outbuf;
+ outbuf[0] = r->iov.iov_len - 4;
+ if (r->iov.iov_len > len)
+ r->iov.iov_len = len;
}
break;
case 0x1b:
@@ -752,7 +750,7 @@ static int32_t scsi_send_command(SCSIDev
outbuf[5] = 0;
outbuf[6] = s->cluster_size * 2;
outbuf[7] = 0;
- r->buf_len = 8;
+ r->iov.iov_len = 8;
} else {
scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NOT_READY);
return 0;
@@ -811,7 +809,7 @@ static int32_t scsi_send_command(SCSIDev
if (toclen > 0) {
if (len > toclen)
len = toclen;
- r->buf_len = len;
+ r->iov.iov_len = len;
break;
}
error_cmd:
@@ -824,7 +822,7 @@ static int32_t scsi_send_command(SCSIDev
/* ??? This should probably return much more information. For now
just return the basic header indicating the CD-ROM profile. */
outbuf[7] = 8; // CD-ROM
- r->buf_len = 8;
+ r->iov.iov_len = 8;
break;
case 0x56:
DPRINTF("Reserve(10)\n");
@@ -861,7 +859,7 @@ static int32_t scsi_send_command(SCSIDev
outbuf[10] = s->cluster_size * 2;
outbuf[11] = 0;
/* Protection, exponent and lowest lba field left blank. */
- r->buf_len = len;
+ r->iov.iov_len = len;
} else {
scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_NOT_READY);
return 0;
@@ -876,7 +874,7 @@ static int32_t scsi_send_command(SCSIDev
goto fail;
memset(outbuf, 0, 16);
outbuf[3] = 8;
- r->buf_len = 16;
+ r->iov.iov_len = 16;
break;
case 0x2f:
DPRINTF("Verify (sector %d, count %d)\n", lba, len);
@@ -890,10 +888,10 @@ static int32_t scsi_send_command(SCSIDev
scsi_command_complete(r, STATUS_CHECK_CONDITION, SENSE_HARDWARE_ERROR);
return 0;
}
- if (r->sector_count == 0 && r->buf_len == 0) {
+ if (r->sector_count == 0 && r->iov.iov_len == 0) {
scsi_command_complete(r, STATUS_GOOD, SENSE_NO_SENSE);
}
- len = r->sector_count * 512 + r->buf_len;
+ len = r->sector_count * 512 + r->iov.iov_len;
if (is_write) {
return -len;
} else {
Index: qemu/block-qcow.c
===================================================================
--- qemu.orig/block-qcow.c 2009-03-14 13:40:54.000000000 +0100
+++ qemu/block-qcow.c 2009-03-14 14:37:07.000000000 +0100
@@ -530,6 +530,7 @@ typedef struct QCowAIOCB {
int n;
uint64_t cluster_offset;
uint8_t *cluster_data;
+ struct iovec hd_iov;
BlockDriverAIOCB *hd_aiocb;
} QCowAIOCB;
@@ -584,8 +585,10 @@ static void qcow_aio_read_cb(void *opaqu
if (!acb->cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
- acb->hd_aiocb = bdrv_aio_read(bs->backing_hd,
- acb->sector_num, acb->buf, acb->n, qcow_aio_read_cb, acb);
+ acb->hd_iov.iov_base = acb->buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+ &acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
} else {
@@ -605,9 +608,11 @@ static void qcow_aio_read_cb(void *opaqu
ret = -EIO;
goto fail;
}
- acb->hd_aiocb = bdrv_aio_read(s->hd,
+ acb->hd_iov.iov_base = acb->buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_readv(s->hd,
(acb->cluster_offset >> 9) + index_in_cluster,
- acb->buf, acb->n, qcow_aio_read_cb, acb);
+ &acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
}
@@ -687,10 +692,13 @@ static void qcow_aio_write_cb(void *opaq
} else {
src_buf = acb->buf;
}
- acb->hd_aiocb = bdrv_aio_write(s->hd,
- (cluster_offset >> 9) + index_in_cluster,
- src_buf, acb->n,
- qcow_aio_write_cb, acb);
+
+ acb->hd_iov.iov_base = (void *)src_buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_writev(s->hd,
+ (cluster_offset >> 9) + index_in_cluster,
+ &acb->hd_iov, 1, acb->n,
+ qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
}
Index: qemu/block-qcow2.c
===================================================================
--- qemu.orig/block-qcow2.c 2009-03-14 13:40:54.000000000 +0100
+++ qemu/block-qcow2.c 2009-03-14 14:37:07.000000000 +0100
@@ -1175,6 +1175,7 @@ typedef struct QCowAIOCB {
uint64_t cluster_offset;
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
+ struct iovec hd_iov;
QEMUBH *bh;
QCowL2Meta l2meta;
} QCowAIOCB;
@@ -1252,8 +1253,11 @@ fail:
n1 = backing_read1(bs->backing_hd, acb->sector_num,
acb->buf, acb->n);
if (n1 > 0) {
- acb->hd_aiocb = bdrv_aio_read(bs->backing_hd, acb->sector_num,
- acb->buf, acb->n, qcow_aio_read_cb, acb);
+ acb->hd_iov.iov_base = acb->buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
+ &acb->hd_iov, 1, acb->n,
+ qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
} else {
@@ -1282,9 +1286,12 @@ fail:
ret = -EIO;
goto fail;
}
- acb->hd_aiocb = bdrv_aio_read(s->hd,
+
+ acb->hd_iov.iov_base = acb->buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_readv(s->hd,
(acb->cluster_offset >> 9) + index_in_cluster,
- acb->buf, acb->n, qcow_aio_read_cb, acb);
+ &acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
}
@@ -1381,10 +1388,12 @@ static void qcow_aio_write_cb(void *opaq
} else {
src_buf = acb->buf;
}
- acb->hd_aiocb = bdrv_aio_write(s->hd,
- (acb->cluster_offset >> 9) + index_in_cluster,
- src_buf, acb->n,
- qcow_aio_write_cb, acb);
+ acb->hd_iov.iov_base = (void *)src_buf;
+ acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_aiocb = bdrv_aio_writev(s->hd,
+ (acb->cluster_offset >> 9) + index_in_cluster,
+ &acb->hd_iov, 1, acb->n,
+ qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
goto fail;
}
Index: qemu/block.c
===================================================================
--- qemu.orig/block.c 2009-03-14 14:36:32.000000000 +0100
+++ qemu/block.c 2009-03-14 14:37:07.000000000 +0100
@@ -53,6 +53,12 @@ typedef struct BlockDriverAIOCBSync {
int ret;
} BlockDriverAIOCBSync;
+static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,
+ int64_t sector_num, uint8_t *buf, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,
+ int64_t sector_num, const uint8_t *buf, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
int64_t sector_num, uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
@@ -1318,7 +1324,7 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockD
cb, opaque, 1);
}
-BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
+static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
@@ -1341,7 +1347,7 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri
return ret;
}
-BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
+static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
Index: qemu/block.h
===================================================================
--- qemu.orig/block.h 2009-03-14 14:36:32.000000000 +0100
+++ qemu/block.h 2009-03-14 14:37:07.000000000 +0100
@@ -92,13 +92,6 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDr
BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
-
-BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
void bdrv_aio_cancel(BlockDriverAIOCB *acb);
/* sg packet commands */
^ permalink raw reply [flat|nested] 20+ messages in thread
* [Qemu-devel] [PATCH 5/6] push down vector linearization to posix-aio-compat.c
2009-03-14 19:27 [Qemu-devel] [PATCH 0/6] add real vectored block I/O support Christoph Hellwig
` (3 preceding siblings ...)
2009-03-14 19:30 ` [Qemu-devel] [PATCH 4/6] remove bdrv_aio_read/bdrv_aio_write Christoph Hellwig
@ 2009-03-14 19:30 ` Christoph Hellwig
2009-03-14 19:31 ` [Qemu-devel] [PATCH 6/6] experimental native preadv/pwritev support for Linux Christoph Hellwig
5 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2009-03-14 19:30 UTC (permalink / raw)
To: qemu-devel
Make all AIO requests vectored and defer linearization until the actual
I/O thread. This prepares for using native preadv/pwritev.
Also enables asynchronous direct I/O by handling that case in the I/O thread.
For now disables support for scsi-generic requests until I figure a
way to fit it into the new world order.
Win32 support is untested and uncompiled. Then again I can't even find
a know to turn on win32 aio support.
Qcow and qcow2 propably want to be adopted to directly deal with multi-segment
requests, but that can be implemented later.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: qemu/block.c
===================================================================
--- qemu.orig/block.c 2009-03-14 18:08:40.000000000 +0100
+++ qemu/block.c 2009-03-14 18:08:41.000000000 +0100
@@ -51,19 +51,19 @@ typedef struct BlockDriverAIOCBSync {
BlockDriverAIOCB common;
QEMUBH *bh;
int ret;
+ /* vector translation state */
+ struct iovec *iov;
+ int nr_iov;
+ int size;
+ uint8_t *bounce;
+ int is_write;
} BlockDriverAIOCBSync;
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque);
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, int64_t sector_num,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
@@ -142,10 +142,10 @@ void path_combine(char *dest, int dest_s
static void bdrv_register(BlockDriver *bdrv)
{
- if (!bdrv->bdrv_aio_read) {
+ if (!bdrv->bdrv_aio_readv) {
/* add AIO emulation layer */
- bdrv->bdrv_aio_read = bdrv_aio_read_em;
- bdrv->bdrv_aio_write = bdrv_aio_write_em;
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
} else if (!bdrv->bdrv_read) {
@@ -1250,84 +1250,10 @@ char *bdrv_snapshot_dump(char *buf, int
/**************************************************************/
/* async I/Os */
-typedef struct VectorTranslationState {
- struct iovec *iov;
- int nr_iov;
- int size;
- uint8_t *bounce;
- int is_write;
- BlockDriverAIOCB *aiocb;
- BlockDriverAIOCB *this_aiocb;
-} VectorTranslationState;
-
-static void bdrv_aio_rw_vector_cb(void *opaque, int ret)
-{
- VectorTranslationState *s = opaque;
-
- if (!s->is_write) {
- iovec_from_buffer(s->iov, s->nr_iov, s->bounce, s->size);
- }
- qemu_vfree(s->bounce);
- s->this_aiocb->cb(s->this_aiocb->opaque, ret);
- qemu_aio_release(s->this_aiocb);
-}
-
-static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- struct iovec *iov,
- int nr_iov,
- int nb_sectors,
- BlockDriverCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- VectorTranslationState *s = qemu_mallocz(sizeof(*s));
- BlockDriverAIOCB *aiocb = qemu_aio_get(bs, cb, opaque);
-
- s->this_aiocb = aiocb;
- s->iov = iov;
- s->nr_iov = nr_iov;
- s->size = nb_sectors * 512;
- s->bounce = qemu_memalign(512, s->size);
- s->is_write = is_write;
- if (is_write) {
- iovec_to_buffer(s->iov, s->nr_iov, s->bounce);
- s->aiocb = bdrv_aio_write(bs, sector_num, s->bounce, nb_sectors,
- bdrv_aio_rw_vector_cb, s);
- } else {
- s->aiocb = bdrv_aio_read(bs, sector_num, s->bounce, nb_sectors,
- bdrv_aio_rw_vector_cb, s);
- }
- return aiocb;
-}
-
BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return NULL;
-
- return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
- cb, opaque, 0);
-}
-
-BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- struct iovec *iov, int nr_iov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return NULL;
-
- return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
- cb, opaque, 1);
-}
-
-static BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
-{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
@@ -1336,7 +1262,8 @@ static BlockDriverAIOCB *bdrv_aio_read(B
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
+ ret = drv->bdrv_aio_readv(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque);
if (ret) {
/* Update stats even though technically transfer has not happened. */
@@ -1347,9 +1274,9 @@ static BlockDriverAIOCB *bdrv_aio_read(B
return ret;
}
-static BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ struct iovec *iov, int nr_iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
@@ -1361,7 +1288,8 @@ static BlockDriverAIOCB *bdrv_aio_write(
if (bdrv_check_request(bs, sector_num, nb_sectors))
return NULL;
- ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
+ ret = drv->bdrv_aio_writev(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque);
if (ret) {
/* Update stats even though technically transfer has not happened. */
@@ -1376,11 +1304,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *a
{
BlockDriver *drv = acb->bs->drv;
- if (acb->cb == bdrv_aio_rw_vector_cb) {
- VectorTranslationState *s = acb->opaque;
- acb = s->aiocb;
- }
-
drv->bdrv_aio_cancel(acb);
}
@@ -1391,42 +1314,66 @@ void bdrv_aio_cancel(BlockDriverAIOCB *a
static void bdrv_aio_bh_cb(void *opaque)
{
BlockDriverAIOCBSync *acb = opaque;
+
+ qemu_vfree(acb->bounce);
+
+ if (!acb->is_write)
+ iovec_from_buffer(acb->iov, acb->nr_iov, acb->bounce, acb->size);
acb->common.cb(acb->common.opaque, acb->ret);
+
qemu_aio_release(acb);
}
-static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ struct iovec *iov,
+ int nr_iov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int is_write)
+
{
- BlockDriverAIOCBSync *acb;
- int ret;
+ BlockDriverAIOCBSync *acb = qemu_aio_get(bs, cb, opaque);
+
+ acb->is_write = is_write;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ acb->size = nb_sectors * 512;
+ acb->bounce = qemu_memalign(512, acb->size);
- acb = qemu_aio_get(bs, cb, opaque);
if (!acb->bh)
acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- acb->ret = ret;
+
+ if (is_write) {
+ iovec_to_buffer(acb->iov, acb->nr_iov, acb->bounce);
+ acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+ } else {
+ acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+ }
+
qemu_bh_schedule(acb->bh);
+
return &acb->common;
}
-static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- BlockDriverAIOCBSync *acb;
- int ret;
+ return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque, 0);
+}
- acb = qemu_aio_get(bs, cb, opaque);
- if (!acb->bh)
- acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
- ret = bdrv_write(bs, sector_num, buf, nb_sectors);
- acb->ret = ret;
- qemu_bh_schedule(acb->bh);
- return &acb->common;
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, int64_t sector_num,
+ struct iovec *iov, int nr_iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque, 1);
}
+
static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
{
BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
@@ -1449,10 +1396,13 @@ static int bdrv_read_em(BlockDriverState
{
int async_ret;
BlockDriverAIOCB *acb;
+ struct iovec iov;
async_ret = NOT_DONE;
- acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
- bdrv_rw_em_cb, &async_ret);
+ iov.iov_base = buf;
+ iov.iov_len = nb_sectors * 512;
+ acb = bdrv_aio_readv(bs, sector_num, &iov, 1, nb_sectors,
+ bdrv_rw_em_cb, &async_ret);
if (acb == NULL)
return -1;
@@ -1468,10 +1418,13 @@ static int bdrv_write_em(BlockDriverStat
{
int async_ret;
BlockDriverAIOCB *acb;
+ struct iovec iov;
async_ret = NOT_DONE;
- acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
- bdrv_rw_em_cb, &async_ret);
+ iov.iov_base = (void *)buf;
+ iov.iov_len = nb_sectors * 512;
+ acb = bdrv_aio_writev(bs, sector_num, &iov, 1, nb_sectors,
+ bdrv_rw_em_cb, &async_ret);
if (acb == NULL)
return -1;
while (async_ret == NOT_DONE) {
Index: qemu/block-qcow.c
===================================================================
--- qemu.orig/block-qcow.c 2009-03-14 18:08:40.000000000 +0100
+++ qemu/block-qcow.c 2009-03-14 18:08:41.000000000 +0100
@@ -525,6 +525,8 @@ static int qcow_write(BlockDriverState *
typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
+ struct iovec *iov;
+ int nr_iov;
uint8_t *buf;
int nb_sectors;
int n;
@@ -542,12 +544,8 @@ static void qcow_aio_read_cb(void *opaqu
int index_in_cluster;
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
redo:
/* post process the read buffer */
@@ -569,9 +567,8 @@ static void qcow_aio_read_cb(void *opaqu
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
/* prepare next AIO request */
@@ -590,7 +587,7 @@ static void qcow_aio_read_cb(void *opaqu
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
@@ -599,14 +596,14 @@ static void qcow_aio_read_cb(void *opaqu
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(s, acb->cluster_offset) < 0)
- goto fail;
+ goto done;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
goto redo;
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
acb->hd_iov.iov_base = acb->buf;
acb->hd_iov.iov_len = acb->n * 512;
@@ -614,12 +611,23 @@ static void qcow_aio_read_cb(void *opaqu
(acb->cluster_offset >> 9) + index_in_cluster,
&acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ }
+
+ return;
+
+done:
+ if (acb->nr_iov > 1) {
+ iovec_from_buffer(acb->iov, acb->nr_iov, acb->buf,
+ acb->nb_sectors * 512);
+ qemu_vfree(acb->buf);
}
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
QCowAIOCB *acb;
@@ -629,7 +637,12 @@ static BlockDriverAIOCB *qcow_aio_read(B
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = buf;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ if (nr_iov > 1)
+ acb->buf = qemu_memalign(512, nb_sectors * 512);
+ else
+ acb->buf = iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
acb->cluster_offset = 0;
@@ -649,12 +662,8 @@ static void qcow_aio_write_cb(void *opaq
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
acb->nb_sectors -= acb->n;
acb->sector_num += acb->n;
@@ -662,9 +671,8 @@ static void qcow_aio_write_cb(void *opaq
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -676,14 +684,14 @@ static void qcow_aio_write_cb(void *opaq
index_in_cluster + acb->n);
if (!cluster_offset || (cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(s->cluster_size);
if (!acb->cluster_data) {
ret = -ENOMEM;
- goto fail;
+ goto done;
}
}
encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
@@ -700,11 +708,18 @@ static void qcow_aio_write_cb(void *opaq
&acb->hd_iov, 1, acb->n,
qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ return;
+
+done:
+ if (acb->nr_iov > 1)
+ qemu_vfree(acb->buf);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVQcowState *s = bs->opaque;
@@ -717,7 +732,13 @@ static BlockDriverAIOCB *qcow_aio_write(
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = (uint8_t *)buf;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ if (nr_iov > 1) {
+ acb->buf = qemu_memalign(512, nb_sectors * 512);
+ iovec_to_buffer(iov, nr_iov, acb->buf);
+ } else
+ acb->buf = iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
@@ -905,8 +926,8 @@ BlockDriver bdrv_qcow = {
.bdrv_is_allocated = qcow_is_allocated,
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_aio_read = qcow_aio_read,
- .bdrv_aio_write = qcow_aio_write,
+ .bdrv_aio_readv = qcow_aio_readv,
+ .bdrv_aio_writev = qcow_aio_writev,
.bdrv_aio_cancel = qcow_aio_cancel,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
Index: qemu/block-qcow2.c
===================================================================
--- qemu.orig/block-qcow2.c 2009-03-14 18:08:40.000000000 +0100
+++ qemu/block-qcow2.c 2009-03-14 18:08:41.000000000 +0100
@@ -1169,6 +1169,8 @@ static int qcow_write(BlockDriverState *
typedef struct QCowAIOCB {
BlockDriverAIOCB common;
int64_t sector_num;
+ struct iovec *iov;
+ int nr_iov;
uint8_t *buf;
int nb_sectors;
int n;
@@ -1211,12 +1213,8 @@ static void qcow_aio_read_cb(void *opaqu
int index_in_cluster, n1;
acb->hd_aiocb = NULL;
- if (ret < 0) {
-fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
/* post process the read buffer */
if (!acb->cluster_offset) {
@@ -1237,9 +1235,8 @@ fail:
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
/* prepare next AIO request */
@@ -1259,32 +1256,32 @@ fail:
&acb->hd_iov, 1, acb->n,
qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
} else {
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
}
} else {
/* Note: in this case, no need to wait */
memset(acb->buf, 0, 512 * acb->n);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
if (decompress_cluster(s, acb->cluster_offset) < 0)
- goto fail;
+ goto done;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
- goto fail;
+ goto done;
} else {
if ((acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
acb->hd_iov.iov_base = acb->buf;
@@ -1293,13 +1290,23 @@ fail:
(acb->cluster_offset >> 9) + index_in_cluster,
&acb->hd_iov, 1, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+ }
+
+ return;
+done:
+ if (acb->nr_iov > 1) {
+ iovec_from_buffer(acb->iov, acb->nr_iov, acb->buf,
+ acb->nb_sectors * 512);
+ qemu_vfree(acb->buf);
}
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int is_write)
{
QCowAIOCB *acb;
@@ -1308,7 +1315,14 @@ static QCowAIOCB *qcow_aio_setup(BlockDr
return NULL;
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
- acb->buf = buf;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ if (nr_iov > 1) {
+ acb->buf = qemu_memalign(512, nb_sectors * 512);
+ if (is_write)
+ iovec_to_buffer(iov, nr_iov, acb->buf);
+ } else
+ acb->buf = iov->iov_base;
acb->nb_sectors = nb_sectors;
acb->n = 0;
acb->cluster_offset = 0;
@@ -1316,13 +1330,14 @@ static QCowAIOCB *qcow_aio_setup(BlockDr
return acb;
}
-static BlockDriverAIOCB *qcow_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
QCowAIOCB *acb;
- acb = qcow_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = qcow_aio_setup(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque, 0);
if (!acb)
return NULL;
@@ -1341,16 +1356,12 @@ static void qcow_aio_write_cb(void *opaq
acb->hd_aiocb = NULL;
- if (ret < 0) {
- fail:
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- return;
- }
+ if (ret < 0)
+ goto done;
if (alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
- goto fail;
+ goto done;
}
acb->nb_sectors -= acb->n;
@@ -1359,9 +1370,8 @@ static void qcow_aio_write_cb(void *opaq
if (acb->nb_sectors == 0) {
/* request completed */
- acb->common.cb(acb->common.opaque, 0);
- qemu_aio_release(acb);
- return;
+ ret = 0;
+ goto done;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
@@ -1375,7 +1385,7 @@ static void qcow_aio_write_cb(void *opaq
n_end, &acb->n, &acb->l2meta);
if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
ret = -EIO;
- goto fail;
+ goto done;
}
if (s->crypt_method) {
if (!acb->cluster_data) {
@@ -1395,11 +1405,19 @@ static void qcow_aio_write_cb(void *opaq
&acb->hd_iov, 1, acb->n,
qcow_aio_write_cb, acb);
if (acb->hd_aiocb == NULL)
- goto fail;
+ goto done;
+
+ return;
+
+done:
+ if (acb->nr_iov > 1)
+ qemu_vfree(acb->buf);
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
}
-static BlockDriverAIOCB *qcow_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVQcowState *s = bs->opaque;
@@ -1407,7 +1425,8 @@ static BlockDriverAIOCB *qcow_aio_write(
s->cluster_cache_offset = -1; /* disable compressed cache */
- acb = qcow_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+ acb = qcow_aio_setup(bs, sector_num, iov, nr_iov, nb_sectors,
+ cb, opaque, 1);
if (!acb)
return NULL;
@@ -2611,8 +2630,8 @@ BlockDriver bdrv_qcow2 = {
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_aio_read = qcow_aio_read,
- .bdrv_aio_write = qcow_aio_write,
+ .bdrv_aio_readv = qcow_aio_readv,
+ .bdrv_aio_writev = qcow_aio_writev,
.bdrv_aio_cancel = qcow_aio_cancel,
.aiocb_size = sizeof(QCowAIOCB),
.bdrv_write_compressed = qcow_write_compressed,
Index: qemu/block-raw-posix.c
===================================================================
--- qemu.orig/block-raw-posix.c 2009-03-14 18:03:55.000000000 +0100
+++ qemu/block-raw-posix.c 2009-03-14 18:08:41.000000000 +0100
@@ -555,8 +555,8 @@ static int posix_aio_init(void)
return 0;
}
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
+ struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -570,24 +570,25 @@ static RawAIOCB *raw_aio_setup(BlockDriv
return NULL;
acb->aiocb.aio_fildes = s->fd;
acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_buf = buf;
- if (nb_sectors < 0)
- acb->aiocb.aio_nbytes = -nb_sectors;
- else
- acb->aiocb.aio_nbytes = nb_sectors * 512;
+ acb->aiocb.aio_iov = iov;
+ acb->aiocb.aio_niov = nr_iov;
+ acb->aiocb.aio_nbytes = nb_sectors * 512;
acb->aiocb.aio_offset = sector_num * 512;
+ acb->aiocb.aio_flags = 0;
+
+ /*
+ * If O_DIRECT is used the buffer needs to be aligned on a sector
+ * boundary. Tell the low level code to ensure that in case it's
+ * not done yet.
+ */
+ if (s->aligned_buf)
+ acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
+
acb->next = posix_aio_state->first_aio;
posix_aio_state->first_aio = acb;
return acb;
}
-static void raw_aio_em_cb(void* opaque)
-{
- RawAIOCB *acb = opaque;
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_aio_release(acb);
-}
-
static void raw_aio_remove(RawAIOCB *acb)
{
RawAIOCB **pacb;
@@ -607,28 +608,13 @@ static void raw_aio_remove(RawAIOCB *acb
}
}
-static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
- /*
- * If O_DIRECT is used and the buffer is not aligned fall back
- * to synchronous IO.
- */
- BDRVRawState *s = bs->opaque;
-
- if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
- QEMUBH *bh;
- acb = qemu_aio_get(bs, cb, opaque);
- acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
- bh = qemu_bh_new(raw_aio_em_cb, acb);
- qemu_bh_schedule(bh);
- return &acb->common;
- }
-
- acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, iov, nr_iov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_read(&acb->aiocb) < 0) {
@@ -638,28 +624,13 @@ static BlockDriverAIOCB *raw_aio_read(Bl
return &acb->common;
}
-static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
- /*
- * If O_DIRECT is used and the buffer is not aligned fall back
- * to synchronous IO.
- */
- BDRVRawState *s = bs->opaque;
-
- if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
- QEMUBH *bh;
- acb = qemu_aio_get(bs, cb, opaque);
- acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
- bh = qemu_bh_new(raw_aio_em_cb, acb);
- qemu_bh_schedule(bh);
- return &acb->common;
- }
-
- acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, iov, nr_iov, nb_sectors, cb, opaque);
if (!acb)
return NULL;
if (qemu_paio_write(&acb->aiocb) < 0) {
@@ -821,8 +792,8 @@ BlockDriver bdrv_raw = {
.bdrv_flush = raw_flush,
#ifdef CONFIG_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
@@ -1173,6 +1144,7 @@ static int raw_sg_recv_response(BlockDri
return raw_pread(bs, -1, buf, count);
}
+#if 0
static BlockDriverAIOCB *raw_sg_aio_read(BlockDriverState *bs,
void *buf, int count,
BlockDriverCompletionFunc *cb,
@@ -1188,6 +1160,7 @@ static BlockDriverAIOCB *raw_sg_aio_writ
{
return raw_aio_write(bs, 0, buf, -(int64_t)count, cb, opaque);
}
+#endif
BlockDriver bdrv_host_device = {
.format_name = "host_device",
@@ -1197,8 +1170,8 @@ BlockDriver bdrv_host_device = {
.bdrv_flush = raw_flush,
#ifdef CONFIG_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB),
#endif
@@ -1216,6 +1189,8 @@ BlockDriver bdrv_host_device = {
.bdrv_ioctl = raw_ioctl,
.bdrv_sg_send_command = raw_sg_send_command,
.bdrv_sg_recv_response = raw_sg_recv_response,
+#if 0
.bdrv_sg_aio_read = raw_sg_aio_read,
.bdrv_sg_aio_write = raw_sg_aio_write,
+#endif
};
Index: qemu/block-raw-win32.c
===================================================================
--- qemu.orig/block-raw-win32.c 2009-03-14 18:03:55.000000000 +0100
+++ qemu/block-raw-win32.c 2009-03-14 18:08:41.000000000 +0100
@@ -44,6 +44,10 @@ typedef struct RawAIOCB {
BlockDriverAIOCB common;
HANDLE hEvent;
OVERLAPPED ov;
+ struct iovec *iov;
+ int nr_iov;
+ char *buf;
+ int is_write;
int count;
} RawAIOCB;
@@ -188,6 +192,13 @@ static void raw_aio_cb(void *opaque)
int ret;
ret = GetOverlappedResult(s->hfile, &acb->ov, &ret_count, TRUE);
+
+ if (acb->nr_iov > 1) {
+ if (is_write)
+ iovec_from_buffer(acb->iov, acb->nr_iov, acb->buf, acb->count);
+ qemu_vfree(acb->buf);
+ }
+
if (!ret || ret_count != acb->count) {
acb->common.cb(acb->common.opaque, -EIO);
} else {
@@ -196,7 +207,7 @@ static void raw_aio_cb(void *opaque)
}
static RawAIOCB *raw_aio_setup(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+ int64_t sector_num, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
RawAIOCB *acb;
@@ -220,44 +231,63 @@ static RawAIOCB *raw_aio_setup(BlockDriv
return acb;
}
-static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
RawAIOCB *acb;
int ret;
- acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, nb_sectors, cb, opaque);
if (!acb)
return NULL;
- ret = ReadFile(s->hfile, buf, acb->count, NULL, &acb->ov);
+
+ acb->is_write = 0;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ if (nr_iov > 1)
+ acb->buf = qemu_memalign(512, acb->count);
+ else
+ acb->buf = iov->iov_base;
+
+ ret = ReadFile(s->hfile, acb->buf, acb->count, NULL, &acb->ov);
if (!ret) {
qemu_aio_release(acb);
return NULL;
}
qemu_aio_release(acb);
- return (BlockDriverAIOCB *)acb;
+ return &acb->common;
}
-static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
RawAIOCB *acb;
int ret;
- acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
+ acb = raw_aio_setup(bs, sector_num, nb_sectors, cb, opaque);
if (!acb)
return NULL;
+
+ acb->is_write = 1;
+ acb->iov = iov;
+ acb->nr_iov = nr_iov;
+ if (nr_iov > 1) {
+ acb->buf = qemu_memalign(512, acb->count);
+ iovec_to_buffer(iov, nr_iov, acb->buf);
+ } else
+ acb->buf = iov->iov_base;
+
ret = WriteFile(s->hfile, buf, acb->count, NULL, &acb->ov);
if (!ret) {
qemu_aio_release(acb);
return NULL;
}
qemu_aio_release(acb);
- return (BlockDriverAIOCB *)acb;
+ return &acb->common;
}
static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
@@ -359,8 +389,8 @@ BlockDriver bdrv_raw = {
.bdrv_flush = raw_flush,
#ifdef WIN32_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB);
#endif
@@ -508,8 +538,8 @@ BlockDriver bdrv_host_device = {
.bdrv_flush = raw_flush,
#ifdef WIN32_AIO
- .bdrv_aio_read = raw_aio_read,
- .bdrv_aio_write = raw_aio_write,
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_cancel = raw_aio_cancel,
.aiocb_size = sizeof(RawAIOCB);
#endif
Index: qemu/block_int.h
===================================================================
--- qemu.orig/block_int.h 2009-03-14 18:03:55.000000000 +0100
+++ qemu/block_int.h 2009-03-14 18:08:41.000000000 +0100
@@ -48,11 +48,11 @@ struct BlockDriver {
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
int (*bdrv_make_empty)(BlockDriverState *bs);
/* aio */
- BlockDriverAIOCB *(*bdrv_aio_read)(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors,
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
- BlockDriverAIOCB *(*bdrv_aio_write)(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors,
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, struct iovec *iov, int nr_iov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque);
void (*bdrv_aio_cancel)(BlockDriverAIOCB *acb);
int aiocb_size;
Index: qemu/posix-aio-compat.c
===================================================================
--- qemu.orig/posix-aio-compat.c 2009-03-14 18:03:55.000000000 +0100
+++ qemu/posix-aio-compat.c 2009-03-14 18:11:25.000000000 +0100
@@ -21,6 +21,7 @@
#include "osdep.h"
#include "posix-aio-compat.h"
+#include "qemu-common.h"
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
@@ -75,6 +76,85 @@ static void thread_create(pthread_t *thr
if (ret) die2(ret, "pthread_create");
}
+/*
+ * Check if we need to copy the data in the aiocb into a new
+ * properly aligned buffer.
+ */
+static int aiocb_needs_copy(struct qemu_paiocb *aiocb)
+{
+ if (aiocb->aio_flags & QEMU_AIO_SECTOR_ALIGNED) {
+ int i;
+
+ for (i = 0; i < aiocb->aio_niov; i++)
+ if ((uintptr_t) aiocb->aio_iov[i].iov_base % 512)
+ return 1;
+ }
+
+ return 0;
+}
+
+static size_t handle_aiocb_linear(struct qemu_paiocb *aiocb, char *buf)
+{
+ size_t offset = 0;
+ size_t len;
+
+ while (offset < aiocb->aio_nbytes) {
+ if (aiocb->is_write)
+ len = pwrite(aiocb->aio_fildes,
+ (const char *)buf + offset,
+ aiocb->aio_nbytes - offset,
+ aiocb->aio_offset + offset);
+ else
+ len = pread(aiocb->aio_fildes,
+ buf + offset,
+ aiocb->aio_nbytes - offset,
+ aiocb->aio_offset + offset);
+
+ if (len == -1 && errno == EINTR)
+ continue;
+ else if (len == -1) {
+ offset = -errno;
+ break;
+ } else if (len == 0)
+ break;
+
+ offset += len;
+ }
+
+ return offset;
+}
+
+static size_t handle_aiocb(struct qemu_paiocb *aiocb)
+{
+ size_t nbytes;
+ char *buf;
+
+ if (!aiocb_needs_copy(aiocb) && aiocb->aio_niov == 1) {
+ /*
+ * If there is just a single buffer, and it is properly aligned
+ * we can just use plain pread/pwrite without any problems.
+ */
+ return handle_aiocb_linear(aiocb, aiocb->aio_iov->iov_base);
+ }
+
+ /*
+ * Ok, we have to do it the hard way, copy all segments into
+ * a single aligned buffer.
+ */
+ buf = qemu_memalign(512, aiocb->aio_nbytes);
+ if (aiocb->is_write)
+ iovec_to_buffer(aiocb->aio_iov, aiocb->aio_niov, buf);
+
+ nbytes = handle_aiocb_linear(aiocb, buf);
+ if (!aiocb->is_write) {
+ iovec_from_buffer(aiocb->aio_iov, aiocb->aio_niov,
+ buf, aiocb->aio_nbytes);
+ }
+ qemu_vfree(buf);
+
+ return nbytes;
+}
+
static void *aio_thread(void *unused)
{
pid_t pid;
@@ -88,7 +168,7 @@ static void *aio_thread(void *unused)
while (1) {
struct qemu_paiocb *aiocb;
- size_t offset;
+ size_t nbytes;
int ret = 0;
qemu_timeval tv;
struct timespec ts;
@@ -110,39 +190,14 @@ static void *aio_thread(void *unused)
aiocb = TAILQ_FIRST(&request_list);
TAILQ_REMOVE(&request_list, aiocb, node);
- offset = 0;
aiocb->active = 1;
-
idle_threads--;
- mutex_unlock(&lock);
-
- while (offset < aiocb->aio_nbytes) {
- ssize_t len;
-
- if (aiocb->is_write)
- len = pwrite(aiocb->aio_fildes,
- (const char *)aiocb->aio_buf + offset,
- aiocb->aio_nbytes - offset,
- aiocb->aio_offset + offset);
- else
- len = pread(aiocb->aio_fildes,
- (char *)aiocb->aio_buf + offset,
- aiocb->aio_nbytes - offset,
- aiocb->aio_offset + offset);
-
- if (len == -1 && errno == EINTR)
- continue;
- else if (len == -1) {
- offset = -errno;
- break;
- } else if (len == 0)
- break;
-
- offset += len;
- }
+ mutex_unlock(&lock);
+ nbytes = handle_aiocb(aiocb);
mutex_lock(&lock);
- aiocb->ret = offset;
+
+ aiocb->ret = nbytes;
idle_threads++;
mutex_unlock(&lock);
Index: qemu/posix-aio-compat.h
===================================================================
--- qemu.orig/posix-aio-compat.h 2009-03-14 18:03:55.000000000 +0100
+++ qemu/posix-aio-compat.h 2009-03-14 18:08:41.000000000 +0100
@@ -24,13 +24,18 @@
#define QEMU_PAIO_NOTCANCELED 0x02
#define QEMU_PAIO_ALLDONE 0x03
+
struct qemu_paiocb
{
int aio_fildes;
- void *aio_buf;
+ struct iovec *aio_iov;
+ int aio_niov;
size_t aio_nbytes;
int ev_signo;
off_t aio_offset;
+ unsigned aio_flags;
+/* 512 byte alignment required for buffer, offset and length */
+#define QEMU_AIO_SECTOR_ALIGNED 0x01
/* private */
TAILQ_ENTRY(qemu_paiocb) node;
^ permalink raw reply [flat|nested] 20+ messages in thread