From: Kevin Wolf <kwolf@redhat.com>
To: anthony@codemonkey.ws
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 16/20] qcow2: Avoid bounce buffers for AIO read requests
Date: Tue, 21 Sep 2010 17:21:58 +0200 [thread overview]
Message-ID: <1285082522-24407-17-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1285082522-24407-1-git-send-email-kwolf@redhat.com>
qcow2 used to use bounce buffers for any AIO requests. This does not only imply
unnecessary copying, but also unbounded allocations which should be avoided.
This patch removes bounce buffers from the normal AIO read path, and constrains
them to a constant size for encrypted images.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/qcow2-cluster.c | 8 ++++-
block/qcow2.c | 86 +++++++++++++++++++++++++++++++++---------------
block/qcow2.h | 4 +-
3 files changed, 68 insertions(+), 30 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cb2e33f..fb4224a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -350,6 +350,8 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1;
uint64_t cluster_offset;
+ struct iovec iov;
+ QEMUIOVector qiov;
while (nb_sectors > 0) {
n = nb_sectors;
@@ -364,7 +366,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n);
+ iov.iov_base = buf;
+ iov.iov_len = n * 512;
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ n1 = qcow2_backing_read1(bs->backing_hd, &qiov, sector_num, n);
if (n1 > 0) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
diff --git a/block/qcow2.c b/block/qcow2.c
index a53014d..4a688b4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,8 +311,8 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
}
/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors)
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors)
{
int n1;
if ((sector_num + nb_sectors) <= bs->total_sectors)
@@ -321,7 +321,9 @@ int qcow2_backing_read1(BlockDriverState *bs,
n1 = 0;
else
n1 = bs->total_sectors - sector_num;
- memset(buf + n1 * 512, 0, 512 * (nb_sectors - n1));
+
+ qemu_iovec_memset(qiov, 0, 512 * (nb_sectors - n1));
+
return n1;
}
@@ -333,6 +335,7 @@ typedef struct QCowAIOCB {
void *orig_buf;
int remaining_sectors;
int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done;
uint64_t cluster_offset;
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
@@ -397,15 +400,19 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* nothing to do */
} else {
if (s->crypt_method) {
- qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
- acb->cur_nr_sectors, 0,
- &s->aes_decrypt_key);
+ qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data,
+ acb->cluster_data, acb->cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+ acb->cur_nr_sectors * 512);
+ qemu_iovec_from_buffer(&acb->hd_qiov, acb->cluster_data,
+ 512 * acb->cur_nr_sectors);
}
}
acb->remaining_sectors -= acb->cur_nr_sectors;
acb->sector_num += acb->cur_nr_sectors;
- acb->buf += acb->cur_nr_sectors * 512;
+ acb->bytes_done += acb->cur_nr_sectors * 512;
if (acb->remaining_sectors == 0) {
/* request completed */
@@ -415,6 +422,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* prepare next AIO request */
acb->cur_nr_sectors = acb->remaining_sectors;
+ if (s->crypt_method) {
+ acb->cur_nr_sectors = MIN(acb->cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
+
ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
&acb->cur_nr_sectors, &acb->cluster_offset);
if (ret < 0) {
@@ -423,15 +435,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_copy(&acb->hd_qiov, acb->qiov, acb->bytes_done,
+ acb->cur_nr_sectors * 512);
+
if (!acb->cluster_offset) {
+
if (bs->backing_hd) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
- acb->buf, acb->cur_nr_sectors);
+ n1 = qcow2_backing_read1(bs->backing_hd, &acb->hd_qiov,
+ acb->sector_num, acb->cur_nr_sectors);
if (n1 > 0) {
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
&acb->hd_qiov, acb->cur_nr_sectors,
@@ -445,7 +459,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
} else {
/* Note: in this case, no need to wait */
- memset(acb->buf, 0, 512 * acb->cur_nr_sectors);
+ qemu_iovec_memset(&acb->hd_qiov, 0, 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@@ -454,8 +468,11 @@ static void qcow_aio_read_cb(void *opaque, int ret)
/* add AIO support for compressed blocks ? */
if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
goto done;
- memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
- 512 * acb->cur_nr_sectors);
+
+ qemu_iovec_from_buffer(&acb->hd_qiov,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * acb->cur_nr_sectors);
+
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@@ -465,9 +482,23 @@ static void qcow_aio_read_cb(void *opaque, int ret)
goto done;
}
- acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
- qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!acb->cluster_data) {
+ acb->cluster_data =
+ qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
+
+ assert(acb->cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&acb->hd_qiov);
+ qemu_iovec_add(&acb->hd_qiov, acb->cluster_data,
+ 512 * acb->cur_nr_sectors);
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
@@ -481,11 +512,8 @@ static void qcow_aio_read_cb(void *opaque, int ret)
return;
done:
- if (acb->qiov->niov > 1) {
- qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size);
- qemu_vfree(acb->orig_buf);
- }
acb->common.cb(acb->common.opaque, ret);
+ qemu_iovec_destroy(&acb->hd_qiov);
qemu_aio_release(acb);
}
@@ -501,13 +529,17 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
acb->hd_aiocb = NULL;
acb->sector_num = sector_num;
acb->qiov = qiov;
- if (qiov->niov > 1) {
- acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
- if (is_write)
- qemu_iovec_to_buffer(qiov, acb->buf);
- } else {
+
+ if (!is_write) {
+ qemu_iovec_init(&acb->hd_qiov, qiov->niov);
+ } else if (qiov->niov == 1) {
acb->buf = (uint8_t *)qiov->iov->iov_base;
+ } else {
+ acb->buf = acb->orig_buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buffer(qiov, acb->buf);
}
+
+ acb->bytes_done = 0;
acb->remaining_sectors = nb_sectors;
acb->cur_nr_sectors = 0;
acb->cluster_offset = 0;
diff --git a/block/qcow2.h b/block/qcow2.h
index 3ff162e..356a34a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -166,8 +166,8 @@ static inline int64_t align_offset(int64_t offset, int n)
// FIXME Need qcow2_ prefix to global functions
/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors);
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors);
/* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs);
--
1.7.2.2
next prev parent reply other threads:[~2010-09-21 15:22 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-21 15:21 [Qemu-devel] [PULL 00/20] Block patches Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 01/20] vvfat: Fix segfault on write to read-only disk Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 02/20] vvfat: Fix double free for opening the image rw Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 03/20] vvfat: Use cache=unsafe Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 04/20] use qemu_blockalign consistently Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 05/20] raw-posix: handle > 512 byte alignment correctly Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 06/20] Improve qemu-nbd performance by 4400 % Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 07/20] nbd: correctly manage default port Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 08/20] qcow2: Move sync out of write_refcount_block_entries Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 09/20] qcow2: Move sync out of update_refcount Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 10/20] qcow2: Move sync out of qcow2_alloc_clusters Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 11/20] qcow2: Get rid of additional sync on COW Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 12/20] virtio-blk: propagate the required alignment Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 13/20] scsi-disk: " Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 14/20] ide: " Kevin Wolf
2010-09-21 15:21 ` [Qemu-devel] [PATCH 15/20] cutils: qemu_iovec_copy and qemu_iovec_memset Kevin Wolf
2010-09-21 15:21 ` Kevin Wolf [this message]
2010-09-21 15:21 ` [Qemu-devel] [PATCH 17/20] qcow2: Avoid bounce buffers for AIO write requests Kevin Wolf
2010-09-21 15:22 ` [Qemu-devel] [PATCH 18/20] scsi-generic: add missing reset handler Kevin Wolf
2010-09-21 15:22 ` [Qemu-devel] [PATCH 19/20] scsi_bus: fix length and xfer_mode for RESERVE and RELEASE commands Kevin Wolf
2010-09-21 15:22 ` [Qemu-devel] [PATCH 20/20] blkverify: Add block driver for verifying I/O Kevin Wolf
2010-09-21 22:51 ` [Qemu-devel] [PULL 00/20] Block patches Anthony Liguori
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1285082522-24407-17-git-send-email-kwolf@redhat.com \
--to=kwolf@redhat.com \
--cc=anthony@codemonkey.ws \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).