[Qemu-devel] Re: [PATCH v2 4/7] qed: Add QEMU Enhanced Disk image format

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Kevin Wolf <kwolf@redhat.com>
To: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: Anthony Liguori <aliguori@us.ibm.com>,
	Avi Kivity <avi@redhat.com>,
	qemu-devel@nongnu.org, Christoph Hellwig <hch@lst.de>
Subject: [Qemu-devel] Re: [PATCH v2 4/7] qed: Add QEMU Enhanced Disk image format
Date: Mon, 11 Oct 2010 17:16:39 +0200	[thread overview]
Message-ID: <4CB32A57.6000405@redhat.com> (raw)
In-Reply-To: <1286552914-27014-5-git-send-email-stefanha@linux.vnet.ibm.com>

Am 08.10.2010 17:48, schrieb Stefan Hajnoczi:
> This patch introduces the qed on-disk layout and implements image
> creation.  Later patches add read/write and other functionality.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  Makefile.objs |    1 +
>  block/qed.c   |  530 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/qed.h   |  148 ++++++++++++++++
>  3 files changed, 679 insertions(+), 0 deletions(-)
>  create mode 100644 block/qed.c
>  create mode 100644 block/qed.h
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 816194a..ff15795 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -14,6 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  
>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
> +block-nested-y += qed.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>  block-nested-$(CONFIG_WIN32) += raw-win32.o
>  block-nested-$(CONFIG_POSIX) += raw-posix.o
> diff --git a/block/qed.c b/block/qed.c
> new file mode 100644
> index 0000000..ea03798
> --- /dev/null
> +++ b/block/qed.c
> @@ -0,0 +1,530 @@
> +/*
> + * QEMU Enhanced Disk Format
> + *
> + * Copyright IBM, Corp. 2010
> + *
> + * Authors:
> + *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
> + *  Anthony Liguori   <aliguori@us.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qed.h"
> +
> +static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
> +                          const char *filename)
> +{
> +    const QEDHeader *header = (const void *)buf;

Too lazy to type the real type name? ;-)

> +
> +    if (buf_size < sizeof(*header)) {
> +        return 0;
> +    }
> +    if (le32_to_cpu(header->magic) != QED_MAGIC) {
> +        return 0;
> +    }
> +    return 100;
> +}
> +
> +static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
> +{
> +    cpu->magic = le32_to_cpu(le->magic);
> +    cpu->cluster_size = le32_to_cpu(le->cluster_size);
> +    cpu->table_size = le32_to_cpu(le->table_size);
> +    cpu->header_size = le32_to_cpu(le->header_size);
> +    cpu->features = le64_to_cpu(le->features);
> +    cpu->compat_features = le64_to_cpu(le->compat_features);
> +    cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
> +    cpu->image_size = le64_to_cpu(le->image_size);
> +    cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
> +    cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
> +    cpu->backing_fmt_offset = le32_to_cpu(le->backing_fmt_offset);
> +    cpu->backing_fmt_size = le32_to_cpu(le->backing_fmt_size);
> +}
> +
> +static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
> +{
> +    le->magic = cpu_to_le32(cpu->magic);
> +    le->cluster_size = cpu_to_le32(cpu->cluster_size);
> +    le->table_size = cpu_to_le32(cpu->table_size);
> +    le->header_size = cpu_to_le32(cpu->header_size);
> +    le->features = cpu_to_le64(cpu->features);
> +    le->compat_features = cpu_to_le64(cpu->compat_features);
> +    le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
> +    le->image_size = cpu_to_le64(cpu->image_size);
> +    le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
> +    le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
> +    le->backing_fmt_offset = cpu_to_le32(cpu->backing_fmt_offset);
> +    le->backing_fmt_size = cpu_to_le32(cpu->backing_fmt_size);
> +}
> +

/** Returns the maximum virtual disk size in bytes */

> +static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
> +{
> +    uint64_t table_entries;
> +    uint64_t l2_size;
> +
> +    table_entries = (table_size * cluster_size) / sizeof(uint64_t);
> +    l2_size = table_entries * cluster_size;
> +
> +    return l2_size * table_entries;
> +}
> +
> +static bool qed_is_cluster_size_valid(uint32_t cluster_size)
> +{
> +    if (cluster_size < QED_MIN_CLUSTER_SIZE ||
> +        cluster_size > QED_MAX_CLUSTER_SIZE) {
> +        return false;
> +    }
> +    if (cluster_size & (cluster_size - 1)) {
> +        return false; /* not power of 2 */
> +    }
> +    return true;
> +}
> +
> +static bool qed_is_table_size_valid(uint32_t table_size)
> +{
> +    if (table_size < QED_MIN_TABLE_SIZE ||
> +        table_size > QED_MAX_TABLE_SIZE) {
> +        return false;
> +    }
> +    if (table_size & (table_size - 1)) {
> +        return false; /* not power of 2 */
> +    }
> +    return true;
> +}
> +
> +static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
> +                                    uint32_t table_size)
> +{
> +    if (image_size == 0) {
> +        /* Supporting zero size images makes life harder because even the L1
> +         * table is not needed.  Make life simple and forbid zero size images.
> +         */
> +        return false;
> +    }

Should the spec be updated to forbid zero size images?

> +    if (image_size & (cluster_size - 1)) {
> +        return false; /* not multiple of cluster size */
> +    }
> +    if (image_size > qed_max_image_size(cluster_size, table_size)) {
> +        return false; /* image is too large */
> +    }
> +    return true;
> +}
> +
> +/**
> + * Read a string of known length from the image file
> + *
> + * @file:       Image file
> + * @offset:     File offset to start of string, in bytes
> + * @n:          String length in bytes
> + * @buf:        Destination buffer
> + * @buflen:     Destination buffer length in bytes
> + *
> + * The string is NUL-terminated.

Return value?

> + */
> +static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
> +                           char *buf, size_t buflen)
> +{
> +    int ret;
> +    if (n >= buflen) {
> +        return -EINVAL;
> +    }
> +    ret = bdrv_pread(file, offset, buf, n);
> +    if (ret != n) {
> +        return ret;
> +    }
> +    buf[n] = '\0';
> +    return 0;
> +}
> +
> +static int bdrv_qed_open(BlockDriverState *bs, int flags)
> +{
> +    BDRVQEDState *s = bs->opaque;
> +    QEDHeader le_header;
> +    int64_t file_size;
> +    int ret;
> +
> +    s->bs = bs;
> +
> +    ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
> +    if (ret != sizeof(le_header)) {
> +        return ret;
> +    }

While this is correct because bdrv_pread never returns short reads, I
think "if (ret < 0)" would be easier to read.

> +    qed_header_le_to_cpu(&le_header, &s->header);
> +
> +    if (s->header.magic != QED_MAGIC) {
> +        return -ENOENT;
> +    }
> +    if (s->header.features & ~QED_FEATURE_MASK) {
> +        return -ENOTSUP; /* image uses unsupported feature bits */
> +    }
> +    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
> +        return -EINVAL;
> +    }
> +
> +    /* Round up file size to the next cluster */
> +    file_size = bdrv_getlength(bs->file);
> +    if (file_size < 0) {
> +        return file_size;
> +    }
> +    s->file_size = qed_start_of_cluster(s, file_size);

Aren't you rounding down despite the comment?

> +
> +    if (!qed_is_table_size_valid(s->header.table_size)) {
> +        return -EINVAL;
> +    }
> +    if (!qed_is_image_size_valid(s->header.image_size,
> +                                 s->header.cluster_size,
> +                                 s->header.table_size)) {
> +        return -EINVAL;
> +    }
> +    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
> +        return -EINVAL;
> +    }
> +
> +    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
> +                      sizeof(uint64_t);
> +    s->l2_shift = get_bits_from_size(s->header.cluster_size);
> +    s->l2_mask = s->table_nelems - 1;
> +    s->l1_shift = s->l2_shift + get_bits_from_size(s->l2_mask + 1);
> +
> +    if ((s->header.features & QED_F_BACKING_FILE)) {
> +        ret = qed_read_string(bs->file, s->header.backing_filename_offset,
> +                              s->header.backing_filename_size, bs->backing_file,
> +                              sizeof(bs->backing_file));
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        if ((s->header.compat_features & QED_CF_BACKING_FORMAT)) {
> +            ret = qed_read_string(bs->file, s->header.backing_fmt_offset,
> +                                  s->header.backing_fmt_size,
> +                                  bs->backing_format,
> +                                  sizeof(bs->backing_format));
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +    }
> +    return ret;

I think this should be a return 0 instead of returning a random
non-negative number.

> +}
> +
> +static void bdrv_qed_close(BlockDriverState *bs)
> +{
> +}
> +
> +static void bdrv_qed_flush(BlockDriverState *bs)
> +{
> +    bdrv_flush(bs->file);
> +}
> +
> +static int qed_create(const char *filename, uint32_t cluster_size,
> +                      uint64_t image_size, uint32_t table_size,
> +                      const char *backing_file, const char *backing_fmt)
> +{
> +    QEDHeader header = {
> +        .magic = QED_MAGIC,
> +        .cluster_size = cluster_size,
> +        .table_size = table_size,
> +        .header_size = 1,
> +        .features = 0,
> +        .compat_features = 0,
> +        .l1_table_offset = cluster_size,
> +        .image_size = image_size,
> +    };
> +    QEDHeader le_header;
> +    uint8_t *l1_table = NULL;
> +    size_t l1_size = header.cluster_size * header.table_size;
> +    int ret = 0;
> +    int fd;
> +
> +    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);

Please use block.c functions (bdrv_create_file etc.) instead of POSIX
functions.

> +    if (fd < 0) {
> +        return -errno;
> +    }
> +
> +    if (backing_file) {
> +        header.features |= QED_F_BACKING_FILE;
> +        header.backing_filename_offset = sizeof(le_header);
> +        header.backing_filename_size = strlen(backing_file);
> +        if (backing_fmt) {
> +            header.compat_features |= QED_CF_BACKING_FORMAT;
> +            header.backing_fmt_offset = header.backing_filename_offset +
> +                                        header.backing_filename_size;
> +            header.backing_fmt_size = strlen(backing_fmt);
> +        }
> +    }
> +
> +    qed_header_cpu_to_le(&header, &le_header);
> +    if (qemu_write_full(fd, &le_header, sizeof(le_header)) != sizeof(le_header)) {
> +        ret = -errno;
> +        goto out;
> +    }
> +    if (qemu_write_full(fd, backing_file, header.backing_filename_size) != header.backing_filename_size) {
> +        ret = -errno;
> +        goto out;
> +    }
> +    if (qemu_write_full(fd, backing_fmt, header.backing_fmt_size) != header.backing_fmt_size) {
> +        ret = -errno;
> +        goto out;
> +    }
> +
> +    l1_table = qemu_mallocz(l1_size);
> +    lseek(fd, header.l1_table_offset, SEEK_SET);
> +    if (qemu_write_full(fd, l1_table, l1_size) != l1_size) {
> +        ret = -errno;
> +        goto out;
> +    }
> +
> +out:
> +    qemu_free(l1_table);
> +    close(fd);
> +    return ret;
> +}
> +
> +static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
> +{
> +    uint64_t image_size = 0;
> +    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
> +    uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
> +    const char *backing_file = NULL;
> +    const char *backing_fmt = NULL;
> +
> +    while (options && options->name) {
> +        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
> +            image_size = options->value.n;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> +            backing_file = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
> +            backing_fmt = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
> +            if (options->value.n) {
> +                cluster_size = options->value.n;
> +            }
> +        } else if (!strcmp(options->name, "table_size")) {
> +            if (options->value.n) {
> +                table_size = options->value.n;
> +            }
> +        }
> +        options++;
> +    }
> +
> +    if (!qed_is_cluster_size_valid(cluster_size)) {
> +        fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
> +                QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
> +        return -EINVAL;
> +    }
> +    if (!qed_is_table_size_valid(table_size)) {
> +        fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
> +                QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
> +        return -EINVAL;
> +    }
> +    if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
> +        char buffer[64];
> +
> +        bytes_to_str(buffer, sizeof(buffer),
> +                     qed_max_image_size(cluster_size, table_size));
> +
> +        fprintf(stderr,
> +                "QED image size must be a non-zero multiple of cluster size and less than %s\n",
> +                buffer);
> +        return -EINVAL;
> +    }
> +
> +    return qed_create(filename, cluster_size, image_size, table_size,
> +                      backing_file, backing_fmt);
> +}
> +
> +static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
> +                                  int nb_sectors, int *pnum)
> +{
> +    return -ENOTSUP;
> +}
> +
> +static int bdrv_qed_make_empty(BlockDriverState *bs)
> +{
> +    return -ENOTSUP;
> +}
> +
> +static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
> +                                            int64_t sector_num,
> +                                            QEMUIOVector *qiov, int nb_sectors,
> +                                            BlockDriverCompletionFunc *cb,
> +                                            void *opaque)
> +{
> +    return NULL;
> +}
> +
> +static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
> +                                             int64_t sector_num,
> +                                             QEMUIOVector *qiov, int nb_sectors,
> +                                             BlockDriverCompletionFunc *cb,
> +                                             void *opaque)
> +{
> +    return NULL;
> +}
> +
> +static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
> +                                            BlockDriverCompletionFunc *cb,
> +                                            void *opaque)
> +{
> +    return bdrv_aio_flush(bs->file, cb, opaque);
> +}
> +
> +static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
> +{
> +    return -ENOTSUP;
> +}
> +
> +static int64_t bdrv_qed_getlength(BlockDriverState *bs)
> +{
> +    BDRVQEDState *s = bs->opaque;
> +    return s->header.image_size;
> +}
> +
> +static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
> +{
> +    BDRVQEDState *s = bs->opaque;
> +
> +    memset(bdi, 0, sizeof(*bdi));
> +    bdi->cluster_size = s->header.cluster_size;
> +    return 0;
> +}
> +
> +static int bdrv_qed_change_backing_file(BlockDriverState *bs,
> +                                        const char *backing_file,
> +                                        const char *backing_fmt)
> +{
> +    BDRVQEDState *s = bs->opaque;
> +    QEDHeader new_header, le_header;
> +    void *buffer;
> +    size_t buffer_len, backing_file_len, backing_fmt_len;
> +    int ret;
> +
> +    /* Refuse to set backing filename if unknown compat feature bits are
> +     * active.  If the image uses an unknown compat feature then we may not
> +     * know the layout of data following the header structure and cannot safely
> +     * add a new string.
> +     */
> +    if (backing_file && (s->header.compat_features &
> +                         ~QED_COMPAT_FEATURE_MASK)) {
> +        return -ENOTSUP;
> +    }
> +
> +    memcpy(&new_header, &s->header, sizeof(new_header));
> +
> +    new_header.features &= ~QED_F_BACKING_FILE;
> +    new_header.compat_features &= ~QED_CF_BACKING_FORMAT;
> +
> +    /* Adjust feature flags */
> +    if (backing_file) {
> +        new_header.features |= QED_F_BACKING_FILE;
> +        if (backing_fmt) {
> +            new_header.compat_features |= QED_CF_BACKING_FORMAT;
> +        }
> +    }
> +
> +    /* Calculate new header size */
> +    backing_file_len = backing_fmt_len = 0;
> +
> +    if (backing_file) {
> +        backing_file_len = strlen(backing_file);
> +        if (backing_fmt) {
> +            backing_fmt_len = strlen(backing_fmt);
> +        }
> +    }
> +
> +    buffer_len = sizeof(new_header);
> +    new_header.backing_filename_offset = buffer_len;
> +    new_header.backing_filename_size = backing_file_len;
> +    buffer_len += backing_file_len;
> +    new_header.backing_fmt_offset = buffer_len;
> +    new_header.backing_fmt_size = backing_fmt_len;
> +    buffer_len += backing_fmt_len;
> +
> +    /* Make sure we can rewrite header without failing */
> +    if (buffer_len > new_header.header_size * new_header.cluster_size) {
> +        return -ENOSPC;
> +    }
> +
> +    /* Prepare new header */
> +    buffer = qemu_malloc(buffer_len);
> +
> +    qed_header_cpu_to_le(&new_header, &le_header);
> +    memcpy(buffer, &le_header, sizeof(le_header));
> +    buffer_len = sizeof(le_header);
> +
> +    memcpy(buffer + buffer_len, backing_file, backing_file_len);
> +    buffer_len += backing_file_len;
> +
> +    memcpy(buffer + buffer_len, backing_fmt, backing_fmt_len);
> +    buffer_len += backing_fmt_len;
> +
> +    /* Write new header */
> +    ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
> +    qemu_free(buffer);
> +    if (ret == 0) {
> +        memcpy(&s->header, &new_header, sizeof(new_header));
> +    }
> +    return ret;
> +}
> +
> +static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
> +{
> +    return -ENOTSUP;
> +}
> +
> +static QEMUOptionParameter qed_create_options[] = {
> +    {
> +        .name = BLOCK_OPT_SIZE,
> +        .type = OPT_SIZE,
> +        .help = "Virtual disk size (in bytes)"
> +    }, {
> +        .name = BLOCK_OPT_BACKING_FILE,
> +        .type = OPT_STRING,
> +        .help = "File name of a base image"
> +    }, {
> +        .name = BLOCK_OPT_BACKING_FMT,
> +        .type = OPT_STRING,
> +        .help = "Image format of the base image"
> +    }, {
> +        .name = BLOCK_OPT_CLUSTER_SIZE,
> +        .type = OPT_SIZE,
> +        .help = "Cluster size (in bytes)"
> +    }, {
> +        .name = "table_size",

What about introducing a constant for this?

> +        .type = OPT_SIZE,
> +        .help = "L1/L2 table size (in clusters)"
> +    },
> +    { /* end of list */ }
> +};
> +
> +static BlockDriver bdrv_qed = {
> +    .format_name = "qed",
> +    .instance_size = sizeof(BDRVQEDState),
> +    .create_options = qed_create_options,
> +
> +    .bdrv_probe = bdrv_qed_probe,
> +    .bdrv_open = bdrv_qed_open,
> +    .bdrv_close = bdrv_qed_close,
> +    .bdrv_create = bdrv_qed_create,
> +    .bdrv_flush = bdrv_qed_flush,
> +    .bdrv_is_allocated = bdrv_qed_is_allocated,
> +    .bdrv_make_empty = bdrv_qed_make_empty,
> +    .bdrv_aio_readv = bdrv_qed_aio_readv,
> +    .bdrv_aio_writev = bdrv_qed_aio_writev,
> +    .bdrv_aio_flush = bdrv_qed_aio_flush,
> +    .bdrv_truncate = bdrv_qed_truncate,
> +    .bdrv_getlength = bdrv_qed_getlength,
> +    .bdrv_get_info = bdrv_qed_get_info,
> +    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
> +    .bdrv_check = bdrv_qed_check,

Please align the = of all definitions vertically.

> +};
> +
> +static void bdrv_qed_init(void)
> +{
> +    bdrv_register(&bdrv_qed);
> +}
> +
> +block_init(bdrv_qed_init);
> diff --git a/block/qed.h b/block/qed.h
> new file mode 100644
> index 0000000..7ce95a7
> --- /dev/null
> +++ b/block/qed.h
> @@ -0,0 +1,148 @@
> +/*
> + * QEMU Enhanced Disk Format
> + *
> + * Copyright IBM, Corp. 2010
> + *
> + * Authors:
> + *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
> + *  Anthony Liguori   <aliguori@us.ibm.com>
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef BLOCK_QED_H
> +#define BLOCK_QED_H
> +
> +#include "block_int.h"
> +
> +/* The layout of a QED file is as follows:
> + *
> + * +--------+----------+----------+----------+-----+
> + * | header | L1 table | cluster0 | cluster1 | ... |
> + * +--------+----------+----------+----------+-----+
> + *
> + * There is a 2-level pagetable for cluster allocation:
> + *
> + *                     +----------+
> + *                     | L1 table |
> + *                     +----------+
> + *                ,------'  |  '------.
> + *           +----------+   |    +----------+
> + *           | L2 table |  ...   | L2 table |
> + *           +----------+        +----------+
> + *       ,------'  |  '------.
> + *  +----------+   |    +----------+
> + *  |   Data   |  ...   |   Data   |
> + *  +----------+        +----------+
> + *
> + * The L1 table is fixed size and always present.  L2 tables are allocated on
> + * demand.  The L1 table size determines the maximum possible image size; it
> + * can be influenced using the cluster_size and table_size values.
> + *
> + * All fields are little-endian on disk.
> + */
> +
> +enum {
> +    QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
> +
> +    /* The image supports a backing file */
> +    QED_F_BACKING_FILE = 0x01,
> +
> +    /* The image has the backing file format */
> +    QED_CF_BACKING_FORMAT = 0x01,
> +
> +    /* Feature bits must be used when the on-disk format changes */
> +    QED_FEATURE_MASK = QED_F_BACKING_FILE,            /* supported feature bits */
> +    QED_COMPAT_FEATURE_MASK = QED_CF_BACKING_FORMAT,  /* supported compat feature bits */
> +
> +    /* Data is stored in groups of sectors called clusters.  Cluster size must
> +     * be large to avoid keeping too much metadata.  I/O requests that have
> +     * sub-cluster size will require read-modify-write.
> +     */
> +    QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
> +    QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
> +    QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
> +
> +    /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
> +     * a multiple of clusters so large maximum image sizes can be supported
> +     * without jacking up the cluster size too much.
> +     */
> +    QED_MIN_TABLE_SIZE = 1,        /* in clusters */
> +    QED_MAX_TABLE_SIZE = 16,
> +    QED_DEFAULT_TABLE_SIZE = 4,
> +};
> +
> +typedef struct {
> +    uint32_t magic;                 /* QED\0 */
> +
> +    uint32_t cluster_size;          /* in bytes */
> +    uint32_t table_size;            /* for L1 and L2 tables, in clusters */
> +    uint32_t header_size;           /* in clusters */
> +
> +    uint64_t features;              /* format feature bits */
> +    uint64_t compat_features;       /* compatible feature bits */
> +    uint64_t l1_table_offset;       /* in bytes */
> +    uint64_t image_size;            /* total logical image size, in bytes */
> +
> +    /* if (features & QED_F_BACKING_FILE) */
> +    uint32_t backing_filename_offset; /* in bytes from start of header */
> +    uint32_t backing_filename_size;   /* in bytes */
> +
> +    /* if (compat_features & QED_CF_BACKING_FORMAT) */
> +    uint32_t backing_fmt_offset;    /* in bytes from start of header */
> +    uint32_t backing_fmt_size;      /* in bytes */
> +} QEDHeader;
> +
> +typedef struct {
> +    BlockDriverState *bs;           /* device */
> +    uint64_t file_size;             /* length of image file, in bytes */
> +
> +    QEDHeader header;               /* always cpu-endian */
> +    uint32_t table_nelems;
> +    uint32_t l1_shift;
> +    uint32_t l2_shift;
> +    uint32_t l2_mask;
> +} BDRVQEDState;
> +
> +/**
> + * Utility functions

Sure that this is the right description for this function? ;-)

> + */
> +static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
> +{
> +    return offset & ~(uint64_t)(s->header.cluster_size - 1);
> +}
> +
> +/**
> + * Test if a cluster offset is valid
> + */
> +static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
> +{
> +    uint64_t header_size = (uint64_t)s->header.header_size *
> +                           s->header.cluster_size;
> +
> +    if (offset & (s->header.cluster_size - 1)) {
> +        return false;
> +    }
> +    return offset >= header_size && offset < s->file_size;
> +}
> +
> +/**
> + * Test if a table offset is valid
> + */
> +static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
> +{
> +    uint64_t end_offset = offset + (s->header.table_size - 1) *
> +                          s->header.cluster_size;
> +
> +    /* Overflow check */
> +    if (end_offset <= offset) {
> +        return false;
> +    }
> +
> +    return qed_check_cluster_offset(s, offset) &&
> +           qed_check_cluster_offset(s, end_offset);
> +}
> +
> +#endif /* BLOCK_QED_H */

Kevin

next prev parent reply	other threads:[~2010-10-11 15:28 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-08 15:48 [Qemu-devel] [PATCH v2 0/7] qed: Add QEMU Enhanced Disk format Stefan Hajnoczi
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 1/7] qcow2: Make get_bits_from_size() common Stefan Hajnoczi
2010-10-08 18:01   ` [Qemu-devel] " Anthony Liguori
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 2/7] cutils: Add bytes_to_str() to format byte values Stefan Hajnoczi
2010-10-11 11:09   ` [Qemu-devel] " Kevin Wolf
2010-10-13  9:15   ` [Qemu-devel] " Markus Armbruster
2010-10-13  9:28     ` Kevin Wolf
2010-10-13 10:58       ` Stefan Hajnoczi
2010-10-13 10:25   ` [Qemu-devel] " Avi Kivity
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 3/7] docs: Add QED image format specification Stefan Hajnoczi
2010-10-10  9:20   ` [Qemu-devel] " Avi Kivity
2010-10-11 10:09     ` Stefan Hajnoczi
2010-10-11 13:04       ` Avi Kivity
2010-10-11 13:42         ` Stefan Hajnoczi
2010-10-11 13:44           ` Avi Kivity
2010-10-11 14:06             ` Stefan Hajnoczi
2010-10-11 14:12               ` Avi Kivity
2010-10-11 15:02             ` Anthony Liguori
2010-10-11 15:24               ` Avi Kivity
2010-10-11 15:41                 ` Anthony Liguori
2010-10-11 15:47                   ` Avi Kivity
2010-10-11 14:54         ` Anthony Liguori
2010-10-11 14:58           ` Avi Kivity
2010-10-11 15:49             ` Anthony Liguori
2010-10-11 16:02               ` Avi Kivity
2010-10-11 16:10                 ` Anthony Liguori
2010-10-12 10:25                   ` Avi Kivity
2010-10-11 13:58   ` Kevin Wolf
2010-10-11 15:30     ` Stefan Hajnoczi
2010-10-11 15:39       ` Avi Kivity
2010-10-11 15:46         ` Stefan Hajnoczi
2010-10-11 16:18           ` Anthony Liguori
2010-10-11 17:14             ` Anthony Liguori
2010-10-12  8:07               ` Kevin Wolf
2010-10-12 13:16                 ` Stefan Hajnoczi
2010-10-12 13:32                   ` Anthony Liguori
2010-10-11 15:50       ` Kevin Wolf
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 4/7] qed: Add QEMU Enhanced Disk image format Stefan Hajnoczi
2010-10-11 15:16   ` Kevin Wolf [this message]
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 5/7] qed: Table, L2 cache, and cluster functions Stefan Hajnoczi
2010-10-12 14:44   ` [Qemu-devel] " Kevin Wolf
2010-10-13 13:41     ` Stefan Hajnoczi
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 6/7] qed: Read/write support Stefan Hajnoczi
2010-10-10  9:10   ` [Qemu-devel] " Avi Kivity
2010-10-11 10:37     ` Stefan Hajnoczi
2010-10-11 13:10       ` Avi Kivity
2010-10-11 13:55         ` Stefan Hajnoczi
2010-10-11 14:57         ` Anthony Liguori
2010-10-12 15:08   ` Kevin Wolf
2010-10-12 15:22     ` Anthony Liguori
2010-10-12 15:39       ` Kevin Wolf
2010-10-12 15:59         ` Stefan Hajnoczi
2010-10-12 16:16           ` Anthony Liguori
2010-10-12 16:21             ` Avi Kivity
2010-10-13 12:13             ` Stefan Hajnoczi
2010-10-13 13:07               ` Kevin Wolf
2010-10-13 13:24                 ` Anthony Liguori
2010-10-13 13:50                   ` Avi Kivity
2010-10-13 14:07                     ` Stefan Hajnoczi
2010-10-13 14:08                       ` Anthony Liguori
2010-10-13 14:10                       ` Avi Kivity
2010-10-13 14:11                         ` Anthony Liguori
2010-10-13 14:16                           ` Avi Kivity
2010-10-13 14:53                             ` Anthony Liguori
2010-10-13 15:08                               ` Avi Kivity
2010-10-13 15:42                                 ` Anthony Liguori
2010-10-14 11:06                         ` Stefan Hajnoczi
2010-10-13 14:10                     ` Anthony Liguori
2010-10-08 15:48 ` [Qemu-devel] [PATCH v2 7/7] qed: Consistency check support Stefan Hajnoczi
2010-10-11 13:21 ` [Qemu-devel] Re: [PATCH v2 0/7] qed: Add QEMU Enhanced Disk format Kevin Wolf
2010-10-11 15:37   ` Stefan Hajnoczi
2010-10-16  7:51 ` [Qemu-devel] " Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CB32A57.6000405@redhat.com \
    --to=kwolf@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=avi@redhat.com \
    --cc=hch@lst.de \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.