qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Peter Wu <peter@lekensteyn.nl>
To: John Snow <jsnow@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>,
	qemu-devel@nongnu.org, Stefan Hajnoczi <stefanha@redhat.com>
Subject: Re: [Qemu-devel] [PATCH 02/10] block/dmg: extract mish block decoding functionality
Date: Sat, 03 Jan 2015 12:05:02 +0100	[thread overview]
Message-ID: <1428333.LlTQDMCNAY@al> (raw)
In-Reply-To: <54A730EA.9010502@redhat.com>

On Friday 02 January 2015 18:59:38 John Snow wrote:
> 
> On 12/27/2014 10:01 AM, Peter Wu wrote:
> > Extract the mish block decoder such that this can be used for other
> > formats in the future. A new DmgHeaderState struct is introduced to
> > share state while decoding.
> >
> > The code is kept unchanged as much as possible, a "fail" label is added
> > for example where a simple return would probably do.
> >
> > Signed-off-by: Peter Wu <peter@lekensteyn.nl>
> > ---
> >   block/dmg.c | 216 +++++++++++++++++++++++++++++++++++-------------------------
> >   1 file changed, 125 insertions(+), 91 deletions(-)
> >
> > diff --git a/block/dmg.c b/block/dmg.c
> > index df274f9..6dc6dbb 100644
> > --- a/block/dmg.c
> > +++ b/block/dmg.c
> > @@ -164,19 +164,137 @@ static int64_t dmg_find_koly_offset(BlockDriverState *file_bs)
> >       return -EINVAL;
> >   }
> >
> > +/* used when building the sector table */
> > +typedef struct DmgHeaderState {
> > +    /* used internally by dmg_read_mish_block to remember offsets of blocks
> > +     * across calls */
> > +    uint64_t last_in_offset;
> > +    uint64_t last_out_offset;
> > +    /* exported for dmg_open */
> > +    uint32_t max_compressed_size;
> > +    uint32_t max_sectors_per_chunk;
> > +} DmgHeaderState;
> > +
> > +static int dmg_read_mish_block(BlockDriverState *bs, DmgHeaderState *ds,
> > +                               int64_t offset, uint32_t count)
> > +{
> > +    BDRVDMGState *s = bs->opaque;
> > +    uint32_t type, i;
> > +    int ret;
> > +    size_t new_size;
> > +    uint32_t chunk_count;
> > +
> > +    ret = read_uint32(bs, offset, &type);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> > +
> > +    /* skip data that is not a valid MISH block (invalid magic or too small) */
> > +    if (type != 0x6d697368 || count < 244) {
> > +        /* assume success for now */
> > +        return 0;
> > +    }
> > +
> > +    offset += 4;
> > +    offset += 200;
> > +
> > +    chunk_count = (count - 204) / 40;
> > +    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
> > +    s->types = g_realloc(s->types, new_size / 2);
> > +    s->offsets = g_realloc(s->offsets, new_size);
> > +    s->lengths = g_realloc(s->lengths, new_size);
> > +    s->sectors = g_realloc(s->sectors, new_size);
> > +    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
> > +
> > +    for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
> > +        ret = read_uint32(bs, offset, &s->types[i]);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +        offset += 4;
> > +        if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
> > +            s->types[i] != 2) {
> > +            if (s->types[i] == 0xffffffff && i > 0) {
> > +                ds->last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
> > +                ds->last_out_offset = s->sectors[i - 1] +
> > +                                      s->sectorcounts[i - 1];
> > +            }
> > +            chunk_count--;
> > +            i--;
> > +            offset += 36;
> > +            continue;
> > +        }
> > +        offset += 4;
> > +
> > +        ret = read_uint64(bs, offset, &s->sectors[i]);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +        s->sectors[i] += ds->last_out_offset;
> > +        offset += 8;
> > +
> > +        ret = read_uint64(bs, offset, &s->sectorcounts[i]);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +        offset += 8;
> > +
> > +        if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
> > +            error_report("sector count %" PRIu64 " for chunk %" PRIu32
> > +                         " is larger than max (%u)",
> > +                         s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
> > +            ret = -EINVAL;
> > +            goto fail;
> > +        }
> > +
> > +        ret = read_uint64(bs, offset, &s->offsets[i]);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +        s->offsets[i] += ds->last_in_offset;
> > +        offset += 8;
> > +
> > +        ret = read_uint64(bs, offset, &s->lengths[i]);
> > +        if (ret < 0) {
> > +            goto fail;
> > +        }
> > +        offset += 8;
> > +
> > +        if (s->lengths[i] > DMG_LENGTHS_MAX) {
> > +            error_report("length %" PRIu64 " for chunk %" PRIu32
> > +                         " is larger than max (%u)",
> > +                         s->lengths[i], i, DMG_LENGTHS_MAX);
> > +            ret = -EINVAL;
> > +            goto fail;
> > +        }
> > +
> > +        update_max_chunk_size(s, i, &ds->max_compressed_size,
> > +                              &ds->max_sectors_per_chunk);
> > +    }
> > +    s->n_chunks += chunk_count;
> > +    return 0;
> > +
> > +fail:
> > +    return ret;
> > +}
> > +
> >   static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
> >                       Error **errp)
> >   {
> >       BDRVDMGState *s = bs->opaque;
> > -    uint64_t info_begin, info_end, last_in_offset, last_out_offset;
> > +    DmgHeaderState ds;
> > +    uint64_t info_begin, info_end;
> >       uint32_t count, tmp;
> > -    uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
> >       int64_t offset;
> >       int ret;
> >
> >       bs->read_only = 1;
> >       s->n_chunks = 0;
> >       s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
> > +    ds.last_in_offset = 0;
> > +    ds.last_out_offset = 0;
> > +    ds.max_compressed_size = 1;
> > +    ds.max_sectors_per_chunk = 1;
> 
> It might be nice to have a tiny initializer macro/function so that 
> future users of functions that use this structure don't need to know the 
> magic numbers.

This state is internal to the dmg_read_mish_block functionality for
which the dmg_open function is the only caller. I consider it similar to
the initialization for BDRVDMGState in the lines above so an additional
indirection to initialize DmgHeaderState does not seem warranted. In the
v2 patch a minor comment is added before this block.

The magic number 1 existed before and seem to be in-place to avoid
allocating a 0 size (for max_sectors_per_chunk). Why max_compressed_size
is 1 is beyond me though, but I copied this value from the previous
version.

> >       /* locate the UDIF trailer */
> >       offset = dmg_find_koly_offset(bs->file);
> > @@ -210,13 +328,11 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
> >       }
> >       info_end = info_begin + count;
> >
> > +    /* begin of mish block */
> >       offset = info_begin + 0x100;
> >
> >       /* read offsets */
> > -    last_in_offset = last_out_offset = 0;
> >       while (offset < info_end) {
> > -        uint32_t type;
> > -
> >           ret = read_uint32(bs, offset, &count);
> >           if (ret < 0) {
> >               goto fail;
> > @@ -226,100 +342,18 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
> >           }
> >           offset += 4;
> >
> > -        ret = read_uint32(bs, offset, &type);
> > +        ret = dmg_read_mish_block(bs, &ds, offset, count);
> >           if (ret < 0) {
> >               goto fail;
> >           }
> > -
> > -        if (type == 0x6d697368 && count >= 244) {
> > -            size_t new_size;
> > -            uint32_t chunk_count;
> > -
> > -            offset += 4;
> > -            offset += 200;
> > -
> > -            chunk_count = (count - 204) / 40;
> > -            new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
> > -            s->types = g_realloc(s->types, new_size / 2);
> > -            s->offsets = g_realloc(s->offsets, new_size);
> > -            s->lengths = g_realloc(s->lengths, new_size);
> > -            s->sectors = g_realloc(s->sectors, new_size);
> > -            s->sectorcounts = g_realloc(s->sectorcounts, new_size);
> > -
> > -            for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
> > -                ret = read_uint32(bs, offset, &s->types[i]);
> > -                if (ret < 0) {
> > -                    goto fail;
> > -                }
> > -                offset += 4;
> > -                if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
> > -                    s->types[i] != 2) {
> > -                    if (s->types[i] == 0xffffffff && i > 0) {
> > -                        last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
> > -                        last_out_offset = s->sectors[i - 1] +
> > -                                          s->sectorcounts[i - 1];
> > -                    }
> > -                    chunk_count--;
> > -                    i--;
> > -                    offset += 36;
> > -                    continue;
> > -                }
> > -                offset += 4;
> > -
> > -                ret = read_uint64(bs, offset, &s->sectors[i]);
> > -                if (ret < 0) {
> > -                    goto fail;
> > -                }
> > -                s->sectors[i] += last_out_offset;
> > -                offset += 8;
> > -
> > -                ret = read_uint64(bs, offset, &s->sectorcounts[i]);
> > -                if (ret < 0) {
> > -                    goto fail;
> > -                }
> > -                offset += 8;
> > -
> > -                if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
> > -                    error_report("sector count %" PRIu64 " for chunk %" PRIu32
> > -                                 " is larger than max (%u)",
> > -                                 s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
> > -                    ret = -EINVAL;
> > -                    goto fail;
> > -                }
> > -
> > -                ret = read_uint64(bs, offset, &s->offsets[i]);
> > -                if (ret < 0) {
> > -                    goto fail;
> > -                }
> > -                s->offsets[i] += last_in_offset;
> > -                offset += 8;
> > -
> > -                ret = read_uint64(bs, offset, &s->lengths[i]);
> > -                if (ret < 0) {
> > -                    goto fail;
> > -                }
> > -                offset += 8;
> > -
> > -                if (s->lengths[i] > DMG_LENGTHS_MAX) {
> > -                    error_report("length %" PRIu64 " for chunk %" PRIu32
> > -                                 " is larger than max (%u)",
> > -                                 s->lengths[i], i, DMG_LENGTHS_MAX);
> > -                    ret = -EINVAL;
> > -                    goto fail;
> > -                }
> > -
> > -                update_max_chunk_size(s, i, &max_compressed_size,
> > -                                      &max_sectors_per_chunk);
> > -            }
> > -            s->n_chunks += chunk_count;
> > -        }
> > +        offset += count;
> 
> A little justification here might be nice: I assume the "count" always 
> includes the full byte count of the mish block, and the old code just 
> incremented the offset up to this amount. This is the only functional 
> difference in this refactor that I can see.

This code got me confused again and after looking at this document[1], I
have added some explanatory comments for the next version.

The "count" in the loop refers to the contents of a single resource.
Basically we have this:

Resource header
 - 4 bytes rsrc data offset (assume 0x100)
 - 4 bytes rsrc map offset (ignored)
 - 4 bytes rsrc data length
 - 4 bytes rsrc map length (ignored)

The block of length "rsrc data length" at offset 0x100 consists of one
or more resources, each described by:
 - 4 bytes resource length
 - "resource length" number of bytes. Its contents is a "mish" block for
   our purposes.

Indeed, in this patch I avoided the need to copy from the mish block
parser as it is not really necessary given that the resource length is
known.

 [1]: https://developer.apple.com/legacy/library/documentation/mac/pdf/MoreMacintoshToolbox.pdf#page=151

> >       }
> >
> >       /* initialize zlib engine */
> >       s->compressed_chunk = qemu_try_blockalign(bs->file,
> > -                                              max_compressed_size + 1);
> > +                                              ds.max_compressed_size + 1);
> >       s->uncompressed_chunk = qemu_try_blockalign(bs->file,
> > -                                                512 * max_sectors_per_chunk);
> > +                                                512 * ds.max_sectors_per_chunk);
> >       if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
> >           ret = -ENOMEM;
> >           goto fail;
> >
> 
> Thank you for splitting this monolithic function up into nicer pieces. 
> With or without addressing my comments above:
> 
> Reviewed-by: John Snow <jsnow@redhat.com>

Since I have only added some comments and renamed a variable, I'll carry
this R-b to the v2 patch.
-- 
Kind regards,
Peter
https://lekensteyn.nl

  reply	other threads:[~2015-01-03 11:05 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-27 15:01 [Qemu-devel] [PATCH 00/10] block/dmg: (compatibility) fixes and bzip2 support Peter Wu
2014-12-27 15:01 ` [Qemu-devel] [PATCH 01/10] block/dmg: properly detect the UDIF trailer Peter Wu
2015-01-02 23:58   ` John Snow
2015-01-03  9:39     ` Peter Wu
2015-01-06 13:35   ` Stefan Hajnoczi
2014-12-27 15:01 ` [Qemu-devel] [PATCH 02/10] block/dmg: extract mish block decoding functionality Peter Wu
2015-01-02 23:59   ` John Snow
2015-01-03 11:05     ` Peter Wu [this message]
2015-01-06 13:42   ` Stefan Hajnoczi
2014-12-27 15:01 ` [Qemu-devel] [PATCH 03/10] block/dmg: extract processing of resource forks Peter Wu
2015-01-03  0:01   ` John Snow
2015-01-03 11:24     ` Peter Wu
2014-12-27 15:01 ` [Qemu-devel] [PATCH 04/10] block/dmg: process a buffer instead of reading ints Peter Wu
2015-01-03  0:01   ` John Snow
2014-12-27 15:01 ` [Qemu-devel] [PATCH 05/10] block/dmg: validate chunk size to avoid overflow Peter Wu
2015-01-03  0:02   ` John Snow
2014-12-27 15:01 ` [Qemu-devel] [PATCH 06/10] block/dmg: process XML plists Peter Wu
2015-01-03  0:04   ` John Snow
2015-01-03 11:54     ` Peter Wu
2015-01-05 16:46       ` John Snow
2015-01-05 16:54   ` John Snow
2014-12-27 15:01 ` [Qemu-devel] [PATCH 07/10] block/dmg: set virtual size to a non-zero value Peter Wu
2015-01-03  0:04   ` John Snow
2014-12-27 15:01 ` [Qemu-devel] [PATCH 08/10] block/dmg: fix sector data offset calculation Peter Wu
2015-01-03  0:05   ` John Snow
2015-01-03 12:47     ` Peter Wu
2014-12-27 15:01 ` [Qemu-devel] [PATCH 09/10] block/dmg: support bzip2 block entry types Peter Wu
2015-01-05 19:32   ` John Snow
2015-01-07 10:29     ` Paolo Bonzini
2015-01-07 10:31       ` Peter Wu
2015-01-07 10:53         ` Paolo Bonzini
2014-12-27 15:01 ` [Qemu-devel] [PATCH 10/10] block/dmg: improve zeroes handling Peter Wu
2015-01-05 19:48   ` John Snow
2015-01-06  0:21     ` Peter Wu
2015-01-02 14:14 ` [Qemu-devel] [PATCH 00/10] block/dmg: (compatibility) fixes and bzip2 support Stefan Hajnoczi
2015-01-02 16:31   ` John Snow
2015-01-02 18:46     ` Peter Wu
2015-01-02 18:58       ` John Snow
2015-01-02 21:49         ` Peter Wu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1428333.LlTQDMCNAY@al \
    --to=peter@lekensteyn.nl \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).