Re: [Qemu-devel] [PATCH v3 6/6] vmdk: Update metadata for multiple clusters

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Fam Zheng <famz@redhat.com>
To: Ashijeet Acharya <ashijeetacharya@gmail.com>
Cc: kwolf@redhat.com, jsnow@redhat.com, mreitz@redhat.com,
	stefanha@gmail.com, qemu-devel@nongnu.org, qemu-block@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v3 6/6] vmdk: Update metadata for multiple clusters
Date: Fri, 21 Apr 2017 16:15:04 +0800	[thread overview]
Message-ID: <20170421081504.GA8342@lemon.lan> (raw)
In-Reply-To: <1491057878-27868-7-git-send-email-ashijeetacharya@gmail.com>

On Sat, 04/01 20:14, Ashijeet Acharya wrote:
> Include a next pointer in VmdkMetaData struct to point to the previous
> allocated L2 table. Modify vmdk_L2update to start updating metadata for
> allocation of multiple clusters at once.
> 
> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com>

This is the metadata part of the coalesed allocation. I think patch 3 is
functionally incomplete without these changes, and is perhaps broken because
metadata is not handled correctly.

Such an "intermediate functional regression" is not good in a series, which we
need to avoid.

> ---
>  block/vmdk.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 111 insertions(+), 25 deletions(-)
> 
> diff --git a/block/vmdk.c b/block/vmdk.c
> index 9456ddd..c7675db 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData {
>      int valid;
>      uint32_t *l2_cache_entry;
>      uint32_t nb_clusters;
> +    uint32_t offset;
> +    struct VmdkMetaData *next;
>  } VmdkMetaData;
>  
>  typedef struct VmdkGrainMarker {
> @@ -263,6 +265,12 @@ static inline uint64_t size_to_clusters(VmdkExtent *extent, uint64_t size)
>      return (DIV_ROUND_UP(size + round_off_size, BDRV_SECTOR_SIZE * 128) - 1);
>  }
>  
> +static inline int64_t vmdk_align_offset(int64_t offset, int n)
> +{
> +    offset = (offset + n - 1) & ~(n - 1);
> +    return offset;
> +}
> +
>  static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
>  {
>      char *desc;
> @@ -1037,29 +1045,88 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
>      }
>  }
>  
> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
> -                         uint32_t offset)
> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent,
> +                                      VmdkMetaData *m_data, bool zeroed)
>  {
> -    offset = cpu_to_le32(offset);
> +    int i;
> +    uint32_t offset, temp_offset;
> +    int *l2_table_array;
> +    int l2_array_size;
> +
> +    if (zeroed) {
> +        temp_offset = VMDK_GTE_ZEROED;
> +    } else {
> +        temp_offset = m_data->offset;
> +    }
> +
> +    temp_offset = cpu_to_le32(temp_offset);
> +
> +    l2_array_size = sizeof(uint32_t) * m_data->nb_clusters;
> +    l2_table_array = qemu_try_blockalign(extent->file->bs,
> +                                    vmdk_align_offset(l2_array_size, 512));

Indentation is off.

Use QEMU_ALIGN_UP, instead of vmdk_align_offset.

512 is a magic number, use BDRV_SECTOR_SIZE.

> +    if (l2_table_array == NULL) {
> +        return VMDK_ERROR;
> +    }
> +    memset(l2_table_array, 0, vmdk_align_offset(l2_array_size, 512));
> +
>      /* update L2 table */
> +    offset = temp_offset;
> +    for (i = 0; i < m_data->nb_clusters; i++) {
> +        l2_table_array[i] = offset;
> +        if (!zeroed) {
> +            offset += 128;

Something is going wrong here with endianness on BE host, I believe.

> +        }
> +    }
> +
>      if (bdrv_pwrite_sync(extent->file,
> -                ((int64_t)m_data->l2_offset * 512)
> -                    + (m_data->l2_index * sizeof(offset)),
> -                &offset, sizeof(offset)) < 0) {
> +            ((int64_t)m_data->l2_offset * 512)
> +                + ((m_data->l2_index) * sizeof(offset)),
> +                         l2_table_array, l2_array_size) < 0) {

You can fix the indentation while changing these lines. If not, don't change it,
or at least don't make it uglier.

>          return VMDK_ERROR;
>      }
> +
>      /* update backup L2 table */
>      if (extent->l1_backup_table_offset != 0) {
>          m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
>          if (bdrv_pwrite_sync(extent->file,
>                      ((int64_t)m_data->l2_offset * 512)
> -                        + (m_data->l2_index * sizeof(offset)),
> -                    &offset, sizeof(offset)) < 0) {
> +                        + ((m_data->l2_index) * sizeof(offset)),
> +                                l2_table_array, l2_array_size) < 0) {

Same here.

>              return VMDK_ERROR;
>          }
>      }
> +
> +    offset = temp_offset;
>      if (m_data->l2_cache_entry) {
> -        *m_data->l2_cache_entry = offset;
> +        for (i = 0; i < m_data->nb_clusters; i++) {
> +            *m_data->l2_cache_entry = offset;
> +            m_data->l2_cache_entry++;
> +
> +            if (!zeroed) {
> +                offset += 128;
> +            }
> +        }
> +    }
> +
> +    qemu_vfree(l2_table_array);
> +    return VMDK_OK;
> +}
> +
> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
> +                         bool zeroed)
> +{
> +    int ret;
> +
> +    while (m_data->next != NULL) {
> +        VmdkMetaData *next;
> +
> +        ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        next = m_data->next;
> +        m_data = next;

Why not simply "m_data = m_data->next" and drop "next" variable?

>      }
>  
>      return VMDK_OK;
> @@ -1271,7 +1338,7 @@ exit:
>   */
>  static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent,
>                          uint64_t offset, uint64_t *cluster_offset,
> -                        int64_t *bytes, VmdkMetaData *m_data,
> +                        int64_t *bytes, VmdkMetaData **m_data,
>                          bool allocate, uint32_t *total_alloc_clusters)
>  {
>      int l1_index, l2_offset, l2_index;
> @@ -1280,6 +1347,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent,
>      uint32_t nb_clusters;
>      bool zeroed = false;
>      uint64_t skip_start_bytes, skip_end_bytes;
> +    VmdkMetaData *old_m_data;
>      int ret;
>  
>      ret = get_cluster_table(extent, offset, &l1_index, &l2_offset,
> @@ -1330,13 +1398,21 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent,
>          if (ret < 0) {
>              return ret;
>          }
> -        if (m_data) {
> -            m_data->valid = 1;
> -            m_data->l1_index = l1_index;
> -            m_data->l2_index = l2_index;
> -            m_data->l2_offset = l2_offset;
> -            m_data->l2_cache_entry = &l2_table[l2_index];
> -            m_data->nb_clusters = nb_clusters;
> +
> +        if (*m_data) {
> +            old_m_data = *m_data;
> +            *m_data = g_malloc0(sizeof(**m_data));
> +
> +            **m_data = (VmdkMetaData) {
> +                .valid            =    1,
> +                .l1_index         =    l1_index,
> +                .l2_index         =    l2_index,
> +                .l2_offset        =    l2_offset,
> +                .l2_cache_entry   =    &l2_table[l2_index],
> +                .nb_clusters      =    nb_clusters,
> +                .offset           =    cluster_sector,
> +                .next             =    old_m_data,
> +            };

I think if the new m_data can be merged into the old, there is no need to
allocate a new one.

>          }
>      }
>      *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
> @@ -1365,7 +1441,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent,
>   */
>  static int vmdk_alloc_cluster_offset(BlockDriverState *bs,
>                                       VmdkExtent *extent,
> -                                     VmdkMetaData *m_data, uint64_t offset,
> +                                     VmdkMetaData **m_data, uint64_t offset,
>                                       bool allocate, uint64_t *cluster_offset,
>                                       int64_t bytes,
>                                       uint32_t *total_alloc_clusters)
> @@ -1385,8 +1461,8 @@ static int vmdk_alloc_cluster_offset(BlockDriverState *bs,
>      new_cluster_offset = 0;
>      *cluster_offset = 0;
>      n_bytes = 0;
> -    if (m_data) {
> -        m_data->valid = 0;
> +    if (*m_data) {
> +        (*m_data)->valid = 0;
>      }
>  
>      /* due to L2 table margins all bytes may not get allocated at once */
> @@ -1768,9 +1844,11 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>      uint64_t cluster_offset;
>      uint64_t bytes_done = 0;
>      uint64_t extent_size;
> -    VmdkMetaData m_data;
> +    VmdkMetaData *m_data;
>      uint32_t total_alloc_clusters = 0;
>  
> +    m_data = g_malloc0(sizeof(*m_data));
> +
>      if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
>          error_report("Wrong offset: offset=0x%" PRIx64
>                       " total_sectors=0x%" PRIx64,
> @@ -1779,6 +1857,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>      }
>  
>      while (bytes > 0) {
> +        m_data->next = NULL;
>          extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
>          if (!extent) {
>              return -EIO;
> @@ -1825,7 +1904,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>                                          total_alloc_clusters;
>                  if (!zero_dry_run) {
>                      /* update L2 tables */
> -                    if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
> +                    if (vmdk_L2update(extent, m_data, zeroed)
>                              != VMDK_OK) {
>                          return -EIO;
>                      }
> @@ -1839,10 +1918,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>              if (ret) {
>                  return ret;
>              }
> -            if (m_data.valid) {
> +            if (m_data->valid) {
>                  /* update L2 tables */
> -                if (vmdk_L2update(extent, &m_data,
> -                                  cluster_offset >> BDRV_SECTOR_BITS)
> +                if (vmdk_L2update(extent, m_data, zeroed)
>                          != VMDK_OK) {
>                      return -EIO;
>                  }
> @@ -1852,6 +1930,13 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>          offset += n_bytes;
>          bytes_done += n_bytes;
>  
> +        while (m_data->next != NULL) {
> +            VmdkMetaData *next;
> +            next = m_data->next;
> +            g_free(m_data);
> +            m_data = next;
> +        }
> +
>          /* update CID on the first write every time the virtual disk is
>           * opened */
>          if (!s->cid_updated) {
> @@ -1862,6 +1947,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
>              s->cid_updated = true;
>          }
>      }
> +    g_free(m_data);

This is weird, you free all but the last m_data with a while loop, a few lines
above, and this one with a separate g_free().

Please use one loop:

    for (p = m_data; p; p = next) {
        next = p->next;
        g_free(p);
    }

>      return 0;
>  }
>  
> -- 
> 2.6.2
>

next prev parent reply	other threads:[~2017-04-21  8:15 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-01 14:44 [Qemu-devel] [PATCH v3 0/6] Optiomize VMDK I/O by allocating multiple clusters Ashijeet Acharya
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 1/6] vmdk: Move vmdk_find_offset_in_cluster() to the top Ashijeet Acharya
2017-04-10 13:04   ` Ashijeet Acharya
2017-04-19 12:14     ` Fam Zheng
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 2/6] vmdk: Rename get_whole_cluster() to vmdk_perform_cow() Ashijeet Acharya
2017-04-19 12:14   ` Fam Zheng
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 3/6] vmdk: New functions to assist allocating multiple clusters Ashijeet Acharya
2017-04-19 12:56   ` Fam Zheng
2017-04-19 15:13     ` Ashijeet Acharya
2017-04-20  0:47       ` Fam Zheng
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 4/6] vmdk: Rename get_cluster_offset() to vmdk_get_cluster_offset() Ashijeet Acharya
2017-04-19 12:57   ` Fam Zheng
2017-04-19 15:21     ` Ashijeet Acharya
2017-04-20  0:45       ` Fam Zheng
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 5/6] vmdk: Set maximum bytes allocated in one cycle Ashijeet Acharya
2017-04-19 13:00   ` Fam Zheng
2017-04-21 14:53     ` Ashijeet Acharya
2017-04-22  4:27       ` Ashijeet Acharya
2017-04-01 14:44 ` [Qemu-devel] [PATCH v3 6/6] vmdk: Update metadata for multiple clusters Ashijeet Acharya
2017-04-21  8:15   ` Fam Zheng [this message]
2017-04-22  4:13     ` Ashijeet Acharya

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170421081504.GA8342@lemon.lan \
    --to=famz@redhat.com \
    --cc=ashijeetacharya@gmail.com \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.