From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
dietmar@proxmox.com, Stefan Hajnoczi <stefanha@redhat.com>,
Markus Armbruster <armbru@redhat.com>
Subject: [Qemu-devel] [RFC 6/8] Add VMA backup archive writer Python module
Date: Sat, 9 Mar 2013 23:22:26 +0100 [thread overview]
Message-ID: <1362867748-30528-7-git-send-email-stefanha@redhat.com> (raw)
In-Reply-To: <1362867748-30528-1-git-send-email-stefanha@redhat.com>
The vma module provides an interface for writing VMA backup archives:
writer = vma.Writer(open('test.vma', 'wb))
writer.add_config('guest.xml', '<guest></guest>')
stream_id = writer.add_stream('foo', # name
65536) # size
writer.write(stream_id, 0, '\0' * 32768)
writer.write(stream_id, 32768, '\1' * 32768)
writer.close()
The Writer handles sequential writes that are not cluster-aligned. This
is typically only the vmstate. Disk writes are 64 KB aligned in
practice.
VMA supports zero regions within a 64 KB cluster. The vma module does
not implement this, the full cluster is written.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
vma.py | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 236 insertions(+)
create mode 100644 vma.py
diff --git a/vma.py b/vma.py
new file mode 100644
index 0000000..236ba14
--- /dev/null
+++ b/vma.py
@@ -0,0 +1,236 @@
+# VMA writer module
+#
+# Copyright 2013 Red Hat, Inc. and/or its affiliates
+#
+# Authors:
+# Stefan Hajnoczi <stefanha@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+import array
+import struct
+import hashlib
+import uuid
+import time
+
+__all__ = ['Writer']
+
+VMA_MAGIC = 0x564d4100
+VMA_VERSION = 1
+VMA_MAX_CONFIGS = 256
+VMA_CLUSTER_SIZE = 65536
+VMA_BLOCKS_PER_EXTENT = 59
+VMA_EXTENT_MAGIC = 0x564d4145
+
+header_struct = struct.Struct('>II16cQ16cIII')
+dev_info_struct = struct.Struct('>IIQQQ')
+extent_struct = struct.Struct('>I2xH16c16c')
+le16_struct = struct.Struct('<H')
+be32_struct = struct.Struct('>I')
+be64_struct = struct.Struct('>Q')
+
+class Writer(object):
+ def __init__(self, fobj):
+ self.fobj = fobj
+ self.uuid = uuid.uuid4().bytes
+ self.streams = []
+ self.blobs = ['\0']
+ self.blob_offset = 1
+ self.config_names = []
+ self.config_data = []
+ self.header_written = False
+ self.align_bufs = {}
+ self.extent = []
+
+ def alloc_blob(self, blob):
+ '''Return allocated blob buffer offset'''
+ offset = self.blob_offset
+ self.blobs.append(le16_struct.pack(len(blob)))
+ self.blobs.append(blob)
+ self.blob_offset += le16_struct.size + len(blob)
+ return offset
+
+ def alloc_blob_str(self, s):
+ '''Return allocated blob buffer offset for string'''
+ return self.alloc_blob(s + '\0')
+
+ def build_dev_info(self):
+ '''Return a buffer with device infos'''
+ bufs = ['\0' * dev_info_struct.size]
+ for name, size in self.streams:
+ name_ptr = self.alloc_blob_str(name)
+ buf = dev_info_struct.pack(name_ptr, 0, size, 0, 0)
+ bufs.append(buf)
+ padding = (255 - len(self.streams)) * dev_info_struct.size
+ bufs.append('\0' * padding)
+ return ''.join(bufs)
+
+ def build_blob_buffer(self):
+ '''Return a buffer with blob data'''
+ return ''.join(self.blobs)
+
+ def build_config(self):
+ '''Return a buffer with config names and data'''
+ bufs = []
+
+ for ptr in self.config_names:
+ bufs.append(be32_struct.pack(ptr))
+ padding = (VMA_MAX_CONFIGS - len(self.config_names)) * be32_struct.size
+ bufs.append('\0' * padding)
+
+ for ptr in self.config_data:
+ bufs.append(be32_struct.pack(ptr))
+ padding = (VMA_MAX_CONFIGS - len(self.config_data)) * be32_struct.size
+ bufs.append('\0' * padding)
+
+ return ''.join(bufs)
+
+ def write_header(self):
+ # Build header pieces
+ config = self.build_config()
+ dev_info = self.build_dev_info()
+ blob_buffer = self.build_blob_buffer()
+
+ # Size the header
+ blob_buffer_offset = header_struct.size + 1984 + \
+ len(config) + 4 + len(dev_info)
+ header_size = blob_buffer_offset + len(blob_buffer)
+
+ # Build header without checksum
+ fields = (VMA_MAGIC,
+ VMA_VERSION) + \
+ tuple(self.uuid) + \
+ (int(time.mktime(time.gmtime())),) + \
+ tuple('\0' * 16) + \
+ (blob_buffer_offset,
+ len(blob_buffer),
+ header_size)
+ header = header_struct.pack(*fields)
+
+ # Checksum header
+ buf = ''.join([header,
+ '\0' * 1984,
+ config,
+ '\0' * 4, # VMAHeader.dev_info is unaligned (vma.h bug)
+ dev_info,
+ blob_buffer])
+ digest = hashlib.md5(buf).digest()
+ buf = array.array('c', buf) # string does not support assignment
+ buf[32:32 + 16] = array.array('c', digest)
+
+ self.fobj.write(buf)
+
+ def add_config(self, name, data):
+ name_ptr = self.alloc_blob_str(name)
+ data_ptr = self.alloc_blob(data)
+ self.config_names.append(name_ptr)
+ self.config_data.append(name_ptr)
+
+ def add_stream(self, name, size):
+ self.streams.append((name, size))
+ return len(self.streams)
+
+ def build_blockinfo(self):
+ '''Return a blockinfo buffer for the current extent'''
+ bufs = []
+ for stream_id, offset, _ in self.extent:
+ buf = be64_struct.pack(0xffff000000000000 | \
+ (stream_id << 32) | \
+ offset // VMA_CLUSTER_SIZE)
+ bufs.append(buf)
+ padding = (VMA_BLOCKS_PER_EXTENT - len(self.extent)) * be64_struct.size
+ bufs.append('\0' * padding)
+ return ''.join(bufs)
+
+ def write_extent(self):
+ blockinfo = self.build_blockinfo()
+ block_count = len(self.extent) * (VMA_CLUSTER_SIZE // 4096)
+
+ # Build header without checksum
+ fields = (VMA_EXTENT_MAGIC,
+ block_count) + \
+ tuple(self.uuid) + \
+ tuple('\0' * 16)
+ header = extent_struct.pack(*fields)
+
+ # Checksum header
+ buf = ''.join([header, blockinfo])
+ digest = hashlib.md5(buf).digest()
+ buf = array.array('c', buf) # string does not support assignment
+ buf[24:24 + 16] = array.array('c', digest)
+
+ self.fobj.write(buf)
+ for _, _, data in self.extent:
+ self.fobj.write(data)
+
+ self.extent = []
+
+ def append_cluster(self, stream_id, offset, data):
+ '''Append one cluster to the current extent'''
+ self.extent.append((stream_id, offset, data))
+ if len(self.extent) == VMA_BLOCKS_PER_EXTENT:
+ self.write_extent()
+
+ def align_write(self, stream_id, offset, data):
+ '''Buffer writes whose length is not cluster-aligned (vmstate)'''
+ # Fast path for aligned writes
+ mod = len(data) % VMA_CLUSTER_SIZE
+ if stream_id not in self.align_bufs and mod == 0:
+ return False, offset, data
+
+ # Add data to buffer
+ bufs, start, total = self.align_bufs.get(stream_id, ([], offset, 0))
+ assert start + total == offset # must be sequential
+ bufs.append(data)
+ total += len(data)
+ self.align_bufs[stream_id] = (bufs, start, total)
+
+ # Stop if we don't have a cluster yet
+ if total < VMA_CLUSTER_SIZE:
+ return True, None, None
+
+ # Take as many clusters as possible
+ end = (total // VMA_CLUSTER_SIZE) * VMA_CLUSTER_SIZE
+ aligned = []
+ nbytes = 0
+ while nbytes < end:
+ buf = bufs.pop(0)
+ aligned.append(buf)
+ nbytes += len(buf)
+ if nbytes > end:
+ buf = aligned[-1]
+ keep = end - (nbytes - len(buf))
+ left, right = buf[:keep], buf[keep:]
+ aligned[-1] = left
+ bufs.insert(0, right)
+ self.align_bufs[stream_id] = (bufs, start + end, total - end)
+ return False, start, ''.join(aligned)
+
+ def write(self, stream_id, offset, data):
+ if not self.header_written:
+ self.write_header()
+ self.header_written = True
+
+ need_more, offset, data = self.align_write(stream_id, offset, data)
+ if need_more:
+ return
+
+ for i in range(len(data) // VMA_CLUSTER_SIZE):
+ self.append_cluster(stream_id, offset, data[:VMA_CLUSTER_SIZE])
+ data = data[VMA_CLUSTER_SIZE:]
+ offset += VMA_CLUSTER_SIZE
+
+ def close(self):
+ # Flush unaligned data
+ for stream_id in self.align_bufs.keys():
+ bufs, start, total = self.align_bufs[stream_id]
+ assert total < VMA_CLUSTER_SIZE
+ padding = VMA_CLUSTER_SIZE - total
+ bufs.append('\0' * padding)
+ self.append_cluster(stream_id, start, ''.join(bufs))
+ self.align_bufs = {}
+
+ # Write final extent, if necessary
+ if self.extent:
+ self.write_extent()
--
1.8.1.4
next prev parent reply other threads:[~2013-03-09 22:23 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-09 22:22 [Qemu-devel] [RFC 0/8] block: Live backup prototype Stefan Hajnoczi
2013-03-09 22:22 ` [Qemu-devel] [RFC 1/8] block: add virtual_size to query-block QMP output Stefan Hajnoczi
2013-03-11 17:35 ` Eric Blake
2013-03-09 22:22 ` [Qemu-devel] [RFC 2/8] add basic backup support to block driver Stefan Hajnoczi
2013-03-09 22:22 ` [Qemu-devel] [RFC 3/8] backup: write to BlockDriverState instead of BackupDumpFunc Stefan Hajnoczi
2013-03-10 10:05 ` Dietmar Maurer
2013-03-10 11:13 ` Stefan Hajnoczi
2013-03-09 22:22 ` [Qemu-devel] [RFC 4/8] block: add block_backup QMP command Stefan Hajnoczi
2013-03-14 21:46 ` Eric Blake
2013-03-14 21:52 ` Eric Blake
2013-03-15 8:38 ` Stefan Hajnoczi
2013-04-11 12:32 ` Paolo Bonzini
2013-03-09 22:22 ` [Qemu-devel] [RFC 5/8] Add nbd server Python module Stefan Hajnoczi
2013-03-09 22:22 ` Stefan Hajnoczi [this message]
2013-03-09 22:22 ` [Qemu-devel] [RFC 7/8] Add vma-writer.py tool Stefan Hajnoczi
2013-03-09 22:22 ` [Qemu-devel] [RFC 8/8] Add backup.py tool Stefan Hajnoczi
2013-03-10 9:14 ` [Qemu-devel] [RFC 0/8] block: Live backup prototype Dietmar Maurer
2013-03-10 10:19 ` Stefan Hajnoczi
2013-03-10 10:38 ` Dietmar Maurer
2013-03-10 11:09 ` Stefan Hajnoczi
2013-03-10 10:50 ` Dietmar Maurer
2013-03-10 11:10 ` Stefan Hajnoczi
2013-03-11 8:58 ` Dietmar Maurer
2013-03-11 9:26 ` Dietmar Maurer
2013-03-11 14:27 ` Stefan Hajnoczi
2013-03-11 15:00 ` Dietmar Maurer
2013-03-11 17:11 ` Stefan Hajnoczi
2013-03-10 9:57 ` Dietmar Maurer
2013-03-10 10:41 ` Stefan Hajnoczi
2013-03-12 9:18 ` Kevin Wolf
2013-03-12 10:50 ` Stefan Hajnoczi
2013-03-12 11:15 ` Dietmar Maurer
2013-03-12 12:18 ` Stefan Hajnoczi
2013-03-12 11:22 ` Kevin Wolf
2013-03-12 11:31 ` Dietmar Maurer
2013-03-12 11:37 ` Dietmar Maurer
2013-03-12 12:17 ` Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1362867748-30528-7-git-send-email-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=armbru@redhat.com \
--cc=dietmar@proxmox.com \
--cc=kwolf@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).