From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1FY9Hw-0001wI-8x for qemu-devel@nongnu.org; Mon, 24 Apr 2006 18:12:12 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1FY9Ht-0001w1-Pq for qemu-devel@nongnu.org; Mon, 24 Apr 2006 18:12:11 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1FY9Ht-0001vu-Mf for qemu-devel@nongnu.org; Mon, 24 Apr 2006 18:12:09 -0400 Received: from [209.204.185.216] (helo=mail.bitblocks.com) by monty-python.gnu.org with esmtp (Exim 4.52) id 1FY9KD-0002Oo-FK for qemu-devel@nongnu.org; Mon, 24 Apr 2006 18:14:33 -0400 Received: from bitblocks.com (localhost [127.0.0.1]) by mail.bitblocks.com (Postfix) with ESMTP id 9BFF42946A for ; Mon, 24 Apr 2006 15:12:06 -0700 (PDT) MIME-Version: 1.0 From: Bakul Shah Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0" Content-ID: <26775.1145916455.0@bitblocks.com> Date: Mon, 24 Apr 2006 15:12:06 -0700 Sender: bakul@bitblocks.com Message-Id: <20060424221206.9BFF42946A@mail.bitblocks.com> Subject: [Qemu-devel] patch to avoid space allocation for zero blocks in the qcow format Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org ------- =_aaaaaaaaaa0 Content-Type: text/plain; charset="us-ascii" Content-ID: <26775.1145916455.1@bitblocks.com> The below patch avoids allocating space in the qcow image format when a block of zeroes is being written. No attempt is made to free up space if a previously written block of nonzero data is being overwritten with zeroes. This patch makes a big difference in space use for cases where the s/w wants to clear large swaths of a disk (such as Plan 9 with fossil+venti). It has a negligible effect on performance for writing nonzero data and significat performance improvement for zero data as the much bigger overhead of allocating and writing is avoided. As an added benefit you can simulate extremely large disks and create huge files of zeroes to test whether your guest OS can handle 2^63 byte size disk. -- bakul ------- =_aaaaaaaaaa0 Content-Type: text/plain; charset="us-ascii" Content-ID: <26775.1145916455.2@bitblocks.com> --- cvs/block-qcow.c Fri Apr 21 12:46:44 2006 +++ my/block-qcow.c Sun Apr 23 17:37:54 2006 @@ -256,12 +256,18 @@ * 'compressed_size'. 'compressed_size' must be > 0 and < * cluster_size * + * 'z' is + * + * 1 if data to be written is all zeroes. + * set it to 0 if the cluster was previously allocated + * else leave it as 1 and don't allocate + * * return 0 if not allocated. */ static uint64_t get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate, int compressed_size, - int n_start, int n_end) + int n_start, int n_end, int* z) { BDRVQcowState *s = bs->opaque; int min_index, i, j, l1_index, l2_index; @@ -273,7 +279,7 @@ l2_offset = s->l1_table[l1_index]; new_l2_table = 0; if (!l2_offset) { - if (!allocate) + if (!allocate || *z) return 0; /* allocate a new l2 entry */ l2_offset = lseek(s->fd, 0, SEEK_END); @@ -296,10 +302,13 @@ } } l2_table = s->l2_cache + (i << s->l2_bits); + if (z) *z = 0; goto found; } } /* not found: load a new entry in the least used one */ + if (z && *z) /* no allocation if we are writing a zero buf */ + return 0; min_index = 0; min_count = 0xffffffff; for(i = 0; i < L2_CACHE_SIZE; i++) { @@ -393,7 +402,7 @@ int index_in_cluster, n; uint64_t cluster_offset; - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, 0); index_in_cluster = sector_num & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) @@ -459,7 +468,7 @@ uint64_t cluster_offset; while (nb_sectors > 0) { - cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0); + cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0, 0); index_in_cluster = sector_num & (s->cluster_sectors - 1); n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) @@ -487,11 +496,13 @@ return 0; } +static uint8_t zerobuf[0x10000] = {0}; // XXX depends on cluster_sectors + static int qcow_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { BDRVQcowState *s = bs->opaque; - int ret, index_in_cluster, n; + int ret, index_in_cluster, n, z; uint64_t cluster_offset; while (nb_sectors > 0) { @@ -499,9 +510,11 @@ n = s->cluster_sectors - index_in_cluster; if (n > nb_sectors) n = nb_sectors; + z = !buf[0] && !buf[n*512-1] && memcmp(buf, zerobuf, n*512) == 0; cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0, index_in_cluster, - index_in_cluster + n); + index_in_cluster + n, &z); + if (!z) { if (!cluster_offset) return -1; lseek(s->fd, cluster_offset + index_in_cluster * 512, SEEK_SET); @@ -514,6 +527,7 @@ } if (ret != n * 512) return -1; + } nb_sectors -= n; sector_num += n; buf += n * 512; @@ -679,8 +693,9 @@ /* could not compress: write normal cluster */ qcow_write(bs, sector_num, buf, s->cluster_sectors); } else { + int z = 0; cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, - out_len, 0, 0); + out_len, 0, 0, &z); cluster_offset &= s->cluster_offset_mask; lseek(s->fd, cluster_offset, SEEK_SET); if (write(s->fd, out_buf, out_len) != out_len) { ------- =_aaaaaaaaaa0--