* [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count
@ 2016-12-03 18:19 Eric Blake
2016-12-06 21:01 ` Max Reitz
0 siblings, 1 reply; 5+ messages in thread
From: Eric Blake @ 2016-12-03 18:19 UTC (permalink / raw)
To: qemu-devel; +Cc: Kevin Wolf, Max Reitz, open list:qcow2
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness. Clean up
the interfaces to take byte offset and count. Rename
qcow2_discard_clusters() and qcow2_zero_clusters() to the
shorter qcow2_discard() and qcow2_zero() to make sure backports
don't get confused by changed units.
Signed-off-by: Eric Blake <eblake@redhat.com>
---
2.9 material, depends on 'Don't strand clusters near 2G intervals
during commit'
block/qcow2.h | 8 ++++----
block/qcow2-cluster.c | 20 +++++++++++---------
block/qcow2-snapshot.c | 7 +++----
block/qcow2.c | 22 ++++++++++------------
4 files changed, 28 insertions(+), 29 deletions(-)
diff --git a/block/qcow2.h b/block/qcow2.h
index 1823414..a0d169b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -543,10 +543,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
int compressed_size);
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
- int flags);
+int qcow2_discard(BlockDriverState *bs, uint64_t offset, uint64_t count,
+ enum qcow2_discard_type type, bool full_discard);
+int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
+ int flags);
int qcow2_expand_zero_clusters(BlockDriverState *bs,
BlockDriverAmendStatusCB *status_cb,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 928c1e2..3ee0815 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1511,19 +1511,17 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
return nb_clusters;
}
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors, enum qcow2_discard_type type, bool full_discard)
+int qcow2_discard(BlockDriverState *bs, uint64_t offset, uint64_t count,
+ enum qcow2_discard_type type, bool full_discard)
{
BDRVQcow2State *s = bs->opaque;
uint64_t end_offset;
uint64_t nb_clusters;
int ret;
- end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
- /* Round start up and end down */
+ /* Round start up and end down to cluster boundary */
+ end_offset = start_of_cluster(s, offset + count);
offset = align_offset(offset, s->cluster_size);
- end_offset = start_of_cluster(s, end_offset);
if (offset > end_offset) {
return 0;
@@ -1595,20 +1593,24 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
return nb_clusters;
}
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
- int flags)
+int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
+ int flags)
{
BDRVQcow2State *s = bs->opaque;
uint64_t nb_clusters;
int ret;
+ /* Block layer guarantees cluster alignment */
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+ assert(QEMU_IS_ALIGNED(count, s->cluster_size));
+
/* The zero flag is only supported by version 3 and newer */
if (s->qcow_version < 3) {
return -ENOTSUP;
}
/* Each L2 table is handled by its own loop iteration */
- nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+ nb_clusters = size_to_clusters(s, count);
s->cache_discards = true;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 0324243..fd088e5 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -440,10 +440,9 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
/* The VM state isn't needed any more in the active L1 table; in fact, it
* hurts by causing expensive COW for the next snapshot. */
- qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
- align_offset(sn->vm_state_size, s->cluster_size)
- >> BDRV_SECTOR_BITS,
- QCOW2_DISCARD_NEVER, false);
+ qcow2_discard(bs, qcow2_vm_state_offset(s),
+ align_offset(sn->vm_state_size, s->cluster_size),
+ QCOW2_DISCARD_NEVER, false);
#ifdef DEBUG_ALLOC
{
diff --git a/block/qcow2.c b/block/qcow2.c
index 369b542..601831f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2487,7 +2487,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
/* Whatever is left can use real zero clusters */
- ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
+ ret = qcow2_zero(bs, offset, count, flags);
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -2505,8 +2505,7 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
- QCOW2_DISCARD_REQUEST, false);
+ ret = qcow2_discard(bs, offset, count, QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -2807,9 +2806,8 @@ fail:
static int qcow2_make_empty(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
- uint64_t start_sector;
- int sector_step = QEMU_ALIGN_DOWN(INT_MAX / BDRV_SECTOR_SIZE,
- s->cluster_size);
+ uint64_t offset;
+ int step = QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size);
int l1_clusters, ret = 0;
l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
@@ -2826,18 +2824,18 @@ static int qcow2_make_empty(BlockDriverState *bs)
/* This fallback code simply discards every active cluster; this is slow,
* but works in all cases */
- for (start_sector = 0; start_sector < bs->total_sectors;
- start_sector += sector_step)
+ for (offset = 0; offset < bs->total_sectors * BDRV_SECTOR_SIZE;
+ offset += step)
{
/* As this function is generally used after committing an external
* snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
* default action for this kind of discard is to pass the discard,
* which will ideally result in an actually smaller image file, as
* is probably desired. */
- ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
- MIN(sector_step,
- bs->total_sectors - start_sector),
- QCOW2_DISCARD_SNAPSHOT, true);
+ ret = qcow2_discard(bs, offset,
+ MIN(step,
+ bs->total_sectors * BDRV_SECTOR_SIZE - offset),
+ QCOW2_DISCARD_SNAPSHOT, true);
if (ret < 0) {
break;
}
--
2.9.3
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count
2016-12-03 18:19 [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count Eric Blake
@ 2016-12-06 21:01 ` Max Reitz
2016-12-06 21:26 ` Eric Blake
0 siblings, 1 reply; 5+ messages in thread
From: Max Reitz @ 2016-12-06 21:01 UTC (permalink / raw)
To: Eric Blake, qemu-devel; +Cc: Kevin Wolf, open list:qcow2
[-- Attachment #1: Type: text/plain, Size: 4797 bytes --]
On 03.12.2016 19:19, Eric Blake wrote:
> Passing a byte offset, but sector count, when we ultimately
> want to operate on cluster granularity, is madness. Clean up
> the interfaces to take byte offset and count. Rename
> qcow2_discard_clusters() and qcow2_zero_clusters() to the
> shorter qcow2_discard() and qcow2_zero() to make sure backports
> don't get confused by changed units.
>
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>
> 2.9 material, depends on 'Don't strand clusters near 2G intervals
> during commit'
>
> block/qcow2.h | 8 ++++----
> block/qcow2-cluster.c | 20 +++++++++++---------
> block/qcow2-snapshot.c | 7 +++----
> block/qcow2.c | 22 ++++++++++------------
> 4 files changed, 28 insertions(+), 29 deletions(-)
>
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 1823414..a0d169b 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -543,10 +543,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
> int compressed_size);
>
> int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
> -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
> - int nb_sectors, enum qcow2_discard_type type, bool full_discard);
> -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
> - int flags);
> +int qcow2_discard(BlockDriverState *bs, uint64_t offset, uint64_t count,
> + enum qcow2_discard_type type, bool full_discard);
> +int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
> + int flags);
>
> int qcow2_expand_zero_clusters(BlockDriverState *bs,
> BlockDriverAmendStatusCB *status_cb,
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 928c1e2..3ee0815 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1511,19 +1511,17 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
> return nb_clusters;
> }
>
> -int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
> - int nb_sectors, enum qcow2_discard_type type, bool full_discard)
> +int qcow2_discard(BlockDriverState *bs, uint64_t offset, uint64_t count,
> + enum qcow2_discard_type type, bool full_discard)
> {
> BDRVQcow2State *s = bs->opaque;
> uint64_t end_offset;
> uint64_t nb_clusters;
> int ret;
>
> - end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
> -
> - /* Round start up and end down */
> + /* Round start up and end down to cluster boundary */
> + end_offset = start_of_cluster(s, offset + count);
> offset = align_offset(offset, s->cluster_size);
> - end_offset = start_of_cluster(s, end_offset);
>
> if (offset > end_offset) {
> return 0;
> @@ -1595,20 +1593,24 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
> return nb_clusters;
> }
>
> -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
> - int flags)
> +int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
> + int flags)
Hmm, I personally liked qcow2_zero_clusters() better. qcow2_zero()
doesn't really express that it means the verb "to zero".
Also, while you are making a good point why the function should be
renamed, qcow2_zero_clusters() was much more accurate because offset and
count are expected to be cluster-aligned.
The only alternative I can come up with would be "qcow2_write_zeroes";
that at least solves the first issue I have with this, but not the
second one...
> {
> BDRVQcow2State *s = bs->opaque;
> uint64_t nb_clusters;
> int ret;
>
> + /* Block layer guarantees cluster alignment */
Hm, it's rather qcow2_co_pwrite_zeroes(), isn't it? The block layer will
split unaligned requests into head, body and tail and it will still
submit head and tail (though separately).
As far as I can see, it's qcow2_co_pwrite_zeroes() which then guarantees
that only the aligned part gets through to qcow2_zero().
The patch looks good apart from these nit picks, though.
Max
> + assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
> + assert(QEMU_IS_ALIGNED(count, s->cluster_size));
> +
> /* The zero flag is only supported by version 3 and newer */
> if (s->qcow_version < 3) {
> return -ENOTSUP;
> }
>
> /* Each L2 table is handled by its own loop iteration */
> - nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
> + nb_clusters = size_to_clusters(s, count);
>
> s->cache_discards = true;
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count
2016-12-06 21:01 ` Max Reitz
@ 2016-12-06 21:26 ` Eric Blake
2016-12-06 21:31 ` Max Reitz
0 siblings, 1 reply; 5+ messages in thread
From: Eric Blake @ 2016-12-06 21:26 UTC (permalink / raw)
To: Max Reitz, qemu-devel; +Cc: Kevin Wolf, open list:qcow2
[-- Attachment #1: Type: text/plain, Size: 2933 bytes --]
On 12/06/2016 03:01 PM, Max Reitz wrote:
> On 03.12.2016 19:19, Eric Blake wrote:
>> Passing a byte offset, but sector count, when we ultimately
>> want to operate on cluster granularity, is madness. Clean up
>> the interfaces to take byte offset and count. Rename
>> qcow2_discard_clusters() and qcow2_zero_clusters() to the
>> shorter qcow2_discard() and qcow2_zero() to make sure backports
>> don't get confused by changed units.
>>
>> Signed-off-by: Eric Blake <eblake@redhat.com>
>> ---
>>
>> 2.9 material, depends on 'Don't strand clusters near 2G intervals
>> during commit'
>>
>> @@ -1595,20 +1593,24 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
>> return nb_clusters;
>> }
>>
>> -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
>> - int flags)
>> +int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
>> + int flags)
>
> Hmm, I personally liked qcow2_zero_clusters() better. qcow2_zero()
> doesn't really express that it means the verb "to zero".
>
> Also, while you are making a good point why the function should be
> renamed, qcow2_zero_clusters() was much more accurate because offset and
> count are expected to be cluster-aligned.
>
> The only alternative I can come up with would be "qcow2_write_zeroes";
> that at least solves the first issue I have with this, but not the
> second one...
Maybe qcow2_cluster_zeroize() and qcow2_cluster_discard()? That gets the
benefit of the rename (to force all callers to use the right semantics),
while still being legible as an object-verb naming: the action
('discard' or 'zeroize') is performed on 'qcow2 cluster' objects.
>
>> {
>> BDRVQcow2State *s = bs->opaque;
>> uint64_t nb_clusters;
>> int ret;
>>
>> + /* Block layer guarantees cluster alignment */
>
> Hm, it's rather qcow2_co_pwrite_zeroes(), isn't it? The block layer will
> split unaligned requests into head, body and tail and it will still
> submit head and tail (though separately).
Hmm, good point. I'll have to come up with some way to reword that.
>
> As far as I can see, it's qcow2_co_pwrite_zeroes() which then guarantees
> that only the aligned part gets through to qcow2_zero().
>
>
> The patch looks good apart from these nit picks, though.
>
> Max
>
>> + assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
>> + assert(QEMU_IS_ALIGNED(count, s->cluster_size));
And since I'm adding assertions that the zero operation is never
attempted on unaligned parts, maybe I should also add asserts that
discards are never unaligned, perhaps as a prereq patch.
I'll wait a bit and see if anyone else has better naming ideas for the
functions, before I try to send a v2.
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count
2016-12-06 21:26 ` Eric Blake
@ 2016-12-06 21:31 ` Max Reitz
2016-12-06 21:48 ` Eric Blake
0 siblings, 1 reply; 5+ messages in thread
From: Max Reitz @ 2016-12-06 21:31 UTC (permalink / raw)
To: Eric Blake, qemu-devel; +Cc: Kevin Wolf, open list:qcow2
[-- Attachment #1: Type: text/plain, Size: 2174 bytes --]
On 06.12.2016 22:26, Eric Blake wrote:
> On 12/06/2016 03:01 PM, Max Reitz wrote:
>> On 03.12.2016 19:19, Eric Blake wrote:
>>> Passing a byte offset, but sector count, when we ultimately
>>> want to operate on cluster granularity, is madness. Clean up
>>> the interfaces to take byte offset and count. Rename
>>> qcow2_discard_clusters() and qcow2_zero_clusters() to the
>>> shorter qcow2_discard() and qcow2_zero() to make sure backports
>>> don't get confused by changed units.
>>>
>>> Signed-off-by: Eric Blake <eblake@redhat.com>
>>> ---
>>>
>>> 2.9 material, depends on 'Don't strand clusters near 2G intervals
>>> during commit'
>>>
>
>>> @@ -1595,20 +1593,24 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
>>> return nb_clusters;
>>> }
>>>
>>> -int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
>>> - int flags)
>>> +int qcow2_zero(BlockDriverState *bs, uint64_t offset, uint64_t count,
>>> + int flags)
>>
>> Hmm, I personally liked qcow2_zero_clusters() better. qcow2_zero()
>> doesn't really express that it means the verb "to zero".
>>
>> Also, while you are making a good point why the function should be
>> renamed, qcow2_zero_clusters() was much more accurate because offset and
>> count are expected to be cluster-aligned.
>>
>> The only alternative I can come up with would be "qcow2_write_zeroes";
>> that at least solves the first issue I have with this, but not the
>> second one...
>
> Maybe qcow2_cluster_zeroize() and qcow2_cluster_discard()?
I think qcow2_discard() is fine (it works with any alignment (even
though it will only discard whole clusters) and "discard" is mainly used
as a verb, so at least I'm not confused there). I like
qcow2_cluster_zeroize() if nothing else then for the "zeroize" alone. :-)
Max
> That gets the
> benefit of the rename (to force all callers to use the right semantics),
> while still being legible as an object-verb naming: the action
> ('discard' or 'zeroize') is performed on 'qcow2 cluster' objects.
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 512 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count
2016-12-06 21:31 ` Max Reitz
@ 2016-12-06 21:48 ` Eric Blake
0 siblings, 0 replies; 5+ messages in thread
From: Eric Blake @ 2016-12-06 21:48 UTC (permalink / raw)
To: Max Reitz, qemu-devel; +Cc: Kevin Wolf, open list:qcow2
[-- Attachment #1: Type: text/plain, Size: 1057 bytes --]
On 12/06/2016 03:31 PM, Max Reitz wrote:
>>>
>>> The only alternative I can come up with would be "qcow2_write_zeroes";
>>> that at least solves the first issue I have with this, but not the
>>> second one...
>>
>> Maybe qcow2_cluster_zeroize() and qcow2_cluster_discard()?
>
> I think qcow2_discard() is fine (it works with any alignment (even
> though it will only discard whole clusters)
Except that I think with my recent fix to qcow2_make_empty(), we can now
assert that all callers actually pass cluster-aligned values. Hence my
other comment that maybe we want to (as a prereq patch) actually assert
that callers are aligned, at which point keeping cluster in the name is
once again worthwhile.
> and "discard" is mainly used
> as a verb, so at least I'm not confused there). I like
> qcow2_cluster_zeroize() if nothing else then for the "zeroize" alone. :-)
Okay, then I have a good idea of what name to use.
--
Eric Blake eblake redhat com +1-919-301-3266
Libvirt virtualization library http://libvirt.org
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2016-12-06 21:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-12-03 18:19 [Qemu-devel] [PATCH] qcow2: Discard/zero clusters by byte count Eric Blake
2016-12-06 21:01 ` Max Reitz
2016-12-06 21:26 ` Eric Blake
2016-12-06 21:31 ` Max Reitz
2016-12-06 21:48 ` Eric Blake
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).