[Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
@ 2012-05-13  8:03 Zhouyi Zhou
  2012-05-14 12:20 ` Kevin Wolf
  0 siblings, 1 reply; 10+ messages in thread
From: Zhouyi Zhou @ 2012-05-13  8:03 UTC (permalink / raw)
  To: qemu-devel

hi all
  
  sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
  after some investigation, I found that:
  in function posix_aio_process_queue(void *opaque)
440             ret = qemu_paio_error(acb);
441             if (ret == ECANCELED) {
442                 /* remove the request */
443                 *pacb = acb->next;
444                 qemu_aio_release(acb);
445                 result = 1;
446             } else if (ret != EINPROGRESS) {
  in line 444 acb got released but acb->common.opaque does not.
which will be released via guest OS via ide_dma_cancel which 
will in term call qcow_aio_cancel which does not check its argument
is in flight list or not.
  The fix is as follows: (debian 6's qemu-kvm-0.12.5)
#######################################
--- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
+++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
@@ -143,6 +143,7 @@
     QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
 
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
+    int inflight;       
 } QCowL2Meta;
--- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
+++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
@@ -349,6 +349,10 @@
     QCowAIOCB *acb = (QCowAIOCB *)blockacb;
     if (acb->hd_aiocb)
         bdrv_aio_cancel(acb->hd_aiocb);
+    if (acb->l2meta.inflight) {
+        QLIST_REMOVE(&acb->l2meta, next_in_flight);
+       acb->l2meta.inflight = 0;
+    }
     qemu_aio_release(acb);
 }
 
@@ -506,6 +510,7 @@
     acb->n = 0;
     acb->cluster_offset = 0;
     acb->l2meta.nb_clusters = 0;
+    acb->l2meta.inflight = 0;
     QLIST_INIT(&acb->l2meta.dependent_requests);
     return acb;
 }
@@ -534,6 +539,7 @@
     /* Take the request off the list of running requests */
     if (m->nb_clusters != 0) {
         QLIST_REMOVE(m, next_in_flight);
+       m->inflight = 0;
     }
 
     /*
@@ -632,6 +638,7 @@
 fail:
     if (acb->l2meta.nb_clusters != 0) {
         QLIST_REMOVE(&acb->l2meta, next_in_flight);
+       acb->l2meta.inflight  = 0;
     }
 done:
     if (acb->qiov->niov > 1)
--- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
+++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
@@ -827,6 +827,7 @@
     m->offset = offset;
     m->n_start = n_start;
     m->nb_clusters = nb_clusters;
+    m->inflight = 1;
 
 out:
     m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);

 Thanks for investigation
Zhouyi


-- 
Zhouyi Zhou <yizhouzhou@ict.ac.cn>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-05-13  8:03 [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset Zhouyi Zhou
@ 2012-05-14 12:20 ` Kevin Wolf
  2012-06-12 13:33   ` Andreas Färber
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Wolf @ 2012-05-14 12:20 UTC (permalink / raw)
  To: Zhouyi Zhou; +Cc: Michael Tokarev, qemu-devel, Andreas Färber

Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
> hi all
>   
>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>   after some investigation, I found that:
>   in function posix_aio_process_queue(void *opaque)
> 440             ret = qemu_paio_error(acb);
> 441             if (ret == ECANCELED) {
> 442                 /* remove the request */
> 443                 *pacb = acb->next;
> 444                 qemu_aio_release(acb);
> 445                 result = 1;
> 446             } else if (ret != EINPROGRESS) {
>   in line 444 acb got released but acb->common.opaque does not.
> which will be released via guest OS via ide_dma_cancel which 
> will in term call qcow_aio_cancel which does not check its argument
> is in flight list or not.
>   The fix is as follows: (debian 6's qemu-kvm-0.12.5)
> #######################################
> --- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
> +++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
> @@ -143,6 +143,7 @@
>      QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
>  
>      QLIST_ENTRY(QCowL2Meta) next_in_flight;
> +    int inflight;       
>  } QCowL2Meta;
> --- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
> +++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
> @@ -349,6 +349,10 @@
>      QCowAIOCB *acb = (QCowAIOCB *)blockacb;
>      if (acb->hd_aiocb)
>          bdrv_aio_cancel(acb->hd_aiocb);
> +    if (acb->l2meta.inflight) {
> +        QLIST_REMOVE(&acb->l2meta, next_in_flight);
> +       acb->l2meta.inflight = 0;
> +    }
>      qemu_aio_release(acb);
>  }
>  
> @@ -506,6 +510,7 @@
>      acb->n = 0;
>      acb->cluster_offset = 0;
>      acb->l2meta.nb_clusters = 0;
> +    acb->l2meta.inflight = 0;
>      QLIST_INIT(&acb->l2meta.dependent_requests);
>      return acb;
>  }
> @@ -534,6 +539,7 @@
>      /* Take the request off the list of running requests */
>      if (m->nb_clusters != 0) {
>          QLIST_REMOVE(m, next_in_flight);
> +       m->inflight = 0;
>      }
>  
>      /*
> @@ -632,6 +638,7 @@
>  fail:
>      if (acb->l2meta.nb_clusters != 0) {
>          QLIST_REMOVE(&acb->l2meta, next_in_flight);
> +       acb->l2meta.inflight  = 0;
>      }
>  done:
>      if (acb->qiov->niov > 1)
> --- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
> +++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
> @@ -827,6 +827,7 @@
>      m->offset = offset;
>      m->n_start = n_start;
>      m->nb_clusters = nb_clusters;
> +    m->inflight = 1;
>  
>  out:
>      m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
> 
>  Thanks for investigation
> Zhouyi

The patch looks reasonable to me. Note however that while it fixes the
hang, it still causes cluster leaks. I'm not sure if someone is
interested in picking these up for old stable releases. Andreas, I think
you were going to take 0.15? The first version that doesn't have the
problem is 1.0.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-05-14 12:20 ` Kevin Wolf
@ 2012-06-12 13:33   ` Andreas Färber
  2012-06-12 13:44     ` Kevin Wolf
  0 siblings, 1 reply; 10+ messages in thread
From: Andreas Färber @ 2012-06-12 13:33 UTC (permalink / raw)
  To: Kevin Wolf, Zhouyi Zhou; +Cc: Michael Tokarev, qemu-devel, Bruce Rogers

Am 14.05.2012 14:20, schrieb Kevin Wolf:
> Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
>> hi all
>>   
>>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>>   after some investigation, I found that:
>>   in function posix_aio_process_queue(void *opaque)
>> 440             ret = qemu_paio_error(acb);
>> 441             if (ret == ECANCELED) {
>> 442                 /* remove the request */
>> 443                 *pacb = acb->next;
>> 444                 qemu_aio_release(acb);
>> 445                 result = 1;
>> 446             } else if (ret != EINPROGRESS) {
>>   in line 444 acb got released but acb->common.opaque does not.
>> which will be released via guest OS via ide_dma_cancel which 
>> will in term call qcow_aio_cancel which does not check its argument
>> is in flight list or not.
>>   The fix is as follows: (debian 6's qemu-kvm-0.12.5)
>> #######################################
>> --- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
>> +++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
>> @@ -143,6 +143,7 @@
>>      QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
>>  
>>      QLIST_ENTRY(QCowL2Meta) next_in_flight;
>> +    int inflight;       
>>  } QCowL2Meta;
>> --- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
>> +++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
>> @@ -349,6 +349,10 @@
>>      QCowAIOCB *acb = (QCowAIOCB *)blockacb;
>>      if (acb->hd_aiocb)
>>          bdrv_aio_cancel(acb->hd_aiocb);
>> +    if (acb->l2meta.inflight) {
>> +        QLIST_REMOVE(&acb->l2meta, next_in_flight);
>> +       acb->l2meta.inflight = 0;
>> +    }
>>      qemu_aio_release(acb);
>>  }
>>  
>> @@ -506,6 +510,7 @@
>>      acb->n = 0;
>>      acb->cluster_offset = 0;
>>      acb->l2meta.nb_clusters = 0;
>> +    acb->l2meta.inflight = 0;
>>      QLIST_INIT(&acb->l2meta.dependent_requests);
>>      return acb;
>>  }
>> @@ -534,6 +539,7 @@
>>      /* Take the request off the list of running requests */
>>      if (m->nb_clusters != 0) {
>>          QLIST_REMOVE(m, next_in_flight);
>> +       m->inflight = 0;
>>      }
>>  
>>      /*
>> @@ -632,6 +638,7 @@
>>  fail:
>>      if (acb->l2meta.nb_clusters != 0) {
>>          QLIST_REMOVE(&acb->l2meta, next_in_flight);
>> +       acb->l2meta.inflight  = 0;
>>      }
>>  done:
>>      if (acb->qiov->niov > 1)
>> --- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
>> +++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
>> @@ -827,6 +827,7 @@
>>      m->offset = offset;
>>      m->n_start = n_start;
>>      m->nb_clusters = nb_clusters;
>> +    m->inflight = 1;
>>  
>>  out:
>>      m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
>>
>>  Thanks for investigation
>> Zhouyi
> 
> The patch looks reasonable to me. Note however that while it fixes the
> hang, it still causes cluster leaks. I'm not sure if someone is
> interested in picking these up for old stable releases. Andreas, I think
> you were going to take 0.15? The first version that doesn't have the
> problem is 1.0.

Kevin, the policy as I understood it is to cherry-pick patches from
qemu.git into qemu-stable-x.y.git. So I don't think me applying this
patch to stable-0.15 would be right. I don't spot a particular qcow2 fix
among our 0.15 backports that I have now pushed. Do you have a pointer
which one(s) would fix this issue so that I can recheck?

Zhouyi, could you test git://git.qemu.org/qemu-stable-0.15.git and check
if the problem is still there?

Thanks,
Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-06-12 13:33   ` Andreas Färber
@ 2012-06-12 13:44     ` Kevin Wolf
  2012-10-12 15:52       ` Andreas Färber
  0 siblings, 1 reply; 10+ messages in thread
From: Kevin Wolf @ 2012-06-12 13:44 UTC (permalink / raw)
  To: Andreas Färber
  Cc: Bruce Rogers, Michael Tokarev, Zhouyi Zhou, qemu-devel

Am 12.06.2012 15:33, schrieb Andreas Färber:
> Am 14.05.2012 14:20, schrieb Kevin Wolf:
>> Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
>>> hi all
>>>   
>>>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>>>   after some investigation, I found that:
>>>   in function posix_aio_process_queue(void *opaque)
>>> 440             ret = qemu_paio_error(acb);
>>> 441             if (ret == ECANCELED) {
>>> 442                 /* remove the request */
>>> 443                 *pacb = acb->next;
>>> 444                 qemu_aio_release(acb);
>>> 445                 result = 1;
>>> 446             } else if (ret != EINPROGRESS) {
>>>   in line 444 acb got released but acb->common.opaque does not.
>>> which will be released via guest OS via ide_dma_cancel which 
>>> will in term call qcow_aio_cancel which does not check its argument
>>> is in flight list or not.
>>>   The fix is as follows: (debian 6's qemu-kvm-0.12.5)
>>> #######################################
>>> --- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
>>> +++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
>>> @@ -143,6 +143,7 @@
>>>      QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
>>>  
>>>      QLIST_ENTRY(QCowL2Meta) next_in_flight;
>>> +    int inflight;       
>>>  } QCowL2Meta;
>>> --- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
>>> +++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
>>> @@ -349,6 +349,10 @@
>>>      QCowAIOCB *acb = (QCowAIOCB *)blockacb;
>>>      if (acb->hd_aiocb)
>>>          bdrv_aio_cancel(acb->hd_aiocb);
>>> +    if (acb->l2meta.inflight) {
>>> +        QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>> +       acb->l2meta.inflight = 0;
>>> +    }
>>>      qemu_aio_release(acb);
>>>  }
>>>  
>>> @@ -506,6 +510,7 @@
>>>      acb->n = 0;
>>>      acb->cluster_offset = 0;
>>>      acb->l2meta.nb_clusters = 0;
>>> +    acb->l2meta.inflight = 0;
>>>      QLIST_INIT(&acb->l2meta.dependent_requests);
>>>      return acb;
>>>  }
>>> @@ -534,6 +539,7 @@
>>>      /* Take the request off the list of running requests */
>>>      if (m->nb_clusters != 0) {
>>>          QLIST_REMOVE(m, next_in_flight);
>>> +       m->inflight = 0;
>>>      }
>>>  
>>>      /*
>>> @@ -632,6 +638,7 @@
>>>  fail:
>>>      if (acb->l2meta.nb_clusters != 0) {
>>>          QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>> +       acb->l2meta.inflight  = 0;
>>>      }
>>>  done:
>>>      if (acb->qiov->niov > 1)
>>> --- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
>>> +++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
>>> @@ -827,6 +827,7 @@
>>>      m->offset = offset;
>>>      m->n_start = n_start;
>>>      m->nb_clusters = nb_clusters;
>>> +    m->inflight = 1;
>>>  
>>>  out:
>>>      m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
>>>
>>>  Thanks for investigation
>>> Zhouyi
>>
>> The patch looks reasonable to me. Note however that while it fixes the
>> hang, it still causes cluster leaks. I'm not sure if someone is
>> interested in picking these up for old stable releases. Andreas, I think
>> you were going to take 0.15? The first version that doesn't have the
>> problem is 1.0.
> 
> Kevin, the policy as I understood it is to cherry-pick patches from
> qemu.git into qemu-stable-x.y.git. So I don't think me applying this
> patch to stable-0.15 would be right. I don't spot a particular qcow2 fix
> among our 0.15 backports that I have now pushed. Do you have a pointer
> which one(s) would fix this issue so that I can recheck?

It's "fixed" as a side effect of the block layer conversion to
coroutines. Not exactly the kind of patches you'd want to cherry-pick
for stable-0.15.

The better fix for 0.15 could be to backport the new behaviour of
coroutine based requests with bdrv_aio_cancel:

static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
{
    qemu_aio_flush();
}

Using that as the implementation for qcow2_aio_cancel should be safe and
fix this problem.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-06-12 13:44     ` Kevin Wolf
@ 2012-10-12 15:52       ` Andreas Färber
  2012-10-15  9:13         ` Kevin Wolf
  0 siblings, 1 reply; 10+ messages in thread
From: Andreas Färber @ 2012-10-12 15:52 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: qemu-devel, qemu-stable, Michael Tokarev, Zhouyi Zhou,
	Bruce Rogers

Am 12.06.2012 15:44, schrieb Kevin Wolf:
> Am 12.06.2012 15:33, schrieb Andreas Färber:
>> Am 14.05.2012 14:20, schrieb Kevin Wolf:
>>> Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
>>>> hi all
>>>>   
>>>>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>>>>   after some investigation, I found that:
>>>>   in function posix_aio_process_queue(void *opaque)
>>>> 440             ret = qemu_paio_error(acb);
>>>> 441             if (ret == ECANCELED) {
>>>> 442                 /* remove the request */
>>>> 443                 *pacb = acb->next;
>>>> 444                 qemu_aio_release(acb);
>>>> 445                 result = 1;
>>>> 446             } else if (ret != EINPROGRESS) {
>>>>   in line 444 acb got released but acb->common.opaque does not.
>>>> which will be released via guest OS via ide_dma_cancel which 
>>>> will in term call qcow_aio_cancel which does not check its argument
>>>> is in flight list or not.
>>>>   The fix is as follows: (debian 6's qemu-kvm-0.12.5)
>>>> #######################################
>>>> --- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
>>>> +++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
>>>> @@ -143,6 +143,7 @@
>>>>      QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
>>>>  
>>>>      QLIST_ENTRY(QCowL2Meta) next_in_flight;
>>>> +    int inflight;       
>>>>  } QCowL2Meta;
>>>> --- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
>>>> +++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
>>>> @@ -349,6 +349,10 @@
>>>>      QCowAIOCB *acb = (QCowAIOCB *)blockacb;
>>>>      if (acb->hd_aiocb)
>>>>          bdrv_aio_cancel(acb->hd_aiocb);
>>>> +    if (acb->l2meta.inflight) {
>>>> +        QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>>> +       acb->l2meta.inflight = 0;
>>>> +    }
>>>>      qemu_aio_release(acb);
>>>>  }
>>>>  
>>>> @@ -506,6 +510,7 @@
>>>>      acb->n = 0;
>>>>      acb->cluster_offset = 0;
>>>>      acb->l2meta.nb_clusters = 0;
>>>> +    acb->l2meta.inflight = 0;
>>>>      QLIST_INIT(&acb->l2meta.dependent_requests);
>>>>      return acb;
>>>>  }
>>>> @@ -534,6 +539,7 @@
>>>>      /* Take the request off the list of running requests */
>>>>      if (m->nb_clusters != 0) {
>>>>          QLIST_REMOVE(m, next_in_flight);
>>>> +       m->inflight = 0;
>>>>      }
>>>>  
>>>>      /*
>>>> @@ -632,6 +638,7 @@
>>>>  fail:
>>>>      if (acb->l2meta.nb_clusters != 0) {
>>>>          QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>>> +       acb->l2meta.inflight  = 0;
>>>>      }
>>>>  done:
>>>>      if (acb->qiov->niov > 1)
>>>> --- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
>>>> +++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
>>>> @@ -827,6 +827,7 @@
>>>>      m->offset = offset;
>>>>      m->n_start = n_start;
>>>>      m->nb_clusters = nb_clusters;
>>>> +    m->inflight = 1;
>>>>  
>>>>  out:
>>>>      m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
>>>>
>>>>  Thanks for investigation
>>>> Zhouyi
>>>
>>> The patch looks reasonable to me. Note however that while it fixes the
>>> hang, it still causes cluster leaks. I'm not sure if someone is
>>> interested in picking these up for old stable releases. Andreas, I think
>>> you were going to take 0.15? The first version that doesn't have the
>>> problem is 1.0.
>>
>> Kevin, the policy as I understood it is to cherry-pick patches from
>> qemu.git into qemu-stable-x.y.git. So I don't think me applying this
>> patch to stable-0.15 would be right. I don't spot a particular qcow2 fix
>> among our 0.15 backports that I have now pushed. Do you have a pointer
>> which one(s) would fix this issue so that I can recheck?
> 
> It's "fixed" as a side effect of the block layer conversion to
> coroutines. Not exactly the kind of patches you'd want to cherry-pick
> for stable-0.15.
> 
> The better fix for 0.15 could be to backport the new behaviour of
> coroutine based requests with bdrv_aio_cancel:
> 
> static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
> {
>     qemu_aio_flush();
> }
> 
> Using that as the implementation for qcow2_aio_cancel should be safe and
> fix this problem.

Kevin, I have stable-0.15 in a state where I'm about to tag 0.15.2 now.
The original patch does not have a Signed-off-by nor your Acked-by, so I
can't apply it as-is. stable-0.15 does not have coroutines, so I don't
understand what exactly you're suggesting as alternative here: Backport
the whole coroutine feature including coroutine function above? Or just
call qemu_aio_flush() in place of what? This is old qcow2_aio_cancel():

static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs,
                                         BlockDriverCompletionFunc *cb,
                                         void *opaque)
{
    BDRVQcowState *s = bs->opaque;
    int ret;

    ret = qcow2_cache_flush(bs, s->l2_table_cache);
    if (ret < 0) {
        return NULL;
    }

    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
    if (ret < 0) {
        return NULL;
    }

    return bdrv_aio_flush(bs->file, cb, opaque);
}


Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-10-12 15:52       ` Andreas Färber
@ 2012-10-15  9:13         ` Kevin Wolf
  2012-10-15 14:28           ` Andreas Färber
  2012-11-09 17:21           ` Andreas Färber
  0 siblings, 2 replies; 10+ messages in thread
From: Kevin Wolf @ 2012-10-15  9:13 UTC (permalink / raw)
  To: Andreas Färber
  Cc: qemu-devel, qemu-stable, Michael Tokarev, Zhouyi Zhou,
	Bruce Rogers

Am 12.10.2012 17:52, schrieb Andreas Färber:
> Am 12.06.2012 15:44, schrieb Kevin Wolf:
>> Am 12.06.2012 15:33, schrieb Andreas Färber:
>>> Am 14.05.2012 14:20, schrieb Kevin Wolf:
>>>> Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
>>>>> hi all
>>>>>   
>>>>>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>>>>>   after some investigation, I found that:
>>>>>   in function posix_aio_process_queue(void *opaque)
>>>>> 440             ret = qemu_paio_error(acb);
>>>>> 441             if (ret == ECANCELED) {
>>>>> 442                 /* remove the request */
>>>>> 443                 *pacb = acb->next;
>>>>> 444                 qemu_aio_release(acb);
>>>>> 445                 result = 1;
>>>>> 446             } else if (ret != EINPROGRESS) {
>>>>>   in line 444 acb got released but acb->common.opaque does not.
>>>>> which will be released via guest OS via ide_dma_cancel which 
>>>>> will in term call qcow_aio_cancel which does not check its argument
>>>>> is in flight list or not.
>>>>>   The fix is as follows: (debian 6's qemu-kvm-0.12.5)
>>>>> #######################################
>>>>> --- block/qcow2.h~      2010-07-27 08:43:53.000000000 +0800
>>>>> +++ block/qcow2.h       2012-05-13 15:51:39.000000000 +0800
>>>>> @@ -143,6 +143,7 @@
>>>>>      QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
>>>>>  
>>>>>      QLIST_ENTRY(QCowL2Meta) next_in_flight;
>>>>> +    int inflight;       
>>>>>  } QCowL2Meta;
>>>>> --- block/qcow2.c~  2012-05-13 15:57:09.000000000 +0800
>>>>> +++ block/qcow2.c       2012-05-13 15:57:24.000000000 +0800
>>>>> @@ -349,6 +349,10 @@
>>>>>      QCowAIOCB *acb = (QCowAIOCB *)blockacb;
>>>>>      if (acb->hd_aiocb)
>>>>>          bdrv_aio_cancel(acb->hd_aiocb);
>>>>> +    if (acb->l2meta.inflight) {
>>>>> +        QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>>>> +       acb->l2meta.inflight = 0;
>>>>> +    }
>>>>>      qemu_aio_release(acb);
>>>>>  }
>>>>>  
>>>>> @@ -506,6 +510,7 @@
>>>>>      acb->n = 0;
>>>>>      acb->cluster_offset = 0;
>>>>>      acb->l2meta.nb_clusters = 0;
>>>>> +    acb->l2meta.inflight = 0;
>>>>>      QLIST_INIT(&acb->l2meta.dependent_requests);
>>>>>      return acb;
>>>>>  }
>>>>> @@ -534,6 +539,7 @@
>>>>>      /* Take the request off the list of running requests */
>>>>>      if (m->nb_clusters != 0) {
>>>>>          QLIST_REMOVE(m, next_in_flight);
>>>>> +       m->inflight = 0;
>>>>>      }
>>>>>  
>>>>>      /*
>>>>> @@ -632,6 +638,7 @@
>>>>>  fail:
>>>>>      if (acb->l2meta.nb_clusters != 0) {
>>>>>          QLIST_REMOVE(&acb->l2meta, next_in_flight);
>>>>> +       acb->l2meta.inflight  = 0;
>>>>>      }
>>>>>  done:
>>>>>      if (acb->qiov->niov > 1)
>>>>> --- block/qcow2-cluster.c~      2010-07-27 08:43:53.000000000 +0800
>>>>> +++ block/qcow2-cluster.c       2012-05-13 15:53:53.000000000 +0800
>>>>> @@ -827,6 +827,7 @@
>>>>>      m->offset = offset;
>>>>>      m->n_start = n_start;
>>>>>      m->nb_clusters = nb_clusters;
>>>>> +    m->inflight = 1;
>>>>>  
>>>>>  out:
>>>>>      m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
>>>>>
>>>>>  Thanks for investigation
>>>>> Zhouyi
>>>>
>>>> The patch looks reasonable to me. Note however that while it fixes the
>>>> hang, it still causes cluster leaks. I'm not sure if someone is
>>>> interested in picking these up for old stable releases. Andreas, I think
>>>> you were going to take 0.15? The first version that doesn't have the
>>>> problem is 1.0.
>>>
>>> Kevin, the policy as I understood it is to cherry-pick patches from
>>> qemu.git into qemu-stable-x.y.git. So I don't think me applying this
>>> patch to stable-0.15 would be right. I don't spot a particular qcow2 fix
>>> among our 0.15 backports that I have now pushed. Do you have a pointer
>>> which one(s) would fix this issue so that I can recheck?
>>
>> It's "fixed" as a side effect of the block layer conversion to
>> coroutines. Not exactly the kind of patches you'd want to cherry-pick
>> for stable-0.15.
>>
>> The better fix for 0.15 could be to backport the new behaviour of
>> coroutine based requests with bdrv_aio_cancel:
>>
>> static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
>> {
>>     qemu_aio_flush();
>> }
>>
>> Using that as the implementation for qcow2_aio_cancel should be safe and
>> fix this problem.
> 
> Kevin, I have stable-0.15 in a state where I'm about to tag 0.15.2 now.
> The original patch does not have a Signed-off-by nor your Acked-by, so I
> can't apply it as-is. stable-0.15 does not have coroutines, so I don't
> understand what exactly you're suggesting as alternative here: Backport
> the whole coroutine feature including coroutine function above? Or just
> call qemu_aio_flush() in place of what? This is old qcow2_aio_cancel():

No, that was qcow2_aio_flush. ;-)

What I'm suggesting (not even compile tested!) is:

Signed-off-by: Kevin Wolf <kwolf@redhat.com>

diff --git a/block/qcow2.c b/block/qcow2.c
index 48e1b95..d665675 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -388,10 +388,7 @@ typedef struct QCowAIOCB {

 static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
 {
-    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
-    if (acb->hd_aiocb)
-        bdrv_aio_cancel(acb->hd_aiocb);
-    qemu_aio_release(acb);
+    qemu_aio_flush();
 }

 static AIOPool qcow2_aio_pool = {

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-10-15  9:13         ` Kevin Wolf
@ 2012-10-15 14:28           ` Andreas Färber
  2012-10-15 14:46             ` Kevin Wolf
  2012-10-16  3:32             ` 周洲仪
  2012-11-09 17:21           ` Andreas Färber
  1 sibling, 2 replies; 10+ messages in thread
From: Andreas Färber @ 2012-10-15 14:28 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: qemu-devel, qemu-stable, Michael Tokarev, Zhouyi Zhou,
	Bruce Rogers

Am 15.10.2012 11:13, schrieb Kevin Wolf:
> Am 12.10.2012 17:52, schrieb Andreas Färber:
>> Am 12.06.2012 15:44, schrieb Kevin Wolf:
>>> Am 12.06.2012 15:33, schrieb Andreas Färber:
>>>> Am 14.05.2012 14:20, schrieb Kevin Wolf:
>>>>> Am 13.05.2012 10:03, schrieb Zhouyi Zhou:
>>>>>>   sometimes, qemu/kvm-0.1x will hang in endless loop in qcow2_alloc_cluster_offset.
>>>>>
>>>>> The patch looks reasonable to me. Note however that while it fixes the
>>>>> hang, it still causes cluster leaks. I'm not sure if someone is
>>>>> interested in picking these up for old stable releases. Andreas, I think
>>>>> you were going to take 0.15? The first version that doesn't have the
>>>>> problem is 1.0.
>>>>
>>> It's "fixed" as a side effect of the block layer conversion to
>>> coroutines. Not exactly the kind of patches you'd want to cherry-pick
>>> for stable-0.15.
>>>
>>> The better fix for 0.15 could be to backport the new behaviour of
>>> coroutine based requests with bdrv_aio_cancel:
>>>
>>> static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
>>> {
>>>     qemu_aio_flush();
>>> }
>>>
>>> Using that as the implementation for qcow2_aio_cancel should be safe and
>>> fix this problem.
>>
>> [...] stable-0.15 does not have coroutines, so I don't
>> understand what exactly you're suggesting as alternative here: Backport
>> the whole coroutine feature including coroutine function above? Or just
>> call qemu_aio_flush() in place of what? This is old qcow2_aio_cancel():
> 
> No, that was qcow2_aio_flush. ;-)

Ugh, what a copy-and-paste error... ;-)

> What I'm suggesting (not even compile tested!) is:
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 48e1b95..d665675 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -388,10 +388,7 @@ typedef struct QCowAIOCB {
> 
>  static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
>  {
> -    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
> -    if (acb->hd_aiocb)
> -        bdrv_aio_cancel(acb->hd_aiocb);
> -    qemu_aio_release(acb);
> +    qemu_aio_flush();
>  }
> 
>  static AIOPool qcow2_aio_pool = {

Compiles fine. Is there a particular test case to invoke this code path?

Does this attempt to fix the cluster leaks you mentioned as well, or
just the cluster allocation endless loop?

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-10-15 14:28           ` Andreas Färber
@ 2012-10-15 14:46             ` Kevin Wolf
  2012-10-16  3:32             ` 周洲仪
  1 sibling, 0 replies; 10+ messages in thread
From: Kevin Wolf @ 2012-10-15 14:46 UTC (permalink / raw)
  To: Andreas Färber
  Cc: qemu-devel, qemu-stable, Michael Tokarev, Zhouyi Zhou,
	Bruce Rogers

Am 15.10.2012 16:28, schrieb Andreas Färber:
>> What I'm suggesting (not even compile tested!) is:
>>
>> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>>
>> diff --git a/block/qcow2.c b/block/qcow2.c
>> index 48e1b95..d665675 100644
>> --- a/block/qcow2.c
>> +++ b/block/qcow2.c
>> @@ -388,10 +388,7 @@ typedef struct QCowAIOCB {
>>
>>  static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
>>  {
>> -    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
>> -    if (acb->hd_aiocb)
>> -        bdrv_aio_cancel(acb->hd_aiocb);
>> -    qemu_aio_release(acb);
>> +    qemu_aio_flush();
>>  }
>>
>>  static AIOPool qcow2_aio_pool = {
> 
> Compiles fine. Is there a particular test case to invoke this code path?

As far as I know, the common way to get this were IDE resets. However,
I'm not sure what you need to do with a Linux guest if you want it to
reset the IDE controller...

> Does this attempt to fix the cluster leaks you mentioned as well, or
> just the cluster allocation endless loop?

Instead of cancelling the in-flight requests it waits for their
completion, so in the end we should be in an consistent state without
cluster leaks.

Kevin

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-10-15 14:28           ` Andreas Färber
  2012-10-15 14:46             ` Kevin Wolf
@ 2012-10-16  3:32             ` 周洲仪
  1 sibling, 0 replies; 10+ messages in thread
From: 周洲仪 @ 2012-10-16  3:32 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: qemu-devel, qemu-stable, Michael Tokarev, Andreas Färber,
	Bruce Rogers

[-- Attachment #1: Type: text/plain, Size: 1758 bytes --]

Hi Kevin
> Am 15.10.2012 16:28, schrieb Andreas Färber:
> >> What I'm suggesting (not even compile tested!) is:
> >>
> >> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> >>
> >> diff --git a/block/qcow2.c b/block/qcow2.c
> >> index 48e1b95..d665675 100644
> >> --- a/block/qcow2.c
> >> +++ b/block/qcow2.c
> >> @@ -388,10 +388,7 @@ typedef struct QCowAIOCB {
> >>
> >>  static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
> >>  {
> >> -    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
> >> -    if (acb->hd_aiocb)
> >> -        bdrv_aio_cancel(acb->hd_aiocb);
> >> -    qemu_aio_release(acb);
> >> +    qemu_aio_flush();
> >>  }
> >>
> >>  static AIOPool qcow2_aio_pool = {
> > 
> > Compiles fine. Is there a particular test case to invoke this code path?
> 
> As far as I know, the common way to get this were IDE resets. However,
> I'm not sure what you need to do with a Linux guest if you want it to
> reset the IDE controller...

I first notice this bug during the stress test of FreeBSD9.0 guests. FreeBSD will issue a
ata reset servicing function ata_timeout. A way to speed up the frequency of invoking
ata_timeout is to modify the function ata_begin_transaction and let the timeout shorter
then do a disk intensive stress test.

under FreeBSD, invoke atacontro reinit ata0 may do the reset,
Under Linux, invoke hdparm -w will also do the work? (dangerous, a safe way maybe
modify Linux kernel and let the ata request timeout shorter?)

> > Does this attempt to fix the cluster leaks you mentioned as well, or
> > just the cluster allocation endless loop?
> 
> Instead of cancelling the in-flight requests it waits for their
> completion, so in the end we should be in an consistent state without
> cluster leaks.
> 
Zhouyi




[-- Attachment #2: Type: text/html, Size: 3190 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset
  2012-10-15  9:13         ` Kevin Wolf
  2012-10-15 14:28           ` Andreas Färber
@ 2012-11-09 17:21           ` Andreas Färber
  1 sibling, 0 replies; 10+ messages in thread
From: Andreas Färber @ 2012-11-09 17:21 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: Zhouyi Zhou, Bruce Rogers, Michael Tokarev, qemu-devel,
	qemu-stable

Am 15.10.2012 11:13, schrieb Kevin Wolf:
> What I'm suggesting (not even compile tested!) is:
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 48e1b95..d665675 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -388,10 +388,7 @@ typedef struct QCowAIOCB {
> 
>  static void qcow2_aio_cancel(BlockDriverAIOCB *blockacb)
>  {
> -    QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
> -    if (acb->hd_aiocb)
> -        bdrv_aio_cancel(acb->hd_aiocb);
> -    qemu_aio_release(acb);
> +    qemu_aio_flush();
>  }
> 
>  static AIOPool qcow2_aio_pool = {

Thanks, we've applied this to stable-0.15.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-11-09 17:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-13  8:03 [Qemu-devel] fixing qemu-0.1X endless loop in qcow2_alloc_cluster_offset Zhouyi Zhou
2012-05-14 12:20 ` Kevin Wolf
2012-06-12 13:33   ` Andreas Färber
2012-06-12 13:44     ` Kevin Wolf
2012-10-12 15:52       ` Andreas Färber
2012-10-15  9:13         ` Kevin Wolf
2012-10-15 14:28           ` Andreas Färber
2012-10-15 14:46             ` Kevin Wolf
2012-10-16  3:32             ` 周洲仪
2012-11-09 17:21           ` Andreas Färber

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).