qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Liguori <anthony@codemonkey.ws>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>,
	Liu Ping Fan <pingfank@linux.vnet.ibm.com>,
	qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock
Date: Fri, 22 Jun 2012 15:06:21 -0500	[thread overview]
Message-ID: <4FE4D03D.4010009@codemonkey.ws> (raw)
In-Reply-To: <1340290158-11036-3-git-send-email-qemulist@gmail.com>

On 06/21/2012 09:49 AM, Liu Ping Fan wrote:
> In order to break the big lock, using per-cpu_lock in kvm_cpu_exec()
> to protect the race from other cpu's access to env->apic_state&  related
> field in env.
> Also, we need to protect agaist run_on_cpu().
>
> Race condition can be like this:
> 1.  vcpu-1 IPI vcpu-2
>      vcpu-3 IPI vcpu-2
>      Open window exists for accessing to vcpu-2's apic_state&  env
>
> 2. run_on_cpu() write env->queued_work_last, while flush_queued_work()
>     read
>
> Signed-off-by: Liu Ping Fan<pingfank@linux.vnet.ibm.com>
> ---
>   cpus.c    |    6 ++++--
>   hw/apic.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>   hw/pc.c   |    8 +++++++-
>   kvm-all.c |   13 +++++++++++--
>   4 files changed, 76 insertions(+), 9 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 554f7bc..ac99afe 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -649,6 +649,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
>
>       wi.func = func;
>       wi.data = data;
> +    qemu_mutex_lock(env->cpu_lock);
>       if (!env->queued_work_first) {
>           env->queued_work_first =&wi;
>       } else {
> @@ -657,6 +658,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
>       env->queued_work_last =&wi;
>       wi.next = NULL;
>       wi.done = false;
> +    qemu_mutex_unlock(env->cpu_lock);
>
>       qemu_cpu_kick(env);
>       while (!wi.done) {
> @@ -718,7 +720,7 @@ static void qemu_tcg_wait_io_event(void)
>   static void qemu_kvm_wait_io_event(CPUArchState *env)
>   {
>       while (cpu_thread_is_idle(env)) {
> -        qemu_cond_wait(env->halt_cond,&qemu_global_mutex);
> +        qemu_cond_wait(env->halt_cond, env->cpu_lock);
>       }
>
>       qemu_kvm_eat_signals(env);
> @@ -729,8 +731,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
>   {
>       CPUArchState *env = arg;
>       int r;
> +    qemu_mutex_lock_cpu(env);
>
> -    qemu_mutex_lock(&qemu_global_mutex);
>       qemu_thread_get_self(env->thread);
>       env->thread_id = qemu_get_thread_id();
>       cpu_single_env = env;
> diff --git a/hw/apic.c b/hw/apic.c
> index 4eeaf88..b999a40 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -22,6 +22,7 @@
>   #include "host-utils.h"
>   #include "trace.h"
>   #include "pc.h"
> +#include "qemu-thread.h"
>
>   #define MAX_APIC_WORDS 8
>
> @@ -94,6 +95,7 @@ static int get_highest_priority_int(uint32_t *tab)
>       return -1;
>   }
>
> +/* Caller must hold the lock */
>   static void apic_sync_vapic(APICCommonState *s, int sync_type)
>   {
>       VAPICState vapic_state;
> @@ -141,11 +143,13 @@ static void apic_sync_vapic(APICCommonState *s, int sync_type)
>       }
>   }
>
> +/* Caller must hold lock */
>   static void apic_vapic_base_update(APICCommonState *s)
>   {
>       apic_sync_vapic(s, SYNC_TO_VAPIC);
>   }
>
> +/* Caller must hold the lock */
>   static void apic_local_deliver(APICCommonState *s, int vector)
>   {
>       uint32_t lvt = s->lvt[vector];
> @@ -175,9 +179,11 @@ static void apic_local_deliver(APICCommonState *s, int vector)
>               (lvt&  APIC_LVT_LEVEL_TRIGGER))
>               trigger_mode = APIC_TRIGGER_LEVEL;
>           apic_set_irq(s, lvt&  0xff, trigger_mode);
> +        break;
>       }
>   }
>
> +/* Caller must hold the lock */
>   void apic_deliver_pic_intr(DeviceState *d, int level)
>   {
>       APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -200,9 +206,12 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
>       }
>   }
>
> +/* Must hold lock */
>   static void apic_external_nmi(APICCommonState *s)
>   {
> +    qemu_mutex_lock_cpu(s->cpu_env);
>       apic_local_deliver(s, APIC_LVT_LINT1);
> +    qemu_mutex_unlock_cpu(s->cpu_env);
>   }
>
>   #define foreach_apic(apic, deliver_bitmask, code) \
> @@ -215,7 +224,9 @@ static void apic_external_nmi(APICCommonState *s)
>                   if (__mask&  (1<<  __j)) {\
>                       apic = local_apics[__i * 32 + __j];\
>                       if (apic) {\
> +                        qemu_mutex_lock_cpu(apic->cpu_env);\
>                           code;\
> +                        qemu_mutex_unlock_cpu(apic->cpu_env);\
>                       }\
>                   }\
>               }\
> @@ -244,7 +255,9 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
>                   if (d>= 0) {
>                       apic_iter = local_apics[d];
>                       if (apic_iter) {
> +                        qemu_mutex_lock_cpu(apic_iter->cpu_env);
>                           apic_set_irq(apic_iter, vector_num, trigger_mode);
> +                        qemu_mutex_unlock_cpu(apic_iter->cpu_env);
>                       }
>                   }
>               }
> @@ -293,6 +306,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
>       apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
>   }
>
> +/* Caller must hold lock */
>   static void apic_set_base(APICCommonState *s, uint64_t val)
>   {
>       s->apicbase = (val&  0xfffff000) |
> @@ -305,6 +319,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
>       }
>   }
>
> +/* caller must hold lock */
>   static void apic_set_tpr(APICCommonState *s, uint8_t val)
>   {
>       /* Updates from cr8 are ignored while the VAPIC is active */
> @@ -314,12 +329,14 @@ static void apic_set_tpr(APICCommonState *s, uint8_t val)
>       }
>   }
>
> +/* caller must hold lock */
>   static uint8_t apic_get_tpr(APICCommonState *s)
>   {
>       apic_sync_vapic(s, SYNC_FROM_VAPIC);
>       return s->tpr>>  4;
>   }
>
> +/* Caller must hold the lock */
>   static int apic_get_ppr(APICCommonState *s)
>   {
>       int tpr, isrv, ppr;
> @@ -348,6 +365,7 @@ static int apic_get_arb_pri(APICCommonState *s)
>    * 0  - no interrupt,
>    *>0 - interrupt number
>    */
> +/* Caller must hold cpu_lock */
>   static int apic_irq_pending(APICCommonState *s)
>   {
>       int irrv, ppr;
> @@ -363,6 +381,7 @@ static int apic_irq_pending(APICCommonState *s)
>       return irrv;
>   }
>
> +/* caller must ensure the lock has been taken */
>   /* signal the CPU if an irq is pending */
>   static void apic_update_irq(APICCommonState *s)
>   {
> @@ -380,11 +399,13 @@ static void apic_update_irq(APICCommonState *s)
>   void apic_poll_irq(DeviceState *d)
>   {
>       APICCommonState *s = APIC_COMMON(d);
> -
> +    qemu_mutex_lock_cpu(s->cpu_env);
>       apic_sync_vapic(s, SYNC_FROM_VAPIC);
>       apic_update_irq(s);
> +    qemu_mutex_unlock_cpu(s->cpu_env);
>   }
>
> +/* caller must hold the lock */
>   static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
>   {
>       apic_report_irq_delivered(!get_bit(s->irr, vector_num));
> @@ -407,6 +428,7 @@ static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
>       apic_update_irq(s);
>   }
>
> +/* caller must hold the lock */
>   static void apic_eoi(APICCommonState *s)
>   {
>       int isrv;
> @@ -415,6 +437,7 @@ static void apic_eoi(APICCommonState *s)
>           return;
>       reset_bit(s->isr, isrv);
>       if (!(s->spurious_vec&  APIC_SV_DIRECTED_IO)&&  get_bit(s->tmr, isrv)) {
> +        //fix me,need to take extra lock for the bus?
>           ioapic_eoi_broadcast(isrv);
>       }
>       apic_sync_vapic(s, SYNC_FROM_VAPIC | SYNC_TO_VAPIC);
> @@ -480,18 +503,23 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
>   static void apic_startup(APICCommonState *s, int vector_num)
>   {
>       s->sipi_vector = vector_num;
> +    qemu_mutex_lock_cpu(s->cpu_env);
>       cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
> +    qemu_mutex_unlock_cpu(s->cpu_env);
>   }
>
>   void apic_sipi(DeviceState *d)
>   {
>       APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> -
> +    qemu_mutex_lock_cpu(s->cpu_env);
>       cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
>
> -    if (!s->wait_for_sipi)
> +    if (!s->wait_for_sipi) {
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           return;
> +    }
>       cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector);
> +    qemu_mutex_unlock_cpu(s->cpu_env);
>       s->wait_for_sipi = 0;
>   }
>
> @@ -543,6 +571,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
>       apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
>   }
>
> +/* Caller must take lock */
>   int apic_get_interrupt(DeviceState *d)
>   {
>       APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -572,6 +601,7 @@ int apic_get_interrupt(DeviceState *d)
>       return intno;
>   }
>
> +/* Caller must hold the cpu_lock */
>   int apic_accept_pic_intr(DeviceState *d)
>   {
>       APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -589,6 +619,7 @@ int apic_accept_pic_intr(DeviceState *d)
>       return 0;
>   }
>
> +/* Caller hold lock */
>   static uint32_t apic_get_current_count(APICCommonState *s)
>   {
>       int64_t d;
> @@ -619,9 +650,10 @@ static void apic_timer_update(APICCommonState *s, int64_t current_time)
>   static void apic_timer(void *opaque)
>   {
>       APICCommonState *s = opaque;
> -
> +    qemu_mutex_lock_cpu(s->cpu_env);
>       apic_local_deliver(s, APIC_LVT_TIMER);
>       apic_timer_update(s, s->next_time);
> +    qemu_mutex_unlock_cpu(s->cpu_env);
>   }
>
>   static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr)
> @@ -664,18 +696,22 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
>           val = 0x11 | ((APIC_LVT_NB - 1)<<  16); /* version 0x11 */
>           break;
>       case 0x08:
> +        qemu_mutex_lock_cpu(s->cpu_env);
>           apic_sync_vapic(s, SYNC_FROM_VAPIC);
>           if (apic_report_tpr_access) {
>               cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ);
>           }
>           val = s->tpr;
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x09:
>           val = apic_get_arb_pri(s);
>           break;
>       case 0x0a:
>           /* ppr */
> +        qemu_mutex_lock_cpu(s->cpu_env);
>           val = apic_get_ppr(s);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x0b:
>           val = 0;
> @@ -712,7 +748,9 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
>           val = s->initial_count;
>           break;
>       case 0x39:
> +        qemu_mutex_lock_cpu(s->cpu_env);
>           val = apic_get_current_count(s);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x3e:
>           val = s->divide_conf;
> @@ -767,18 +805,22 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>       case 0x03:
>           break;
>       case 0x08:
> +       qemu_mutex_lock_cpu(s->cpu_env);
>           if (apic_report_tpr_access) {
>               cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE);
>           }
>           s->tpr = val;
>           apic_sync_vapic(s, SYNC_TO_VAPIC);
>           apic_update_irq(s);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x09:
>       case 0x0a:
>           break;
>       case 0x0b: /* EOI */
> +        qemu_mutex_lock_cpu(s->cpu_env);
>           apic_eoi(s);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x0d:
>           s->log_dest = val>>  24;
> @@ -787,8 +829,10 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>           s->dest_mode = val>>  28;
>           break;
>       case 0x0f:
> +        qemu_mutex_lock_cpu(s->cpu_env);
>           s->spurious_vec = val&  0x1ff;
>           apic_update_irq(s);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x10 ... 0x17:
>       case 0x18 ... 0x1f:
> @@ -807,24 +851,30 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
>       case 0x32 ... 0x37:
>           {
>               int n = index - 0x32;
> +            qemu_mutex_lock_cpu(s->cpu_env);
>               s->lvt[n] = val;
>               if (n == APIC_LVT_TIMER)
>                   apic_timer_update(s, qemu_get_clock_ns(vm_clock));
> +            qemu_mutex_unlock_cpu(s->cpu_env);
>           }
>           break;
>       case 0x38:
> +       qemu_mutex_lock_cpu(s->cpu_env);
>           s->initial_count = val;
>           s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
>           apic_timer_update(s, s->initial_count_load_time);
> +        qemu_mutex_unlock_cpu(s->cpu_env);
>           break;
>       case 0x39:
>           break;
>       case 0x3e:
>           {
>               int v;
> +            qemu_mutex_lock_cpu(s->cpu_env);
>               s->divide_conf = val&  0xb;
>               v = (s->divide_conf&  3) | ((s->divide_conf>>  1)&  4);
>               s->count_shift = (v + 1)&  7;
> +            qemu_mutex_unlock_cpu(s->cpu_env);
>           }
>           break;
>       default:
> diff --git a/hw/pc.c b/hw/pc.c
> index 4d34a33..5e7350c 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -147,7 +147,7 @@ void cpu_smm_update(CPUX86State *env)
>           smm_set(!!(env->hflags&  HF_SMM_MASK), smm_arg);
>   }
>
> -
> +/* Called by kvm_cpu_exec(), already with lock protection */
>   /* IRQ handling */
>   int cpu_get_pic_interrupt(CPUX86State *env)
>   {
> @@ -173,9 +173,15 @@ static void pic_irq_request(void *opaque, int irq, int level)
>       DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
>       if (env->apic_state) {
>           while (env) {
> +            if (!qemu_cpu_is_self(env)) {
> +                qemu_mutex_lock_cpu(env);
> +            }
>               if (apic_accept_pic_intr(env->apic_state)) {
>                   apic_deliver_pic_intr(env->apic_state, level);
>               }
> +            if (!qemu_cpu_is_self(env)) {
> +                qemu_mutex_unlock_cpu(env);
> +            }
>               env = env->next_cpu;
>           }
>       } else {
> diff --git a/kvm-all.c b/kvm-all.c
> index 9b73ccf..dc9aa54 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -29,6 +29,7 @@
>   #include "bswap.h"
>   #include "memory.h"
>   #include "exec-memory.h"
> +#include "qemu-thread.h"
>
>   /* This check must be after config-host.h is included */
>   #ifdef CONFIG_EVENTFD
> @@ -1252,12 +1253,15 @@ int kvm_cpu_exec(CPUArchState *env)
>                */
>               qemu_cpu_kick_self();
>           }
> -        qemu_mutex_unlock_iothread();
> +        qemu_mutex_unlock(env->cpu_lock);
>
>           run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>
> -        qemu_mutex_lock_iothread();
> +        qemu_mutex_lock(env->cpu_lock);
>           kvm_arch_post_run(env, run);
> +        qemu_mutex_unlock_cpu(env);
> +
> +        qemu_mutex_lock_iothread();
>
>           kvm_flush_coalesced_mmio_buffer();
>
> @@ -1265,6 +1269,8 @@ int kvm_cpu_exec(CPUArchState *env)
>               if (run_ret == -EINTR || run_ret == -EAGAIN) {
>                   DPRINTF("io window exit\n");
>                   ret = EXCP_INTERRUPT;
> +                qemu_mutex_unlock_iothread();
> +                qemu_mutex_lock_cpu(env);
>                   break;
>               }
>               fprintf(stderr, "error: kvm run failed %s\n",
> @@ -1312,6 +1318,9 @@ int kvm_cpu_exec(CPUArchState *env)
>               ret = kvm_arch_handle_exit(env, run);
>               break;
>           }
> +
> +        qemu_mutex_unlock_iothread();
> +        qemu_mutex_lock_cpu(env);
>       } while (ret == 0);

This really confuses me.

Shouldn't we be moving qemu_mutex_unlock_iothread() to the very top of 
kvm_cpu_exec()?

There's only a fixed amount of state that gets manipulated too in kvm_cpu_exec() 
so shouldn't we try to specifically describe what state is covered by cpu_lock?

What's your strategy for ensuring that all accesses to that state is protected 
by cpu_lock?

Regards,

Anthony Liguori

>
>       if (ret<  0) {

  parent reply	other threads:[~2012-06-22 20:06 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1340290158-11036-1-git-send-email-qemulist@gmail.com>
2012-06-21 15:23 ` [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Jan Kiszka
2012-06-22 10:24   ` liu ping fan
2012-06-22 10:37     ` Jan Kiszka
2012-06-22 20:11       ` Anthony Liguori
2012-06-22 21:14         ` Jan Kiszka
2012-06-22 21:44           ` Anthony Liguori
2012-06-22 22:27             ` Jan Kiszka
2012-06-22 22:56               ` Anthony Liguori
2012-06-23  9:10                 ` Jan Kiszka
     [not found] ` <1340290158-11036-2-git-send-email-qemulist@gmail.com>
2012-06-22 20:01   ` [Qemu-devel] [PATCH 1/2] CPUArchState: introduce per-cpu lock Anthony Liguori
     [not found] ` <1340290158-11036-3-git-send-email-qemulist@gmail.com>
2012-06-21 15:02   ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Jan Kiszka
2012-06-22 20:06   ` Anthony Liguori [this message]
2012-06-23  9:32     ` liu ping fan
2012-06-24 14:09       ` liu ping fan
2012-06-21 15:06 [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Liu Ping Fan
2012-06-21 15:06 ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Liu Ping Fan
2012-06-22  2:29   ` 陳韋任 (Wei-Ren Chen)
2012-06-22 10:26     ` liu ping fan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FE4D03D.4010009@codemonkey.ws \
    --to=anthony@codemonkey.ws \
    --cc=jan.kiszka@siemens.com \
    --cc=pingfank@linux.vnet.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemulist@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).