From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:41402) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SiA7g-0001k3-Ey for qemu-devel@nongnu.org; Fri, 22 Jun 2012 16:06:30 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SiA7d-00070k-KS for qemu-devel@nongnu.org; Fri, 22 Jun 2012 16:06:28 -0400 Received: from mail-pb0-f45.google.com ([209.85.160.45]:41035) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SiA7d-00070Y-BG for qemu-devel@nongnu.org; Fri, 22 Jun 2012 16:06:25 -0400 Received: by pbbro12 with SMTP id ro12so4099807pbb.4 for ; Fri, 22 Jun 2012 13:06:23 -0700 (PDT) Message-ID: <4FE4D03D.4010009@codemonkey.ws> Date: Fri, 22 Jun 2012 15:06:21 -0500 From: Anthony Liguori MIME-Version: 1.0 References: <1340290158-11036-1-git-send-email-qemulist@gmail.com> <1340290158-11036-3-git-send-email-qemulist@gmail.com> In-Reply-To: <1340290158-11036-3-git-send-email-qemulist@gmail.com> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Liu Ping Fan Cc: Jan Kiszka , Liu Ping Fan , qemu-devel On 06/21/2012 09:49 AM, Liu Ping Fan wrote: > In order to break the big lock, using per-cpu_lock in kvm_cpu_exec() > to protect the race from other cpu's access to env->apic_state& related > field in env. > Also, we need to protect agaist run_on_cpu(). > > Race condition can be like this: > 1. vcpu-1 IPI vcpu-2 > vcpu-3 IPI vcpu-2 > Open window exists for accessing to vcpu-2's apic_state& env > > 2. run_on_cpu() write env->queued_work_last, while flush_queued_work() > read > > Signed-off-by: Liu Ping Fan > --- > cpus.c | 6 ++++-- > hw/apic.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > hw/pc.c | 8 +++++++- > kvm-all.c | 13 +++++++++++-- > 4 files changed, 76 insertions(+), 9 deletions(-) > > diff --git a/cpus.c b/cpus.c > index 554f7bc..ac99afe 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -649,6 +649,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data) > > wi.func = func; > wi.data = data; > + qemu_mutex_lock(env->cpu_lock); > if (!env->queued_work_first) { > env->queued_work_first =&wi; > } else { > @@ -657,6 +658,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data) > env->queued_work_last =&wi; > wi.next = NULL; > wi.done = false; > + qemu_mutex_unlock(env->cpu_lock); > > qemu_cpu_kick(env); > while (!wi.done) { > @@ -718,7 +720,7 @@ static void qemu_tcg_wait_io_event(void) > static void qemu_kvm_wait_io_event(CPUArchState *env) > { > while (cpu_thread_is_idle(env)) { > - qemu_cond_wait(env->halt_cond,&qemu_global_mutex); > + qemu_cond_wait(env->halt_cond, env->cpu_lock); > } > > qemu_kvm_eat_signals(env); > @@ -729,8 +731,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) > { > CPUArchState *env = arg; > int r; > + qemu_mutex_lock_cpu(env); > > - qemu_mutex_lock(&qemu_global_mutex); > qemu_thread_get_self(env->thread); > env->thread_id = qemu_get_thread_id(); > cpu_single_env = env; > diff --git a/hw/apic.c b/hw/apic.c > index 4eeaf88..b999a40 100644 > --- a/hw/apic.c > +++ b/hw/apic.c > @@ -22,6 +22,7 @@ > #include "host-utils.h" > #include "trace.h" > #include "pc.h" > +#include "qemu-thread.h" > > #define MAX_APIC_WORDS 8 > > @@ -94,6 +95,7 @@ static int get_highest_priority_int(uint32_t *tab) > return -1; > } > > +/* Caller must hold the lock */ > static void apic_sync_vapic(APICCommonState *s, int sync_type) > { > VAPICState vapic_state; > @@ -141,11 +143,13 @@ static void apic_sync_vapic(APICCommonState *s, int sync_type) > } > } > > +/* Caller must hold lock */ > static void apic_vapic_base_update(APICCommonState *s) > { > apic_sync_vapic(s, SYNC_TO_VAPIC); > } > > +/* Caller must hold the lock */ > static void apic_local_deliver(APICCommonState *s, int vector) > { > uint32_t lvt = s->lvt[vector]; > @@ -175,9 +179,11 @@ static void apic_local_deliver(APICCommonState *s, int vector) > (lvt& APIC_LVT_LEVEL_TRIGGER)) > trigger_mode = APIC_TRIGGER_LEVEL; > apic_set_irq(s, lvt& 0xff, trigger_mode); > + break; > } > } > > +/* Caller must hold the lock */ > void apic_deliver_pic_intr(DeviceState *d, int level) > { > APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); > @@ -200,9 +206,12 @@ void apic_deliver_pic_intr(DeviceState *d, int level) > } > } > > +/* Must hold lock */ > static void apic_external_nmi(APICCommonState *s) > { > + qemu_mutex_lock_cpu(s->cpu_env); > apic_local_deliver(s, APIC_LVT_LINT1); > + qemu_mutex_unlock_cpu(s->cpu_env); > } > > #define foreach_apic(apic, deliver_bitmask, code) \ > @@ -215,7 +224,9 @@ static void apic_external_nmi(APICCommonState *s) > if (__mask& (1<< __j)) {\ > apic = local_apics[__i * 32 + __j];\ > if (apic) {\ > + qemu_mutex_lock_cpu(apic->cpu_env);\ > code;\ > + qemu_mutex_unlock_cpu(apic->cpu_env);\ > }\ > }\ > }\ > @@ -244,7 +255,9 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, > if (d>= 0) { > apic_iter = local_apics[d]; > if (apic_iter) { > + qemu_mutex_lock_cpu(apic_iter->cpu_env); > apic_set_irq(apic_iter, vector_num, trigger_mode); > + qemu_mutex_unlock_cpu(apic_iter->cpu_env); > } > } > } > @@ -293,6 +306,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, > apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); > } > > +/* Caller must hold lock */ > static void apic_set_base(APICCommonState *s, uint64_t val) > { > s->apicbase = (val& 0xfffff000) | > @@ -305,6 +319,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val) > } > } > > +/* caller must hold lock */ > static void apic_set_tpr(APICCommonState *s, uint8_t val) > { > /* Updates from cr8 are ignored while the VAPIC is active */ > @@ -314,12 +329,14 @@ static void apic_set_tpr(APICCommonState *s, uint8_t val) > } > } > > +/* caller must hold lock */ > static uint8_t apic_get_tpr(APICCommonState *s) > { > apic_sync_vapic(s, SYNC_FROM_VAPIC); > return s->tpr>> 4; > } > > +/* Caller must hold the lock */ > static int apic_get_ppr(APICCommonState *s) > { > int tpr, isrv, ppr; > @@ -348,6 +365,7 @@ static int apic_get_arb_pri(APICCommonState *s) > * 0 - no interrupt, > *>0 - interrupt number > */ > +/* Caller must hold cpu_lock */ > static int apic_irq_pending(APICCommonState *s) > { > int irrv, ppr; > @@ -363,6 +381,7 @@ static int apic_irq_pending(APICCommonState *s) > return irrv; > } > > +/* caller must ensure the lock has been taken */ > /* signal the CPU if an irq is pending */ > static void apic_update_irq(APICCommonState *s) > { > @@ -380,11 +399,13 @@ static void apic_update_irq(APICCommonState *s) > void apic_poll_irq(DeviceState *d) > { > APICCommonState *s = APIC_COMMON(d); > - > + qemu_mutex_lock_cpu(s->cpu_env); > apic_sync_vapic(s, SYNC_FROM_VAPIC); > apic_update_irq(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > } > > +/* caller must hold the lock */ > static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode) > { > apic_report_irq_delivered(!get_bit(s->irr, vector_num)); > @@ -407,6 +428,7 @@ static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode) > apic_update_irq(s); > } > > +/* caller must hold the lock */ > static void apic_eoi(APICCommonState *s) > { > int isrv; > @@ -415,6 +437,7 @@ static void apic_eoi(APICCommonState *s) > return; > reset_bit(s->isr, isrv); > if (!(s->spurious_vec& APIC_SV_DIRECTED_IO)&& get_bit(s->tmr, isrv)) { > + //fix me,need to take extra lock for the bus? > ioapic_eoi_broadcast(isrv); > } > apic_sync_vapic(s, SYNC_FROM_VAPIC | SYNC_TO_VAPIC); > @@ -480,18 +503,23 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, > static void apic_startup(APICCommonState *s, int vector_num) > { > s->sipi_vector = vector_num; > + qemu_mutex_lock_cpu(s->cpu_env); > cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); > + qemu_mutex_unlock_cpu(s->cpu_env); > } > > void apic_sipi(DeviceState *d) > { > APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); > - > + qemu_mutex_lock_cpu(s->cpu_env); > cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI); > > - if (!s->wait_for_sipi) > + if (!s->wait_for_sipi) { > + qemu_mutex_unlock_cpu(s->cpu_env); > return; > + } > cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector); > + qemu_mutex_unlock_cpu(s->cpu_env); > s->wait_for_sipi = 0; > } > > @@ -543,6 +571,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode, > apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); > } > > +/* Caller must take lock */ > int apic_get_interrupt(DeviceState *d) > { > APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); > @@ -572,6 +601,7 @@ int apic_get_interrupt(DeviceState *d) > return intno; > } > > +/* Caller must hold the cpu_lock */ > int apic_accept_pic_intr(DeviceState *d) > { > APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d); > @@ -589,6 +619,7 @@ int apic_accept_pic_intr(DeviceState *d) > return 0; > } > > +/* Caller hold lock */ > static uint32_t apic_get_current_count(APICCommonState *s) > { > int64_t d; > @@ -619,9 +650,10 @@ static void apic_timer_update(APICCommonState *s, int64_t current_time) > static void apic_timer(void *opaque) > { > APICCommonState *s = opaque; > - > + qemu_mutex_lock_cpu(s->cpu_env); > apic_local_deliver(s, APIC_LVT_TIMER); > apic_timer_update(s, s->next_time); > + qemu_mutex_unlock_cpu(s->cpu_env); > } > > static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr) > @@ -664,18 +696,22 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) > val = 0x11 | ((APIC_LVT_NB - 1)<< 16); /* version 0x11 */ > break; > case 0x08: > + qemu_mutex_lock_cpu(s->cpu_env); > apic_sync_vapic(s, SYNC_FROM_VAPIC); > if (apic_report_tpr_access) { > cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ); > } > val = s->tpr; > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x09: > val = apic_get_arb_pri(s); > break; > case 0x0a: > /* ppr */ > + qemu_mutex_lock_cpu(s->cpu_env); > val = apic_get_ppr(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x0b: > val = 0; > @@ -712,7 +748,9 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr) > val = s->initial_count; > break; > case 0x39: > + qemu_mutex_lock_cpu(s->cpu_env); > val = apic_get_current_count(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x3e: > val = s->divide_conf; > @@ -767,18 +805,22 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > case 0x03: > break; > case 0x08: > + qemu_mutex_lock_cpu(s->cpu_env); > if (apic_report_tpr_access) { > cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE); > } > s->tpr = val; > apic_sync_vapic(s, SYNC_TO_VAPIC); > apic_update_irq(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x09: > case 0x0a: > break; > case 0x0b: /* EOI */ > + qemu_mutex_lock_cpu(s->cpu_env); > apic_eoi(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x0d: > s->log_dest = val>> 24; > @@ -787,8 +829,10 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > s->dest_mode = val>> 28; > break; > case 0x0f: > + qemu_mutex_lock_cpu(s->cpu_env); > s->spurious_vec = val& 0x1ff; > apic_update_irq(s); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x10 ... 0x17: > case 0x18 ... 0x1f: > @@ -807,24 +851,30 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) > case 0x32 ... 0x37: > { > int n = index - 0x32; > + qemu_mutex_lock_cpu(s->cpu_env); > s->lvt[n] = val; > if (n == APIC_LVT_TIMER) > apic_timer_update(s, qemu_get_clock_ns(vm_clock)); > + qemu_mutex_unlock_cpu(s->cpu_env); > } > break; > case 0x38: > + qemu_mutex_lock_cpu(s->cpu_env); > s->initial_count = val; > s->initial_count_load_time = qemu_get_clock_ns(vm_clock); > apic_timer_update(s, s->initial_count_load_time); > + qemu_mutex_unlock_cpu(s->cpu_env); > break; > case 0x39: > break; > case 0x3e: > { > int v; > + qemu_mutex_lock_cpu(s->cpu_env); > s->divide_conf = val& 0xb; > v = (s->divide_conf& 3) | ((s->divide_conf>> 1)& 4); > s->count_shift = (v + 1)& 7; > + qemu_mutex_unlock_cpu(s->cpu_env); > } > break; > default: > diff --git a/hw/pc.c b/hw/pc.c > index 4d34a33..5e7350c 100644 > --- a/hw/pc.c > +++ b/hw/pc.c > @@ -147,7 +147,7 @@ void cpu_smm_update(CPUX86State *env) > smm_set(!!(env->hflags& HF_SMM_MASK), smm_arg); > } > > - > +/* Called by kvm_cpu_exec(), already with lock protection */ > /* IRQ handling */ > int cpu_get_pic_interrupt(CPUX86State *env) > { > @@ -173,9 +173,15 @@ static void pic_irq_request(void *opaque, int irq, int level) > DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq); > if (env->apic_state) { > while (env) { > + if (!qemu_cpu_is_self(env)) { > + qemu_mutex_lock_cpu(env); > + } > if (apic_accept_pic_intr(env->apic_state)) { > apic_deliver_pic_intr(env->apic_state, level); > } > + if (!qemu_cpu_is_self(env)) { > + qemu_mutex_unlock_cpu(env); > + } > env = env->next_cpu; > } > } else { > diff --git a/kvm-all.c b/kvm-all.c > index 9b73ccf..dc9aa54 100644 > --- a/kvm-all.c > +++ b/kvm-all.c > @@ -29,6 +29,7 @@ > #include "bswap.h" > #include "memory.h" > #include "exec-memory.h" > +#include "qemu-thread.h" > > /* This check must be after config-host.h is included */ > #ifdef CONFIG_EVENTFD > @@ -1252,12 +1253,15 @@ int kvm_cpu_exec(CPUArchState *env) > */ > qemu_cpu_kick_self(); > } > - qemu_mutex_unlock_iothread(); > + qemu_mutex_unlock(env->cpu_lock); > > run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); > > - qemu_mutex_lock_iothread(); > + qemu_mutex_lock(env->cpu_lock); > kvm_arch_post_run(env, run); > + qemu_mutex_unlock_cpu(env); > + > + qemu_mutex_lock_iothread(); > > kvm_flush_coalesced_mmio_buffer(); > > @@ -1265,6 +1269,8 @@ int kvm_cpu_exec(CPUArchState *env) > if (run_ret == -EINTR || run_ret == -EAGAIN) { > DPRINTF("io window exit\n"); > ret = EXCP_INTERRUPT; > + qemu_mutex_unlock_iothread(); > + qemu_mutex_lock_cpu(env); > break; > } > fprintf(stderr, "error: kvm run failed %s\n", > @@ -1312,6 +1318,9 @@ int kvm_cpu_exec(CPUArchState *env) > ret = kvm_arch_handle_exit(env, run); > break; > } > + > + qemu_mutex_unlock_iothread(); > + qemu_mutex_lock_cpu(env); > } while (ret == 0); This really confuses me. Shouldn't we be moving qemu_mutex_unlock_iothread() to the very top of kvm_cpu_exec()? There's only a fixed amount of state that gets manipulated too in kvm_cpu_exec() so shouldn't we try to specifically describe what state is covered by cpu_lock? What's your strategy for ensuring that all accesses to that state is protected by cpu_lock? Regards, Anthony Liguori > > if (ret< 0) {