From: Anthony Liguori <anthony@codemonkey.ws>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>,
Liu Ping Fan <pingfank@linux.vnet.ibm.com>,
qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock
Date: Fri, 22 Jun 2012 15:06:21 -0500 [thread overview]
Message-ID: <4FE4D03D.4010009@codemonkey.ws> (raw)
In-Reply-To: <1340290158-11036-3-git-send-email-qemulist@gmail.com>
On 06/21/2012 09:49 AM, Liu Ping Fan wrote:
> In order to break the big lock, using per-cpu_lock in kvm_cpu_exec()
> to protect the race from other cpu's access to env->apic_state& related
> field in env.
> Also, we need to protect agaist run_on_cpu().
>
> Race condition can be like this:
> 1. vcpu-1 IPI vcpu-2
> vcpu-3 IPI vcpu-2
> Open window exists for accessing to vcpu-2's apic_state& env
>
> 2. run_on_cpu() write env->queued_work_last, while flush_queued_work()
> read
>
> Signed-off-by: Liu Ping Fan<pingfank@linux.vnet.ibm.com>
> ---
> cpus.c | 6 ++++--
> hw/apic.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> hw/pc.c | 8 +++++++-
> kvm-all.c | 13 +++++++++++--
> 4 files changed, 76 insertions(+), 9 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 554f7bc..ac99afe 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -649,6 +649,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
>
> wi.func = func;
> wi.data = data;
> + qemu_mutex_lock(env->cpu_lock);
> if (!env->queued_work_first) {
> env->queued_work_first =&wi;
> } else {
> @@ -657,6 +658,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
> env->queued_work_last =&wi;
> wi.next = NULL;
> wi.done = false;
> + qemu_mutex_unlock(env->cpu_lock);
>
> qemu_cpu_kick(env);
> while (!wi.done) {
> @@ -718,7 +720,7 @@ static void qemu_tcg_wait_io_event(void)
> static void qemu_kvm_wait_io_event(CPUArchState *env)
> {
> while (cpu_thread_is_idle(env)) {
> - qemu_cond_wait(env->halt_cond,&qemu_global_mutex);
> + qemu_cond_wait(env->halt_cond, env->cpu_lock);
> }
>
> qemu_kvm_eat_signals(env);
> @@ -729,8 +731,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
> {
> CPUArchState *env = arg;
> int r;
> + qemu_mutex_lock_cpu(env);
>
> - qemu_mutex_lock(&qemu_global_mutex);
> qemu_thread_get_self(env->thread);
> env->thread_id = qemu_get_thread_id();
> cpu_single_env = env;
> diff --git a/hw/apic.c b/hw/apic.c
> index 4eeaf88..b999a40 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -22,6 +22,7 @@
> #include "host-utils.h"
> #include "trace.h"
> #include "pc.h"
> +#include "qemu-thread.h"
>
> #define MAX_APIC_WORDS 8
>
> @@ -94,6 +95,7 @@ static int get_highest_priority_int(uint32_t *tab)
> return -1;
> }
>
> +/* Caller must hold the lock */
> static void apic_sync_vapic(APICCommonState *s, int sync_type)
> {
> VAPICState vapic_state;
> @@ -141,11 +143,13 @@ static void apic_sync_vapic(APICCommonState *s, int sync_type)
> }
> }
>
> +/* Caller must hold lock */
> static void apic_vapic_base_update(APICCommonState *s)
> {
> apic_sync_vapic(s, SYNC_TO_VAPIC);
> }
>
> +/* Caller must hold the lock */
> static void apic_local_deliver(APICCommonState *s, int vector)
> {
> uint32_t lvt = s->lvt[vector];
> @@ -175,9 +179,11 @@ static void apic_local_deliver(APICCommonState *s, int vector)
> (lvt& APIC_LVT_LEVEL_TRIGGER))
> trigger_mode = APIC_TRIGGER_LEVEL;
> apic_set_irq(s, lvt& 0xff, trigger_mode);
> + break;
> }
> }
>
> +/* Caller must hold the lock */
> void apic_deliver_pic_intr(DeviceState *d, int level)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -200,9 +206,12 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
> }
> }
>
> +/* Must hold lock */
> static void apic_external_nmi(APICCommonState *s)
> {
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_local_deliver(s, APIC_LVT_LINT1);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> #define foreach_apic(apic, deliver_bitmask, code) \
> @@ -215,7 +224,9 @@ static void apic_external_nmi(APICCommonState *s)
> if (__mask& (1<< __j)) {\
> apic = local_apics[__i * 32 + __j];\
> if (apic) {\
> + qemu_mutex_lock_cpu(apic->cpu_env);\
> code;\
> + qemu_mutex_unlock_cpu(apic->cpu_env);\
> }\
> }\
> }\
> @@ -244,7 +255,9 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
> if (d>= 0) {
> apic_iter = local_apics[d];
> if (apic_iter) {
> + qemu_mutex_lock_cpu(apic_iter->cpu_env);
> apic_set_irq(apic_iter, vector_num, trigger_mode);
> + qemu_mutex_unlock_cpu(apic_iter->cpu_env);
> }
> }
> }
> @@ -293,6 +306,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
> apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
> }
>
> +/* Caller must hold lock */
> static void apic_set_base(APICCommonState *s, uint64_t val)
> {
> s->apicbase = (val& 0xfffff000) |
> @@ -305,6 +319,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
> }
> }
>
> +/* caller must hold lock */
> static void apic_set_tpr(APICCommonState *s, uint8_t val)
> {
> /* Updates from cr8 are ignored while the VAPIC is active */
> @@ -314,12 +329,14 @@ static void apic_set_tpr(APICCommonState *s, uint8_t val)
> }
> }
>
> +/* caller must hold lock */
> static uint8_t apic_get_tpr(APICCommonState *s)
> {
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> return s->tpr>> 4;
> }
>
> +/* Caller must hold the lock */
> static int apic_get_ppr(APICCommonState *s)
> {
> int tpr, isrv, ppr;
> @@ -348,6 +365,7 @@ static int apic_get_arb_pri(APICCommonState *s)
> * 0 - no interrupt,
> *>0 - interrupt number
> */
> +/* Caller must hold cpu_lock */
> static int apic_irq_pending(APICCommonState *s)
> {
> int irrv, ppr;
> @@ -363,6 +381,7 @@ static int apic_irq_pending(APICCommonState *s)
> return irrv;
> }
>
> +/* caller must ensure the lock has been taken */
> /* signal the CPU if an irq is pending */
> static void apic_update_irq(APICCommonState *s)
> {
> @@ -380,11 +399,13 @@ static void apic_update_irq(APICCommonState *s)
> void apic_poll_irq(DeviceState *d)
> {
> APICCommonState *s = APIC_COMMON(d);
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> +/* caller must hold the lock */
> static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
> {
> apic_report_irq_delivered(!get_bit(s->irr, vector_num));
> @@ -407,6 +428,7 @@ static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
> apic_update_irq(s);
> }
>
> +/* caller must hold the lock */
> static void apic_eoi(APICCommonState *s)
> {
> int isrv;
> @@ -415,6 +437,7 @@ static void apic_eoi(APICCommonState *s)
> return;
> reset_bit(s->isr, isrv);
> if (!(s->spurious_vec& APIC_SV_DIRECTED_IO)&& get_bit(s->tmr, isrv)) {
> + //fix me,need to take extra lock for the bus?
> ioapic_eoi_broadcast(isrv);
> }
> apic_sync_vapic(s, SYNC_FROM_VAPIC | SYNC_TO_VAPIC);
> @@ -480,18 +503,23 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
> static void apic_startup(APICCommonState *s, int vector_num)
> {
> s->sipi_vector = vector_num;
> + qemu_mutex_lock_cpu(s->cpu_env);
> cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> void apic_sipi(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
>
> - if (!s->wait_for_sipi)
> + if (!s->wait_for_sipi) {
> + qemu_mutex_unlock_cpu(s->cpu_env);
> return;
> + }
> cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> s->wait_for_sipi = 0;
> }
>
> @@ -543,6 +571,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
> apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
> }
>
> +/* Caller must take lock */
> int apic_get_interrupt(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -572,6 +601,7 @@ int apic_get_interrupt(DeviceState *d)
> return intno;
> }
>
> +/* Caller must hold the cpu_lock */
> int apic_accept_pic_intr(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -589,6 +619,7 @@ int apic_accept_pic_intr(DeviceState *d)
> return 0;
> }
>
> +/* Caller hold lock */
> static uint32_t apic_get_current_count(APICCommonState *s)
> {
> int64_t d;
> @@ -619,9 +650,10 @@ static void apic_timer_update(APICCommonState *s, int64_t current_time)
> static void apic_timer(void *opaque)
> {
> APICCommonState *s = opaque;
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_local_deliver(s, APIC_LVT_TIMER);
> apic_timer_update(s, s->next_time);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr)
> @@ -664,18 +696,22 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
> val = 0x11 | ((APIC_LVT_NB - 1)<< 16); /* version 0x11 */
> break;
> case 0x08:
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> if (apic_report_tpr_access) {
> cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ);
> }
> val = s->tpr;
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x09:
> val = apic_get_arb_pri(s);
> break;
> case 0x0a:
> /* ppr */
> + qemu_mutex_lock_cpu(s->cpu_env);
> val = apic_get_ppr(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x0b:
> val = 0;
> @@ -712,7 +748,9 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
> val = s->initial_count;
> break;
> case 0x39:
> + qemu_mutex_lock_cpu(s->cpu_env);
> val = apic_get_current_count(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x3e:
> val = s->divide_conf;
> @@ -767,18 +805,22 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> case 0x03:
> break;
> case 0x08:
> + qemu_mutex_lock_cpu(s->cpu_env);
> if (apic_report_tpr_access) {
> cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE);
> }
> s->tpr = val;
> apic_sync_vapic(s, SYNC_TO_VAPIC);
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x09:
> case 0x0a:
> break;
> case 0x0b: /* EOI */
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_eoi(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x0d:
> s->log_dest = val>> 24;
> @@ -787,8 +829,10 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> s->dest_mode = val>> 28;
> break;
> case 0x0f:
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->spurious_vec = val& 0x1ff;
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x10 ... 0x17:
> case 0x18 ... 0x1f:
> @@ -807,24 +851,30 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> case 0x32 ... 0x37:
> {
> int n = index - 0x32;
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->lvt[n] = val;
> if (n == APIC_LVT_TIMER)
> apic_timer_update(s, qemu_get_clock_ns(vm_clock));
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
> break;
> case 0x38:
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->initial_count = val;
> s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
> apic_timer_update(s, s->initial_count_load_time);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x39:
> break;
> case 0x3e:
> {
> int v;
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->divide_conf = val& 0xb;
> v = (s->divide_conf& 3) | ((s->divide_conf>> 1)& 4);
> s->count_shift = (v + 1)& 7;
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
> break;
> default:
> diff --git a/hw/pc.c b/hw/pc.c
> index 4d34a33..5e7350c 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -147,7 +147,7 @@ void cpu_smm_update(CPUX86State *env)
> smm_set(!!(env->hflags& HF_SMM_MASK), smm_arg);
> }
>
> -
> +/* Called by kvm_cpu_exec(), already with lock protection */
> /* IRQ handling */
> int cpu_get_pic_interrupt(CPUX86State *env)
> {
> @@ -173,9 +173,15 @@ static void pic_irq_request(void *opaque, int irq, int level)
> DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
> if (env->apic_state) {
> while (env) {
> + if (!qemu_cpu_is_self(env)) {
> + qemu_mutex_lock_cpu(env);
> + }
> if (apic_accept_pic_intr(env->apic_state)) {
> apic_deliver_pic_intr(env->apic_state, level);
> }
> + if (!qemu_cpu_is_self(env)) {
> + qemu_mutex_unlock_cpu(env);
> + }
> env = env->next_cpu;
> }
> } else {
> diff --git a/kvm-all.c b/kvm-all.c
> index 9b73ccf..dc9aa54 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -29,6 +29,7 @@
> #include "bswap.h"
> #include "memory.h"
> #include "exec-memory.h"
> +#include "qemu-thread.h"
>
> /* This check must be after config-host.h is included */
> #ifdef CONFIG_EVENTFD
> @@ -1252,12 +1253,15 @@ int kvm_cpu_exec(CPUArchState *env)
> */
> qemu_cpu_kick_self();
> }
> - qemu_mutex_unlock_iothread();
> + qemu_mutex_unlock(env->cpu_lock);
>
> run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>
> - qemu_mutex_lock_iothread();
> + qemu_mutex_lock(env->cpu_lock);
> kvm_arch_post_run(env, run);
> + qemu_mutex_unlock_cpu(env);
> +
> + qemu_mutex_lock_iothread();
>
> kvm_flush_coalesced_mmio_buffer();
>
> @@ -1265,6 +1269,8 @@ int kvm_cpu_exec(CPUArchState *env)
> if (run_ret == -EINTR || run_ret == -EAGAIN) {
> DPRINTF("io window exit\n");
> ret = EXCP_INTERRUPT;
> + qemu_mutex_unlock_iothread();
> + qemu_mutex_lock_cpu(env);
> break;
> }
> fprintf(stderr, "error: kvm run failed %s\n",
> @@ -1312,6 +1318,9 @@ int kvm_cpu_exec(CPUArchState *env)
> ret = kvm_arch_handle_exit(env, run);
> break;
> }
> +
> + qemu_mutex_unlock_iothread();
> + qemu_mutex_lock_cpu(env);
> } while (ret == 0);
This really confuses me.
Shouldn't we be moving qemu_mutex_unlock_iothread() to the very top of
kvm_cpu_exec()?
There's only a fixed amount of state that gets manipulated too in kvm_cpu_exec()
so shouldn't we try to specifically describe what state is covered by cpu_lock?
What's your strategy for ensuring that all accesses to that state is protected
by cpu_lock?
Regards,
Anthony Liguori
>
> if (ret< 0) {
next prev parent reply other threads:[~2012-06-22 20:06 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1340290158-11036-1-git-send-email-qemulist@gmail.com>
[not found] ` <1340290158-11036-3-git-send-email-qemulist@gmail.com>
2012-06-21 15:02 ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Jan Kiszka
2012-06-22 20:06 ` Anthony Liguori [this message]
2012-06-23 9:32 ` liu ping fan
2012-06-24 14:09 ` liu ping fan
2012-06-21 15:23 ` [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Jan Kiszka
2012-06-22 10:24 ` liu ping fan
2012-06-22 10:37 ` Jan Kiszka
2012-06-22 20:11 ` Anthony Liguori
2012-06-22 21:14 ` Jan Kiszka
2012-06-22 21:44 ` Anthony Liguori
2012-06-22 22:27 ` Jan Kiszka
2012-06-22 22:56 ` Anthony Liguori
2012-06-23 9:10 ` Jan Kiszka
[not found] ` <1340290158-11036-2-git-send-email-qemulist@gmail.com>
2012-06-22 20:01 ` [Qemu-devel] [PATCH 1/2] CPUArchState: introduce per-cpu lock Anthony Liguori
2012-06-21 15:06 [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Liu Ping Fan
2012-06-21 15:06 ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Liu Ping Fan
2012-06-22 2:29 ` 陳韋任 (Wei-Ren Chen)
2012-06-22 10:26 ` liu ping fan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FE4D03D.4010009@codemonkey.ws \
--to=anthony@codemonkey.ws \
--cc=jan.kiszka@siemens.com \
--cc=pingfank@linux.vnet.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=qemulist@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.