From: Anthony Liguori <anthony@codemonkey.ws>
To: Liu Ping Fan <qemulist@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>,
Liu Ping Fan <pingfank@linux.vnet.ibm.com>,
qemu-devel <qemu-devel@nongnu.org>
Subject: Re: [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock
Date: Fri, 22 Jun 2012 15:06:21 -0500 [thread overview]
Message-ID: <4FE4D03D.4010009@codemonkey.ws> (raw)
In-Reply-To: <1340290158-11036-3-git-send-email-qemulist@gmail.com>
On 06/21/2012 09:49 AM, Liu Ping Fan wrote:
> In order to break the big lock, using per-cpu_lock in kvm_cpu_exec()
> to protect the race from other cpu's access to env->apic_state& related
> field in env.
> Also, we need to protect agaist run_on_cpu().
>
> Race condition can be like this:
> 1. vcpu-1 IPI vcpu-2
> vcpu-3 IPI vcpu-2
> Open window exists for accessing to vcpu-2's apic_state& env
>
> 2. run_on_cpu() write env->queued_work_last, while flush_queued_work()
> read
>
> Signed-off-by: Liu Ping Fan<pingfank@linux.vnet.ibm.com>
> ---
> cpus.c | 6 ++++--
> hw/apic.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> hw/pc.c | 8 +++++++-
> kvm-all.c | 13 +++++++++++--
> 4 files changed, 76 insertions(+), 9 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 554f7bc..ac99afe 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -649,6 +649,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
>
> wi.func = func;
> wi.data = data;
> + qemu_mutex_lock(env->cpu_lock);
> if (!env->queued_work_first) {
> env->queued_work_first =&wi;
> } else {
> @@ -657,6 +658,7 @@ void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
> env->queued_work_last =&wi;
> wi.next = NULL;
> wi.done = false;
> + qemu_mutex_unlock(env->cpu_lock);
>
> qemu_cpu_kick(env);
> while (!wi.done) {
> @@ -718,7 +720,7 @@ static void qemu_tcg_wait_io_event(void)
> static void qemu_kvm_wait_io_event(CPUArchState *env)
> {
> while (cpu_thread_is_idle(env)) {
> - qemu_cond_wait(env->halt_cond,&qemu_global_mutex);
> + qemu_cond_wait(env->halt_cond, env->cpu_lock);
> }
>
> qemu_kvm_eat_signals(env);
> @@ -729,8 +731,8 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
> {
> CPUArchState *env = arg;
> int r;
> + qemu_mutex_lock_cpu(env);
>
> - qemu_mutex_lock(&qemu_global_mutex);
> qemu_thread_get_self(env->thread);
> env->thread_id = qemu_get_thread_id();
> cpu_single_env = env;
> diff --git a/hw/apic.c b/hw/apic.c
> index 4eeaf88..b999a40 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -22,6 +22,7 @@
> #include "host-utils.h"
> #include "trace.h"
> #include "pc.h"
> +#include "qemu-thread.h"
>
> #define MAX_APIC_WORDS 8
>
> @@ -94,6 +95,7 @@ static int get_highest_priority_int(uint32_t *tab)
> return -1;
> }
>
> +/* Caller must hold the lock */
> static void apic_sync_vapic(APICCommonState *s, int sync_type)
> {
> VAPICState vapic_state;
> @@ -141,11 +143,13 @@ static void apic_sync_vapic(APICCommonState *s, int sync_type)
> }
> }
>
> +/* Caller must hold lock */
> static void apic_vapic_base_update(APICCommonState *s)
> {
> apic_sync_vapic(s, SYNC_TO_VAPIC);
> }
>
> +/* Caller must hold the lock */
> static void apic_local_deliver(APICCommonState *s, int vector)
> {
> uint32_t lvt = s->lvt[vector];
> @@ -175,9 +179,11 @@ static void apic_local_deliver(APICCommonState *s, int vector)
> (lvt& APIC_LVT_LEVEL_TRIGGER))
> trigger_mode = APIC_TRIGGER_LEVEL;
> apic_set_irq(s, lvt& 0xff, trigger_mode);
> + break;
> }
> }
>
> +/* Caller must hold the lock */
> void apic_deliver_pic_intr(DeviceState *d, int level)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -200,9 +206,12 @@ void apic_deliver_pic_intr(DeviceState *d, int level)
> }
> }
>
> +/* Must hold lock */
> static void apic_external_nmi(APICCommonState *s)
> {
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_local_deliver(s, APIC_LVT_LINT1);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> #define foreach_apic(apic, deliver_bitmask, code) \
> @@ -215,7 +224,9 @@ static void apic_external_nmi(APICCommonState *s)
> if (__mask& (1<< __j)) {\
> apic = local_apics[__i * 32 + __j];\
> if (apic) {\
> + qemu_mutex_lock_cpu(apic->cpu_env);\
> code;\
> + qemu_mutex_unlock_cpu(apic->cpu_env);\
> }\
> }\
> }\
> @@ -244,7 +255,9 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask,
> if (d>= 0) {
> apic_iter = local_apics[d];
> if (apic_iter) {
> + qemu_mutex_lock_cpu(apic_iter->cpu_env);
> apic_set_irq(apic_iter, vector_num, trigger_mode);
> + qemu_mutex_unlock_cpu(apic_iter->cpu_env);
> }
> }
> }
> @@ -293,6 +306,7 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode,
> apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
> }
>
> +/* Caller must hold lock */
> static void apic_set_base(APICCommonState *s, uint64_t val)
> {
> s->apicbase = (val& 0xfffff000) |
> @@ -305,6 +319,7 @@ static void apic_set_base(APICCommonState *s, uint64_t val)
> }
> }
>
> +/* caller must hold lock */
> static void apic_set_tpr(APICCommonState *s, uint8_t val)
> {
> /* Updates from cr8 are ignored while the VAPIC is active */
> @@ -314,12 +329,14 @@ static void apic_set_tpr(APICCommonState *s, uint8_t val)
> }
> }
>
> +/* caller must hold lock */
> static uint8_t apic_get_tpr(APICCommonState *s)
> {
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> return s->tpr>> 4;
> }
>
> +/* Caller must hold the lock */
> static int apic_get_ppr(APICCommonState *s)
> {
> int tpr, isrv, ppr;
> @@ -348,6 +365,7 @@ static int apic_get_arb_pri(APICCommonState *s)
> * 0 - no interrupt,
> *>0 - interrupt number
> */
> +/* Caller must hold cpu_lock */
> static int apic_irq_pending(APICCommonState *s)
> {
> int irrv, ppr;
> @@ -363,6 +381,7 @@ static int apic_irq_pending(APICCommonState *s)
> return irrv;
> }
>
> +/* caller must ensure the lock has been taken */
> /* signal the CPU if an irq is pending */
> static void apic_update_irq(APICCommonState *s)
> {
> @@ -380,11 +399,13 @@ static void apic_update_irq(APICCommonState *s)
> void apic_poll_irq(DeviceState *d)
> {
> APICCommonState *s = APIC_COMMON(d);
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> +/* caller must hold the lock */
> static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
> {
> apic_report_irq_delivered(!get_bit(s->irr, vector_num));
> @@ -407,6 +428,7 @@ static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode)
> apic_update_irq(s);
> }
>
> +/* caller must hold the lock */
> static void apic_eoi(APICCommonState *s)
> {
> int isrv;
> @@ -415,6 +437,7 @@ static void apic_eoi(APICCommonState *s)
> return;
> reset_bit(s->isr, isrv);
> if (!(s->spurious_vec& APIC_SV_DIRECTED_IO)&& get_bit(s->tmr, isrv)) {
> + //fix me,need to take extra lock for the bus?
> ioapic_eoi_broadcast(isrv);
> }
> apic_sync_vapic(s, SYNC_FROM_VAPIC | SYNC_TO_VAPIC);
> @@ -480,18 +503,23 @@ static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
> static void apic_startup(APICCommonState *s, int vector_num)
> {
> s->sipi_vector = vector_num;
> + qemu_mutex_lock_cpu(s->cpu_env);
> cpu_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> void apic_sipi(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> cpu_reset_interrupt(s->cpu_env, CPU_INTERRUPT_SIPI);
>
> - if (!s->wait_for_sipi)
> + if (!s->wait_for_sipi) {
> + qemu_mutex_unlock_cpu(s->cpu_env);
> return;
> + }
> cpu_x86_load_seg_cache_sipi(s->cpu_env, s->sipi_vector);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> s->wait_for_sipi = 0;
> }
>
> @@ -543,6 +571,7 @@ static void apic_deliver(DeviceState *d, uint8_t dest, uint8_t dest_mode,
> apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode);
> }
>
> +/* Caller must take lock */
> int apic_get_interrupt(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -572,6 +601,7 @@ int apic_get_interrupt(DeviceState *d)
> return intno;
> }
>
> +/* Caller must hold the cpu_lock */
> int apic_accept_pic_intr(DeviceState *d)
> {
> APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
> @@ -589,6 +619,7 @@ int apic_accept_pic_intr(DeviceState *d)
> return 0;
> }
>
> +/* Caller hold lock */
> static uint32_t apic_get_current_count(APICCommonState *s)
> {
> int64_t d;
> @@ -619,9 +650,10 @@ static void apic_timer_update(APICCommonState *s, int64_t current_time)
> static void apic_timer(void *opaque)
> {
> APICCommonState *s = opaque;
> -
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_local_deliver(s, APIC_LVT_TIMER);
> apic_timer_update(s, s->next_time);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
>
> static uint32_t apic_mem_readb(void *opaque, target_phys_addr_t addr)
> @@ -664,18 +696,22 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
> val = 0x11 | ((APIC_LVT_NB - 1)<< 16); /* version 0x11 */
> break;
> case 0x08:
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_sync_vapic(s, SYNC_FROM_VAPIC);
> if (apic_report_tpr_access) {
> cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_READ);
> }
> val = s->tpr;
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x09:
> val = apic_get_arb_pri(s);
> break;
> case 0x0a:
> /* ppr */
> + qemu_mutex_lock_cpu(s->cpu_env);
> val = apic_get_ppr(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x0b:
> val = 0;
> @@ -712,7 +748,9 @@ static uint32_t apic_mem_readl(void *opaque, target_phys_addr_t addr)
> val = s->initial_count;
> break;
> case 0x39:
> + qemu_mutex_lock_cpu(s->cpu_env);
> val = apic_get_current_count(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x3e:
> val = s->divide_conf;
> @@ -767,18 +805,22 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> case 0x03:
> break;
> case 0x08:
> + qemu_mutex_lock_cpu(s->cpu_env);
> if (apic_report_tpr_access) {
> cpu_report_tpr_access(s->cpu_env, TPR_ACCESS_WRITE);
> }
> s->tpr = val;
> apic_sync_vapic(s, SYNC_TO_VAPIC);
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x09:
> case 0x0a:
> break;
> case 0x0b: /* EOI */
> + qemu_mutex_lock_cpu(s->cpu_env);
> apic_eoi(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x0d:
> s->log_dest = val>> 24;
> @@ -787,8 +829,10 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> s->dest_mode = val>> 28;
> break;
> case 0x0f:
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->spurious_vec = val& 0x1ff;
> apic_update_irq(s);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x10 ... 0x17:
> case 0x18 ... 0x1f:
> @@ -807,24 +851,30 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
> case 0x32 ... 0x37:
> {
> int n = index - 0x32;
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->lvt[n] = val;
> if (n == APIC_LVT_TIMER)
> apic_timer_update(s, qemu_get_clock_ns(vm_clock));
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
> break;
> case 0x38:
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->initial_count = val;
> s->initial_count_load_time = qemu_get_clock_ns(vm_clock);
> apic_timer_update(s, s->initial_count_load_time);
> + qemu_mutex_unlock_cpu(s->cpu_env);
> break;
> case 0x39:
> break;
> case 0x3e:
> {
> int v;
> + qemu_mutex_lock_cpu(s->cpu_env);
> s->divide_conf = val& 0xb;
> v = (s->divide_conf& 3) | ((s->divide_conf>> 1)& 4);
> s->count_shift = (v + 1)& 7;
> + qemu_mutex_unlock_cpu(s->cpu_env);
> }
> break;
> default:
> diff --git a/hw/pc.c b/hw/pc.c
> index 4d34a33..5e7350c 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -147,7 +147,7 @@ void cpu_smm_update(CPUX86State *env)
> smm_set(!!(env->hflags& HF_SMM_MASK), smm_arg);
> }
>
> -
> +/* Called by kvm_cpu_exec(), already with lock protection */
> /* IRQ handling */
> int cpu_get_pic_interrupt(CPUX86State *env)
> {
> @@ -173,9 +173,15 @@ static void pic_irq_request(void *opaque, int irq, int level)
> DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
> if (env->apic_state) {
> while (env) {
> + if (!qemu_cpu_is_self(env)) {
> + qemu_mutex_lock_cpu(env);
> + }
> if (apic_accept_pic_intr(env->apic_state)) {
> apic_deliver_pic_intr(env->apic_state, level);
> }
> + if (!qemu_cpu_is_self(env)) {
> + qemu_mutex_unlock_cpu(env);
> + }
> env = env->next_cpu;
> }
> } else {
> diff --git a/kvm-all.c b/kvm-all.c
> index 9b73ccf..dc9aa54 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -29,6 +29,7 @@
> #include "bswap.h"
> #include "memory.h"
> #include "exec-memory.h"
> +#include "qemu-thread.h"
>
> /* This check must be after config-host.h is included */
> #ifdef CONFIG_EVENTFD
> @@ -1252,12 +1253,15 @@ int kvm_cpu_exec(CPUArchState *env)
> */
> qemu_cpu_kick_self();
> }
> - qemu_mutex_unlock_iothread();
> + qemu_mutex_unlock(env->cpu_lock);
>
> run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
>
> - qemu_mutex_lock_iothread();
> + qemu_mutex_lock(env->cpu_lock);
> kvm_arch_post_run(env, run);
> + qemu_mutex_unlock_cpu(env);
> +
> + qemu_mutex_lock_iothread();
>
> kvm_flush_coalesced_mmio_buffer();
>
> @@ -1265,6 +1269,8 @@ int kvm_cpu_exec(CPUArchState *env)
> if (run_ret == -EINTR || run_ret == -EAGAIN) {
> DPRINTF("io window exit\n");
> ret = EXCP_INTERRUPT;
> + qemu_mutex_unlock_iothread();
> + qemu_mutex_lock_cpu(env);
> break;
> }
> fprintf(stderr, "error: kvm run failed %s\n",
> @@ -1312,6 +1318,9 @@ int kvm_cpu_exec(CPUArchState *env)
> ret = kvm_arch_handle_exit(env, run);
> break;
> }
> +
> + qemu_mutex_unlock_iothread();
> + qemu_mutex_lock_cpu(env);
> } while (ret == 0);
This really confuses me.
Shouldn't we be moving qemu_mutex_unlock_iothread() to the very top of
kvm_cpu_exec()?
There's only a fixed amount of state that gets manipulated too in kvm_cpu_exec()
so shouldn't we try to specifically describe what state is covered by cpu_lock?
What's your strategy for ensuring that all accesses to that state is protected
by cpu_lock?
Regards,
Anthony Liguori
>
> if (ret< 0) {
next prev parent reply other threads:[~2012-06-22 20:06 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1340290158-11036-1-git-send-email-qemulist@gmail.com>
2012-06-21 15:23 ` [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Jan Kiszka
2012-06-22 10:24 ` liu ping fan
2012-06-22 10:37 ` Jan Kiszka
2012-06-22 20:11 ` Anthony Liguori
2012-06-22 21:14 ` Jan Kiszka
2012-06-22 21:44 ` Anthony Liguori
2012-06-22 22:27 ` Jan Kiszka
2012-06-22 22:56 ` Anthony Liguori
2012-06-23 9:10 ` Jan Kiszka
[not found] ` <1340290158-11036-2-git-send-email-qemulist@gmail.com>
2012-06-22 20:01 ` [Qemu-devel] [PATCH 1/2] CPUArchState: introduce per-cpu lock Anthony Liguori
[not found] ` <1340290158-11036-3-git-send-email-qemulist@gmail.com>
2012-06-21 15:02 ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Jan Kiszka
2012-06-22 20:06 ` Anthony Liguori [this message]
2012-06-23 9:32 ` liu ping fan
2012-06-24 14:09 ` liu ping fan
2012-06-21 15:06 [Qemu-devel] [RFC] use little granularity lock to substitue qemu_mutex_lock_iothread Liu Ping Fan
2012-06-21 15:06 ` [Qemu-devel] [PATCH 2/2] kvm: use per-cpu lock to free vcpu thread out of the big lock Liu Ping Fan
2012-06-22 2:29 ` 陳韋任 (Wei-Ren Chen)
2012-06-22 10:26 ` liu ping fan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FE4D03D.4010009@codemonkey.ws \
--to=anthony@codemonkey.ws \
--cc=jan.kiszka@siemens.com \
--cc=pingfank@linux.vnet.ibm.com \
--cc=qemu-devel@nongnu.org \
--cc=qemulist@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).