* [Qemu-devel] [RFC][PATCH] Drop global lock during TCG code execution
@ 2011-09-27 8:38 Jan Kiszka
0 siblings, 0 replies; only message in thread
From: Jan Kiszka @ 2011-09-27 8:38 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Maydell, Anthony Liguori, Blue Swirl, Paolo Bonzini,
Edgar E. Iglesias, Aurelien Jarno
This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.
We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.
I'm pretty sure some cases are still broken, definitely SMP (we no
longer perform round-robin scheduling "by chance"). Still, a Linux x86
UP guest and my Musicpal ARM model boot fine here. These numbers
demonstrate where we gain something:
20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm
20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm
The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond
32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm
32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm
We don't benefit significantly, though, when the guest is not fully
loading a host CPU.
Note that this patch depends on
http://thread.gmane.org/gmane.comp.emulators.qemu/118657
---
cpus.c | 18 ++++++------------
exec.c | 33 +++++++++++++++++++++++++++++++++
qemu-common.h | 11 ++++++++---
softmmu_template.h | 8 ++++++++
target-i386/op_helper.c | 27 ++++++++++++++++++++++++---
5 files changed, 79 insertions(+), 18 deletions(-)
diff --git a/cpus.c b/cpus.c
index f983033..7d64437 100644
--- a/cpus.c
+++ b/cpus.c
@@ -721,7 +721,7 @@ void qemu_cpu_kick(void *_env)
CPUState *env = _env;
qemu_cond_broadcast(env->halt_cond);
- if (kvm_enabled() && !env->thread_kicked) {
+ if (!env->thread_kicked) {
qemu_cpu_kick_thread(env);
env->thread_kicked = true;
}
@@ -750,17 +750,7 @@ int qemu_cpu_is_self(void *_env)
void qemu_mutex_lock_iothread(void)
{
- if (kvm_enabled()) {
- qemu_mutex_lock(&qemu_global_mutex);
- } else {
- iothread_requesting_mutex = true;
- if (qemu_mutex_trylock(&qemu_global_mutex)) {
- qemu_cpu_kick_thread(first_cpu);
- qemu_mutex_lock(&qemu_global_mutex);
- }
- iothread_requesting_mutex = false;
- qemu_cond_broadcast(&qemu_io_proceeded_cond);
- }
+ qemu_mutex_lock(&qemu_global_mutex);
}
void qemu_mutex_unlock_iothread(void)
@@ -912,7 +902,11 @@ static int tcg_cpu_exec(CPUState *env)
env->icount_decr.u16.low = decr;
env->icount_extra = count;
}
+ qemu_mutex_unlock(&qemu_global_mutex);
+
ret = cpu_exec(env);
+
+ qemu_mutex_lock(&qemu_global_mutex);
#ifdef CONFIG_PROFILER
qemu_time += profile_getclock() - ti;
#endif
diff --git a/exec.c b/exec.c
index 1e6f732..0524574 100644
--- a/exec.c
+++ b/exec.c
@@ -1118,6 +1118,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
#endif
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
+ qemu_mutex_unlock_iothread();
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
@@ -1205,6 +1206,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
p->first_tb = NULL;
#ifdef TARGET_HAS_PRECISE_SMC
if (current_tb_modified) {
+ qemu_mutex_unlock_iothread();
/* we generate a block containing just the instruction
modifying the memory. It will ensure that it cannot modify
itself */
@@ -2061,9 +2063,11 @@ void tlb_flush_page(CPUState *env, target_ulong addr)
can be detected */
static void tlb_protect_code(ram_addr_t ram_addr)
{
+ qemu_mutex_lock_iothread();
cpu_physical_memory_reset_dirty(ram_addr,
ram_addr + TARGET_PAGE_SIZE,
CODE_DIRTY_FLAG);
+ qemu_mutex_unlock_iothread();
}
/* update the TLB so that writes in physical page 'phys_addr' are no longer
@@ -3122,11 +3126,13 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
QLIST_FOREACH(block, &ram_list.blocks, next) {
if (addr - block->offset < block->length) {
+#if 0 /* requires RCU - and like also a smarter balancing algorithm */
/* Move this entry to to start of the list. */
if (block != QLIST_FIRST(&ram_list.blocks)) {
QLIST_REMOVE(block, next);
QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
}
+#endif
if (xen_enabled()) {
/* We need to check if the requested address is in the RAM
* because we don't want to map the entire memory in QEMU.
@@ -3417,6 +3423,7 @@ static void check_watchpoint(int offset, int len_mask, int flags)
(vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
wp->flags |= BP_WATCHPOINT_HIT;
if (!env->watchpoint_hit) {
+ qemu_mutex_unlock_iothread();
env->watchpoint_hit = wp;
tb = tb_find_pc(env->mem_io_pc);
if (!tb) {
@@ -4194,6 +4201,7 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
if (p)
addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ qemu_mutex_lock_iothread();
val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -4204,6 +4212,7 @@ static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
val = bswap32(val);
}
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
@@ -4264,6 +4273,7 @@ static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
/* XXX This is broken when device endian != cpu endian.
Fix and add "endian" variable check */
+ qemu_mutex_lock_iothread();
#ifdef TARGET_WORDS_BIGENDIAN
val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
@@ -4271,6 +4281,7 @@ static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
@@ -4336,6 +4347,7 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
if (p)
addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ qemu_mutex_lock_iothread();
val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
#if defined(TARGET_WORDS_BIGENDIAN)
if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -4346,6 +4358,7 @@ static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
val = bswap16(val);
}
#endif
+ qemu_mutex_unlock_iothread();
} else {
/* RAM case */
ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
@@ -4401,7 +4414,9 @@ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
if (p)
addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ qemu_mutex_lock_iothread();
io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ qemu_mutex_unlock_iothread();
} else {
unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
ptr = qemu_get_ram_ptr(addr1);
@@ -4409,11 +4424,13 @@ void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
if (unlikely(in_migration)) {
if (!cpu_physical_memory_is_dirty(addr1)) {
+ qemu_mutex_lock_iothread();
/* invalidate code */
tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
/* set dirty bit */
cpu_physical_memory_set_dirty_flags(
addr1, (0xff & ~CODE_DIRTY_FLAG));
+ qemu_mutex_unlock_iothread();
}
}
}
@@ -4437,6 +4454,7 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
if (p)
addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
+ qemu_mutex_lock_iothread();
#ifdef TARGET_WORDS_BIGENDIAN
io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
@@ -4444,6 +4462,7 @@ void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
#endif
+ qemu_mutex_unlock_iothread();
} else {
ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
(addr & ~TARGET_PAGE_MASK);
@@ -4480,7 +4499,9 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
val = bswap32(val);
}
#endif
+ qemu_mutex_lock_iothread();
io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+ qemu_mutex_unlock_iothread();
} else {
unsigned long addr1;
addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
@@ -4498,11 +4519,13 @@ static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
break;
}
if (!cpu_physical_memory_is_dirty(addr1)) {
+ qemu_mutex_lock_iothread();
/* invalidate code */
tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
/* set dirty bit */
cpu_physical_memory_set_dirty_flags(addr1,
(0xff & ~CODE_DIRTY_FLAG));
+ qemu_mutex_unlock_iothread();
}
}
}
@@ -4558,7 +4581,9 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
val = bswap16(val);
}
#endif
+ qemu_mutex_lock_iothread();
io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
+ qemu_mutex_unlock_iothread();
} else {
unsigned long addr1;
addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
@@ -4576,11 +4601,13 @@ static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
break;
}
if (!cpu_physical_memory_is_dirty(addr1)) {
+ qemu_mutex_lock_iothread();
/* invalidate code */
tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
/* set dirty bit */
cpu_physical_memory_set_dirty_flags(addr1,
(0xff & ~CODE_DIRTY_FLAG));
+ qemu_mutex_unlock_iothread();
}
}
}
@@ -4604,19 +4631,25 @@ void stw_be_phys(target_phys_addr_t addr, uint32_t val)
void stq_phys(target_phys_addr_t addr, uint64_t val)
{
val = tswap64(val);
+ qemu_mutex_lock_iothread();
cpu_physical_memory_write(addr, &val, 8);
+ qemu_mutex_unlock_iothread();
}
void stq_le_phys(target_phys_addr_t addr, uint64_t val)
{
val = cpu_to_le64(val);
+ qemu_mutex_lock_iothread();
cpu_physical_memory_write(addr, &val, 8);
+ qemu_mutex_unlock_iothread();
}
void stq_be_phys(target_phys_addr_t addr, uint64_t val)
{
val = cpu_to_be64(val);
+ qemu_mutex_lock_iothread();
cpu_physical_memory_write(addr, &val, 8);
+ qemu_mutex_unlock_iothread();
}
/* virtual memory access for debug (includes writing to ROM) */
diff --git a/qemu-common.h b/qemu-common.h
index 5e87bdf..b97bbc8 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -96,6 +96,14 @@ static inline char *realpath(const char *path, char *resolved_path)
}
#endif
+#ifndef CONFIG_USER_ONLY
+void qemu_mutex_lock_iothread(void);
+void qemu_mutex_unlock_iothread(void);
+#else
+static inline void qemu_mutex_lock_iothread(void) { }
+static inline void qemu_mutex_unlock_iothread(void) { }
+#endif
+
/* FIXME: Remove NEED_CPU_H. */
#ifndef NEED_CPU_H
@@ -183,9 +191,6 @@ const char *path(const char *pathname);
void *qemu_oom_check(void *ptr);
-void qemu_mutex_lock_iothread(void);
-void qemu_mutex_unlock_iothread(void);
-
int qemu_open(const char *name, int flags, ...);
ssize_t qemu_write_full(int fd, const void *buf, size_t count)
QEMU_WARN_UNUSED_RESULT;
diff --git a/softmmu_template.h b/softmmu_template.h
index c2df9ec..eb7ce68 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -65,6 +65,8 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
cpu_io_recompile(env, retaddr);
}
+ qemu_mutex_lock_iothread();
+
env->mem_io_vaddr = addr;
#if SHIFT <= 2
res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
@@ -77,6 +79,8 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
res |= (uint64_t)io_mem_read[index][2](io_mem_opaque[index], physaddr + 4) << 32;
#endif
#endif /* SHIFT > 2 */
+
+ qemu_mutex_unlock_iothread();
return res;
}
@@ -207,6 +211,8 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
cpu_io_recompile(env, retaddr);
}
+ qemu_mutex_lock_iothread();
+
env->mem_io_vaddr = addr;
env->mem_io_pc = (unsigned long)retaddr;
#if SHIFT <= 2
@@ -220,6 +226,8 @@ static inline void glue(io_write, SUFFIX)(target_phys_addr_t physaddr,
io_mem_write[index][2](io_mem_opaque[index], physaddr + 4, val >> 32);
#endif
#endif /* SHIFT > 2 */
+
+ qemu_mutex_unlock_iothread();
}
void REGPARM glue(glue(__st, SUFFIX), MMUSUFFIX)(target_ulong addr,
diff --git a/target-i386/op_helper.c b/target-i386/op_helper.c
index 1fc248f..346b59f 100644
--- a/target-i386/op_helper.c
+++ b/target-i386/op_helper.c
@@ -662,32 +662,53 @@ void helper_check_iol(uint32_t t0)
void helper_outb(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outb(port, data & 0xff);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inb(uint32_t port)
{
- return cpu_inb(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inb(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
void helper_outw(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outw(port, data & 0xffff);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inw(uint32_t port)
{
- return cpu_inw(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inw(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
void helper_outl(uint32_t port, uint32_t data)
{
+ qemu_mutex_lock_iothread();
cpu_outl(port, data);
+ qemu_mutex_unlock_iothread();
}
target_ulong helper_inl(uint32_t port)
{
- return cpu_inl(port);
+ target_ulong ret;
+
+ qemu_mutex_lock_iothread();
+ ret = cpu_inl(port);
+ qemu_mutex_unlock_iothread();
+ return ret;
}
static inline unsigned int get_sp_mask(unsigned int e2)
--
1.7.3.4
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2011-09-27 8:39 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-27 8:38 [Qemu-devel] [RFC][PATCH] Drop global lock during TCG code execution Jan Kiszka
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.