* [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg
@ 2015-08-14 15:55 Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed Alvise Rigo
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
This is just an update with the relevant patches needed to port the v4
of "Slow-path for atomic instruction translation" to mttcg v7.
The full source code is available at the following GIT repository:
https://git.virtualopensystems.com/dev/qemu-mt.git
branch:
slowpath-for-atomic-v4-mttcg
The [PATCH 6/6] is the first step to use runtime helpers in place of the
TCG load/store instructions as suggested by Aurelien Jarno.
The patch-series addresses also some of the Paolo's comments, however
the exclusive bitmap is still using one bit for each vCPU.
Alvise Rigo (6):
cpus: async_run_on_cpu: kick only if needed
cputlb: wrap tlb_flush with the a new function
exec: ram_addr: Fix exclusive bitmap accessor
softmmu_llsc_template.h: move to multithreading
softmmu_template.h: move to multithreading
target-arm: Use a runtime helper for excl accesses
cpus.c | 4 +++-
cputlb.c | 25 ++++++++++++++++----
include/exec/exec-all.h | 1 +
include/exec/ram_addr.h | 61 +++++++++++++++++++++++++++++++++++++++++--------
include/qom/cpu.h | 4 ++++
softmmu_llsc_template.h | 59 +++++++++++++++++++++++++++++++++++------------
softmmu_template.h | 36 ++++++++++++++++++++++++-----
target-arm/helper.h | 2 ++
target-arm/op_helper.c | 11 +++++++++
target-arm/translate.c | 12 +++++++++-
10 files changed, 177 insertions(+), 38 deletions(-)
--
2.5.0
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 2/6] cputlb: wrap tlb_flush with the a new function Alvise Rigo
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
In some unique situations a vCPU can be kicked even if it's not ready to
execute TCG code i.e. when current_tb has never been set before.
This can happen with the atomic stress test (not kvm-unit-test based),
where a vCPU can query some work to a not yet started vCPU.
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
cpus.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/cpus.c b/cpus.c
index f61530c..3d90142 100644
--- a/cpus.c
+++ b/cpus.c
@@ -935,7 +935,9 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
wi->done = false;
qemu_mutex_unlock(&cpu->work_mutex);
- qemu_cpu_kick(cpu);
+ if (tcg_enabled() && (atomic_read(&cpu->tcg_exec_flag) == 1)) {
+ qemu_cpu_kick(cpu);
+ }
}
void async_run_safe_work_on_cpu(CPUState *cpu, void (*func)(void *data),
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 2/6] cputlb: wrap tlb_flush with the a new function
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 3/6] exec: ram_addr: Fix exclusive bitmap accessor Alvise Rigo
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
Introduce the new tlb_query_flush_cpu function to query a TLB flush
to a given vCPU.
The function takes care to check and set a new flag (pending_tlb_flush)
to avoid unnecessary flushes.
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
cputlb.c | 21 ++++++++++++++++-----
include/exec/exec-all.h | 1 +
include/qom/cpu.h | 4 ++++
3 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/cputlb.c b/cputlb.c
index 538c92d..7cbaaca 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -79,13 +79,27 @@ static void tlb_flush_async_work(void *opaque)
struct TLBFlushParams *params = opaque;
tlb_flush(params->cpu, params->flush_global);
+ atomic_set(¶ms->cpu->pending_tlb_flush, 0);
+
g_free(params);
}
+void tlb_query_flush_cpu(CPUState *cpu, int flush_global) {
+ struct TLBFlushParams *params;
+
+ if (!atomic_read(&cpu->pending_tlb_flush)) {
+ params = g_malloc(sizeof(struct TLBFlushParams));
+ params->cpu = cpu;
+ params->flush_global = flush_global;
+
+ atomic_set(&cpu->pending_tlb_flush, 1);
+ async_run_on_cpu(cpu, tlb_flush_async_work, params);
+ }
+}
+
void tlb_flush_all(int flush_global)
{
CPUState *cpu;
- struct TLBFlushParams *params;
#if 0 /* MTTCG */
CPU_FOREACH(cpu) {
@@ -99,10 +113,7 @@ void tlb_flush_all(int flush_global)
*/
tlb_flush(cpu, flush_global);
} else {
- params = g_malloc(sizeof(struct TLBFlushParams));
- params->cpu = cpu;
- params->flush_global = flush_global;
- async_run_on_cpu(cpu, tlb_flush_async_work, params);
+ tlb_query_flush_cpu(cpu, flush_global);
}
}
#endif /* MTTCG */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 246df68..3c36724 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -99,6 +99,7 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
/* cputlb.c */
void tlb_flush_page_all(target_ulong addr);
void tlb_flush_page(CPUState *cpu, target_ulong addr);
+void tlb_query_flush_cpu(CPUState *cpu, int flush_global);
void tlb_flush_all(int flush_global);
void tlb_flush(CPUState *cpu, int flush_global);
void tlb_set_page(CPUState *cpu, target_ulong vaddr,
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 23418c0..62abf6e 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -299,6 +299,10 @@ struct CPUState {
void *opaque;
+ /* True if the CPU has a pending request for a TLB flush. While this value
+ * is true, any flush request will be ignored. */
+ int pending_tlb_flush;
+
/* In order to avoid passing too many arguments to the MMIO helpers,
* we store some rarely used information in the CPU context.
*/
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 3/6] exec: ram_addr: Fix exclusive bitmap accessor
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 2/6] cputlb: wrap tlb_flush with the a new function Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading Alvise Rigo
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
include/exec/ram_addr.h | 61 +++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 51 insertions(+), 10 deletions(-)
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 6b678d6..34bb486 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -269,11 +269,28 @@ static inline int cpu_physical_memory_excl_atleast_one_clean(ram_addr_t addr)
unsigned long next, end;
if (likely(smp_cpus <= BITS_PER_LONG)) {
- unsigned long mask = (1 << smp_cpus) - 1;
-
- return
- (mask & (bitmap[BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr))] >>
- (EXCL_BITMAP_GET_OFFSET(addr) & (BITS_PER_LONG-1)))) != mask;
+ unsigned long mask1;
+ uint32_t shift, first_off;
+ /* Number of vCPUs bits in the next long. */
+ int bits_left;
+
+ first_off = BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr));
+ shift = (EXCL_BITMAP_GET_OFFSET(addr) & (BITS_PER_LONG-1));
+ bits_left = (shift + smp_cpus) - BITS_PER_LONG;
+
+ if (bits_left <= 0) {
+ mask1 = (1 << smp_cpus) - 1;
+ return (mask1 & (bitmap[first_off] >> shift)) != mask1;
+ } else {
+ /* The bits we need to access span two different longs. */
+ unsigned long mask2;
+
+ mask2 = (1 << bits_left) - 1;
+ mask1 = (1 << (smp_cpus - bits_left)) - 1;
+
+ return !(((mask2 & bitmap[first_off + 1]) == mask2) &&
+ ((mask1 & (bitmap[first_off] >> shift)) == mask1));
+ }
}
end = BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr)) + smp_cpus;
@@ -288,15 +305,41 @@ static inline int cpu_physical_memory_excl_is_dirty(ram_addr_t addr,
{
unsigned long *bitmap = ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE];
unsigned long end, next;
- uint32_t add;
+ uint32_t add, first_off;
assert(cpu <= smp_cpus);
if (likely(smp_cpus <= BITS_PER_LONG)) {
- cpu = (cpu == smp_cpus) ? (1 << cpu) - 1 : (1 << cpu);
+ uint32_t shift = 0;
+
+ if (cpu == smp_cpus) {
+ unsigned long mask1, mask2;
+ int bits_left;
+
+ first_off = BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr));
+ shift = (EXCL_BITMAP_GET_OFFSET(addr) & (BITS_PER_LONG-1));
+ bits_left = (shift + cpu) - BITS_PER_LONG;
+
+ if (bits_left <= 0) {
+ mask1 = (1 << cpu) - 1;
+
+ return mask1 & (bitmap[first_off] >> shift);
+ }
+
+ mask2 = (1 << bits_left) - 1;
+ mask1 = (1 << (cpu - bits_left)) - 1;
+
+ return (mask1 & (bitmap[first_off] >> shift)) |
+ (mask2 & (bitmap[first_off + 1]));
+ } else {
+ first_off = BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr) + cpu);
+ shift = ((EXCL_BITMAP_GET_OFFSET(addr) + cpu) & (BITS_PER_LONG-1));
+
+ return 1 & (bitmap[first_off] >> shift);
+ }
return cpu & (bitmap[BIT_WORD(EXCL_BITMAP_GET_OFFSET(addr))] >>
- (EXCL_BITMAP_GET_OFFSET(addr) & (BITS_PER_LONG-1)));
+ (shift));
}
add = (cpu == smp_cpus) ? 0 : 1;
@@ -315,7 +358,5 @@ static inline bool cpu_physical_memory_clear_excl_dirty(ram_addr_t addr,
EXCL_BITMAP_GET_OFFSET(addr) + cpu_index, 1);
}
-
-
#endif
#endif
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
` (2 preceding siblings ...)
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 3/6] exec: ram_addr: Fix exclusive bitmap accessor Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 5/6] softmmu_template.h: " Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 6/6] target-arm: Use a runtime helper for excl accesses Alvise Rigo
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
Update the TCG LL/SC helpers to work in multi-threading.
The basic idea remains untouched, but the whole mechanism takes now into
account of the multiple, concurrent, vCPUs execution.
In essence, if a vCPU does a LL it checks the vCPUs that have not the
excl bit set for the accessed page. For those vCPUs it then:
- sets the excl bit
- queries a TLB flush
Doing so, we make sure that all the vCPUs will have the EXCL flag in the
TLB entry for that specific page *before* entering the next TB
Changes from v3:
- The rendez-vous mechanism has been removed since the reworked
TLB flush query addresses the same purpose.
Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
cputlb.c | 4 ++++
softmmu_llsc_template.h | 59 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 48 insertions(+), 15 deletions(-)
diff --git a/cputlb.c b/cputlb.c
index 7cbaaca..08949df 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -36,6 +36,10 @@
/* statistics */
int tlb_flush_count;
+/* For atomic instruction handling. */
+int exit_flush_request = 0;
+QemuMutex tcg_excl_access_lock;
+
/* NOTE:
* If flush_global is true (the usual case), flush all tlb entries.
* If flush_global is false, flush (at least) all tlb entries not
diff --git a/softmmu_llsc_template.h b/softmmu_llsc_template.h
index d2e92b4..9486385 100644
--- a/softmmu_llsc_template.h
+++ b/softmmu_llsc_template.h
@@ -33,25 +33,39 @@
#define helper_ldlink_name glue(glue(helper_be_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_be_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
#else /* LE helpers + 8bit helpers (generated only once for both LE end BE) */
#if DATA_SIZE > 1
#define helper_ldlink_name glue(glue(helper_le_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_le_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
#else /* DATA_SIZE <= 1 */
#define helper_ldlink_name glue(glue(helper_ret_ldlink, USUFFIX), MMUSUFFIX)
#define helper_stcond_name glue(glue(helper_ret_stcond, SUFFIX), MMUSUFFIX)
-#define helper_ld_legacy glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
-#define helper_st_legacy glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
+#define helper_ld glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
+#define helper_st glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
#endif
#endif
+#define is_read_tlb_entry_set(env, page, index) \
+({ \
+ (addr & TARGET_PAGE_MASK) \
+ == ((env->tlb_table[mmu_idx][index].addr_read) & \
+ (TARGET_PAGE_MASK | TLB_INVALID_MASK)); \
+})
+/* Whenever a SC operation fails, we add a small delay to reduce the
+ * concurrency among the atomic instruction emulation code. Without this delay,
+ * in very congested situation where plain stores make all the pending LLs
+ * fail, the code could reach a stalling situation in which all the SCs happen
+ * to fail.
+ * */
+#define TCG_ATOMIC_INSN_EMUL_DELAY 100
+
WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr)
{
@@ -61,11 +75,13 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
hwaddr hw_addr;
unsigned mmu_idx = get_mmuidx(oi);
- /* Use the proper load helper from cpu_ldst.h */
- ret = helper_ld_legacy(env, addr, mmu_idx, retaddr);
-
index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ if (!is_read_tlb_entry_set(env, addr, index) ||
+ !VICTIM_TLB_HIT(addr_read)) {
+ tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+ }
+
/* hw_addr = hwaddr of the page (i.e. section->mr->ram_addr + xlat)
* plus the offset (i.e. addr & ~TARGET_PAGE_MASK) */
hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) + addr;
@@ -73,22 +89,34 @@ WORD_TYPE helper_ldlink_name(CPUArchState *env, target_ulong addr,
cpu_physical_memory_clear_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
/* If all the vCPUs have the EXCL bit set for this page there is no need
* to request any flush. */
- if (cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus)) {
+ if (unlikely(!atomic_xchg(&exit_flush_request, 1) &&
+ cpu_physical_memory_excl_is_dirty(hw_addr, smp_cpus))) {
CPU_FOREACH(cpu) {
- if (current_cpu != cpu) {
+ if (cpu->thread_id != qemu_get_thread_id()) {
if (cpu_physical_memory_excl_is_dirty(hw_addr,
cpu->cpu_index)) {
cpu_physical_memory_clear_excl_dirty(hw_addr,
cpu->cpu_index);
- tlb_flush(cpu, 1);
+ tlb_query_flush_cpu(cpu, 1);
}
}
}
+
+ atomic_set(&exit_flush_request, 0);
}
+ env->ll_sc_context = true;
+
+ qemu_mutex_lock(&tcg_excl_access_lock);
+
+ /* Use the proper load helper from cpu_ldst.h */
+ ret = helper_ld(env, addr, mmu_idx, retaddr);
+
env->excl_protected_range.begin = hw_addr;
env->excl_protected_range.end = hw_addr + DATA_SIZE;
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
/* For this vCPU, just update the TLB entry, no need to flush. */
env->tlb_table[mmu_idx][index].addr_write |= TLB_EXCL;
@@ -106,12 +134,13 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, target_ulong addr,
* access as one made by the store conditional wrapper. If the store
* conditional does not succeed, the value will be set to 0.*/
env->excl_succeeded = 1;
- helper_st_legacy(env, addr, val, mmu_idx, retaddr);
+ helper_st(env, addr, val, mmu_idx, retaddr);
if (env->excl_succeeded) {
env->excl_succeeded = 0;
ret = 0;
} else {
+ g_usleep(TCG_ATOMIC_INSN_EMUL_DELAY);
ret = 1;
}
@@ -120,5 +149,5 @@ WORD_TYPE helper_stcond_name(CPUArchState *env, target_ulong addr,
#undef helper_ldlink_name
#undef helper_stcond_name
-#undef helper_ld_legacy
-#undef helper_st_legacy
+#undef helper_ld
+#undef helper_st
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 5/6] softmmu_template.h: move to multithreading
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
` (3 preceding siblings ...)
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 6/6] target-arm: Use a runtime helper for excl accesses Alvise Rigo
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
Exploiting the tcg_excl_access_lock, port the helper_{le,be}_st_name to
work in real multithreading.
Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
softmmu_template.h | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/softmmu_template.h b/softmmu_template.h
index ad65d20..514aeb7 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -418,20 +418,29 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
* exclusive-protected memory. */
hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ qemu_mutex_lock(&tcg_excl_access_lock);
/* The function lookup_and_reset_cpus_ll_addr could have reset the
* exclusive address. Fail the SC in this case.
* N.B.: Here excl_succeeded == 0 means that helper_le_st_name has
* not been called by a softmmu_llsc_template.h. */
if(env->excl_succeeded) {
- if (env->excl_protected_range.begin != hw_addr) {
- /* The vCPU is SC-ing to an unprotected address. */
+ if (!((env->excl_protected_range.begin == hw_addr) &&
+ env->excl_protected_range.end == (hw_addr + DATA_SIZE))) {
+ /* The vCPU is SC-ing to an unprotected address. This
+ * can also happen when a vCPU stores to the address.
+ * */
env->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
env->excl_succeeded = 0;
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
return;
}
- cpu_physical_memory_set_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
+ /* Now we are going for sure to complete the access. Set the
+ * bit to dirty. */
+ cpu_physical_memory_set_excl_dirty(hw_addr,
+ ENV_GET_CPU(env)->cpu_index);
}
haddr = addr + env->tlb_table[mmu_idx][index].addend;
@@ -441,8 +450,11 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
#endif
+ /* This will reset the excl address also for the current vCPU. */
lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
return;
} else {
if ((addr & (DATA_SIZE - 1)) != 0) {
@@ -532,20 +544,29 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
* exclusive-protected memory. */
hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+ qemu_mutex_lock(&tcg_excl_access_lock);
/* The function lookup_and_reset_cpus_ll_addr could have reset the
* exclusive address. Fail the SC in this case.
* N.B.: Here excl_succeeded == 0 means that helper_le_st_name has
* not been called by a softmmu_llsc_template.h. */
if(env->excl_succeeded) {
- if (env->excl_protected_range.begin != hw_addr) {
- /* The vCPU is SC-ing to an unprotected address. */
+ if (!((env->excl_protected_range.begin == hw_addr) &&
+ env->excl_protected_range.end == (hw_addr + DATA_SIZE))) {
+ /* The vCPU is SC-ing to an unprotected address. This
+ * can also happen when a vCPU stores to the address.
+ * */
env->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR;
env->excl_succeeded = 0;
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
return;
}
- cpu_physical_memory_set_excl_dirty(hw_addr, ENV_GET_CPU(env)->cpu_index);
+ /* Now we are going for sure to complete the access. Set the
+ * bit to dirty. */
+ cpu_physical_memory_set_excl_dirty(hw_addr,
+ ENV_GET_CPU(env)->cpu_index);
}
haddr = addr + env->tlb_table[mmu_idx][index].addend;
@@ -555,8 +576,11 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
#endif
+ /* This will reset the excl address also for the current vCPU. */
lookup_and_reset_cpus_ll_addr(hw_addr, DATA_SIZE);
+ qemu_mutex_unlock(&tcg_excl_access_lock);
+
return;
} else {
if ((addr & (DATA_SIZE - 1)) != 0) {
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [mttcg RFC v4 6/6] target-arm: Use a runtime helper for excl accesses
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
` (4 preceding siblings ...)
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 5/6] softmmu_template.h: " Alvise Rigo
@ 2015-08-14 15:55 ` Alvise Rigo
5 siblings, 0 replies; 7+ messages in thread
From: Alvise Rigo @ 2015-08-14 15:55 UTC (permalink / raw)
To: qemu-devel, mttcg
Cc: claudio.fontana, pbonzini, jani.kokkonen, tech, alex.bennee,
aurelien
Instead of using TCG's load and store instructions, use a runtime helper
as a hook for the slow-path.
This is a proof of concept to verify that this approach is actually
working.
At the moment only the 32bit STREX is relying on this new code-path and
it's working as expected.
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
target-arm/helper.h | 2 ++
target-arm/op_helper.c | 11 +++++++++++
target-arm/translate.c | 12 +++++++++++-
3 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index c77bf04..c4da74a 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -534,6 +534,8 @@ DEF_HELPER_4(atomic_cmpxchg64, i32, env, i32, i64, i32)
DEF_HELPER_1(atomic_clear, void, env)
DEF_HELPER_3(atomic_claim, void, env, i32, i64)
+DEF_HELPER_4(stcond_aa32_i32, i32, env, i32, i32, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index ba8c5f5..53dcdde 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -1095,3 +1095,14 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
return ((uint32_t)x >> shift) | (x << (32 - shift));
}
}
+
+uint32_t HELPER(stcond_aa32_i32)(CPUARMState *env, uint32_t val, uint32_t addr,
+ uint32_t index)
+{
+ CPUArchState *state = env;
+ TCGMemOpIdx op;
+
+ op = make_memop_idx(MO_LEUL, index);
+
+ return helper_le_stcondl_mmu(state, addr, val, op, 0);
+}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d90a27b..591ce97 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -1006,6 +1006,17 @@ static inline void gen_aa32_stex64(TCGv_i32 is_dirty, TCGv_i64 val,
#endif
+/* Use the runtime helper for 32bit exclusive stores. */
+static inline void gen_aa32_stex32(TCGv_i32 is_dirty, TCGv_i32 val,
+ TCGv_i32 addr, int index)
+{
+ TCGv index_tmp = tcg_temp_new_i32();
+
+ tcg_gen_movi_i32(index_tmp, index);
+ gen_helper_stcond_aa32_i32(is_dirty, cpu_env, val, addr, index_tmp);
+ tcg_temp_free_i32(index_tmp);
+}
+
DO_GEN_LD(8s, MO_SB)
DO_GEN_LD(8u, MO_UB)
DO_GEN_LD(8uex, MO_UB | MO_EXCL)
@@ -1021,7 +1032,6 @@ DO_GEN_ST(32, MO_TEUL)
/* Load/Store exclusive generators (always unsigned) */
DO_GEN_STEX(8, MO_UB)
DO_GEN_STEX(16, MO_TEUW)
-DO_GEN_STEX(32, MO_TEUL)
static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
{
--
2.5.0
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2015-08-14 15:52 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-14 15:55 [Qemu-devel] [mttcg RFC v4 0/6] Atomic slow-path for mttcg Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 1/6] cpus: async_run_on_cpu: kick only if needed Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 2/6] cputlb: wrap tlb_flush with the a new function Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 3/6] exec: ram_addr: Fix exclusive bitmap accessor Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 4/6] softmmu_llsc_template.h: move to multithreading Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 5/6] softmmu_template.h: " Alvise Rigo
2015-08-14 15:55 ` [Qemu-devel] [mttcg RFC v4 6/6] target-arm: Use a runtime helper for excl accesses Alvise Rigo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).