From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alex =?utf-8?Q?Benn=C3=A9e?= Subject: Re: [kvm-unit-tests PATCH v5 11/11] new: arm/barrier-test for memory barriers Date: Mon, 03 Aug 2015 11:30:15 +0100 Message-ID: <87oaioll54.fsf@linaro.org> References: <1438358041-18021-1-git-send-email-alex.bennee@linaro.org> <1438358041-18021-12-git-send-email-alex.bennee@linaro.org> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: mttcg@listserver.greensocs.com, Mark Burton , KONRAD =?utf-8?B?RnLDqWTDqXJpYw==?= , QEMU Developers , Peter Maydell , Alexander Spyridakis , Andrew Jones , Claudio Fontana , kvm@vger.kernel.org To: alvise rigo Return-path: Received: from mail-wi0-f171.google.com ([209.85.212.171]:35845 "EHLO mail-wi0-f171.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751799AbbHCKaU (ORCPT ); Mon, 3 Aug 2015 06:30:20 -0400 Received: by wicgj17 with SMTP id gj17so98658148wic.1 for ; Mon, 03 Aug 2015 03:30:18 -0700 (PDT) In-reply-to: Sender: kvm-owner@vger.kernel.org List-ID: alvise rigo writes: > Hi Alex, > > Nice set of tests, they are proving to be helpful. > One question below. > > On Fri, Jul 31, 2015 at 5:54 PM, Alex Benn=C3=A9e wrote: >> From: Alex Benn=C3=A9e >> >> This test has been written mainly to stress multi-threaded TCG behav= iour >> but will demonstrate failure by default on real hardware. The test t= akes >> the following parameters: >> >> - "lock" use GCC's locking semantics >> - "excl" use load/store exclusive semantics >> - "acqrel" use acquire/release semantics >> >> Currently excl/acqrel lock up on MTTCG >> >> Signed-off-by: Alex Benn=C3=A9e >> --- >> arm/barrier-test.c | 206 ++++++++++++++++++++++++++++++++= +++++++++++ >> config/config-arm-common.mak | 2 + >> 2 files changed, 208 insertions(+) >> create mode 100644 arm/barrier-test.c >> >> diff --git a/arm/barrier-test.c b/arm/barrier-test.c >> new file mode 100644 >> index 0000000..53d690b >> --- /dev/null >> +++ b/arm/barrier-test.c >> @@ -0,0 +1,206 @@ >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#include >> + >> +#define MAX_CPUS 4 >> + >> +/* How many increments to do */ >> +static int increment_count =3D 10000000; >> + >> + >> +/* shared value, we use the alignment to ensure the global_lock val= ue >> + * doesn't share a page */ >> +static unsigned int shared_value; >> + >> +/* PAGE_SIZE * uint32_t means we span several pages */ >> +static uint32_t memory_array[PAGE_SIZE]; >> + >> +__attribute__((aligned(PAGE_SIZE))) static unsigned int per_cpu_val= ue[MAX_CPUS]; >> +__attribute__((aligned(PAGE_SIZE))) static cpumask_t smp_test_compl= ete; >> +__attribute__((aligned(PAGE_SIZE))) static int global_lock; >> + >> +struct isaac_ctx prng_context[MAX_CPUS]; >> + >> +void (*inc_fn)(void); >> + >> +static void lock(int *lock_var) >> +{ >> + while (__sync_lock_test_and_set(lock_var, 1)); >> +} >> +static void unlock(int *lock_var) >> +{ >> + __sync_lock_release(lock_var); >> +} >> + >> +static void increment_shared(void) >> +{ >> + shared_value++; >> +} >> + >> +static void increment_shared_with_lock(void) >> +{ >> + lock(&global_lock); >> + shared_value++; >> + unlock(&global_lock); >> +} >> + >> +static void increment_shared_with_excl(void) >> +{ >> +#if defined (__LP64__) || defined (_LP64) >> + asm volatile( >> + "1: ldxr w0, [%[sptr]]\n" >> + " add w0, w0, #0x1\n" >> + " stxr w1, w0, [%[sptr]]\n" >> + " cbnz w1, 1b\n" >> + : /* out */ >> + : [sptr] "r" (&shared_value) /* in */ >> + : "w0", "w1", "cc"); >> +#else >> + asm volatile( >> + "1: ldrex r0, [%[sptr]]\n" >> + " add r0, r0, #0x1\n" >> + " strexeq r1, r0, [%[sptr]]\n" >> + " cmpeq r1, #0\n" > > Why are we calling these last two instructions with the 'eq' suffix? > Shouldn't we just strex r1, r0, [sptr] and then cmp r1, #0? Possibly, my armv7 is a little rusty. I'm just looking at tweaking this test now so I'll try and clean that up. > > Thank you, > alvise > >> + " bne 1b\n" >> + : /* out */ >> + : [sptr] "r" (&shared_value) /* in */ >> + : "r0", "r1", "cc"); >> +#endif >> +} >> + >> +static void increment_shared_with_acqrel(void) >> +{ >> +#if defined (__LP64__) || defined (_LP64) >> + asm volatile( >> + " ldar w0, [%[sptr]]\n" >> + " add w0, w0, #0x1\n" >> + " str w0, [%[sptr]]\n" >> + : /* out */ >> + : [sptr] "r" (&shared_value) /* in */ >> + : "w0"); >> +#else >> + /* ARMv7 has no acquire/release semantics but we >> + * can ensure the results of the write are propagated >> + * with the use of barriers. >> + */ >> + asm volatile( >> + "1: ldrex r0, [%[sptr]]\n" >> + " add r0, r0, #0x1\n" >> + " strexeq r1, r0, [%[sptr]]\n" >> + " cmpeq r1, #0\n" >> + " bne 1b\n" >> + " dmb\n" >> + : /* out */ >> + : [sptr] "r" (&shared_value) /* in */ >> + : "r0", "r1", "cc"); >> +#endif >> + >> +} >> + >> +/* The idea of this is just to generate some random load/store >> + * activity which may or may not race with an un-barried incremente= d >> + * of the shared counter >> + */ >> +static void shuffle_memory(int cpu) >> +{ >> + int i; >> + uint32_t lspat =3D isaac_next_uint32(&prng_context[cpu]); >> + uint32_t seq =3D isaac_next_uint32(&prng_context[cpu]); >> + int count =3D seq & 0x1f; >> + uint32_t val=3D0; >> + >> + seq >>=3D 5; >> + >> + for (i=3D0; i> + int index =3D seq & ~PAGE_MASK; >> + if (lspat & 1) { >> + val ^=3D memory_array[index]; >> + } else { >> + memory_array[index] =3D val; >> + } >> + seq >>=3D PAGE_SHIFT; >> + seq ^=3D lspat; >> + lspat >>=3D 1; >> + } >> + >> +} >> + >> +static void do_increment(void) >> +{ >> + int i; >> + int cpu =3D smp_processor_id(); >> + >> + printf("CPU%d online\n", cpu); >> + >> + for (i=3D0; i < increment_count; i++) { >> + per_cpu_value[cpu]++; >> + inc_fn(); >> + >> + shuffle_memory(cpu); >> + } >> + >> + printf("CPU%d: Done, %d incs\n", cpu, per_cpu_value[cpu]); >> + >> + cpumask_set_cpu(cpu, &smp_test_complete); >> + if (cpu !=3D 0) >> + halt(); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + int cpu; >> + unsigned int i, sum =3D 0; >> + static const unsigned char seed[] =3D "myseed"; >> + >> + inc_fn =3D &increment_shared; >> + >> + isaac_init(&prng_context[0], &seed[0], sizeof(seed)); >> + >> + for (i=3D0; i> + char *arg =3D argv[i]; >> + >> + if (strcmp(arg, "lock") =3D=3D 0) { >> + inc_fn =3D &increment_shared_with_lock; >> + report_prefix_push("lock"); >> + } else if (strcmp(arg, "excl") =3D=3D 0) { >> + inc_fn =3D &increment_shared_with_excl; >> + report_prefix_push("excl"); >> + } else if (strcmp(arg, "acqrel") =3D=3D 0) { >> + inc_fn =3D &increment_shared_with_acqrel; >> + report_prefix_push("acqrel"); >> + } else { >> + isaac_reseed(&prng_context[0], (unsigned cha= r *) arg, strlen(arg)); >> + } >> + } >> + >> + /* fill our random page */ >> + for (i=3D0; i> + memory_array[i] =3D isaac_next_uint32(&prng_context[= 0]); >> + } >> + >> + for_each_present_cpu(cpu) { >> + uint32_t seed2 =3D isaac_next_uint32(&prng_context[0= ]); >> + if (cpu =3D=3D 0) >> + continue; >> + >> + isaac_init(&prng_context[cpu], (unsigned char *) &se= ed2, sizeof(seed2)); >> + smp_boot_secondary(cpu, do_increment); >> + } >> + >> + do_increment(); >> + >> + while (!cpumask_full(&smp_test_complete)) >> + cpu_relax(); >> + >> + /* All CPUs done, do we add up */ >> + for_each_present_cpu(cpu) { >> + sum +=3D per_cpu_value[cpu]; >> + } >> + report("total incs %d", sum =3D=3D shared_value, shared_valu= e); >> + >> + return report_summary(); >> +} >> diff --git a/config/config-arm-common.mak b/config/config-arm-common= =2Emak >> index 67a9dda..af628e6 100644 >> --- a/config/config-arm-common.mak >> +++ b/config/config-arm-common.mak >> @@ -12,6 +12,7 @@ endif >> tests-common =3D $(TEST_DIR)/selftest.flat >> tests-common +=3D $(TEST_DIR)/spinlock-test.flat >> tests-common +=3D $(TEST_DIR)/tlbflush-test.flat >> +tests-common +=3D $(TEST_DIR)/barrier-test.flat >> >> utils-common =3D $(TEST_DIR)/utils/kvm-query >> >> @@ -80,3 +81,4 @@ utils: $(utils-common) >> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o >> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-tes= t.o >> $(TEST_DIR)/tlbflush-test.elf: $(cstart.o) $(TEST_DIR)/tlbflush-tes= t.o >> +$(TEST_DIR)/barrier-test.elf: $(cstart.o) $(TEST_DIR)/barrier-test.= o >> -- >> 2.5.0 >> --=20 Alex Benn=C3=A9e