From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:59020) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZMD4W-0000Fx-8t for qemu-devel@nongnu.org; Mon, 03 Aug 2015 06:34:21 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZMD4S-0005Td-Lv for qemu-devel@nongnu.org; Mon, 03 Aug 2015 06:34:20 -0400 Received: from mail-io0-f169.google.com ([209.85.223.169]:36060) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZMD4S-0005TM-GI for qemu-devel@nongnu.org; Mon, 03 Aug 2015 06:34:16 -0400 Received: by ioeg141 with SMTP id g141so140699959ioe.3 for ; Mon, 03 Aug 2015 03:34:16 -0700 (PDT) MIME-Version: 1.0 In-Reply-To: <87oaioll54.fsf@linaro.org> References: <1438358041-18021-1-git-send-email-alex.bennee@linaro.org> <1438358041-18021-12-git-send-email-alex.bennee@linaro.org> <87oaioll54.fsf@linaro.org> Date: Mon, 3 Aug 2015 12:34:15 +0200 Message-ID: From: alvise rigo Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [kvm-unit-tests PATCH v5 11/11] new: arm/barrier-test for memory barriers List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?UTF-8?B?QWxleCBCZW5uw6ll?= Cc: mttcg@listserver.greensocs.com, Peter Maydell , Andrew Jones , Claudio Fontana , kvm@vger.kernel.org, Alexander Spyridakis , Mark Burton , QEMU Developers , =?UTF-8?B?S09OUkFEIEZyw6lkw6lyaWM=?= On Mon, Aug 3, 2015 at 12:30 PM, Alex Benn=C3=A9e = wrote: > > alvise rigo writes: > >> Hi Alex, >> >> Nice set of tests, they are proving to be helpful. >> One question below. >> >> On Fri, Jul 31, 2015 at 5:54 PM, Alex Benn=C3=A9e wrote: >>> From: Alex Benn=C3=A9e >>> >>> This test has been written mainly to stress multi-threaded TCG behaviou= r >>> but will demonstrate failure by default on real hardware. The test take= s >>> the following parameters: >>> >>> - "lock" use GCC's locking semantics >>> - "excl" use load/store exclusive semantics >>> - "acqrel" use acquire/release semantics >>> >>> Currently excl/acqrel lock up on MTTCG >>> >>> Signed-off-by: Alex Benn=C3=A9e >>> --- >>> arm/barrier-test.c | 206 +++++++++++++++++++++++++++++++++++= ++++++++ >>> config/config-arm-common.mak | 2 + >>> 2 files changed, 208 insertions(+) >>> create mode 100644 arm/barrier-test.c >>> >>> diff --git a/arm/barrier-test.c b/arm/barrier-test.c >>> new file mode 100644 >>> index 0000000..53d690b >>> --- /dev/null >>> +++ b/arm/barrier-test.c >>> @@ -0,0 +1,206 @@ >>> +#include >>> +#include >>> +#include >>> +#include >>> +#include >>> + >>> +#include >>> + >>> +#define MAX_CPUS 4 >>> + >>> +/* How many increments to do */ >>> +static int increment_count =3D 10000000; >>> + >>> + >>> +/* shared value, we use the alignment to ensure the global_lock value >>> + * doesn't share a page */ >>> +static unsigned int shared_value; >>> + >>> +/* PAGE_SIZE * uint32_t means we span several pages */ >>> +static uint32_t memory_array[PAGE_SIZE]; >>> + >>> +__attribute__((aligned(PAGE_SIZE))) static unsigned int per_cpu_value[= MAX_CPUS]; >>> +__attribute__((aligned(PAGE_SIZE))) static cpumask_t smp_test_complete= ; >>> +__attribute__((aligned(PAGE_SIZE))) static int global_lock; >>> + >>> +struct isaac_ctx prng_context[MAX_CPUS]; >>> + >>> +void (*inc_fn)(void); >>> + >>> +static void lock(int *lock_var) >>> +{ >>> + while (__sync_lock_test_and_set(lock_var, 1)); >>> +} >>> +static void unlock(int *lock_var) >>> +{ >>> + __sync_lock_release(lock_var); >>> +} >>> + >>> +static void increment_shared(void) >>> +{ >>> + shared_value++; >>> +} >>> + >>> +static void increment_shared_with_lock(void) >>> +{ >>> + lock(&global_lock); >>> + shared_value++; >>> + unlock(&global_lock); >>> +} >>> + >>> +static void increment_shared_with_excl(void) >>> +{ >>> +#if defined (__LP64__) || defined (_LP64) >>> + asm volatile( >>> + "1: ldxr w0, [%[sptr]]\n" >>> + " add w0, w0, #0x1\n" >>> + " stxr w1, w0, [%[sptr]]\n" >>> + " cbnz w1, 1b\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "w0", "w1", "cc"); >>> +#else >>> + asm volatile( >>> + "1: ldrex r0, [%[sptr]]\n" >>> + " add r0, r0, #0x1\n" >>> + " strexeq r1, r0, [%[sptr]]\n" >>> + " cmpeq r1, #0\n" >> >> Why are we calling these last two instructions with the 'eq' suffix? >> Shouldn't we just strex r1, r0, [sptr] and then cmp r1, #0? > > Possibly, my armv7 is a little rusty. I'm just looking at tweaking this > test now so I'll try and clean that up. > >> >> Thank you, >> alvise >> >>> + " bne 1b\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "r0", "r1", "cc"); >>> +#endif >>> +} >>> + >>> +static void increment_shared_with_acqrel(void) >>> +{ >>> +#if defined (__LP64__) || defined (_LP64) >>> + asm volatile( >>> + " ldar w0, [%[sptr]]\n" >>> + " add w0, w0, #0x1\n" >>> + " str w0, [%[sptr]]\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "w0"); >>> +#else >>> + /* ARMv7 has no acquire/release semantics but we >>> + * can ensure the results of the write are propagated >>> + * with the use of barriers. >>> + */ >>> + asm volatile( >>> + "1: ldrex r0, [%[sptr]]\n" >>> + " add r0, r0, #0x1\n" >>> + " strexeq r1, r0, [%[sptr]]\n" >>> + " cmpeq r1, #0\n" I have not tested it, but also this one looks wrong. Regards, alvise >>> + " bne 1b\n" >>> + " dmb\n" >>> + : /* out */ >>> + : [sptr] "r" (&shared_value) /* in */ >>> + : "r0", "r1", "cc"); >>> +#endif >>> + >>> +} >>> + >>> +/* The idea of this is just to generate some random load/store >>> + * activity which may or may not race with an un-barried incremented >>> + * of the shared counter >>> + */ >>> +static void shuffle_memory(int cpu) >>> +{ >>> + int i; >>> + uint32_t lspat =3D isaac_next_uint32(&prng_context[cpu]); >>> + uint32_t seq =3D isaac_next_uint32(&prng_context[cpu]); >>> + int count =3D seq & 0x1f; >>> + uint32_t val=3D0; >>> + >>> + seq >>=3D 5; >>> + >>> + for (i=3D0; i>> + int index =3D seq & ~PAGE_MASK; >>> + if (lspat & 1) { >>> + val ^=3D memory_array[index]; >>> + } else { >>> + memory_array[index] =3D val; >>> + } >>> + seq >>=3D PAGE_SHIFT; >>> + seq ^=3D lspat; >>> + lspat >>=3D 1; >>> + } >>> + >>> +} >>> + >>> +static void do_increment(void) >>> +{ >>> + int i; >>> + int cpu =3D smp_processor_id(); >>> + >>> + printf("CPU%d online\n", cpu); >>> + >>> + for (i=3D0; i < increment_count; i++) { >>> + per_cpu_value[cpu]++; >>> + inc_fn(); >>> + >>> + shuffle_memory(cpu); >>> + } >>> + >>> + printf("CPU%d: Done, %d incs\n", cpu, per_cpu_value[cpu]); >>> + >>> + cpumask_set_cpu(cpu, &smp_test_complete); >>> + if (cpu !=3D 0) >>> + halt(); >>> +} >>> + >>> +int main(int argc, char **argv) >>> +{ >>> + int cpu; >>> + unsigned int i, sum =3D 0; >>> + static const unsigned char seed[] =3D "myseed"; >>> + >>> + inc_fn =3D &increment_shared; >>> + >>> + isaac_init(&prng_context[0], &seed[0], sizeof(seed)); >>> + >>> + for (i=3D0; i>> + char *arg =3D argv[i]; >>> + >>> + if (strcmp(arg, "lock") =3D=3D 0) { >>> + inc_fn =3D &increment_shared_with_lock; >>> + report_prefix_push("lock"); >>> + } else if (strcmp(arg, "excl") =3D=3D 0) { >>> + inc_fn =3D &increment_shared_with_excl; >>> + report_prefix_push("excl"); >>> + } else if (strcmp(arg, "acqrel") =3D=3D 0) { >>> + inc_fn =3D &increment_shared_with_acqrel; >>> + report_prefix_push("acqrel"); >>> + } else { >>> + isaac_reseed(&prng_context[0], (unsigned char *= ) arg, strlen(arg)); >>> + } >>> + } >>> + >>> + /* fill our random page */ >>> + for (i=3D0; i>> + memory_array[i] =3D isaac_next_uint32(&prng_context[0])= ; >>> + } >>> + >>> + for_each_present_cpu(cpu) { >>> + uint32_t seed2 =3D isaac_next_uint32(&prng_context[0]); >>> + if (cpu =3D=3D 0) >>> + continue; >>> + >>> + isaac_init(&prng_context[cpu], (unsigned char *) &seed2= , sizeof(seed2)); >>> + smp_boot_secondary(cpu, do_increment); >>> + } >>> + >>> + do_increment(); >>> + >>> + while (!cpumask_full(&smp_test_complete)) >>> + cpu_relax(); >>> + >>> + /* All CPUs done, do we add up */ >>> + for_each_present_cpu(cpu) { >>> + sum +=3D per_cpu_value[cpu]; >>> + } >>> + report("total incs %d", sum =3D=3D shared_value, shared_value); >>> + >>> + return report_summary(); >>> +} >>> diff --git a/config/config-arm-common.mak b/config/config-arm-common.ma= k >>> index 67a9dda..af628e6 100644 >>> --- a/config/config-arm-common.mak >>> +++ b/config/config-arm-common.mak >>> @@ -12,6 +12,7 @@ endif >>> tests-common =3D $(TEST_DIR)/selftest.flat >>> tests-common +=3D $(TEST_DIR)/spinlock-test.flat >>> tests-common +=3D $(TEST_DIR)/tlbflush-test.flat >>> +tests-common +=3D $(TEST_DIR)/barrier-test.flat >>> >>> utils-common =3D $(TEST_DIR)/utils/kvm-query >>> >>> @@ -80,3 +81,4 @@ utils: $(utils-common) >>> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o >>> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o >>> $(TEST_DIR)/tlbflush-test.elf: $(cstart.o) $(TEST_DIR)/tlbflush-test.o >>> +$(TEST_DIR)/barrier-test.elf: $(cstart.o) $(TEST_DIR)/barrier-test.o >>> -- >>> 2.5.0 >>> > > -- > Alex Benn=C3=A9e