>From 0953549985134268bf9079a7a01b2631d8a4fdee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Thu, 30 Jul 2015 15:13:33 +0000 Subject: [kvm-unit-tests PATCH] arm/barrier-test: add memory barrier tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This test has been written mainly to stress multi-threaded TCG behaviour but will demonstrate failure by default on real hardware. The test takes the following parameters: - "lock" use GCC's locking semantics - "atomic" use GCC's __atomic primitives - "barrier" use plain dmb() barriers - "wfelock" use WaitForEvent sleep - "excl" use load/store exclusive semantics - "acqrel" use acquire/release semantics Also two more options allow the test to be tweaked - "noshuffle" disables the memory shuffling - "count=%ld" set your own per-CPU increment count Signed-off-by: Alex Bennée --- v2 - Don't use thumb style strexeq stuff - Add atomic, barrier and wfelock tests - Add count/noshuffle test controls --- arm/barrier-test.c | 284 +++++++++++++++++++++++++++++++++++++++++++ config/config-arm-common.mak | 2 + 2 files changed, 286 insertions(+) create mode 100644 arm/barrier-test.c diff --git a/arm/barrier-test.c b/arm/barrier-test.c new file mode 100644 index 0000000..765f8f6 --- /dev/null +++ b/arm/barrier-test.c @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include + +#include + +#define MAX_CPUS 8 + +/* How many increments to do */ +static int increment_count = 10000000; +static int do_shuffle = 1; + + +/* shared value all the tests attempt to safely increment */ +static unsigned int shared_value; + +/* PAGE_SIZE * uint32_t means we span several pages */ +static uint32_t memory_array[PAGE_SIZE]; + +/* We use the alignment of the following to ensure accesses to locking + * and synchronisation primatives don't interfere with the page of the + * shared value + */ +__attribute__((aligned(PAGE_SIZE))) static unsigned int per_cpu_value[MAX_CPUS]; +__attribute__((aligned(PAGE_SIZE))) static cpumask_t smp_test_complete; +__attribute__((aligned(PAGE_SIZE))) static int global_lock; + +struct isaac_ctx prng_context[MAX_CPUS]; + +void (*inc_fn)(void); + + +/* In any SMP setting this *should* fail due to cores stepping on + * each other updating the shared variable + */ +static void increment_shared(void) +{ + shared_value++; +} + +/* GCC __sync primitives are deprecated in favour of __atomic */ +static void increment_shared_with_lock(void) +{ + while (__sync_lock_test_and_set(&global_lock, 1)); + shared_value++; + __sync_lock_release(&global_lock); +} + +/* In practice even __ATOMIC_RELAXED uses ARM's ldxr/stex exclusive + * semantics */ +static void increment_shared_with_atomic(void) +{ + __atomic_add_fetch(&shared_value, 1, __ATOMIC_SEQ_CST); +} + + +/* By themselves barriers do not gaurentee atomicity */ +static void increment_shared_with_barrier(void) +{ +#if defined (__LP64__) || defined (_LP64) +#else + asm volatile( + " ldr r0, [%[sptr]]\n" + " dmb\n" + " add r0, r0, #0x1\n" + " str r1, r0, [%[sptr]]\n" + " dmb\n" + : /* out */ + : [sptr] "r" (&shared_value) /* in */ + : "r0", "r1", "cc"); +#endif +} + +/* + * Load/store exclusive with WFE (wait-for-event) + */ + +static void increment_shared_with_wfelock(void) +{ +#if defined (__LP64__) || defined (_LP64) +#else + asm volatile( + " mov r1, #1\n" + "1: ldrex r0, [%[lock]]\n" + " cmp r0, #0\n" + " wfene\n" + " strexeq r0, r1, [%[lock]]\n" + " cmpeq r0, #0\n" + " bne 1b\n" + " dmb\n" + /* lock held */ + " ldr r0, [%[sptr]]\n" + " add r0, r0, #0x1\n" + " str r0, [%[sptr]]\n" + /* now release */ + " mov r0, #0\n" + " dmb\n" + " str r0, [%[lock]]\n" + " dsb\n" + " sev\n" + : /* out */ + : [lock] "r" (&global_lock), [sptr] "r" (&shared_value) /* in */ + : "r0", "r1", "cc"); +#endif +} + + +/* + * Hand-written version of the load/store exclusive + */ +static void increment_shared_with_excl(void) +{ +#if defined (__LP64__) || defined (_LP64) + asm volatile( + "1: ldxr w0, [%[sptr]]\n" + " add w0, w0, #0x1\n" + " stxr w1, w0, [%[sptr]]\n" + " cbnz w1, 1b\n" + : /* out */ + : [sptr] "r" (&shared_value) /* in */ + : "w0", "w1", "cc"); +#else + asm volatile( + "1: ldrex r0, [%[sptr]]\n" + " add r0, r0, #0x1\n" + " strex r1, r0, [%[sptr]]\n" + " cmp r1, #0\n" + " bne 1b\n" + : /* out */ + : [sptr] "r" (&shared_value) /* in */ + : "r0", "r1", "cc"); +#endif +} + +static void increment_shared_with_acqrel(void) +{ +#if defined (__LP64__) || defined (_LP64) + asm volatile( + " ldar w0, [%[sptr]]\n" + " add w0, w0, #0x1\n" + " str w0, [%[sptr]]\n" + : /* out */ + : [sptr] "r" (&shared_value) /* in */ + : "w0"); +#else + /* ARMv7 has no acquire/release semantics but we + * can ensure the results of the write are propagated + * with the use of barriers. + */ + asm volatile( + "1: ldrex r0, [%[sptr]]\n" + " add r0, r0, #0x1\n" + " strex r1, r0, [%[sptr]]\n" + " cmp r1, #0\n" + " bne 1b\n" + " dmb\n" + : /* out */ + : [sptr] "r" (&shared_value) /* in */ + : "r0", "r1", "cc"); +#endif + +} + +/* The idea of this is just to generate some random load/store + * activity which may or may not race with an un-barried incremented + * of the shared counter + */ +static void shuffle_memory(int cpu) +{ + int i; + uint32_t lspat = isaac_next_uint32(&prng_context[cpu]); + uint32_t seq = isaac_next_uint32(&prng_context[cpu]); + int count = seq & 0x1f; + uint32_t val=0; + + seq >>= 5; + + for (i=0; i>= PAGE_SHIFT; + seq ^= lspat; + lspat >>= 1; + } + +} + +static void do_increment(void) +{ + int i; + int cpu = smp_processor_id(); + + printf("CPU%d online\n", cpu); + + for (i=0; i < increment_count; i++) { + per_cpu_value[cpu]++; + inc_fn(); + + if (do_shuffle) + shuffle_memory(cpu); + } + + printf("CPU%d: Done, %d incs\n", cpu, per_cpu_value[cpu]); + + cpumask_set_cpu(cpu, &smp_test_complete); + if (cpu != 0) + halt(); +} + +int main(int argc, char **argv) +{ + int cpu; + unsigned int i, sum = 0; + static const unsigned char seed[] = "myseed"; + + inc_fn = &increment_shared; + + isaac_init(&prng_context[0], &seed[0], sizeof(seed)); + + for (i=0; i