From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Jones Subject: Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV] Date: Mon, 27 Jul 2015 09:54:11 +0200 Message-ID: <20150727075411.GA3758@hawk.localdomain> References: <1437744306-7911-1-git-send-email-alex.bennee@linaro.org> Mime-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: mttcg@greensocs.com, mark.burton@greensocs.com, fred.konrad@greensocs.com, a.spyridakis@virtualopensystems.com, kvm@vger.kernel.org To: Alex =?iso-8859-1?Q?Benn=E9e?= Return-path: Received: from mx1.redhat.com ([209.132.183.28]:40560 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750765AbbG0HyQ (ORCPT ); Mon, 27 Jul 2015 03:54:16 -0400 Content-Disposition: inline In-Reply-To: <1437744306-7911-1-git-send-email-alex.bennee@linaro.org> Sender: kvm-owner@vger.kernel.org List-ID: On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Benn=E9e wrote: > This adds a fairly brain dead torture test for TLB flushes intended f= or > stressing the MTTCG QEMU build. It takes the usual -smp option for > multiple CPUs. >=20 > By default it will do a TLBIALL flush after each cycle. If you pass > -append "page" to the kernel it will take it in turns to flush each o= f > the computation functions. At the moment it doesn't do any re-mapping= of > pages but maybe that is something that could be done in the future. >=20 > [DEV VERSION FOR COMMENT] > Signed-off-by: Alex Benn=E9e > --- > arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++= ++++++++++ > config/config-arm-common.mak | 4 +- > lib/arm/asm/mmu.h | 11 +++ > 3 files changed, 177 insertions(+), 1 deletion(-) > create mode 100644 arm/tlbflush.c >=20 > diff --git a/arm/tlbflush.c b/arm/tlbflush.c > new file mode 100644 > index 0000000..6eeff18 > --- /dev/null > +++ b/arm/tlbflush.c > @@ -0,0 +1,163 @@ > +#include > +#include > +#include > +#include > +#include > + > +#define SEQ_LENGTH 10 > + > +static cpumask_t smp_test_complete; > +static int flush_count =3D 100000; > +static int flush_self =3D 1; > +static int flush_page =3D 0; > + > +__attribute__((aligned(0x1000))) unsigned int hash_array(int length,= unsigned int *array) You should use PAGE_SIZE instead of 0x1000 in these attributes, allowin= g the test to also work for aarch64, as we're using 64k pages on aarch64. > +{ > + int i; > + unsigned int sum=3D0; > + for (i=3D0; i + { > + unsigned int val =3D *array++; > + sum ^=3D val; > + sum ^=3D (val >> (val % 16)); > + sum ^=3D (val << (val % 32)); > + } > + > + return sum; > +} > + > +__attribute__((aligned(0x1000))) void create_fib_sequence(int length= , unsigned int *array) > +{ > + int i; > + > + /* first two values */ > + array[0] =3D 0; > + array[1] =3D 1; > + for (i=3D2; i + { > + array[i] =3D array[i-2] + array[i-1]; > + } > +} > + > +__attribute__((aligned(0x1000))) unsigned long long factorial(unsign= ed int n) > +{ > + unsigned int i; > + unsigned long long fac =3D 1; > + for (i=3D1; i<=3Dn; i++) > + { > + fac =3D fac * i; > + } > + return fac; > +} > + > +/* do some computationally expensive stuff, return a checksum of the > + * results */ > +__attribute__((aligned(0x1000))) unsigned int do_computation(void) > +{ > + unsigned int fib_array[SEQ_LENGTH]; > + unsigned long long facfib_array[SEQ_LENGTH]; > + unsigned int fib_hash, facfib_hash; > + int cpu =3D smp_processor_id(); > + int i, j; > +=09 > + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); > + fib_hash =3D hash_array(SEQ_LENGTH, &fib_array[0]); > + for (i=3D0; i + for (j=3D0; j + facfib_array[i] =3D factorial(fib_array[i]+j); > + } > + } > + facfib_hash =3D 0; > + for (i=3D0; i + for (j=3D0; j + facfib_hash ^=3D hash_array(sizeof(facfib_array)/sizeof(unsigned = int), (unsigned int *)&facfib_array[0]); > + } > + } > + > +#if 0 > + printf("CPU:%d FIBSEQ ", cpu); > + for (i=3D0; i + printf("%u,", fib_array[i]); > + printf("\n"); > + > + printf("CPU:%d FACFIB ", cpu); > + for (i=3D0; i + printf("%llu,", facfib_array[i]); > + printf("\n"); > +#endif > +=09 > + return (fib_hash ^ facfib_hash); > +} > + > +static void * pages[] =3D {&hash_array, &create_fib_sequence, &facto= rial, &do_computation}; I can't comment on whether or not the complexity of do_computation is necessary for your test, but it seems like overkill. Comments explainin= g why it's necessary would be good. > + > +static void test_flush(void) > +{ > + int i, errors =3D 0; > + int cpu =3D smp_processor_id(); > + > + unsigned int ref; > + > + printf("CPU%d online\n", cpu); > + > + ref =3D do_computation(); What makes you sure that the first time you do the computation per cpu is correct? I think computing it externally, and saving the result, i.e.=20 #define EXPECTED_RESULT 0x12345678 would be more reliable. > + > + for (i=3D0; i < flush_count; i++) { > + unsigned int this_ref =3D do_computation(); > + > + if (this_ref !=3D ref) { > + errors++; > + printf("CPU%d: seq%d 0x%x!=3D0x%x\n", > + cpu, i, ref, this_ref); > + } > + > + if ((i % 1000) =3D=3D 0) { > + printf("CPU%d: seq%d\n", cpu, i); > + } > + =09 > + if (flush_self) { > + if (flush_page) { > + int j =3D (i % (sizeof(pages)/sizeof(void *))); libcflat.h has the ARRAY_SIZE macro > + flush_tlb_page((unsigned long)pages[j]); > + } else { > + flush_tlb_all(); > + } > + } > + } > + > + report("CPU%d: Done - Errors: %d\n", errors =3D=3D 0, cpu, errors); > + > + cpumask_set_cpu(cpu, &smp_test_complete); > + if (cpu !=3D 0) > + halt(); > +} > + > +int main(int argc, char **argv) > +{ > + int cpu, i; > +=09 > + report_prefix_push("tlbflush"); > + > + for (i=3D0; i + char *arg =3D argv[i]; > +/* printf("arg:%d:%s\n", i, arg); */ > + > + if (strcmp(arg, "page") =3D=3D 0) { > + report_prefix_push("page"); > + flush_page =3D 1; > + } > + } > + > + for_each_present_cpu(cpu) { > + if (cpu =3D=3D 0) > + continue; > + smp_boot_secondary(cpu, test_flush); > + } > + > + test_flush(); > + > + while (!cpumask_full(&smp_test_complete)) > + cpu_relax(); > + > + return report_summary(); As we use the kernel coding style you should run $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c Also, please rename to tlbflush-test.c to differentiate it from an implementation of tlbflush support, and to make the standalone test name (if we commit those patches) more descriptive. > +} > diff --git a/config/config-arm-common.mak b/config/config-arm-common.= mak > index 0674daa..5b14db4 100644 > --- a/config/config-arm-common.mak > +++ b/config/config-arm-common.mak > @@ -11,7 +11,8 @@ endif > =20 > tests-common =3D \ > $(TEST_DIR)/selftest.flat \ > - $(TEST_DIR)/spinlock-test.flat > + $(TEST_DIR)/spinlock-test.flat \ > + $(TEST_DIR)/tlbflush.flat As we're adding tests faster now it's becoming clear that the '\' list isn't so great. To add a new test at the bottom we always have to modif= y the last line too. We should either add the new one at the top (right below the 'test-common =3D' line), or change this to a '+=3D' sequence = like some other lists are done. > =20 > all: test_cases > =20 > @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(te= sts) > =20 > $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o > $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test= =2Eo > +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o > diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h > index c1bd01c..2bb0cde 100644 > --- a/lib/arm/asm/mmu.h > +++ b/lib/arm/asm/mmu.h > @@ -14,8 +14,11 @@ > #define PTE_AF PTE_EXT_AF > #define PTE_WBWA L_PTE_MT_WRITEALLOC > =20 > +/* See B3.18.7 TLB maintenance operations */ > + > static inline void local_flush_tlb_all(void) > { > + /* TLBIALL */ > asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); > dsb(); > isb(); > @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) > local_flush_tlb_all(); > } > =20 > +static inline void flush_tlb_page(unsigned long vaddr) > +{ > + /* TLBIMVAA */ > + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); > + dsb(); > + isb(); > +} > + > #include > =20 > #endif /* __ASMARM_MMU_H_ */ This mmu.h change looks good, but please add the arm64 flush_tlb_page at the same time. And anyway, I guess you'll want your test to work for both arm and aarch64? Thanks, drew