From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alex =?utf-8?Q?Benn=C3=A9e?= Subject: Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV] Date: Mon, 27 Jul 2015 10:07:57 +0100 Message-ID: <87zj2im0ia.fsf@linaro.org> References: <1437744306-7911-1-git-send-email-alex.bennee@linaro.org> <20150727075411.GA3758@hawk.localdomain> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: mttcg@greensocs.com, mark.burton@greensocs.com, fred.konrad@greensocs.com, a.spyridakis@virtualopensystems.com, kvm@vger.kernel.org To: Andrew Jones Return-path: Received: from mail-wi0-f182.google.com ([209.85.212.182]:34091 "EHLO mail-wi0-f182.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751217AbbG0JIB (ORCPT ); Mon, 27 Jul 2015 05:08:01 -0400 Received: by wibud3 with SMTP id ud3so130909309wib.1 for ; Mon, 27 Jul 2015 02:08:00 -0700 (PDT) In-reply-to: <20150727075411.GA3758@hawk.localdomain> Sender: kvm-owner@vger.kernel.org List-ID: Andrew Jones writes: > On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Benn=C3=A9e wrote: >> This adds a fairly brain dead torture test for TLB flushes intended = for >> stressing the MTTCG QEMU build. It takes the usual -smp option for >> multiple CPUs. >>=20 >> By default it will do a TLBIALL flush after each cycle. If you pass >> -append "page" to the kernel it will take it in turns to flush each = of >> the computation functions. At the moment it doesn't do any re-mappin= g of >> pages but maybe that is something that could be done in the future. >>=20 >> [DEV VERSION FOR COMMENT] >> Signed-off-by: Alex Benn=C3=A9e >> --- >> arm/tlbflush.c | 163 ++++++++++++++++++++++++++++++++= +++++++++++ >> config/config-arm-common.mak | 4 +- >> lib/arm/asm/mmu.h | 11 +++ >> 3 files changed, 177 insertions(+), 1 deletion(-) >> create mode 100644 arm/tlbflush.c >>=20 >> diff --git a/arm/tlbflush.c b/arm/tlbflush.c >> new file mode 100644 >> index 0000000..6eeff18 >> --- /dev/null >> +++ b/arm/tlbflush.c >> @@ -0,0 +1,163 @@ >> +#include >> +#include >> +#include >> +#include >> +#include >> + >> +#define SEQ_LENGTH 10 >> + >> +static cpumask_t smp_test_complete; >> +static int flush_count =3D 100000; >> +static int flush_self =3D 1; >> +static int flush_page =3D 0; >> + >> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length= , unsigned int *array) > > You should use PAGE_SIZE instead of 0x1000 in these attributes, allow= ing > the test to also work for aarch64, as we're using 64k pages on > aarch64. Good point. > >> +{ >> + int i; >> + unsigned int sum=3D0; >> + for (i=3D0; i> + { >> + unsigned int val =3D *array++; >> + sum ^=3D val; >> + sum ^=3D (val >> (val % 16)); >> + sum ^=3D (val << (val % 32)); >> + } >> + >> + return sum; >> +} >> + >> +__attribute__((aligned(0x1000))) void create_fib_sequence(int lengt= h, unsigned int *array) >> +{ >> + int i; >> + >> + /* first two values */ >> + array[0] =3D 0; >> + array[1] =3D 1; >> + for (i=3D2; i> + { >> + array[i] =3D array[i-2] + array[i-1]; >> + } >> +} >> + >> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsig= ned int n) >> +{ >> + unsigned int i; >> + unsigned long long fac =3D 1; >> + for (i=3D1; i<=3Dn; i++) >> + { >> + fac =3D fac * i; >> + } >> + return fac; >> +} >> + >> +/* do some computationally expensive stuff, return a checksum of th= e >> + * results */ >> +__attribute__((aligned(0x1000))) unsigned int do_computation(void) >> +{ >> + unsigned int fib_array[SEQ_LENGTH]; >> + unsigned long long facfib_array[SEQ_LENGTH]; >> + unsigned int fib_hash, facfib_hash; >> + int cpu =3D smp_processor_id(); >> + int i, j; >> +=09 >> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]); >> + fib_hash =3D hash_array(SEQ_LENGTH, &fib_array[0]); >> + for (i=3D0; i> + for (j=3D0; j> + facfib_array[i] =3D factorial(fib_array[i]+j); >> + } >> + } >> + facfib_hash =3D 0; >> + for (i=3D0; i> + for (j=3D0; j> + facfib_hash ^=3D hash_array(sizeof(facfib_array)/sizeof(unsigned= int), (unsigned int *)&facfib_array[0]); >> + } >> + } >> + >> +#if 0 >> + printf("CPU:%d FIBSEQ ", cpu); >> + for (i=3D0; i> + printf("%u,", fib_array[i]); >> + printf("\n"); >> + >> + printf("CPU:%d FACFIB ", cpu); >> + for (i=3D0; i> + printf("%llu,", facfib_array[i]); >> + printf("\n"); >> +#endif >> +=09 >> + return (fib_hash ^ facfib_hash); >> +} >> + >> +static void * pages[] =3D {&hash_array, &create_fib_sequence, &fact= orial, &do_computation}; > > I can't comment on whether or not the complexity of do_computation is > necessary for your test, but it seems like overkill. Comments explain= ing > why it's necessary would be good. OK. From QEMUs TCG point of view I just want to ensure I have more than= two basic blocks per-page region so I can check the block-chaining in-page and jump caching intra-page which are both affected on flushes. A computationally complex routine with a known answer would be nicer though I guess. > >> + >> +static void test_flush(void) >> +{ >> + int i, errors =3D 0; >> + int cpu =3D smp_processor_id(); >> + >> + unsigned int ref; >> + >> + printf("CPU%d online\n", cpu); >> + >> + ref =3D do_computation(); > > What makes you sure that the first time you do the computation > per cpu is correct? I think computing it externally, and saving > the result, i.e.=20 > > #define EXPECTED_RESULT 0x12345678 > > would be more reliable. OK. > >> + >> + for (i=3D0; i < flush_count; i++) { >> + unsigned int this_ref =3D do_computation(); >> + >> + if (this_ref !=3D ref) { >> + errors++; >> + printf("CPU%d: seq%d 0x%x!=3D0x%x\n", >> + cpu, i, ref, this_ref); >> + } >> + >> + if ((i % 1000) =3D=3D 0) { >> + printf("CPU%d: seq%d\n", cpu, i); >> + } >> + =09 >> + if (flush_self) { >> + if (flush_page) { >> + int j =3D (i % (sizeof(pages)/sizeof(void *))); > libcflat.h has the ARRAY_SIZE macro OK >> + flush_tlb_page((unsigned long)pages[j]); >> + } else { >> + flush_tlb_all(); >> + } >> + } >> + } >> + >> + report("CPU%d: Done - Errors: %d\n", errors =3D=3D 0, cpu, errors)= ; >> + >> + cpumask_set_cpu(cpu, &smp_test_complete); >> + if (cpu !=3D 0) >> + halt(); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + int cpu, i; >> +=09 >> + report_prefix_push("tlbflush"); >> + >> + for (i=3D0; i> + char *arg =3D argv[i]; >> +/* printf("arg:%d:%s\n", i, arg); */ >> + >> + if (strcmp(arg, "page") =3D=3D 0) { >> + report_prefix_push("page"); >> + flush_page =3D 1; >> + } >> + } >> + >> + for_each_present_cpu(cpu) { >> + if (cpu =3D=3D 0) >> + continue; >> + smp_boot_secondary(cpu, test_flush); >> + } >> + >> + test_flush(); >> + >> + while (!cpumask_full(&smp_test_complete)) >> + cpu_relax(); >> + >> + return report_summary(); > > As we use the kernel coding style you should run > > $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c > > Also, please rename to tlbflush-test.c to differentiate it > from an implementation of tlbflush support, and to make > the standalone test name (if we commit those patches) more > descriptive. I'll have another poke at my editor config. It should have been setting the coding style automatically, although of course explicit local variables are better ;-) > >> +} >> diff --git a/config/config-arm-common.mak b/config/config-arm-common= =2Emak >> index 0674daa..5b14db4 100644 >> --- a/config/config-arm-common.mak >> +++ b/config/config-arm-common.mak >> @@ -11,7 +11,8 @@ endif >> =20 >> tests-common =3D \ >> $(TEST_DIR)/selftest.flat \ >> - $(TEST_DIR)/spinlock-test.flat >> + $(TEST_DIR)/spinlock-test.flat \ >> + $(TEST_DIR)/tlbflush.flat > > As we're adding tests faster now it's becoming clear that the '\' lis= t > isn't so great. To add a new test at the bottom we always have to mod= ify > the last line too. We should either add the new one at the top (right > below the 'test-common =3D' line), or change this to a '+=3D' sequenc= e like > some other lists are done. > >> =20 >> all: test_cases >> =20 >> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(t= ests) >> =20 >> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o >> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-tes= t.o >> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o >> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h >> index c1bd01c..2bb0cde 100644 >> --- a/lib/arm/asm/mmu.h >> +++ b/lib/arm/asm/mmu.h >> @@ -14,8 +14,11 @@ >> #define PTE_AF PTE_EXT_AF >> #define PTE_WBWA L_PTE_MT_WRITEALLOC >> =20 >> +/* See B3.18.7 TLB maintenance operations */ >> + >> static inline void local_flush_tlb_all(void) >> { >> + /* TLBIALL */ >> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0)); >> dsb(); >> isb(); >> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void) >> local_flush_tlb_all(); >> } >> =20 >> +static inline void flush_tlb_page(unsigned long vaddr) >> +{ >> + /* TLBIMVAA */ >> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr)); >> + dsb(); >> + isb(); >> +} >> + >> #include >> =20 >> #endif /* __ASMARM_MMU_H_ */ > > This mmu.h change looks good, but please add the arm64 > flush_tlb_page at the same time. And anyway, I guess you'll > want your test to work for both arm and aarch64? Yes I will. Currently the MTTCG is arm32 only but this will be expanded= =2E > > Thanks, > drew --=20 Alex Benn=C3=A9e