* [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
@ 2015-07-24 13:25 Alex Bennée
2015-07-27 7:54 ` Andrew Jones
0 siblings, 1 reply; 6+ messages in thread
From: Alex Bennée @ 2015-07-24 13:25 UTC (permalink / raw)
To: mttcg, mark.burton, fred.konrad
Cc: a.spyridakis, drjones, kvm, Alex Bennée
This adds a fairly brain dead torture test for TLB flushes intended for
stressing the MTTCG QEMU build. It takes the usual -smp option for
multiple CPUs.
By default it will do a TLBIALL flush after each cycle. If you pass
-append "page" to the kernel it will take it in turns to flush each of
the computation functions. At the moment it doesn't do any re-mapping of
pages but maybe that is something that could be done in the future.
[DEV VERSION FOR COMMENT]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++
config/config-arm-common.mak | 4 +-
lib/arm/asm/mmu.h | 11 +++
3 files changed, 177 insertions(+), 1 deletion(-)
create mode 100644 arm/tlbflush.c
diff --git a/arm/tlbflush.c b/arm/tlbflush.c
new file mode 100644
index 0000000..6eeff18
--- /dev/null
+++ b/arm/tlbflush.c
@@ -0,0 +1,163 @@
+#include <libcflat.h>
+#include <asm/smp.h>
+#include <asm/cpumask.h>
+#include <asm/barrier.h>
+#include <asm/mmu.h>
+
+#define SEQ_LENGTH 10
+
+static cpumask_t smp_test_complete;
+static int flush_count = 100000;
+static int flush_self = 1;
+static int flush_page = 0;
+
+__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array)
+{
+ int i;
+ unsigned int sum=0;
+ for (i=0; i<length; i++)
+ {
+ unsigned int val = *array++;
+ sum ^= val;
+ sum ^= (val >> (val % 16));
+ sum ^= (val << (val % 32));
+ }
+
+ return sum;
+}
+
+__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array)
+{
+ int i;
+
+ /* first two values */
+ array[0] = 0;
+ array[1] = 1;
+ for (i=2; i<length; i++)
+ {
+ array[i] = array[i-2] + array[i-1];
+ }
+}
+
+__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n)
+{
+ unsigned int i;
+ unsigned long long fac = 1;
+ for (i=1; i<=n; i++)
+ {
+ fac = fac * i;
+ }
+ return fac;
+}
+
+/* do some computationally expensive stuff, return a checksum of the
+ * results */
+__attribute__((aligned(0x1000))) unsigned int do_computation(void)
+{
+ unsigned int fib_array[SEQ_LENGTH];
+ unsigned long long facfib_array[SEQ_LENGTH];
+ unsigned int fib_hash, facfib_hash;
+ int cpu = smp_processor_id();
+ int i, j;
+
+ create_fib_sequence(SEQ_LENGTH, &fib_array[0]);
+ fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]);
+ for (i=0; i<SEQ_LENGTH; i++) {
+ for (j=0; j<fib_array[i]; j++) {
+ facfib_array[i] = factorial(fib_array[i]+j);
+ }
+ }
+ facfib_hash = 0;
+ for (i=0; i<SEQ_LENGTH; i++) {
+ for (j=0; j<fib_array[i]; j++) {
+ facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]);
+ }
+ }
+
+#if 0
+ printf("CPU:%d FIBSEQ ", cpu);
+ for (i=0; i<SEQ_LENGTH; i++)
+ printf("%u,", fib_array[i]);
+ printf("\n");
+
+ printf("CPU:%d FACFIB ", cpu);
+ for (i=0; i<SEQ_LENGTH; i++)
+ printf("%llu,", facfib_array[i]);
+ printf("\n");
+#endif
+
+ return (fib_hash ^ facfib_hash);
+}
+
+static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation};
+
+static void test_flush(void)
+{
+ int i, errors = 0;
+ int cpu = smp_processor_id();
+
+ unsigned int ref;
+
+ printf("CPU%d online\n", cpu);
+
+ ref = do_computation();
+
+ for (i=0; i < flush_count; i++) {
+ unsigned int this_ref = do_computation();
+
+ if (this_ref != ref) {
+ errors++;
+ printf("CPU%d: seq%d 0x%x!=0x%x\n",
+ cpu, i, ref, this_ref);
+ }
+
+ if ((i % 1000) == 0) {
+ printf("CPU%d: seq%d\n", cpu, i);
+ }
+
+ if (flush_self) {
+ if (flush_page) {
+ int j = (i % (sizeof(pages)/sizeof(void *)));
+ flush_tlb_page((unsigned long)pages[j]);
+ } else {
+ flush_tlb_all();
+ }
+ }
+ }
+
+ report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors);
+
+ cpumask_set_cpu(cpu, &smp_test_complete);
+ if (cpu != 0)
+ halt();
+}
+
+int main(int argc, char **argv)
+{
+ int cpu, i;
+
+ report_prefix_push("tlbflush");
+
+ for (i=0; i<argc; i++) {
+ char *arg = argv[i];
+/* printf("arg:%d:%s\n", i, arg); */
+
+ if (strcmp(arg, "page") == 0) {
+ report_prefix_push("page");
+ flush_page = 1;
+ }
+ }
+
+ for_each_present_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+ smp_boot_secondary(cpu, test_flush);
+ }
+
+ test_flush();
+
+ while (!cpumask_full(&smp_test_complete))
+ cpu_relax();
+
+ return report_summary();
+}
diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
index 0674daa..5b14db4 100644
--- a/config/config-arm-common.mak
+++ b/config/config-arm-common.mak
@@ -11,7 +11,8 @@ endif
tests-common = \
$(TEST_DIR)/selftest.flat \
- $(TEST_DIR)/spinlock-test.flat
+ $(TEST_DIR)/spinlock-test.flat \
+ $(TEST_DIR)/tlbflush.flat
all: test_cases
@@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests)
$(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o
$(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o
+$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o
diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
index c1bd01c..2bb0cde 100644
--- a/lib/arm/asm/mmu.h
+++ b/lib/arm/asm/mmu.h
@@ -14,8 +14,11 @@
#define PTE_AF PTE_EXT_AF
#define PTE_WBWA L_PTE_MT_WRITEALLOC
+/* See B3.18.7 TLB maintenance operations */
+
static inline void local_flush_tlb_all(void)
{
+ /* TLBIALL */
asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0));
dsb();
isb();
@@ -27,6 +30,14 @@ static inline void flush_tlb_all(void)
local_flush_tlb_all();
}
+static inline void flush_tlb_page(unsigned long vaddr)
+{
+ /* TLBIMVAA */
+ asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr));
+ dsb();
+ isb();
+}
+
#include <asm/mmu-api.h>
#endif /* __ASMARM_MMU_H_ */
--
2.4.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
2015-07-24 13:25 [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV] Alex Bennée
@ 2015-07-27 7:54 ` Andrew Jones
2015-07-27 9:07 ` Alex Bennée
2015-07-29 13:58 ` Paolo Bonzini
0 siblings, 2 replies; 6+ messages in thread
From: Andrew Jones @ 2015-07-27 7:54 UTC (permalink / raw)
To: Alex Bennée; +Cc: mttcg, mark.burton, fred.konrad, a.spyridakis, kvm
On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote:
> This adds a fairly brain dead torture test for TLB flushes intended for
> stressing the MTTCG QEMU build. It takes the usual -smp option for
> multiple CPUs.
>
> By default it will do a TLBIALL flush after each cycle. If you pass
> -append "page" to the kernel it will take it in turns to flush each of
> the computation functions. At the moment it doesn't do any re-mapping of
> pages but maybe that is something that could be done in the future.
>
> [DEV VERSION FOR COMMENT]
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> ---
> arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++
> config/config-arm-common.mak | 4 +-
> lib/arm/asm/mmu.h | 11 +++
> 3 files changed, 177 insertions(+), 1 deletion(-)
> create mode 100644 arm/tlbflush.c
>
> diff --git a/arm/tlbflush.c b/arm/tlbflush.c
> new file mode 100644
> index 0000000..6eeff18
> --- /dev/null
> +++ b/arm/tlbflush.c
> @@ -0,0 +1,163 @@
> +#include <libcflat.h>
> +#include <asm/smp.h>
> +#include <asm/cpumask.h>
> +#include <asm/barrier.h>
> +#include <asm/mmu.h>
> +
> +#define SEQ_LENGTH 10
> +
> +static cpumask_t smp_test_complete;
> +static int flush_count = 100000;
> +static int flush_self = 1;
> +static int flush_page = 0;
> +
> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array)
You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing
the test to also work for aarch64, as we're using 64k pages on aarch64.
> +{
> + int i;
> + unsigned int sum=0;
> + for (i=0; i<length; i++)
> + {
> + unsigned int val = *array++;
> + sum ^= val;
> + sum ^= (val >> (val % 16));
> + sum ^= (val << (val % 32));
> + }
> +
> + return sum;
> +}
> +
> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array)
> +{
> + int i;
> +
> + /* first two values */
> + array[0] = 0;
> + array[1] = 1;
> + for (i=2; i<length; i++)
> + {
> + array[i] = array[i-2] + array[i-1];
> + }
> +}
> +
> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n)
> +{
> + unsigned int i;
> + unsigned long long fac = 1;
> + for (i=1; i<=n; i++)
> + {
> + fac = fac * i;
> + }
> + return fac;
> +}
> +
> +/* do some computationally expensive stuff, return a checksum of the
> + * results */
> +__attribute__((aligned(0x1000))) unsigned int do_computation(void)
> +{
> + unsigned int fib_array[SEQ_LENGTH];
> + unsigned long long facfib_array[SEQ_LENGTH];
> + unsigned int fib_hash, facfib_hash;
> + int cpu = smp_processor_id();
> + int i, j;
> +
> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]);
> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]);
> + for (i=0; i<SEQ_LENGTH; i++) {
> + for (j=0; j<fib_array[i]; j++) {
> + facfib_array[i] = factorial(fib_array[i]+j);
> + }
> + }
> + facfib_hash = 0;
> + for (i=0; i<SEQ_LENGTH; i++) {
> + for (j=0; j<fib_array[i]; j++) {
> + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]);
> + }
> + }
> +
> +#if 0
> + printf("CPU:%d FIBSEQ ", cpu);
> + for (i=0; i<SEQ_LENGTH; i++)
> + printf("%u,", fib_array[i]);
> + printf("\n");
> +
> + printf("CPU:%d FACFIB ", cpu);
> + for (i=0; i<SEQ_LENGTH; i++)
> + printf("%llu,", facfib_array[i]);
> + printf("\n");
> +#endif
> +
> + return (fib_hash ^ facfib_hash);
> +}
> +
> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation};
I can't comment on whether or not the complexity of do_computation is
necessary for your test, but it seems like overkill. Comments explaining
why it's necessary would be good.
> +
> +static void test_flush(void)
> +{
> + int i, errors = 0;
> + int cpu = smp_processor_id();
> +
> + unsigned int ref;
> +
> + printf("CPU%d online\n", cpu);
> +
> + ref = do_computation();
What makes you sure that the first time you do the computation
per cpu is correct? I think computing it externally, and saving
the result, i.e.
#define EXPECTED_RESULT 0x12345678
would be more reliable.
> +
> + for (i=0; i < flush_count; i++) {
> + unsigned int this_ref = do_computation();
> +
> + if (this_ref != ref) {
> + errors++;
> + printf("CPU%d: seq%d 0x%x!=0x%x\n",
> + cpu, i, ref, this_ref);
> + }
> +
> + if ((i % 1000) == 0) {
> + printf("CPU%d: seq%d\n", cpu, i);
> + }
> +
> + if (flush_self) {
> + if (flush_page) {
> + int j = (i % (sizeof(pages)/sizeof(void *)));
libcflat.h has the ARRAY_SIZE macro
> + flush_tlb_page((unsigned long)pages[j]);
> + } else {
> + flush_tlb_all();
> + }
> + }
> + }
> +
> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors);
> +
> + cpumask_set_cpu(cpu, &smp_test_complete);
> + if (cpu != 0)
> + halt();
> +}
> +
> +int main(int argc, char **argv)
> +{
> + int cpu, i;
> +
> + report_prefix_push("tlbflush");
> +
> + for (i=0; i<argc; i++) {
> + char *arg = argv[i];
> +/* printf("arg:%d:%s\n", i, arg); */
> +
> + if (strcmp(arg, "page") == 0) {
> + report_prefix_push("page");
> + flush_page = 1;
> + }
> + }
> +
> + for_each_present_cpu(cpu) {
> + if (cpu == 0)
> + continue;
> + smp_boot_secondary(cpu, test_flush);
> + }
> +
> + test_flush();
> +
> + while (!cpumask_full(&smp_test_complete))
> + cpu_relax();
> +
> + return report_summary();
As we use the kernel coding style you should run
$KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c
Also, please rename to tlbflush-test.c to differentiate it
from an implementation of tlbflush support, and to make
the standalone test name (if we commit those patches) more
descriptive.
> +}
> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
> index 0674daa..5b14db4 100644
> --- a/config/config-arm-common.mak
> +++ b/config/config-arm-common.mak
> @@ -11,7 +11,8 @@ endif
>
> tests-common = \
> $(TEST_DIR)/selftest.flat \
> - $(TEST_DIR)/spinlock-test.flat
> + $(TEST_DIR)/spinlock-test.flat \
> + $(TEST_DIR)/tlbflush.flat
As we're adding tests faster now it's becoming clear that the '\' list
isn't so great. To add a new test at the bottom we always have to modify
the last line too. We should either add the new one at the top (right
below the 'test-common =' line), or change this to a '+=' sequence like
some other lists are done.
>
> all: test_cases
>
> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests)
>
> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o
> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o
> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o
> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> index c1bd01c..2bb0cde 100644
> --- a/lib/arm/asm/mmu.h
> +++ b/lib/arm/asm/mmu.h
> @@ -14,8 +14,11 @@
> #define PTE_AF PTE_EXT_AF
> #define PTE_WBWA L_PTE_MT_WRITEALLOC
>
> +/* See B3.18.7 TLB maintenance operations */
> +
> static inline void local_flush_tlb_all(void)
> {
> + /* TLBIALL */
> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0));
> dsb();
> isb();
> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void)
> local_flush_tlb_all();
> }
>
> +static inline void flush_tlb_page(unsigned long vaddr)
> +{
> + /* TLBIMVAA */
> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr));
> + dsb();
> + isb();
> +}
> +
> #include <asm/mmu-api.h>
>
> #endif /* __ASMARM_MMU_H_ */
This mmu.h change looks good, but please add the arm64
flush_tlb_page at the same time. And anyway, I guess you'll
want your test to work for both arm and aarch64?
Thanks,
drew
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
2015-07-27 7:54 ` Andrew Jones
@ 2015-07-27 9:07 ` Alex Bennée
2015-07-27 10:32 ` Andrew Jones
2015-07-29 13:58 ` Paolo Bonzini
1 sibling, 1 reply; 6+ messages in thread
From: Alex Bennée @ 2015-07-27 9:07 UTC (permalink / raw)
To: Andrew Jones; +Cc: mttcg, mark.burton, fred.konrad, a.spyridakis, kvm
Andrew Jones <drjones@redhat.com> writes:
> On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote:
>> This adds a fairly brain dead torture test for TLB flushes intended for
>> stressing the MTTCG QEMU build. It takes the usual -smp option for
>> multiple CPUs.
>>
>> By default it will do a TLBIALL flush after each cycle. If you pass
>> -append "page" to the kernel it will take it in turns to flush each of
>> the computation functions. At the moment it doesn't do any re-mapping of
>> pages but maybe that is something that could be done in the future.
>>
>> [DEV VERSION FOR COMMENT]
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> ---
>> arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++
>> config/config-arm-common.mak | 4 +-
>> lib/arm/asm/mmu.h | 11 +++
>> 3 files changed, 177 insertions(+), 1 deletion(-)
>> create mode 100644 arm/tlbflush.c
>>
>> diff --git a/arm/tlbflush.c b/arm/tlbflush.c
>> new file mode 100644
>> index 0000000..6eeff18
>> --- /dev/null
>> +++ b/arm/tlbflush.c
>> @@ -0,0 +1,163 @@
>> +#include <libcflat.h>
>> +#include <asm/smp.h>
>> +#include <asm/cpumask.h>
>> +#include <asm/barrier.h>
>> +#include <asm/mmu.h>
>> +
>> +#define SEQ_LENGTH 10
>> +
>> +static cpumask_t smp_test_complete;
>> +static int flush_count = 100000;
>> +static int flush_self = 1;
>> +static int flush_page = 0;
>> +
>> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array)
>
> You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing
> the test to also work for aarch64, as we're using 64k pages on
> aarch64.
Good point.
>
>> +{
>> + int i;
>> + unsigned int sum=0;
>> + for (i=0; i<length; i++)
>> + {
>> + unsigned int val = *array++;
>> + sum ^= val;
>> + sum ^= (val >> (val % 16));
>> + sum ^= (val << (val % 32));
>> + }
>> +
>> + return sum;
>> +}
>> +
>> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array)
>> +{
>> + int i;
>> +
>> + /* first two values */
>> + array[0] = 0;
>> + array[1] = 1;
>> + for (i=2; i<length; i++)
>> + {
>> + array[i] = array[i-2] + array[i-1];
>> + }
>> +}
>> +
>> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n)
>> +{
>> + unsigned int i;
>> + unsigned long long fac = 1;
>> + for (i=1; i<=n; i++)
>> + {
>> + fac = fac * i;
>> + }
>> + return fac;
>> +}
>> +
>> +/* do some computationally expensive stuff, return a checksum of the
>> + * results */
>> +__attribute__((aligned(0x1000))) unsigned int do_computation(void)
>> +{
>> + unsigned int fib_array[SEQ_LENGTH];
>> + unsigned long long facfib_array[SEQ_LENGTH];
>> + unsigned int fib_hash, facfib_hash;
>> + int cpu = smp_processor_id();
>> + int i, j;
>> +
>> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]);
>> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]);
>> + for (i=0; i<SEQ_LENGTH; i++) {
>> + for (j=0; j<fib_array[i]; j++) {
>> + facfib_array[i] = factorial(fib_array[i]+j);
>> + }
>> + }
>> + facfib_hash = 0;
>> + for (i=0; i<SEQ_LENGTH; i++) {
>> + for (j=0; j<fib_array[i]; j++) {
>> + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]);
>> + }
>> + }
>> +
>> +#if 0
>> + printf("CPU:%d FIBSEQ ", cpu);
>> + for (i=0; i<SEQ_LENGTH; i++)
>> + printf("%u,", fib_array[i]);
>> + printf("\n");
>> +
>> + printf("CPU:%d FACFIB ", cpu);
>> + for (i=0; i<SEQ_LENGTH; i++)
>> + printf("%llu,", facfib_array[i]);
>> + printf("\n");
>> +#endif
>> +
>> + return (fib_hash ^ facfib_hash);
>> +}
>> +
>> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation};
>
> I can't comment on whether or not the complexity of do_computation is
> necessary for your test, but it seems like overkill. Comments explaining
> why it's necessary would be good.
OK. From QEMUs TCG point of view I just want to ensure I have more than two
basic blocks per-page region so I can check the block-chaining in-page
and jump caching intra-page which are both affected on flushes. A
computationally complex routine with a known answer would be nicer
though I guess.
>
>> +
>> +static void test_flush(void)
>> +{
>> + int i, errors = 0;
>> + int cpu = smp_processor_id();
>> +
>> + unsigned int ref;
>> +
>> + printf("CPU%d online\n", cpu);
>> +
>> + ref = do_computation();
>
> What makes you sure that the first time you do the computation
> per cpu is correct? I think computing it externally, and saving
> the result, i.e.
>
> #define EXPECTED_RESULT 0x12345678
>
> would be more reliable.
OK.
>
>> +
>> + for (i=0; i < flush_count; i++) {
>> + unsigned int this_ref = do_computation();
>> +
>> + if (this_ref != ref) {
>> + errors++;
>> + printf("CPU%d: seq%d 0x%x!=0x%x\n",
>> + cpu, i, ref, this_ref);
>> + }
>> +
>> + if ((i % 1000) == 0) {
>> + printf("CPU%d: seq%d\n", cpu, i);
>> + }
>> +
>> + if (flush_self) {
>> + if (flush_page) {
>> + int j = (i % (sizeof(pages)/sizeof(void *)));
> libcflat.h has the ARRAY_SIZE macro
OK
>> + flush_tlb_page((unsigned long)pages[j]);
>> + } else {
>> + flush_tlb_all();
>> + }
>> + }
>> + }
>> +
>> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors);
>> +
>> + cpumask_set_cpu(cpu, &smp_test_complete);
>> + if (cpu != 0)
>> + halt();
>> +}
>> +
>> +int main(int argc, char **argv)
>> +{
>> + int cpu, i;
>> +
>> + report_prefix_push("tlbflush");
>> +
>> + for (i=0; i<argc; i++) {
>> + char *arg = argv[i];
>> +/* printf("arg:%d:%s\n", i, arg); */
>> +
>> + if (strcmp(arg, "page") == 0) {
>> + report_prefix_push("page");
>> + flush_page = 1;
>> + }
>> + }
>> +
>> + for_each_present_cpu(cpu) {
>> + if (cpu == 0)
>> + continue;
>> + smp_boot_secondary(cpu, test_flush);
>> + }
>> +
>> + test_flush();
>> +
>> + while (!cpumask_full(&smp_test_complete))
>> + cpu_relax();
>> +
>> + return report_summary();
>
> As we use the kernel coding style you should run
>
> $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c
>
> Also, please rename to tlbflush-test.c to differentiate it
> from an implementation of tlbflush support, and to make
> the standalone test name (if we commit those patches) more
> descriptive.
I'll have another poke at my editor config. It should have been setting
the coding style automatically, although of course explicit local
variables are better ;-)
>
>> +}
>> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
>> index 0674daa..5b14db4 100644
>> --- a/config/config-arm-common.mak
>> +++ b/config/config-arm-common.mak
>> @@ -11,7 +11,8 @@ endif
>>
>> tests-common = \
>> $(TEST_DIR)/selftest.flat \
>> - $(TEST_DIR)/spinlock-test.flat
>> + $(TEST_DIR)/spinlock-test.flat \
>> + $(TEST_DIR)/tlbflush.flat
>
> As we're adding tests faster now it's becoming clear that the '\' list
> isn't so great. To add a new test at the bottom we always have to modify
> the last line too. We should either add the new one at the top (right
> below the 'test-common =' line), or change this to a '+=' sequence like
> some other lists are done.
>
>>
>> all: test_cases
>>
>> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests)
>>
>> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o
>> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o
>> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o
>> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
>> index c1bd01c..2bb0cde 100644
>> --- a/lib/arm/asm/mmu.h
>> +++ b/lib/arm/asm/mmu.h
>> @@ -14,8 +14,11 @@
>> #define PTE_AF PTE_EXT_AF
>> #define PTE_WBWA L_PTE_MT_WRITEALLOC
>>
>> +/* See B3.18.7 TLB maintenance operations */
>> +
>> static inline void local_flush_tlb_all(void)
>> {
>> + /* TLBIALL */
>> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0));
>> dsb();
>> isb();
>> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void)
>> local_flush_tlb_all();
>> }
>>
>> +static inline void flush_tlb_page(unsigned long vaddr)
>> +{
>> + /* TLBIMVAA */
>> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr));
>> + dsb();
>> + isb();
>> +}
>> +
>> #include <asm/mmu-api.h>
>>
>> #endif /* __ASMARM_MMU_H_ */
>
> This mmu.h change looks good, but please add the arm64
> flush_tlb_page at the same time. And anyway, I guess you'll
> want your test to work for both arm and aarch64?
Yes I will. Currently the MTTCG is arm32 only but this will be expanded.
>
> Thanks,
> drew
--
Alex Bennée
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
2015-07-27 9:07 ` Alex Bennée
@ 2015-07-27 10:32 ` Andrew Jones
0 siblings, 0 replies; 6+ messages in thread
From: Andrew Jones @ 2015-07-27 10:32 UTC (permalink / raw)
To: Alex Bennée; +Cc: mttcg, mark.burton, fred.konrad, a.spyridakis, kvm
On Mon, Jul 27, 2015 at 10:07:57AM +0100, Alex Bennée wrote:
>
> Andrew Jones <drjones@redhat.com> writes:
>
> > On Fri, Jul 24, 2015 at 02:25:06PM +0100, Alex Bennée wrote:
> >> This adds a fairly brain dead torture test for TLB flushes intended for
> >> stressing the MTTCG QEMU build. It takes the usual -smp option for
> >> multiple CPUs.
> >>
> >> By default it will do a TLBIALL flush after each cycle. If you pass
> >> -append "page" to the kernel it will take it in turns to flush each of
> >> the computation functions. At the moment it doesn't do any re-mapping of
> >> pages but maybe that is something that could be done in the future.
> >>
> >> [DEV VERSION FOR COMMENT]
> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> >> ---
> >> arm/tlbflush.c | 163 +++++++++++++++++++++++++++++++++++++++++++
> >> config/config-arm-common.mak | 4 +-
> >> lib/arm/asm/mmu.h | 11 +++
> >> 3 files changed, 177 insertions(+), 1 deletion(-)
> >> create mode 100644 arm/tlbflush.c
> >>
> >> diff --git a/arm/tlbflush.c b/arm/tlbflush.c
> >> new file mode 100644
> >> index 0000000..6eeff18
> >> --- /dev/null
> >> +++ b/arm/tlbflush.c
> >> @@ -0,0 +1,163 @@
> >> +#include <libcflat.h>
> >> +#include <asm/smp.h>
> >> +#include <asm/cpumask.h>
> >> +#include <asm/barrier.h>
> >> +#include <asm/mmu.h>
> >> +
> >> +#define SEQ_LENGTH 10
> >> +
> >> +static cpumask_t smp_test_complete;
> >> +static int flush_count = 100000;
> >> +static int flush_self = 1;
> >> +static int flush_page = 0;
> >> +
> >> +__attribute__((aligned(0x1000))) unsigned int hash_array(int length, unsigned int *array)
> >
> > You should use PAGE_SIZE instead of 0x1000 in these attributes, allowing
> > the test to also work for aarch64, as we're using 64k pages on
> > aarch64.
>
> Good point.
>
> >
> >> +{
> >> + int i;
> >> + unsigned int sum=0;
> >> + for (i=0; i<length; i++)
> >> + {
> >> + unsigned int val = *array++;
> >> + sum ^= val;
> >> + sum ^= (val >> (val % 16));
> >> + sum ^= (val << (val % 32));
> >> + }
> >> +
> >> + return sum;
> >> +}
> >> +
> >> +__attribute__((aligned(0x1000))) void create_fib_sequence(int length, unsigned int *array)
> >> +{
> >> + int i;
> >> +
> >> + /* first two values */
> >> + array[0] = 0;
> >> + array[1] = 1;
> >> + for (i=2; i<length; i++)
> >> + {
> >> + array[i] = array[i-2] + array[i-1];
> >> + }
> >> +}
> >> +
> >> +__attribute__((aligned(0x1000))) unsigned long long factorial(unsigned int n)
> >> +{
> >> + unsigned int i;
> >> + unsigned long long fac = 1;
> >> + for (i=1; i<=n; i++)
> >> + {
> >> + fac = fac * i;
> >> + }
> >> + return fac;
> >> +}
> >> +
> >> +/* do some computationally expensive stuff, return a checksum of the
> >> + * results */
> >> +__attribute__((aligned(0x1000))) unsigned int do_computation(void)
> >> +{
> >> + unsigned int fib_array[SEQ_LENGTH];
> >> + unsigned long long facfib_array[SEQ_LENGTH];
> >> + unsigned int fib_hash, facfib_hash;
> >> + int cpu = smp_processor_id();
> >> + int i, j;
> >> +
> >> + create_fib_sequence(SEQ_LENGTH, &fib_array[0]);
> >> + fib_hash = hash_array(SEQ_LENGTH, &fib_array[0]);
> >> + for (i=0; i<SEQ_LENGTH; i++) {
> >> + for (j=0; j<fib_array[i]; j++) {
> >> + facfib_array[i] = factorial(fib_array[i]+j);
> >> + }
> >> + }
> >> + facfib_hash = 0;
> >> + for (i=0; i<SEQ_LENGTH; i++) {
> >> + for (j=0; j<fib_array[i]; j++) {
> >> + facfib_hash ^= hash_array(sizeof(facfib_array)/sizeof(unsigned int), (unsigned int *)&facfib_array[0]);
> >> + }
> >> + }
> >> +
> >> +#if 0
> >> + printf("CPU:%d FIBSEQ ", cpu);
> >> + for (i=0; i<SEQ_LENGTH; i++)
> >> + printf("%u,", fib_array[i]);
> >> + printf("\n");
> >> +
> >> + printf("CPU:%d FACFIB ", cpu);
> >> + for (i=0; i<SEQ_LENGTH; i++)
> >> + printf("%llu,", facfib_array[i]);
> >> + printf("\n");
> >> +#endif
> >> +
> >> + return (fib_hash ^ facfib_hash);
> >> +}
> >> +
> >> +static void * pages[] = {&hash_array, &create_fib_sequence, &factorial, &do_computation};
> >
> > I can't comment on whether or not the complexity of do_computation is
> > necessary for your test, but it seems like overkill. Comments explaining
> > why it's necessary would be good.
>
> OK. From QEMUs TCG point of view I just want to ensure I have more than two
> basic blocks per-page region so I can check the block-chaining in-page
> and jump caching intra-page which are both affected on flushes. A
> computationally complex routine with a known answer would be nicer
> though I guess.
>
> >
> >> +
> >> +static void test_flush(void)
> >> +{
> >> + int i, errors = 0;
> >> + int cpu = smp_processor_id();
> >> +
> >> + unsigned int ref;
> >> +
> >> + printf("CPU%d online\n", cpu);
> >> +
> >> + ref = do_computation();
> >
> > What makes you sure that the first time you do the computation
> > per cpu is correct? I think computing it externally, and saving
> > the result, i.e.
> >
> > #define EXPECTED_RESULT 0x12345678
> >
> > would be more reliable.
>
> OK.
>
> >
> >> +
> >> + for (i=0; i < flush_count; i++) {
> >> + unsigned int this_ref = do_computation();
> >> +
> >> + if (this_ref != ref) {
> >> + errors++;
> >> + printf("CPU%d: seq%d 0x%x!=0x%x\n",
> >> + cpu, i, ref, this_ref);
> >> + }
> >> +
> >> + if ((i % 1000) == 0) {
> >> + printf("CPU%d: seq%d\n", cpu, i);
> >> + }
> >> +
> >> + if (flush_self) {
> >> + if (flush_page) {
> >> + int j = (i % (sizeof(pages)/sizeof(void *)));
> > libcflat.h has the ARRAY_SIZE macro
>
> OK
>
> >> + flush_tlb_page((unsigned long)pages[j]);
> >> + } else {
> >> + flush_tlb_all();
> >> + }
> >> + }
> >> + }
> >> +
> >> + report("CPU%d: Done - Errors: %d\n", errors == 0, cpu, errors);
> >> +
> >> + cpumask_set_cpu(cpu, &smp_test_complete);
> >> + if (cpu != 0)
> >> + halt();
> >> +}
> >> +
> >> +int main(int argc, char **argv)
> >> +{
> >> + int cpu, i;
> >> +
> >> + report_prefix_push("tlbflush");
> >> +
> >> + for (i=0; i<argc; i++) {
> >> + char *arg = argv[i];
> >> +/* printf("arg:%d:%s\n", i, arg); */
> >> +
> >> + if (strcmp(arg, "page") == 0) {
> >> + report_prefix_push("page");
> >> + flush_page = 1;
> >> + }
> >> + }
> >> +
> >> + for_each_present_cpu(cpu) {
> >> + if (cpu == 0)
> >> + continue;
> >> + smp_boot_secondary(cpu, test_flush);
> >> + }
> >> +
> >> + test_flush();
> >> +
> >> + while (!cpumask_full(&smp_test_complete))
> >> + cpu_relax();
> >> +
> >> + return report_summary();
> >
> > As we use the kernel coding style you should run
> >
> > $KERNEL_SRC/scripts/checkpatch.pl -f arm/tlbflush.c
> >
> > Also, please rename to tlbflush-test.c to differentiate it
> > from an implementation of tlbflush support, and to make
> > the standalone test name (if we commit those patches) more
> > descriptive.
>
> I'll have another poke at my editor config. It should have been setting
> the coding style automatically, although of course explicit local
> variables are better ;-)
>
> >
> >> +}
> >> diff --git a/config/config-arm-common.mak b/config/config-arm-common.mak
> >> index 0674daa..5b14db4 100644
> >> --- a/config/config-arm-common.mak
> >> +++ b/config/config-arm-common.mak
> >> @@ -11,7 +11,8 @@ endif
> >>
> >> tests-common = \
> >> $(TEST_DIR)/selftest.flat \
> >> - $(TEST_DIR)/spinlock-test.flat
> >> + $(TEST_DIR)/spinlock-test.flat \
> >> + $(TEST_DIR)/tlbflush.flat
> >
> > As we're adding tests faster now it's becoming clear that the '\' list
> > isn't so great. To add a new test at the bottom we always have to modify
> > the last line too. We should either add the new one at the top (right
> > below the 'test-common =' line), or change this to a '+=' sequence like
> > some other lists are done.
> >
> >>
> >> all: test_cases
> >>
> >> @@ -72,3 +73,4 @@ test_cases: $(generated_files) $(tests-common) $(tests)
> >>
> >> $(TEST_DIR)/selftest.elf: $(cstart.o) $(TEST_DIR)/selftest.o
> >> $(TEST_DIR)/spinlock-test.elf: $(cstart.o) $(TEST_DIR)/spinlock-test.o
> >> +$(TEST_DIR)/tlbflush.elf: $(cstart.o) $(TEST_DIR)/tlbflush.o
> >> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> >> index c1bd01c..2bb0cde 100644
> >> --- a/lib/arm/asm/mmu.h
> >> +++ b/lib/arm/asm/mmu.h
> >> @@ -14,8 +14,11 @@
> >> #define PTE_AF PTE_EXT_AF
> >> #define PTE_WBWA L_PTE_MT_WRITEALLOC
> >>
> >> +/* See B3.18.7 TLB maintenance operations */
> >> +
> >> static inline void local_flush_tlb_all(void)
> >> {
> >> + /* TLBIALL */
> >> asm volatile("mcr p15, 0, %0, c8, c7, 0" :: "r" (0));
> >> dsb();
> >> isb();
> >> @@ -27,6 +30,14 @@ static inline void flush_tlb_all(void)
> >> local_flush_tlb_all();
> >> }
> >>
> >> +static inline void flush_tlb_page(unsigned long vaddr)
> >> +{
> >> + /* TLBIMVAA */
> >> + asm volatile("mcr p15, 0, %0, c8, c7, 3" :: "r" (vaddr));
> >> + dsb();
> >> + isb();
> >> +}
> >> +
> >> #include <asm/mmu-api.h>
> >>
> >> #endif /* __ASMARM_MMU_H_ */
> >
> > This mmu.h change looks good, but please add the arm64
> > flush_tlb_page at the same time. And anyway, I guess you'll
> > want your test to work for both arm and aarch64?
>
> Yes I will. Currently the MTTCG is arm32 only but this will be expanded.
Actually, I'd also like the arm*/asm/mmu.h file changes to be in
a separate patch, they can be together in one patch, but that patch
should be separate from the unit test.
Thanks,
drew
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
2015-07-27 7:54 ` Andrew Jones
2015-07-27 9:07 ` Alex Bennée
@ 2015-07-29 13:58 ` Paolo Bonzini
2015-07-29 14:36 ` Andrew Jones
1 sibling, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2015-07-29 13:58 UTC (permalink / raw)
To: Andrew Jones, Alex Bennée
Cc: mttcg, mark.burton, fred.konrad, a.spyridakis, kvm
On 27/07/2015 09:54, Andrew Jones wrote:
> Also, please rename to tlbflush-test.c to differentiate it
> from an implementation of tlbflush support, and to make
> the standalone test name (if we commit those patches) more
> descriptive.
I disagree here. Support code would go in lib/arm.
> As we're adding tests faster now it's becoming clear that the '\' list
> isn't so great. To add a new test at the bottom we always have to modify
> the last line too. We should either add the new one at the top (right
> below the 'test-common =' line), or change this to a '+=' sequence like
> some other lists are done.
If you prefer += that's okay, but then please modify also cflatobjs in
Makefile and the x86 fragments in config/.
Paolo
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV]
2015-07-29 13:58 ` Paolo Bonzini
@ 2015-07-29 14:36 ` Andrew Jones
0 siblings, 0 replies; 6+ messages in thread
From: Andrew Jones @ 2015-07-29 14:36 UTC (permalink / raw)
To: Paolo Bonzini
Cc: Alex Bennée, mttcg, mark.burton, fred.konrad, a.spyridakis,
kvm
On Wed, Jul 29, 2015 at 03:58:59PM +0200, Paolo Bonzini wrote:
>
>
> On 27/07/2015 09:54, Andrew Jones wrote:
> > Also, please rename to tlbflush-test.c to differentiate it
> > from an implementation of tlbflush support, and to make
> > the standalone test name (if we commit those patches) more
> > descriptive.
>
> I disagree here. Support code would go in lib/arm.
Generally yes, and for arm, so far yes, but not always. In x86 we
have kvmclock.c (support) vs. kvmclock_test.c (the test). And powerpc
will have a support C file in powerpc vs. lib/powerpc as well, as I
feel it fits better there, since it's really part of cstart.
But that said, I'm not overly opposed to dropping -test from the
common case, only using it when necessary. We can also append -test
for standalone test names later, if we want to.
Thanks,
drew
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2015-07-29 14:36 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-24 13:25 [kvm-unit-tests RFC PATCH] arm/tlbflush.c: TLB flushing torture test [DEV] Alex Bennée
2015-07-27 7:54 ` Andrew Jones
2015-07-27 9:07 ` Alex Bennée
2015-07-27 10:32 ` Andrew Jones
2015-07-29 13:58 ` Paolo Bonzini
2015-07-29 14:36 ` Andrew Jones
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox