Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

From: Sean Christopherson <seanjc@google.com>
To: Maxim Levitsky <mlevitsk@redhat.com>
Cc: kvm@vger.kernel.org, Cathy Avery <cavery@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: [kvm-unit-tests PATCH 16/16] add IPI loss stress test
Date: Thu, 20 Oct 2022 20:23:58 +0000	[thread overview]
Message-ID: <Y1GuXoYm6JLpkUvq@google.com> (raw)
In-Reply-To: <20221020152404.283980-17-mlevitsk@redhat.com>

On Thu, Oct 20, 2022, Maxim Levitsky wrote:
> +u64 num_iterations = -1;

"Run indefinitely" is an odd default.  Why not set the default number of iterations
to something reasonable and then let the user override that if the user wants to
run for an absurdly long time?

> +
> +volatile u64 *isr_counts;
> +bool use_svm;
> +int hlt_allowed = -1;

These can all be static.

> +
> +static int get_random(int min, int max)
> +{
> +	/* TODO : use rdrand to seed an PRNG instead */
> +	u64 random_value = rdtsc() >> 4;
> +
> +	return min + random_value % (max - min + 1);
> +}
> +
> +static void ipi_interrupt_handler(isr_regs_t *r)
> +{
> +	isr_counts[smp_id()]++;
> +	eoi();
> +}
> +
> +static void wait_for_ipi(volatile u64 *count)
> +{
> +	u64 old_count = *count;
> +	bool use_halt;
> +
> +	switch (hlt_allowed) {
> +	case -1:
> +		use_halt = get_random(0,10000) == 0;

Randomly doing "halt" is going to be annoying to debug.  What about tying the
this decision to the iteration and then providing a knob to let the user specify
the frequency?  It seems unlikely that this test will expose a bug that occurs
if and only if the halt path is truly random.

> +		break;
> +	case 0:
> +		use_halt = false;
> +		break;
> +	case 1:
> +		use_halt = true;
> +		break;
> +	default:
> +		use_halt = false;
> +		break;
> +	}
> +
> +	do {
> +		if (use_halt)
> +			asm volatile ("sti;hlt;cli\n");

safe_halt();

> +		else
> +			asm volatile ("sti;nop;cli");

sti_nop_cli();

> +
> +	} while (old_count == *count);

There's no need to loop in the use_halt case.  If KVM spuriously wakes the vCPU
from halt, then that's a KVM bug.  Kinda ugly, but it does provide meaningfully
coverage for the HLT case.

	if (use_halt) {
		safe_halt();
		cli();
	} else {
		do {
			sti_nop_cli();
		} while (old_count == *count);
	}

	assert(*count == old_count + 1);

> +}
> +
> +/******************************************************************************************************/
> +
> +#ifdef __x86_64__
> +
> +static void l2_guest_wait_for_ipi(volatile u64 *count)
> +{
> +	wait_for_ipi(count);
> +	asm volatile("vmmcall");
> +}
> +
> +static void l2_guest_dummy(void)
> +{
> +	asm volatile("vmmcall");
> +}
> +
> +static void wait_for_ipi_in_l2(volatile u64 *count, struct svm_vcpu *vcpu)
> +{
> +	u64 old_count = *count;
> +	bool irq_on_vmentry = get_random(0,1) == 0;

Same concerns about using random numbers.

> +
> +	vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
> +	vcpu->regs.rdi = (u64)count;
> +
> +	vcpu->vmcb->save.rip = irq_on_vmentry ? (ulong)l2_guest_dummy : (ulong)l2_guest_wait_for_ipi;
> +
> +	do {
> +		if (irq_on_vmentry)
> +			vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
> +		else
> +			vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
> +
> +		asm volatile("clgi;nop;sti");

Why a NOP between CLGI and STI?  And why re-enable GIF on each iteration?

> +		// GIF is set by VMRUN
> +		SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
> +		// GIF is cleared by VMEXIT
> +		asm volatile("cli;nop;stgi");

Why re-enable GIF on every exit?

> +
> +		assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
> +
> +	} while (old_count == *count);

Isn't the loop only necessary in the irq_on_vmentry case?

static void run_svm_l2(...)
{
	SVM_VMRUN(vcpu->vmcb, &vcpu->regs);
	assert(vcpu->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
}

E.g. can't this be:

	bool irq_on_vmentry = ???;
	u64 old_count = *count;

	clgi();
	sti();

	vcpu->regs.rdi = (u64)count;

	if (!irq_on_vmentry) {
		vcpu->vmcb->save.rip = (ulong)l2_guest_wait_for_ipi;
		vcpu->vmcb->save.rflags &= ~X86_EFLAGS_IF;
		run_svm_l2(...);
	} else {
		vcpu->vmcb->save.rip = (ulong)l2_guest_dummy
		vcpu->vmcb->save.rflags |= X86_EFLAGS_IF;
		do {
			run_svm_l2(...);
		} while (old_count == *count);
	}

	assert(*count == old_count + 1);
	cli();
	stgi();

> +}
> +#endif
> +
> +/******************************************************************************************************/
> +
> +#define FIRST_TEST_VCPU 1
> +
> +static void vcpu_init(void *data)
> +{
> +	/* To make it easier to see iteration number in the trace */
> +	handle_irq(0x40, ipi_interrupt_handler);
> +	handle_irq(0x50, ipi_interrupt_handler);

Why not make it even more granular?  E.g. do vector == 32 + (iteration % ???)
Regardless, a #define for the (base) vector would be helpful, the usage in
vcpu_code() is a bit magical.


> +}
> +
> +static void vcpu_code(void *data)
> +{
> +	int ncpus = cpu_count();
> +	int cpu = (long)data;
> +#ifdef __x86_64__
> +	struct svm_vcpu vcpu;
> +#endif
> +
> +	u64 i;
> +
> +#ifdef __x86_64__
> +	if (cpu == 2 && use_svm)

Why only CPU2?

> +		svm_vcpu_init(&vcpu);
> +#endif
> +
> +	assert(cpu != 0);
> +
> +	if (cpu != FIRST_TEST_VCPU)
> +		wait_for_ipi(&isr_counts[cpu]);
> +
> +	for (i = 0; i < num_iterations; i++)
> +	{
> +		u8 physical_dst = cpu == ncpus -1 ? 1 : cpu + 1;

Space after the '-'.

> +
> +		// send IPI to a next vCPU in a circular fashion
> +		apic_icr_write(APIC_INT_ASSERT |
> +				APIC_DEST_PHYSICAL |
> +				APIC_DM_FIXED |
> +				(i % 2 ? 0x40 : 0x50),
> +				physical_dst);
> +
> +		if (i == (num_iterations - 1) && cpu != FIRST_TEST_VCPU)
> +			break;
> +
> +#ifdef __x86_64__
> +		// wait for the IPI interrupt chain to come back to us
> +		if (cpu == 2 && use_svm) {
> +				wait_for_ipi_in_l2(&isr_counts[cpu], &vcpu);

Indentation is funky.

> +				continue;
> +		}
> +#endif
> +		wait_for_ipi(&isr_counts[cpu]);
> +	}
> +}
> +
> +int main(int argc, void** argv)
> +{
> +	int cpu, ncpus = cpu_count();
> +
> +	assert(ncpus > 2);
> +
> +	if (argc > 1)
> +		hlt_allowed = atol(argv[1]);
> +
> +	if (argc > 2)
> +		num_iterations = atol(argv[2]);
> +
> +	setup_vm();
> +
> +#ifdef __x86_64__
> +	if (svm_supported()) {
> +		use_svm = true;
> +		setup_svm();
> +	}
> +#endif
> +
> +	isr_counts = (volatile u64 *)calloc(ncpus, sizeof(u64));
> +
> +	printf("found %d cpus\n", ncpus);
> +	printf("running for %lld iterations - test\n",
> +		(long long unsigned int)num_iterations);
> +
> +
> +	for (cpu = 0; cpu < ncpus; ++cpu)
> +		on_cpu_async(cpu, vcpu_init, (void *)(long)cpu);
> +
> +	/* now let all the vCPUs end the IPI function*/
> +	while (cpus_active() > 1)
> +		  pause();
> +
> +	printf("starting test on all cpus but 0...\n");
> +
> +	for (cpu = ncpus-1; cpu >= FIRST_TEST_VCPU; cpu--)

Spaces around the '-'.

> +		on_cpu_async(cpu, vcpu_code, (void *)(long)cpu);

Why not use smp_id() in vcpu_code()?  ipi_interrupt_handler() already relies on
that being correct.

> +
> +	printf("test started, waiting to end...\n");
> +
> +	while (cpus_active() > 1) {
> +
> +		unsigned long isr_count1, isr_count2;
> +
> +		isr_count1 = isr_counts[1];
> +		delay(5ULL*1000*1000*1000);

Please add a macro or two for nanoseconds/milliseconds/seconds or whatever this
expands to.

> +		isr_count2 = isr_counts[1];
> +
> +		if (isr_count1 == isr_count2) {
> +			printf("\n");
> +			printf("hang detected!!\n");
> +			break;
> +		} else {
> +			printf("made %ld IPIs \n", (isr_count2 - isr_count1)*(ncpus-1));
> +		}
> +	}
> +
> +	printf("\n");
> +
> +	for (cpu = 1; cpu < ncpus; ++cpu)
> +		report(isr_counts[cpu] == num_iterations,
> +				"Number of IPIs match (%lld)",

Indentation.

> +				(long long unsigned int)isr_counts[cpu]);

Print num_iterations, i.e. expected vs. actual?

> +
> +	free((void*)isr_counts);
> +	return report_summary();
> +}
> diff --git a/x86/unittests.cfg b/x86/unittests.cfg
> index ebb3fdfc..7655d2ba 100644
> --- a/x86/unittests.cfg
> +++ b/x86/unittests.cfg
> @@ -61,6 +61,11 @@ smp = 2
>  file = smptest.flat
>  smp = 3
>  
> +[ipi_stress]
> +file = ipi_stress.flat
> +extra_params = -cpu host,-x2apic,-svm,-hypervisor -global kvm-pit.lost_tick_policy=discard -machine kernel-irqchip=on -append '0 50000'

Why add all the SVM and HLT stuff and then effectively turn them off by default?
There's basically zero chance any other configuration will get regular testing.

And why not have multi configs, e.g. to run with and without x2APIC?

> +smp = 4
> +
>  [vmexit_cpuid]
>  file = vmexit.flat
>  extra_params = -append 'cpuid'
> -- 
> 2.26.3
>

next prev parent reply	other threads:[~2022-10-20 20:24 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-20 15:23 [kvm-unit-tests PATCH 00/16] kvm-unit-tests: set of fixes and new tests Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 01/16] x86: make irq_enable avoid the interrupt shadow Maxim Levitsky
2022-10-20 18:01   ` Sean Christopherson
2022-10-24 12:36     ` Maxim Levitsky
2022-10-24 22:49       ` Sean Christopherson
2022-10-27 10:16         ` Maxim Levitsky
2022-10-27 15:50           ` Sean Christopherson
2022-10-27 17:10             ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 02/16] x86: add few helper functions for apic local timer Maxim Levitsky
2022-10-20 19:14   ` Sean Christopherson
2022-10-24 12:37     ` Maxim Levitsky
2022-10-24 16:10       ` Sean Christopherson
2022-10-27 10:19         ` Maxim Levitsky
2022-10-27 15:54           ` Sean Christopherson
2022-10-27 17:11             ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 03/16] svm: use irq_enable instead of sti/nop Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 04/16] svm: make svm_intr_intercept_mix_if/gif test a bit more robust Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 05/16] svm: use apic_start_timer/apic_stop_timer instead of open coding it Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 06/16] x86: Add test for #SMI during interrupt window Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 07/16] x86: Add a simple test for SYSENTER instruction Maxim Levitsky
2022-10-20 19:25   ` Sean Christopherson
2022-10-24 12:38     ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 08/16] svm: add nested shutdown test Maxim Levitsky
2022-10-20 15:26   ` Maxim Levitsky
2022-10-20 19:06     ` Sean Christopherson
2022-10-24 12:39       ` Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 09/16] svm: move svm spec definitions to lib/x86/svm.h Maxim Levitsky
2022-10-20 19:08   ` Sean Christopherson
2022-10-20 15:23 ` [kvm-unit-tests PATCH 10/16] svm: move some svm support functions into lib/x86/svm_lib.h Maxim Levitsky
2022-10-20 15:23 ` [kvm-unit-tests PATCH 11/16] svm: add svm_suported Maxim Levitsky
2022-10-20 18:21   ` Sean Christopherson
2022-10-24 12:40     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 12/16] svm: move setup_svm to svm_lib.c Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 13/16] svm: move vmcb_ident " Maxim Levitsky
2022-10-20 18:37   ` Sean Christopherson
2022-10-24 12:46     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 14/16] svm: rewerite vm entry macros Maxim Levitsky
2022-10-20 18:55   ` Sean Christopherson
2022-10-24 12:45     ` Maxim Levitsky
2022-10-24 19:56       ` Sean Christopherson
2022-10-27 12:07         ` Maxim Levitsky
2022-10-27 19:39           ` Sean Christopherson
2022-10-20 15:24 ` [kvm-unit-tests PATCH 15/16] svm: introduce svm_vcpu Maxim Levitsky
2022-10-20 19:02   ` Sean Christopherson
2022-10-24 12:46     ` Maxim Levitsky
2022-10-20 15:24 ` [kvm-unit-tests PATCH 16/16] add IPI loss stress test Maxim Levitsky
2022-10-20 20:23   ` Sean Christopherson [this message]
2022-10-24 12:54     ` Maxim Levitsky
2022-10-24 17:19       ` Sean Christopherson
2022-10-27 11:00         ` Maxim Levitsky
2022-10-27 18:41           ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y1GuXoYm6JLpkUvq@google.com \
    --to=seanjc@google.com \
    --cc=cavery@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mlevitsk@redhat.com \
    --cc=pbonzini@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox