The Linux Kernel Mailing List
 help / color / mirror / Atom feed
* Re: [PATCH v2 2/9] KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction emulation
       [not found] ` <9b859ab6a6b59e5ccfdac741459117996fe2da6e.1766066076.git.houwenlong.hwl@antgroup.com>
@ 2026-05-11 15:23   ` Sean Christopherson
  2026-05-11 15:26     ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2026-05-11 15:23 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, linux-kernel

On Thu, Dec 18, 2025, Hou Wenlong wrote:
> Record DR6 in emulate_db() and use kvm_queue_exception_p() to set DR6
> instead of directly using kvm_set_dr6() in emulation, which keeps the
> handling of DR6 during #DB injection consistent with other code paths.
> 
> No functional change intended.
> 
> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> ---
>  arch/x86/kvm/emulate.c     | 14 ++++----------
>  arch/x86/kvm/kvm_emulate.h |  6 +++++-
>  arch/x86/kvm/x86.c         |  5 ++++-
>  3 files changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index c8e292e9a24d..997cd6e46d90 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -540,8 +540,9 @@ static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
>  	return X86EMUL_PROPAGATE_FAULT;
>  }
>  
> -static int emulate_db(struct x86_emulate_ctxt *ctxt)
> +static int emulate_db(struct x86_emulate_ctxt *ctxt, unsigned long dr6)
>  {
> +	ctxt->exception.dr6 = dr6;
>  	return emulate_exception(ctxt, DB_VECTOR, 0, false);
>  }
>  
> @@ -3834,15 +3835,8 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
>  	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
>  		return emulate_ud(ctxt);
>  
> -	if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
> -		ulong dr6;
> -
> -		dr6 = ctxt->ops->get_dr(ctxt, 6);
> -		dr6 &= ~DR_TRAP_BITS;
> -		dr6 |= DR6_BD | DR6_ACTIVE_LOW;
> -		ctxt->ops->set_dr(ctxt, 6, dr6);
> -		return emulate_db(ctxt);
> -	}
> +	if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD)
> +		return emulate_db(ctxt, DR6_BD);
>  
>  	return X86EMUL_CONTINUE;
>  }
> diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
> index fb3dab4b5a53..7fe38b174e18 100644
> --- a/arch/x86/kvm/kvm_emulate.h
> +++ b/arch/x86/kvm/kvm_emulate.h
> @@ -24,7 +24,11 @@ struct x86_exception {
>  	bool error_code_valid;
>  	u16 error_code;
>  	bool nested_page_fault;
> -	u64 address; /* cr2 or nested page fault gpa */
> +	union {
> +		u64 address; /* cr2 or nested page fault gpa */
> +		unsigned long dr6;
> +		u64 payload;

Please split the introduction of the union to a separate patch, mainly so that
the effectively zeroing of ctxt.exception.address in init_emulate_ctxt() is
isolated, e.g. in case it somehow causes problems.  But that will also allow
introducing the inject_emulated_exception() change separately from the
check_dr_read() change.

> +	};
>  	u8 async_page_fault;
>  	unsigned long exit_qualification;
>  };
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ab298bfa7d9f..f33ce947633e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8925,7 +8925,9 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
>  {
>  	struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;
>  
> -	if (ex->vector == PF_VECTOR)
> +	if (ex->vector == DB_VECTOR)
> +		kvm_queue_exception_e(vcpu, DB_VECTOR, ex->dr6);

This should be kvm_queue_exception_p().  I also think pivoting on DB_VECTOR is
the wrong approach.  Rather than key off the vector, add payload_valid (to match
error_code_valid), and then do:

	struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;

	WARN_ON_ONCE(ex->vector != PF_VECTOR && ex->payload_valid &&
		     ex->error_code_valid);

	if (ex->vector == PF_VECTOR)
		kvm_inject_emulated_page_fault(vcpu, ex);
	else if (ex->payload_valid)
		kvm_queue_exception_p(vcpu, DB_VECTOR, ex->payload);
	else if (ex->error_code_valid)
		kvm_queue_exception_e(vcpu, ex->vector, ex->error_code);
	else
		kvm_queue_exception(vcpu, ex->vector);

PF_VECTOR is special because it has both an error code and a payload, and because
it needs additional handling on multiple fronts.

> +	else if (ex->vector == PF_VECTOR)
>  		kvm_inject_emulated_page_fault(vcpu, ex);
>  	else if (ex->error_code_valid)
>  		kvm_queue_exception_e(vcpu, ex->vector, ex->error_code);
> @@ -8970,6 +8972,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
>  	ctxt->interruptibility = 0;
>  	ctxt->have_exception = false;
>  	ctxt->exception.vector = -1;
> +	ctxt->exception.payload = 0;
>  	ctxt->perm_ok = false;
>  
>  	init_decode_cache(ctxt);
> -- 
> 2.31.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/9] KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction emulation
  2026-05-11 15:23   ` [PATCH v2 2/9] KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction emulation Sean Christopherson
@ 2026-05-11 15:26     ` Sean Christopherson
  2026-05-11 15:42       ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2026-05-11 15:26 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, linux-kernel

On Mon, May 11, 2026, Sean Christopherson wrote:
> On Thu, Dec 18, 2025, Hou Wenlong wrote:
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index ab298bfa7d9f..f33ce947633e 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -8925,7 +8925,9 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
> >  {
> >  	struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;
> >  
> > -	if (ex->vector == PF_VECTOR)
> > +	if (ex->vector == DB_VECTOR)
> > +		kvm_queue_exception_e(vcpu, DB_VECTOR, ex->dr6);
> 
> This should be kvm_queue_exception_p().  I also think pivoting on DB_VECTOR is
> the wrong approach. 

Gah, never mind, didn't look at the next patch.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/9] KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction emulation
  2026-05-11 15:26     ` Sean Christopherson
@ 2026-05-11 15:42       ` Sean Christopherson
  0 siblings, 0 replies; 8+ messages in thread
From: Sean Christopherson @ 2026-05-11 15:42 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, linux-kernel

On Mon, May 11, 2026, Sean Christopherson wrote:
> On Mon, May 11, 2026, Sean Christopherson wrote:
> > On Thu, Dec 18, 2025, Hou Wenlong wrote:
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index ab298bfa7d9f..f33ce947633e 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -8925,7 +8925,9 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
> > >  {
> > >  	struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;
> > >  
> > > -	if (ex->vector == PF_VECTOR)
> > > +	if (ex->vector == DB_VECTOR)
> > > +		kvm_queue_exception_e(vcpu, DB_VECTOR, ex->dr6);
> > 
> > This should be kvm_queue_exception_p().  I also think pivoting on DB_VECTOR is
> > the wrong approach. 
> 
> Gah, never mind, didn't look at the next patch.

Actually, that's a good excuse to provide kvm_inject_emulated_db() in this patch,
even though it doesn't become truly necessary until the next patch.  Eliminating
some of the code movement in the next patch will yield a smaller diff, and make
it easier to see that there's change in the !KVM_GUESTDBG_USE_HW_BP case.

@@ -8976,23 +8998,36 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
        }
 }
 
-static void kvm_inject_emulated_db(struct kvm_vcpu *vcpu, unsigned long dr6)
+static int kvm_inject_emulated_db(struct kvm_vcpu *vcpu, unsigned long dr6)
 {
+       struct kvm_run *kvm_run = vcpu->run;
+
+       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+               kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
+               kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               return 0;
+       }
+
        kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
+       return 1;
 }
 
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static int inject_emulated_exception(struct kvm_vcpu *vcpu)
 {
        struct x86_exception *ex = &vcpu->arch.emulate_ctxt->exception;
 
        if (ex->vector == DB_VECTOR)
-               kvm_inject_emulated_db(vcpu, ex->dr6);
-       else if (ex->vector == PF_VECTOR)
+               return kvm_inject_emulated_db(vcpu, ex->dr6);
+
+       if (ex->vector == PF_VECTOR)
                kvm_inject_emulated_page_fault(vcpu, ex);
        else if (ex->error_code_valid)
                kvm_queue_exception_e(vcpu, ex->vector, ex->error_code);
        else
                kvm_queue_exception(vcpu, ex->vector);
+       return 1;
 }
 
 static struct x86_emulate_ctxt *alloc_emulate_ctxt(struct kvm_vcpu *vcpu)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 0/9] KVM: x86: Improve the handling of debug exceptions during instruction emulation
       [not found] <cover.1766066076.git.houwenlong.hwl@antgroup.com>
       [not found] ` <9b859ab6a6b59e5ccfdac741459117996fe2da6e.1766066076.git.houwenlong.hwl@antgroup.com>
@ 2026-05-11 15:45 ` Sean Christopherson
       [not found] ` <e0df5d6812da0f508cdf697ac7627693199c8293.1766066076.git.houwenlong.hwl@antgroup.com>
       [not found] ` <5e667a338a27ef2392143962466d77432fcd5441.1766066076.git.houwenlong.hwl@antgroup.com>
  3 siblings, 0 replies; 8+ messages in thread
From: Sean Christopherson @ 2026-05-11 15:45 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, Shuah Khan,
	linux-kernel, linux-kselftest

On Thu, Dec 18, 2025, Hou Wenlong wrote:
> Hou Wenlong (9):
>   KVM: x86: Capture "struct x86_exception" in
>     inject_emulated_exception()
>   KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction
>     emulation
>   KVM: x86: Check guest debug in DR access instruction emulation
>   KVM: x86: Only check effective code breakpoint in emulation
>   KVM: x86: Consolidate KVM_GUESTDBG_SINGLESTEP check into the
>     kvm_inject_emulated_db()
>   KVM: x86: Move kvm_set_rflags() up before kvm_vcpu_do_singlestep()
>   KVM: VMX: Refresh 'PENDING_DBG_EXCEPTIONS.BS' bit during instruction
>     emulation
>   KVM: selftests: Verify guest debug DR7.GD checking during instruction
>     emulation
>   KVM: selftests: Verify 'BS' bit checking in pending debug exception
>     during VM entry
> 
>  arch/x86/include/asm/kvm-x86-ops.h            |   1 +
>  arch/x86/include/asm/kvm_host.h               |   1 +
>  arch/x86/kvm/emulate.c                        |  14 +--
>  arch/x86/kvm/kvm_emulate.h                    |   7 +-
>  arch/x86/kvm/vmx/main.c                       |   9 ++
>  arch/x86/kvm/vmx/vmx.c                        |  15 ++-
>  arch/x86/kvm/vmx/x86_ops.h                    |   1 +
>  arch/x86/kvm/x86.c                            | 116 ++++++++++--------
>  arch/x86/kvm/x86.h                            |   7 ++
>  .../selftests/kvm/include/x86/processor.h     |   3 +-
>  tools/testing/selftests/kvm/x86/debug_regs.c  |  72 ++++++++++-
>  11 files changed, 178 insertions(+), 68 deletions(-)

One goof and some nits on patch 3, but I'll fix them up when applying, i.e. no
need for a v3.  I'll probably also add some comments and elaborate on some of
the changelogs.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 9/9] KVM: selftests: Verify 'BS' bit checking in pending debug exception state during VM-Entry
       [not found] ` <e0df5d6812da0f508cdf697ac7627693199c8293.1766066076.git.houwenlong.hwl@antgroup.com>
@ 2026-05-13 23:14   ` Sean Christopherson
  2026-05-14  5:31     ` Hou Wenlong
  0 siblings, 1 reply; 8+ messages in thread
From: Sean Christopherson @ 2026-05-13 23:14 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Shuah Khan, linux-kselftest,
	linux-kernel

On Thu, Dec 18, 2025, Hou Wenlong wrote:
> In the x86's debug_regs test, add a test case to cover the scenario
> where single-step with STI in VMX sets the 'BS' bit in pending debug
> exceptions state for #DB interception and instruction emulation in both
> cases.

...

> +static void guest_db_handler(struct ex_regs *regs)
> +{
> +	static int count;
> +	unsigned long target_rips[2] = {
> +		CAST_TO_RIP(fep_sti_start),
> +		CAST_TO_RIP(fep_sti_end),
> +	};
> +
> +	__GUEST_ASSERT(regs->rip == target_rips[count], "STI: unexpected rip 0x%lx (should be 0x%lx)",
> +		       regs->rip, target_rips[count]);
> +	regs->rflags &= ~X86_EFLAGS_TF;
> +	count++;
> +}
> +
> +static void guest_irq_handler(struct ex_regs *regs)
> +{
> +	unsigned long target_rip = CAST_TO_RIP(fep_bd_start);
> +
> +	__GUEST_ASSERT(regs->rip == target_rip, "IRQ: unexpected rip 0x%lx (should be 0x%lx)",
> +		       regs->rip, target_rip);

This is wrong, and the test fails with your series applied verbatim.  The IRQ
will arrive at fep_sti_start, because RFLAGS.IF=0 from the CLI at the end of the
ss_start block, and remains that way across fep_bd_start.   And to make things
even more confusing, the IRQ arrives on the CLI even though it's in an STI shadow
due to #DBs not being subjected to _STI_ shadows on Intel, only to MOV-SS/POP-SS
shadows.  I.e. the #DB lands on CLI, pushes RFLAGS.IF=1 on the stack, and then
the subsequent IRET from guest_db_handler() fully unmasks IRQs, and voila.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 9/9] KVM: selftests: Verify 'BS' bit checking in pending debug exception state during VM-Entry
  2026-05-13 23:14   ` [PATCH v2 9/9] KVM: selftests: Verify 'BS' bit checking in pending debug exception state during VM-Entry Sean Christopherson
@ 2026-05-14  5:31     ` Hou Wenlong
  2026-05-14 18:51       ` Sean Christopherson
  0 siblings, 1 reply; 8+ messages in thread
From: Hou Wenlong @ 2026-05-14  5:31 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Shuah Khan, linux-kselftest,
	linux-kernel

On Wed, May 13, 2026 at 04:14:14PM -0700, Sean Christopherson wrote:
> On Thu, Dec 18, 2025, Hou Wenlong wrote:
> > In the x86's debug_regs test, add a test case to cover the scenario
> > where single-step with STI in VMX sets the 'BS' bit in pending debug
> > exceptions state for #DB interception and instruction emulation in both
> > cases.
> 
> ...
> 
> > +static void guest_db_handler(struct ex_regs *regs)
> > +{
> > +	static int count;
> > +	unsigned long target_rips[2] = {
> > +		CAST_TO_RIP(fep_sti_start),
> > +		CAST_TO_RIP(fep_sti_end),
> > +	};
> > +
> > +	__GUEST_ASSERT(regs->rip == target_rips[count], "STI: unexpected rip 0x%lx (should be 0x%lx)",
> > +		       regs->rip, target_rips[count]);
> > +	regs->rflags &= ~X86_EFLAGS_TF;
> > +	count++;
> > +}
> > +
> > +static void guest_irq_handler(struct ex_regs *regs)
> > +{
> > +	unsigned long target_rip = CAST_TO_RIP(fep_bd_start);
> > +
> > +	__GUEST_ASSERT(regs->rip == target_rip, "IRQ: unexpected rip 0x%lx (should be 0x%lx)",
> > +		       regs->rip, target_rip);
> 
> This is wrong, and the test fails with your series applied verbatim.  The IRQ
> will arrive at fep_sti_start, because RFLAGS.IF=0 from the CLI at the end of the
> ss_start block, and remains that way across fep_bd_start.   And to make things
> even more confusing, the IRQ arrives on the CLI even though it's in an STI shadow
> due to #DBs not being subjected to _STI_ shadows on Intel, only to MOV-SS/POP-SS
> shadows.  I.e. the #DB lands on CLI, pushes RFLAGS.IF=1 on the stack, and then
> the subsequent IRET from guest_db_handler() fully unmasks IRQs, and voila.

Yes, the RIP here should be 'fep_sti_start'. This was a stupid mistake,
and I’m very sorry about that. In my reply to v1[0] I explained why an IRQ
handler is needed, so in v2 I added comments and an additional check,
and I tested it before sending. However, it looks like the patch differs
from what I had locally—my code management was a bit messy.

As you pointed out, this pending interrupt can make the test somewhat
confusing. So it might be better to consume the IRQ before running the
test, e.g.:

```
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c
b/tools/testing/selftests/kvm/x86/debug_regs.c
index 3ec2724697de..030d5cb58cc2 100644
--- a/tools/testing/selftests/kvm/x86/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -21,7 +21,7 @@
 u32 guest_value;

 extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
-extern unsigned char fep_bd_start, fep_sti_start, fep_sti_end;
+extern unsigned char fep_irq, fep_bd_start, fep_sti_start, fep_sti_end;

 static void guest_db_handler(struct ex_regs *regs)
 {
@@ -39,7 +39,7 @@ static void guest_db_handler(struct ex_regs *regs)

 static void guest_irq_handler(struct ex_regs *regs)
 {
-       unsigned long target_rip = CAST_TO_RIP(fep_bd_start);
+       unsigned long target_rip = CAST_TO_RIP(fep_irq);

        __GUEST_ASSERT(regs->rip == target_rip, "IRQ: unexpected rip
0x%lx (should be 0x%lx)",
                       regs->rip, target_rip);
@@ -93,6 +93,14 @@ static void guest_code(void)
        if (is_forced_emulation_enabled) {
                asm volatile(KVM_FEP "fep_bd_start: mov %%dr0, %%rax" :
: : "rax");

+               /*
+                * Consume the pending interrupt, as the single-step #DB
delivery after
+                * STI removes the interrupt shadow, so the pending
interrupt will be
+                * delivered after guest #DB handling.
+                */
+               asm volatile("sti; nop\n\t"
+                            "fep_irq:");
+
                /* pending debug exceptions for emulation */
                asm volatile("pushf\n\t"
                             "orq $" __stringify(X86_EFLAGS_TF) ",
(%rsp)\n\t"
@@ -264,13 +272,10 @@ int main(void)
        memset(&debug, 0, sizeof(debug));
        vcpu_guest_debug_set(vcpu, &debug);

-       vm_install_exception_handler(vm, DB_VECTOR, guest_db_handler);
-       /*
-        * Install a dummy IRQ handler, as the single-step #DB delivery
         after
-        * STI removes the interrupt shadow, so the pending interrupt
         will be
-        * delivered after #DB handling.
-        */
-       vm_install_exception_handler(vm, IRQ_VECTOR, guest_irq_handler);
+       if (is_forced_emulation_enabled) {
+               vm_install_exception_handler(vm, DB_VECTOR,
guest_db_handler);
+               vm_install_exception_handler(vm, IRQ_VECTOR,
guest_irq_handler);
+       }

        vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
```

I have tested it on both Intel and AMD. Please let me know if I should
resend a v3 of this patch.

[0]: https://lore.kernel.org/kvm/20251218134026.GA118671@k08j02272.eu95sqa/

Thanks!


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 9/9] KVM: selftests: Verify 'BS' bit checking in pending debug exception state during VM-Entry
  2026-05-14  5:31     ` Hou Wenlong
@ 2026-05-14 18:51       ` Sean Christopherson
  0 siblings, 0 replies; 8+ messages in thread
From: Sean Christopherson @ 2026-05-14 18:51 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Shuah Khan, linux-kselftest,
	linux-kernel

On Thu, May 14, 2026, Hou Wenlong wrote:
> On Wed, May 13, 2026 at 04:14:14PM -0700, Sean Christopherson wrote:
> > On Thu, Dec 18, 2025, Hou Wenlong wrote:
> > > +static void guest_irq_handler(struct ex_regs *regs)
> > > +{
> > > +	unsigned long target_rip = CAST_TO_RIP(fep_bd_start);
> > > +
> > > +	__GUEST_ASSERT(regs->rip == target_rip, "IRQ: unexpected rip 0x%lx (should be 0x%lx)",
> > > +		       regs->rip, target_rip);
> > 
> > This is wrong, and the test fails with your series applied verbatim.  The IRQ
> > will arrive at fep_sti_start, because RFLAGS.IF=0 from the CLI at the end of the
> > ss_start block, and remains that way across fep_bd_start.   And to make things
> > even more confusing, the IRQ arrives on the CLI even though it's in an STI shadow
> > due to #DBs not being subjected to _STI_ shadows on Intel, only to MOV-SS/POP-SS
> > shadows.  I.e. the #DB lands on CLI, pushes RFLAGS.IF=1 on the stack, and then
> > the subsequent IRET from guest_db_handler() fully unmasks IRQs, and voila.
> 
> Yes, the RIP here should be 'fep_sti_start'. This was a stupid mistake,
> and I’m very sorry about that. In my reply to v1[0] I explained why an IRQ
> handler is needed, so in v2 I added comments and an additional check,
> and I tested it before sending. However, it looks like the patch differs
> from what I had locally—my code management was a bit messy.

...

> I have tested it on both Intel and AMD. Please let me know if I should
> resend a v3 of this patch.

No need, I'm actually going to send a v3 of the entire series (beleated feedback
on patch 6 incoming...).

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 7/9] KVM: VMX: Refresh 'PENDING_DBG_EXCEPTIONS.BS' bit during instruction emulation
       [not found] ` <5e667a338a27ef2392143962466d77432fcd5441.1766066076.git.houwenlong.hwl@antgroup.com>
@ 2026-05-14 19:06   ` Sean Christopherson
  0 siblings, 0 replies; 8+ messages in thread
From: Sean Christopherson @ 2026-05-14 19:06 UTC (permalink / raw)
  To: Hou Wenlong
  Cc: kvm, Lai Jiangshan, Paolo Bonzini, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, x86, H. Peter Anvin, linux-kernel

On Thu, Dec 18, 2025, Hou Wenlong wrote:
> The VM-Entry has a consistency check that when the 'STI' or 'MOVSS'
> blocking is active, the 'PENDING_DBG_EXCEPTIONS.BS' bit must be set if
> 'RFLAGS.TF' is set; otherwise, a VM-Fail is triggered during VM-entry.
> However, when 'STI' or 'MOV SS' is emulated (e.g., using the 'force
> emulation' prefix), the emulator only refreshes interruptibility state
> but pending debug exception state is not refreshed. Since the force
> emulation prefix causes a VM-Exit due to #UD interception, which clears
> the 'PENDING_DBG_EXCEPTIONS' bits, the emulator should refresh the
> 'PENDING_DBG_EXCEPTIONS.BS' bit when the 'RFLAGS.TF' bit is set to
> ensure the success of VM-Entry.

After (way too) much thought, it's not just (forced) emulation that's flawed;
save/restore also needs similar treatment.  E.g. if userspace gains control of
the vCPU on a single-step #DB exit with STI-blocking, then KVM will save/restore
the pending #DB, RFLAGS.TF, and STI-blocking, but not PENDING_DBG_EXCEPTIONS.BS.
When the target resumes, VM-Entry will fail due to the "bad" state.

So AFAICT, we can handle both by moving the fixup logic to vmx_inject_exception()
(because it's just fixup for a consistency check, e.g. the state doesn't need to
be saved/restored).  I'm not entirely convinced this fixes *all* the flows that
can run afoul of the consistency check, but I think it gets the legimiate ones?
And if not, we can always continue playing whack-a-mole...

I'll send a v3 of the whole series, because with this approach there's no need to
commit RFLAGS before queuing the #DB in the emulator writeback path, and we can
drop kvm_vcpu_do_singlestep() entirely.

---
 arch/x86/kvm/vmx/vmx.c | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1701db1b2e18..a0a0ccf342d3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1909,6 +1909,24 @@ void vmx_inject_exception(struct kvm_vcpu *vcpu)
 	u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/*
+	 * When injecting a #DB, single-stepping is enabled in RFLAGS, and STI
+	 * or MOV-SS blocking is active, set vmcs.PENDING_DBG_EXCEPTIONS.BS to
+	 * prevent a false positive from VM-Entry consistency check.  VM-Entry
+	 * asserts that a single-step #DB _must_ be pending in this scenario,
+	 * as the previous instruction cannot have toggled RFLAGS.TF 0=>1
+	 * (because STI and POP/MOV don't modify RFLAGS), therefore the one
+	 * instruction delay when activating single-step breakpoints must have
+	 * already expired.  However, the CPU isn't smart enough to peek at
+	 * vmcs.VM_ENTRY_INTR_INFO_FIELD and so doesn't realize that yes, there
+	 * is indeed a #DB pending/imminent.
+	 */
+	if (ex->vector == DB_VECTOR &&
+	    (vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
+	    vmx_get_interrupt_shadow(vcpu))
+		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+			    vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
+
 	kvm_deliver_exception_payload(vcpu, ex);
 
 	if (ex->has_error_code) {
@@ -5485,26 +5503,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 			 * avoid single-step #DB and MTF updates, as ICEBP is
 			 * higher priority.  Note, skipping ICEBP still clears
 			 * STI and MOVSS blocking.
-			 *
-			 * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
-			 * if single-step is enabled in RFLAGS and STI or MOVSS
-			 * blocking is active, as the CPU doesn't set the bit
-			 * on VM-Exit due to #DB interception.  VM-Entry has a
-			 * consistency check that a single-step #DB is pending
-			 * in this scenario as the previous instruction cannot
-			 * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV
-			 * don't modify RFLAGS), therefore the one instruction
-			 * delay when activating single-step breakpoints must
-			 * have already expired.  Note, the CPU sets/clears BS
-			 * as appropriate for all other VM-Exits types.
 			 */
 			if (is_icebp(intr_info))
 				WARN_ON(!skip_emulated_instruction(vcpu));
-			else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
-				 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
-				  (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)))
-				vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
-					    vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
 
 			kvm_queue_exception_p(vcpu, DB_VECTOR, dr6);
 			return 1;

base-commit: b7fbe9a1bf9ee6c967ef77d366ca58c35fcf1887
-- 

> +void vmx_refresh_pending_dbg_exceptions(struct kvm_vcpu *vcpu)
> +{
> +	if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) &&
> +	    vmx_get_interrupt_shadow(vcpu))
> +		vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> +			    vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS);
> +}
> +
>  static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
>  {
>  	kvm_apic_update_ppr(vcpu);
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index d09abeac2b56..2978b6506ac6 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -74,6 +74,7 @@ void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
>  void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
>  void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
>  void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val);
> +void vmx_refresh_pending_dbg_exceptions(struct kvm_vcpu *vcpu);
>  void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu);
>  void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg);
>  unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7352c2114bab..9167393cc0cc 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9232,7 +9232,12 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
>  
>  static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
>  {
> -	return kvm_inject_emulated_db(vcpu, DR6_BS);
> +	int r;
> +
> +	r = kvm_inject_emulated_db(vcpu, DR6_BS);
> +	if (r)
> +		kvm_x86_call(refresh_pending_dbg_exceptions)(vcpu);
> +	return r;
>  }
>  
>  int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
> -- 
> 2.31.1
> 

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2026-05-14 19:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1766066076.git.houwenlong.hwl@antgroup.com>
     [not found] ` <9b859ab6a6b59e5ccfdac741459117996fe2da6e.1766066076.git.houwenlong.hwl@antgroup.com>
2026-05-11 15:23   ` [PATCH v2 2/9] KVM: x86: Set guest DR6 by kvm_queue_exception_p() in instruction emulation Sean Christopherson
2026-05-11 15:26     ` Sean Christopherson
2026-05-11 15:42       ` Sean Christopherson
2026-05-11 15:45 ` [PATCH v2 0/9] KVM: x86: Improve the handling of debug exceptions during " Sean Christopherson
     [not found] ` <e0df5d6812da0f508cdf697ac7627693199c8293.1766066076.git.houwenlong.hwl@antgroup.com>
2026-05-13 23:14   ` [PATCH v2 9/9] KVM: selftests: Verify 'BS' bit checking in pending debug exception state during VM-Entry Sean Christopherson
2026-05-14  5:31     ` Hou Wenlong
2026-05-14 18:51       ` Sean Christopherson
     [not found] ` <5e667a338a27ef2392143962466d77432fcd5441.1766066076.git.houwenlong.hwl@antgroup.com>
2026-05-14 19:06   ` [PATCH v2 7/9] KVM: VMX: Refresh 'PENDING_DBG_EXCEPTIONS.BS' bit during instruction emulation Sean Christopherson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox