* [PATCH] KVM: x86: Add support for cmpxchg16b emulation
@ 2026-03-06 10:20 Sairaj Kodilkar
2026-03-06 15:38 ` Sean Christopherson
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Sairaj Kodilkar @ 2026-03-06 10:20 UTC (permalink / raw)
To: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar,
Paolo Bonzini, Sean Christopherson, Thomas Gleixner, kvm,
linux-kernel, x86
Cc: suravee.suthikulpanit, vasant.hegde, nikunj.dadhania,
Manali.Shukla, Sairaj Kodilkar
AMD and Intel both provides support for 128 bit cmpxchg operands using
cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not
support emulating cmpxchg16b (i.e when destination memory is 128 bit and
REX.W = 1) which causes emulation failure when QEMU guest performs a
cmpxchg16b on a memory region setup as a IO.
Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory
is 128 bit.
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
Background:
The AMD IOMMU driver writes 256-bit device table entries with two
128-bit cmpxchg operations. For guests using hardware-accelerated
vIOMMU (still in progress), QEMU traps device table accesses to set up
nested page tables. Without 128-bit cmpxchg emulation, KVM cannot
handle these traps and DTE access emulation fails.
QEMU implementation that traps DTE accesses:
https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517
---
arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++-------
arch/x86/kvm/kvm_emulate.h | 1 +
2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8e292e9a24d..e1a08cd3274b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
return rc;
}
-static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
+static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
{
- u64 old = ctxt->dst.orig_val64;
+ u64 old64 = ctxt->dst.orig_val64;
- if (ctxt->dst.bytes == 16)
+ /* Use of the REX.W prefix promotes operation to 128 bits */
+ if (ctxt->rex_bits & REX_W)
return X86EMUL_UNHANDLEABLE;
- if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
- ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
- *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
- *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
+ if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
+ ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
+ *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0);
+ *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32);
ctxt->eflags &= ~X86_EFLAGS_ZF;
} else {
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
@@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt)
+{
+ __uint128_t old128 = ctxt->dst.val128;
+
+ /* Use of the REX.W prefix promotes operation to 128 bits */
+ if (!(ctxt->rex_bits & REX_W))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) ||
+ ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
+ *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0);
+ *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
+ } else {
+ ctxt->dst.val128 =
+ ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
+ (u64) reg_read(ctxt, VCPU_REGS_RBX);
+
+ ctxt->eflags |= X86_EFLAGS_ZF;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->dst.bytes == 16)
+ return __handle_cmpxchg16b(ctxt);
+
+ return __handle_cmpxchg8b(ctxt);
+}
+
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
int rc;
@@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = {
static const struct group_dual group9 = { {
- N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
+ N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N,
}, {
N, N, N, N, N, N, N,
GP(0, &pfx_0f_c7_7),
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb3dab4b5a53..a1e95c57d40e 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -268,6 +268,7 @@ struct operand {
union {
unsigned long val;
u64 val64;
+ __uint128_t val128;
char valptr[sizeof(avx256_t)];
sse128_t vec_val;
avx256_t vec_val2;
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation 2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar @ 2026-03-06 15:38 ` Sean Christopherson 2026-03-11 11:18 ` Sairaj Kodilkar 2026-03-12 11:46 ` Paolo Bonzini 2026-03-07 2:39 ` kernel test robot 2026-03-07 5:40 ` kernel test robot 2 siblings, 2 replies; 6+ messages in thread From: Sean Christopherson @ 2026-03-06 15:38 UTC (permalink / raw) To: Sairaj Kodilkar Cc: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar, Paolo Bonzini, Thomas Gleixner, kvm, linux-kernel, x86, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania, Manali.Shukla On Fri, Mar 06, 2026, Sairaj Kodilkar wrote: > AMD and Intel both provides support for 128 bit cmpxchg operands using > cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not > support emulating cmpxchg16b (i.e when destination memory is 128 bit and > REX.W = 1) which causes emulation failure when QEMU guest performs a > cmpxchg16b on a memory region setup as a IO. > > Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory > is 128 bit. > > Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com> > --- > Background: > > The AMD IOMMU driver writes 256-bit device table entries with two > 128-bit cmpxchg operations. For guests using hardware-accelerated > vIOMMU (still in progress), QEMU traps device table accesses to set up > nested page tables. Without 128-bit cmpxchg emulation, KVM cannot > handle these traps and DTE access emulation fails. Please put this paragraph in the changelog proper, the "why" matters greatly here. > QEMU implementation that traps DTE accesses: > https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517 > --- > arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++------- > arch/x86/kvm/kvm_emulate.h | 1 + > 2 files changed, 41 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index c8e292e9a24d..e1a08cd3274b 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) > return rc; > } > > -static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > +static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > { > - u64 old = ctxt->dst.orig_val64; > + u64 old64 = ctxt->dst.orig_val64; > > - if (ctxt->dst.bytes == 16) > + /* Use of the REX.W prefix promotes operation to 128 bits */ > + if (ctxt->rex_bits & REX_W) Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a sanity check, but it should be exactly that, e.g. if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes)) return X86EMUL_UNHANDLEABLE; > return X86EMUL_UNHANDLEABLE; > > - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || > - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { > - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); > - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); > + if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || > + ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { > + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0); > + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32); > ctxt->eflags &= ~X86_EFLAGS_ZF; > } else { > ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | > @@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > return X86EMUL_CONTINUE; > } > > +static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt) > +{ > + __uint128_t old128 = ctxt->dst.val128; There is zero chance you properly tested this patch. Please write comprehensive testcases in KUT's emulator64.c before posting the next version. In writeback(), the OP_MEM case quite clearly operates on "unsigned long" values *and* consumes orig_val in the locked case. case OP_MEM: if (ctxt->lock_prefix) return segmented_cmpxchg(ctxt, op->addr.mem, &op->orig_val, &op->val, op->bytes); else return segmented_write(ctxt, op->addr.mem, &op->val, op->bytes); And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself: /* guests cmpxchg8b have to be emulated atomically */ if (bytes > 8 || (bytes & (bytes - 1))) goto emul_write; That might be a big lift, e.g. to get a 16-byte uaccess version. If it's unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B. And this code would also need to be updated to copy the full 128-bit value. /* Copy full 64-bit value for CMPXCHG8B. */ ctxt->dst.orig_val64 = ctxt->dst.val64; Maybe something like this? Or maybe just copy 128 bits unconditionally (I'm not sure if that could read uninitiatlized data or not). /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */ if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16) ctxt->dst.orig_val128 = ctxt->dst.val128; else ctxt->dst.orig_val64 = ctxt->dst.val64; > + > + /* Use of the REX.W prefix promotes operation to 128 bits */ > + if (!(ctxt->rex_bits & REX_W)) > + return X86EMUL_UNHANDLEABLE; > + > + if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) || > + ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) { > + *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0); > + *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64); > + ctxt->eflags &= ~X86_EFLAGS_ZF; > + } else { > + ctxt->dst.val128 = > + ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) | > + (u64) reg_read(ctxt, VCPU_REGS_RBX); > + > + ctxt->eflags |= X86_EFLAGS_ZF; IMO we should use a macro to handle 8b vs. 16b. The code is going to be heinous no matter what, and so to me, duplicating everything makes it twice as ugly, whereas a macro makes it like 10% more ugly. > + } > + return X86EMUL_CONTINUE; > +} > + > +static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt) > +{ > + if (ctxt->dst.bytes == 16) > + return __handle_cmpxchg16b(ctxt); > + > + return __handle_cmpxchg8b(ctxt); > +} > + > static int em_ret(struct x86_emulate_ctxt *ctxt) > { > int rc; > @@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = { > > > static const struct group_dual group9 = { { > - N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, > + N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N, Eh, I vote to keep it em_cmpxchg8b. _If_ we want to capture that its size is variable, maybe em_cmpxchg8b_16b? em_cmpxchgxb just looks like a typo. Sans correct writeback() handling, something like so: --- arch/x86/kvm/emulate.c | 44 ++++++++++++++++++++++++-------------- arch/x86/kvm/kvm_emulate.h | 2 ++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6145dac4a605..3ec4555571fa 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2201,24 +2201,33 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) return rc; } +#define em_cmpxchg8b_16b(__c, rbits, mbits) \ +do { \ + u##mbits old = __c->dst.orig_val##mbits; \ + \ + BUILD_BUG_ON(rbits * 2 != mbits); \ + \ + if (((u##rbits)(old >> 0) != (u##rbits)reg_read(__c, VCPU_REGS_RAX)) || \ + ((u##rbits)(old >> rbits) != (u##rbits)reg_read(__c, VCPU_REGS_RDX))) { \ + *reg_write(__c, VCPU_REGS_RAX) = (u##rbits)(old >> 0); \ + *reg_write(__c, VCPU_REGS_RDX) = (u##rbits)(old >> rbits); \ + __c->eflags &= ~X86_EFLAGS_ZF; \ + } else { \ + __c->dst.val##mbits = ((u##mbits)reg_read(__c, VCPU_REGS_RCX) << rbits) | \ + (u##rbits)reg_read(__c, VCPU_REGS_RBX); \ + __c->eflags |= X86_EFLAGS_ZF; \ + } \ +} while (0) + static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { - u64 old = ctxt->dst.orig_val64; - - if (ctxt->dst.bytes == 16) + if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes)) return X86EMUL_UNHANDLEABLE; - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); - ctxt->eflags &= ~X86_EFLAGS_ZF; - } else { - ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | - (u32) reg_read(ctxt, VCPU_REGS_RBX); - - ctxt->eflags |= X86_EFLAGS_ZF; - } + if (!(ctxt->rex_bits & REX_W)) + em_cmpxchg8b_16b(ctxt, 32, 64); + else + em_cmpxchg8b_16b(ctxt, 64, 128); return X86EMUL_CONTINUE; } @@ -5418,8 +5427,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) goto done; } } - /* Copy full 64-bit value for CMPXCHG8B. */ - ctxt->dst.orig_val64 = ctxt->dst.val64; + /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */ + if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16) + ctxt->dst.orig_val128 = ctxt->dst.val128; + else + ctxt->dst.orig_val64 = ctxt->dst.val64; special_insn: diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index fb3dab4b5a53..0e9968319343 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -255,6 +255,7 @@ struct operand { union { unsigned long orig_val; u64 orig_val64; + u64 orig_val128; }; union { unsigned long *reg; @@ -268,6 +269,7 @@ struct operand { union { unsigned long val; u64 val64; + __uint128_t val128; char valptr[sizeof(avx256_t)]; sse128_t vec_val; avx256_t vec_val2; base-commit: 5128b972fb2801ad9aca54d990a75611ab5283a9 -- ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation 2026-03-06 15:38 ` Sean Christopherson @ 2026-03-11 11:18 ` Sairaj Kodilkar 2026-03-12 11:46 ` Paolo Bonzini 1 sibling, 0 replies; 6+ messages in thread From: Sairaj Kodilkar @ 2026-03-11 11:18 UTC (permalink / raw) To: Sean Christopherson Cc: sarunkod, H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar, Paolo Bonzini, Thomas Gleixner, kvm, linux-kernel, x86, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania, Manali.Shukla On 3/6/2026 9:08 PM, Sean Christopherson wrote: > On Fri, Mar 06, 2026, Sairaj Kodilkar wrote: >> AMD and Intel both provides support for 128 bit cmpxchg operands using >> cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not >> support emulating cmpxchg16b (i.e when destination memory is 128 bit and >> REX.W = 1) which causes emulation failure when QEMU guest performs a >> cmpxchg16b on a memory region setup as a IO. >> >> Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory >> is 128 bit. >> >> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com> >> --- >> Background: >> >> The AMD IOMMU driver writes 256-bit device table entries with two >> 128-bit cmpxchg operations. For guests using hardware-accelerated >> vIOMMU (still in progress), QEMU traps device table accesses to set up >> nested page tables. Without 128-bit cmpxchg emulation, KVM cannot >> handle these traps and DTE access emulation fails. > Please put this paragraph in the changelog proper, the "why" matters greatly here. Sure. >> QEMU implementation that traps DTE accesses: >> https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517 >> --- >> arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++------- >> arch/x86/kvm/kvm_emulate.h | 1 + >> 2 files changed, 41 insertions(+), 8 deletions(-) >> >> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c >> index c8e292e9a24d..e1a08cd3274b 100644 >> --- a/arch/x86/kvm/emulate.c >> +++ b/arch/x86/kvm/emulate.c >> @@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) >> return rc; >> } >> >> -static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) >> +static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt) >> { >> - u64 old = ctxt->dst.orig_val64; >> + u64 old64 = ctxt->dst.orig_val64; >> >> - if (ctxt->dst.bytes == 16) >> + /* Use of the REX.W prefix promotes operation to 128 bits */ >> + if (ctxt->rex_bits & REX_W) > Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a > sanity check, but it should be exactly that, e.g. > > if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes)) > return X86EMUL_UNHANDLEABLE; Sure I'll update it. >> return X86EMUL_UNHANDLEABLE; >> >> - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || >> - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { >> - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); >> - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); >> + if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || >> + ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { >> + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0); >> + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32); >> ctxt->eflags &= ~X86_EFLAGS_ZF; >> } else { >> ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | >> @@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) >> return X86EMUL_CONTINUE; >> } >> >> +static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt) >> +{ >> + __uint128_t old128 = ctxt->dst.val128; > There is zero chance you properly tested this patch. Please write comprehensive > testcases in KUT's emulator64.c before posting the next version. > > In writeback(), the OP_MEM case quite clearly operates on "unsigned long" values > *and* consumes orig_val in the locked case. > > case OP_MEM: > if (ctxt->lock_prefix) > return segmented_cmpxchg(ctxt, > op->addr.mem, > &op->orig_val, > &op->val, > op->bytes); > else > return segmented_write(ctxt, > op->addr.mem, > &op->val, > op->bytes); > > And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself: > > /* guests cmpxchg8b have to be emulated atomically */ > if (bytes > 8 || (bytes & (bytes - 1))) > goto emul_write; > > That might be a big lift, e.g. to get a 16-byte uaccess version. If it's > unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B. I think its not a too much of the work, I have a prototype ready which implements cmpxchg16b access in uaccess.h. Tested with a KUT as well seems to be working fine. Will post it once I cleanup the series. > And this code would also need to be updated to copy the full 128-bit value. > > /* Copy full 64-bit value for CMPXCHG8B. */ > ctxt->dst.orig_val64 = ctxt->dst.val64; > > Maybe something like this? Or maybe just copy 128 bits unconditionally (I'm not > sure if that could read uninitiatlized data or not). > > /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */ > if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16) > ctxt->dst.orig_val128 = ctxt->dst.val128; > else > ctxt->dst.orig_val64 = ctxt->dst.val64; Yeah thanks, I completely missed that cmpxchg with lock prefix consumes original value. I'll update the code. >> + >> + /* Use of the REX.W prefix promotes operation to 128 bits */ >> + if (!(ctxt->rex_bits & REX_W)) >> + return X86EMUL_UNHANDLEABLE; >> + >> + if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) || >> + ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) { >> + *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0); >> + *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64); >> + ctxt->eflags &= ~X86_EFLAGS_ZF; >> + } else { >> + ctxt->dst.val128 = >> + ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) | >> + (u64) reg_read(ctxt, VCPU_REGS_RBX); >> + >> + ctxt->eflags |= X86_EFLAGS_ZF; > IMO we should use a macro to handle 8b vs. 16b. The code is going to be heinous > no matter what, and so to me, duplicating everything makes it twice as ugly, > whereas a macro makes it like 10% more ugly. > >> + } >> + return X86EMUL_CONTINUE; >> +} >> + >> +static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt) >> +{ >> + if (ctxt->dst.bytes == 16) >> + return __handle_cmpxchg16b(ctxt); >> + >> + return __handle_cmpxchg8b(ctxt); >> +} >> + >> static int em_ret(struct x86_emulate_ctxt *ctxt) >> { >> int rc; >> @@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = { >> >> >> static const struct group_dual group9 = { { >> - N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, >> + N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N, > Eh, I vote to keep it em_cmpxchg8b. _If_ we want to capture that its size is > variable, maybe em_cmpxchg8b_16b? em_cmpxchgxb just looks like a typo. Yeah ok, I was not sure how to represent variable size hence used the x. Will rename it. > > Sans correct writeback() handling, something like so: > > --- > arch/x86/kvm/emulate.c | 44 ++++++++++++++++++++++++-------------- > arch/x86/kvm/kvm_emulate.h | 2 ++ > 2 files changed, 30 insertions(+), 16 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 6145dac4a605..3ec4555571fa 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -2201,24 +2201,33 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt) > return rc; > } > > +#define em_cmpxchg8b_16b(__c, rbits, mbits) \ > +do { \ > + u##mbits old = __c->dst.orig_val##mbits; \ > + \ > + BUILD_BUG_ON(rbits * 2 != mbits); \ > + \ > + if (((u##rbits)(old >> 0) != (u##rbits)reg_read(__c, VCPU_REGS_RAX)) || \ > + ((u##rbits)(old >> rbits) != (u##rbits)reg_read(__c, VCPU_REGS_RDX))) { \ > + *reg_write(__c, VCPU_REGS_RAX) = (u##rbits)(old >> 0); \ > + *reg_write(__c, VCPU_REGS_RDX) = (u##rbits)(old >> rbits); \ > + __c->eflags &= ~X86_EFLAGS_ZF; \ > + } else { \ > + __c->dst.val##mbits = ((u##mbits)reg_read(__c, VCPU_REGS_RCX) << rbits) | \ > + (u##rbits)reg_read(__c, VCPU_REGS_RBX); \ > + __c->eflags |= X86_EFLAGS_ZF; \ > + } \ > +} while (0) > + > static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > { > - u64 old = ctxt->dst.orig_val64; > - > - if (ctxt->dst.bytes == 16) > + if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes)) > return X86EMUL_UNHANDLEABLE; > > - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || > - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { > - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); > - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); > - ctxt->eflags &= ~X86_EFLAGS_ZF; > - } else { > - ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | > - (u32) reg_read(ctxt, VCPU_REGS_RBX); > - > - ctxt->eflags |= X86_EFLAGS_ZF; > - } > + if (!(ctxt->rex_bits & REX_W)) > + em_cmpxchg8b_16b(ctxt, 32, 64); > + else > + em_cmpxchg8b_16b(ctxt, 64, 128); > return X86EMUL_CONTINUE; > } > > @@ -5418,8 +5427,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts) > goto done; > } > } > - /* Copy full 64-bit value for CMPXCHG8B. */ > - ctxt->dst.orig_val64 = ctxt->dst.val64; > + /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */ > + if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16) > + ctxt->dst.orig_val128 = ctxt->dst.val128; > + else > + ctxt->dst.orig_val64 = ctxt->dst.val64; > > special_insn: > > diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h > index fb3dab4b5a53..0e9968319343 100644 > --- a/arch/x86/kvm/kvm_emulate.h > +++ b/arch/x86/kvm/kvm_emulate.h > @@ -255,6 +255,7 @@ struct operand { > union { > unsigned long orig_val; > u64 orig_val64; > + u64 orig_val128; > }; > union { > unsigned long *reg; > @@ -268,6 +269,7 @@ struct operand { > union { > unsigned long val; > u64 val64; > + __uint128_t val128; > char valptr[sizeof(avx256_t)]; > sse128_t vec_val; > avx256_t vec_val2; > > base-commit: 5128b972fb2801ad9aca54d990a75611ab5283a9 > -- Thanks Sairaj ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation 2026-03-06 15:38 ` Sean Christopherson 2026-03-11 11:18 ` Sairaj Kodilkar @ 2026-03-12 11:46 ` Paolo Bonzini 1 sibling, 0 replies; 6+ messages in thread From: Paolo Bonzini @ 2026-03-12 11:46 UTC (permalink / raw) To: Sean Christopherson, Sairaj Kodilkar Cc: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar, Thomas Gleixner, kvm, linux-kernel, x86, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania, Manali.Shukla On 3/6/26 16:38, Sean Christopherson wrote: >> - if (ctxt->dst.bytes == 16) >> + /* Use of the REX.W prefix promotes operation to 128 bits */ >> + if (ctxt->rex_bits & REX_W) > > Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a > sanity check, but it should be exactly that, e.g. > > if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes)) > return X86EMUL_UNHANDLEABLE; > And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself: > > /* guests cmpxchg8b have to be emulated atomically */ > if (bytes > 8 || (bytes & (bytes - 1))) > goto emul_write; > > That might be a big lift, e.g. to get a 16-byte uaccess version. If it's > unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B. It's actually pretty easy because a kind soul already wrote the cmpxchg8b version of the macros. Compile-tested only (and checked that the asm does have the instruction): diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 3a7755c1a4410..f40e815263233 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -388,14 +388,13 @@ do { \ *_old = __old; \ likely(success); }) -#ifdef CONFIG_X86_32 -#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ +#define __try_cmpxchg8b_16b_user_asm(_b, _ptr, _pold, _new, label)({ \ bool success; \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ asm_goto_output("\n" \ - "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "1: " LOCK_PREFIX "cmpxchg" #_b "b %[ptr]\n" \ _ASM_EXTABLE_UA(1b, %l[label]) \ : CC_OUT(z) (success), \ "+A" (__old), \ @@ -407,7 +406,6 @@ do { \ if (unlikely(!success)) \ *_old = __old; \ likely(success); }) -#endif // CONFIG_X86_32 #else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT #define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ int __err = 0; \ @@ -433,7 +431,6 @@ do { \ *_old = __old; \ likely(success); }) -#ifdef CONFIG_X86_32 /* * Unlike the normal CMPXCHG, use output GPR for both success/fail and error. * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are @@ -441,13 +438,13 @@ do { \ * both ESI and EDI for the memory operand, compilation will fail if the error * is an input+output as there will be no register available for input. */ -#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ +#define __try_cmpxchg8b_16b_user_asm(_b, _ptr, _pold, _new, label) ({ \ int __result; \ __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ __typeof__(*(_ptr)) __old = *_old; \ __typeof__(*(_ptr)) __new = (_new); \ asm volatile("\n" \ - "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "1: " LOCK_PREFIX "cmpxchg" #_b "b %[ptr]\n" \ "mov $0, %[result]\n\t" \ "setz %b[result]\n" \ "2:\n" \ @@ -464,7 +461,6 @@ do { \ if (unlikely(!__result)) \ *_old = __old; \ likely(__result); }) -#endif // CONFIG_X86_32 #endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT /* FIXME: this hack is definitely wrong -AK */ @@ -552,9 +549,15 @@ do { \ extern void __try_cmpxchg_user_wrong_size(void); -#ifndef CONFIG_X86_32 +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) \ + __try_cmpxchg8b_16b_user_asm(8, _ptr, _pold, _new, label) +#else #define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) + +#define __try_cmpxchg128_user_asm(_ptr, _pold, _new, label) \ + __try_cmpxchg8b_16b_user_asm(16, _ptr, _pold, _new, label) #endif /* @@ -581,6 +584,9 @@ extern void __try_cmpxchg_user_wrong_size(void); case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ (_nval), _label); \ break; \ + case 16:__ret = __try_cmpxchg128_user_asm((__force u128 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ default: __try_cmpxchg_user_wrong_size(); \ } \ __ret; }) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f2db04aa6a17f..3e1bd9cdd75e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7973,7 +7973,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, int r; /* guests cmpxchg8b have to be emulated atomically */ - if (bytes > 8 || (bytes & (bytes - 1))) + if (bytes > 2 * sizeof(unsigned long) || (bytes & (bytes - 1))) goto emul_write; gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); @@ -8013,6 +8013,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, case 8: r = emulator_try_cmpxchg_user(u64, hva, old, new); break; +#ifdef CONFIG_X86_64 + case 16: + r = emulator_try_cmpxchg_user(u128, hva, old, new); + break; +#endif default: BUG(); } (with the uaccess.h part to be split in a separate patch, of course). Paolo ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation 2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar 2026-03-06 15:38 ` Sean Christopherson @ 2026-03-07 2:39 ` kernel test robot 2026-03-07 5:40 ` kernel test robot 2 siblings, 0 replies; 6+ messages in thread From: kernel test robot @ 2026-03-07 2:39 UTC (permalink / raw) To: Sairaj Kodilkar, H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar, Paolo Bonzini, Sean Christopherson, Thomas Gleixner, kvm, linux-kernel, x86 Cc: oe-kbuild-all, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania, Manali.Shukla, Sairaj Kodilkar Hi Sairaj, kernel test robot noticed the following build warnings: [auto build test WARNING on kvm/queue] [also build test WARNING on kvm/next tip/master linus/master v7.0-rc2 next-20260306] [cannot apply to kvm/linux-next tip/auto-latest] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Sairaj-Kodilkar/KVM-x86-Add-support-for-cmpxchg16b-emulation/20260306-182915 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20260306102047.29760-1-sarunkod%40amd.com patch subject: [PATCH] KVM: x86: Add support for cmpxchg16b emulation config: i386-randconfig-005-20260307 (https://download.01.org/0day-ci/archive/20260307/202603071055.Wi43mREW-lkp@intel.com/config) compiler: gcc-12 (Debian 12.4.0-5) 12.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260307/202603071055.Wi43mREW-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202603071055.Wi43mREW-lkp@intel.com/ All warnings (new ones prefixed by >>): In file included from arch/x86/kvm/emulate.c:24: arch/x86/kvm/kvm_emulate.h:271:17: error: unknown type name '__uint128_t' 271 | __uint128_t val128; | ^~~~~~~~~~~ arch/x86/kvm/emulate.c: In function '__handle_cmpxchg16b': arch/x86/kvm/emulate.c:2215:9: error: unknown type name '__uint128_t'; did you mean '__int128__'? 2215 | __uint128_t old128 = ctxt->dst.val128; | ^~~~~~~~~~~ | __int128__ >> arch/x86/kvm/emulate.c:2222:28: warning: right shift count >= width of type [-Wshift-count-overflow] 2222 | ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) { | ^~ arch/x86/kvm/emulate.c:2224:65: warning: right shift count >= width of type [-Wshift-count-overflow] 2224 | *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64); | ^~ arch/x86/kvm/emulate.c:2228:27: error: '__uint128_t' undeclared (first use in this function); did you mean '__int128__'? 2228 | ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) | | ^~~~~~~~~~~ | __int128__ arch/x86/kvm/emulate.c:2228:27: note: each undeclared identifier is reported only once for each function it appears in arch/x86/kvm/emulate.c:2228:39: error: expected ')' before 'reg_read' 2228 | ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) | | ~ ^~~~~~~~~ | ) vim +2222 arch/x86/kvm/emulate.c 2212 2213 static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt) 2214 { 2215 __uint128_t old128 = ctxt->dst.val128; 2216 2217 /* Use of the REX.W prefix promotes operation to 128 bits */ 2218 if (!(ctxt->rex_bits & REX_W)) 2219 return X86EMUL_UNHANDLEABLE; 2220 2221 if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) || > 2222 ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) { 2223 *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0); 2224 *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64); 2225 ctxt->eflags &= ~X86_EFLAGS_ZF; 2226 } else { 2227 ctxt->dst.val128 = 2228 ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) | 2229 (u64) reg_read(ctxt, VCPU_REGS_RBX); 2230 2231 ctxt->eflags |= X86_EFLAGS_ZF; 2232 } 2233 return X86EMUL_CONTINUE; 2234 } 2235 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation 2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar 2026-03-06 15:38 ` Sean Christopherson 2026-03-07 2:39 ` kernel test robot @ 2026-03-07 5:40 ` kernel test robot 2 siblings, 0 replies; 6+ messages in thread From: kernel test robot @ 2026-03-07 5:40 UTC (permalink / raw) To: Sairaj Kodilkar, H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar, Paolo Bonzini, Sean Christopherson, Thomas Gleixner, kvm, linux-kernel, x86 Cc: llvm, oe-kbuild-all, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania, Manali.Shukla, Sairaj Kodilkar Hi Sairaj, kernel test robot noticed the following build errors: [auto build test ERROR on kvm/queue] [also build test ERROR on kvm/next tip/master linus/master v7.0-rc2 next-20260306] [cannot apply to kvm/linux-next tip/auto-latest bp/for-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Sairaj-Kodilkar/KVM-x86-Add-support-for-cmpxchg16b-emulation/20260306-182915 base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue patch link: https://lore.kernel.org/r/20260306102047.29760-1-sarunkod%40amd.com patch subject: [PATCH] KVM: x86: Add support for cmpxchg16b emulation config: i386-randconfig-012-20260307 (https://download.01.org/0day-ci/archive/20260307/202603071358.8bRahq3i-lkp@intel.com/config) compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260307/202603071358.8bRahq3i-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202603071358.8bRahq3i-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from arch/x86/kvm/mmu/page_track.c:19: In file included from arch/x86/kvm/mmu.h:7: In file included from arch/x86/kvm/x86.h:10: >> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t' 271 | __uint128_t val128; | ^ 1 error generated. -- In file included from arch/x86/kvm/vmx/vmcs12.c:4: In file included from arch/x86/kvm/vmx/vmcs12.h:7: In file included from arch/x86/kvm/vmx/vmcs.h:12: In file included from arch/x86/kvm/vmx/capabilities.h:7: In file included from arch/x86/kvm/vmx/../lapic.h:11: In file included from arch/x86/kvm/vmx/../hyperv.h:25: In file included from arch/x86/kvm/vmx/../x86.h:10: >> arch/x86/kvm/vmx/../kvm_emulate.h:271:3: error: unknown type name '__uint128_t' 271 | __uint128_t val128; | ^ 1 error generated. -- In file included from arch/x86/kvm/emulate.c:24: >> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t' 271 | __uint128_t val128; | ^ >> arch/x86/kvm/emulate.c:393:12: error: no member named 'val' in 'struct operand' 393 | ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF); | ~~~~~~~~~ ^ >> arch/x86/kvm/emulate.c:423:27: error: no member named 'val64' in 'struct operand' 423 | .src_val = ctxt->src.val64, | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:424:27: error: no member named 'val64' in 'struct operand' 424 | .dst_val = ctxt->dst.val64, | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2' 346 | case 1: __EM_ASM_2(op##b, al, dl); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM' 291 | : "+a" (ctxt->dst.val), \ | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2' 346 | case 1: __EM_ASM_2(op##b, al, dl); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM' 292 | "+d" (ctxt->src.val), \ | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2' 346 | case 1: __EM_ASM_2(op##b, al, dl); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:295:25: note: expanded from macro '__EM_ASM' 295 | : "c" (ctxt->src2.val)) | ~~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2' 347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM' 291 | : "+a" (ctxt->dst.val), \ | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2' 347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM' 292 | "+d" (ctxt->src.val), \ | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2' 347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:295:25: note: expanded from macro '__EM_ASM' 295 | : "c" (ctxt->src2.val)) | ~~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:348:10: note: expanded from macro 'EM_ASM_2' 348 | case 4: __EM_ASM_2(op##l, eax, edx); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM' 291 | : "+a" (ctxt->dst.val), \ | ~~~~~~~~~ ^ arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand' 955 | EM_ASM_2(add); | ^~~~~~~~~~~~~ arch/x86/kvm/emulate.c:348:10: note: expanded from macro 'EM_ASM_2' 348 | case 4: __EM_ASM_2(op##l, eax, edx); break; \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2' 305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t") | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM' 292 | "+d" (ctxt->src.val), \ -- In file included from arch/x86/kvm/x86.c:21: In file included from arch/x86/kvm/irq.h:19: In file included from arch/x86/kvm/lapic.h:11: In file included from arch/x86/kvm/hyperv.h:25: In file included from arch/x86/kvm/x86.h:10: >> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t' 271 | __uint128_t val128; | ^ In file included from arch/x86/kvm/x86.c:44: include/linux/mman.h:157:9: warning: division by zero is undefined [-Wdivision-by-zero] 157 | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/mman.h:135:21: note: expanded from macro '_calc_vm_trans' 135 | : ((x) & (bit1)) / ((bit1) / (bit2)))) | ^ ~~~~~~~~~~~~~~~~~ >> arch/x86/kvm/x86.c:9381:30: error: no member named 'val' in 'struct operand' 9381 | return vector == ctxt->src.val; | ~~~~~~~~~ ^ 1 warning and 2 errors generated. vim +/__uint128_t +271 arch/x86/kvm/kvm_emulate.h 249 250 /* Type, address-of, and value of an instruction's operand. */ 251 struct operand { 252 enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_YMM, OP_MM, OP_NONE } type; 253 unsigned int bytes; 254 unsigned int count; 255 union { 256 unsigned long orig_val; 257 u64 orig_val64; 258 }; 259 union { 260 unsigned long *reg; 261 struct segmented_address { 262 ulong ea; 263 unsigned seg; 264 } mem; 265 unsigned xmm; 266 unsigned mm; 267 } addr; 268 union { 269 unsigned long val; 270 u64 val64; > 271 __uint128_t val128; 272 char valptr[sizeof(avx256_t)]; 273 sse128_t vec_val; 274 avx256_t vec_val2; 275 u64 mm_val; 276 void *data; 277 } __aligned(32); 278 }; 279 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2026-03-12 11:46 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar 2026-03-06 15:38 ` Sean Christopherson 2026-03-11 11:18 ` Sairaj Kodilkar 2026-03-12 11:46 ` Paolo Bonzini 2026-03-07 2:39 ` kernel test robot 2026-03-07 5:40 ` kernel test robot
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox