* [PATCH] KVM: x86: Add support for cmpxchg16b emulation
@ 2026-03-06 10:20 Sairaj Kodilkar
2026-03-06 15:38 ` Sean Christopherson
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Sairaj Kodilkar @ 2026-03-06 10:20 UTC (permalink / raw)
To: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar,
Paolo Bonzini, Sean Christopherson, Thomas Gleixner, kvm,
linux-kernel, x86
Cc: suravee.suthikulpanit, vasant.hegde, nikunj.dadhania,
Manali.Shukla, Sairaj Kodilkar
AMD and Intel both provides support for 128 bit cmpxchg operands using
cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not
support emulating cmpxchg16b (i.e when destination memory is 128 bit and
REX.W = 1) which causes emulation failure when QEMU guest performs a
cmpxchg16b on a memory region setup as a IO.
Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory
is 128 bit.
Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
---
Background:
The AMD IOMMU driver writes 256-bit device table entries with two
128-bit cmpxchg operations. For guests using hardware-accelerated
vIOMMU (still in progress), QEMU traps device table accesses to set up
nested page tables. Without 128-bit cmpxchg emulation, KVM cannot
handle these traps and DTE access emulation fails.
QEMU implementation that traps DTE accesses:
https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517
---
arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++-------
arch/x86/kvm/kvm_emulate.h | 1 +
2 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c8e292e9a24d..e1a08cd3274b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
return rc;
}
-static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
+static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
{
- u64 old = ctxt->dst.orig_val64;
+ u64 old64 = ctxt->dst.orig_val64;
- if (ctxt->dst.bytes == 16)
+ /* Use of the REX.W prefix promotes operation to 128 bits */
+ if (ctxt->rex_bits & REX_W)
return X86EMUL_UNHANDLEABLE;
- if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
- ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
- *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
- *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
+ if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
+ ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
+ *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0);
+ *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32);
ctxt->eflags &= ~X86_EFLAGS_ZF;
} else {
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
@@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt)
+{
+ __uint128_t old128 = ctxt->dst.val128;
+
+ /* Use of the REX.W prefix promotes operation to 128 bits */
+ if (!(ctxt->rex_bits & REX_W))
+ return X86EMUL_UNHANDLEABLE;
+
+ if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) ||
+ ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
+ *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0);
+ *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
+ } else {
+ ctxt->dst.val128 =
+ ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
+ (u64) reg_read(ctxt, VCPU_REGS_RBX);
+
+ ctxt->eflags |= X86_EFLAGS_ZF;
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt)
+{
+ if (ctxt->dst.bytes == 16)
+ return __handle_cmpxchg16b(ctxt);
+
+ return __handle_cmpxchg8b(ctxt);
+}
+
static int em_ret(struct x86_emulate_ctxt *ctxt)
{
int rc;
@@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = {
static const struct group_dual group9 = { {
- N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
+ N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N,
}, {
N, N, N, N, N, N, N,
GP(0, &pfx_0f_c7_7),
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb3dab4b5a53..a1e95c57d40e 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -268,6 +268,7 @@ struct operand {
union {
unsigned long val;
u64 val64;
+ __uint128_t val128;
char valptr[sizeof(avx256_t)];
sse128_t vec_val;
avx256_t vec_val2;
--
2.34.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar
@ 2026-03-06 15:38 ` Sean Christopherson
2026-03-11 11:18 ` Sairaj Kodilkar
2026-03-12 11:46 ` Paolo Bonzini
2026-03-07 2:39 ` kernel test robot
2026-03-07 5:40 ` kernel test robot
2 siblings, 2 replies; 6+ messages in thread
From: Sean Christopherson @ 2026-03-06 15:38 UTC (permalink / raw)
To: Sairaj Kodilkar
Cc: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar,
Paolo Bonzini, Thomas Gleixner, kvm, linux-kernel, x86,
suravee.suthikulpanit, vasant.hegde, nikunj.dadhania,
Manali.Shukla
On Fri, Mar 06, 2026, Sairaj Kodilkar wrote:
> AMD and Intel both provides support for 128 bit cmpxchg operands using
> cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not
> support emulating cmpxchg16b (i.e when destination memory is 128 bit and
> REX.W = 1) which causes emulation failure when QEMU guest performs a
> cmpxchg16b on a memory region setup as a IO.
>
> Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory
> is 128 bit.
>
> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
> ---
> Background:
>
> The AMD IOMMU driver writes 256-bit device table entries with two
> 128-bit cmpxchg operations. For guests using hardware-accelerated
> vIOMMU (still in progress), QEMU traps device table accesses to set up
> nested page tables. Without 128-bit cmpxchg emulation, KVM cannot
> handle these traps and DTE access emulation fails.
Please put this paragraph in the changelog proper, the "why" matters greatly here.
> QEMU implementation that traps DTE accesses:
> https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517
> ---
> arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++-------
> arch/x86/kvm/kvm_emulate.h | 1 +
> 2 files changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index c8e292e9a24d..e1a08cd3274b 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
> return rc;
> }
>
> -static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
> +static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
> {
> - u64 old = ctxt->dst.orig_val64;
> + u64 old64 = ctxt->dst.orig_val64;
>
> - if (ctxt->dst.bytes == 16)
> + /* Use of the REX.W prefix promotes operation to 128 bits */
> + if (ctxt->rex_bits & REX_W)
Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a
sanity check, but it should be exactly that, e.g.
if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes))
return X86EMUL_UNHANDLEABLE;
> return X86EMUL_UNHANDLEABLE;
>
> - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
> - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
> - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
> - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
> + if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
> + ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
> + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0);
> + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32);
> ctxt->eflags &= ~X86_EFLAGS_ZF;
> } else {
> ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
> @@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
> return X86EMUL_CONTINUE;
> }
>
> +static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt)
> +{
> + __uint128_t old128 = ctxt->dst.val128;
There is zero chance you properly tested this patch. Please write comprehensive
testcases in KUT's emulator64.c before posting the next version.
In writeback(), the OP_MEM case quite clearly operates on "unsigned long" values
*and* consumes orig_val in the locked case.
case OP_MEM:
if (ctxt->lock_prefix)
return segmented_cmpxchg(ctxt,
op->addr.mem,
&op->orig_val,
&op->val,
op->bytes);
else
return segmented_write(ctxt,
op->addr.mem,
&op->val,
op->bytes);
And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself:
/* guests cmpxchg8b have to be emulated atomically */
if (bytes > 8 || (bytes & (bytes - 1)))
goto emul_write;
That might be a big lift, e.g. to get a 16-byte uaccess version. If it's
unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B.
And this code would also need to be updated to copy the full 128-bit value.
/* Copy full 64-bit value for CMPXCHG8B. */
ctxt->dst.orig_val64 = ctxt->dst.val64;
Maybe something like this? Or maybe just copy 128 bits unconditionally (I'm not
sure if that could read uninitiatlized data or not).
/* Copy the full 64/128-bit value for CMPXCHG8B/16B. */
if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16)
ctxt->dst.orig_val128 = ctxt->dst.val128;
else
ctxt->dst.orig_val64 = ctxt->dst.val64;
> +
> + /* Use of the REX.W prefix promotes operation to 128 bits */
> + if (!(ctxt->rex_bits & REX_W))
> + return X86EMUL_UNHANDLEABLE;
> +
> + if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) ||
> + ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
> + *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0);
> + *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
> + ctxt->eflags &= ~X86_EFLAGS_ZF;
> + } else {
> + ctxt->dst.val128 =
> + ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
> + (u64) reg_read(ctxt, VCPU_REGS_RBX);
> +
> + ctxt->eflags |= X86_EFLAGS_ZF;
IMO we should use a macro to handle 8b vs. 16b. The code is going to be heinous
no matter what, and so to me, duplicating everything makes it twice as ugly,
whereas a macro makes it like 10% more ugly.
> + }
> + return X86EMUL_CONTINUE;
> +}
> +
> +static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt)
> +{
> + if (ctxt->dst.bytes == 16)
> + return __handle_cmpxchg16b(ctxt);
> +
> + return __handle_cmpxchg8b(ctxt);
> +}
> +
> static int em_ret(struct x86_emulate_ctxt *ctxt)
> {
> int rc;
> @@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = {
>
>
> static const struct group_dual group9 = { {
> - N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
> + N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N,
Eh, I vote to keep it em_cmpxchg8b. _If_ we want to capture that its size is
variable, maybe em_cmpxchg8b_16b? em_cmpxchgxb just looks like a typo.
Sans correct writeback() handling, something like so:
---
arch/x86/kvm/emulate.c | 44 ++++++++++++++++++++++++--------------
arch/x86/kvm/kvm_emulate.h | 2 ++
2 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 6145dac4a605..3ec4555571fa 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2201,24 +2201,33 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
return rc;
}
+#define em_cmpxchg8b_16b(__c, rbits, mbits) \
+do { \
+ u##mbits old = __c->dst.orig_val##mbits; \
+ \
+ BUILD_BUG_ON(rbits * 2 != mbits); \
+ \
+ if (((u##rbits)(old >> 0) != (u##rbits)reg_read(__c, VCPU_REGS_RAX)) || \
+ ((u##rbits)(old >> rbits) != (u##rbits)reg_read(__c, VCPU_REGS_RDX))) { \
+ *reg_write(__c, VCPU_REGS_RAX) = (u##rbits)(old >> 0); \
+ *reg_write(__c, VCPU_REGS_RDX) = (u##rbits)(old >> rbits); \
+ __c->eflags &= ~X86_EFLAGS_ZF; \
+ } else { \
+ __c->dst.val##mbits = ((u##mbits)reg_read(__c, VCPU_REGS_RCX) << rbits) | \
+ (u##rbits)reg_read(__c, VCPU_REGS_RBX); \
+ __c->eflags |= X86_EFLAGS_ZF; \
+ } \
+} while (0)
+
static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
{
- u64 old = ctxt->dst.orig_val64;
-
- if (ctxt->dst.bytes == 16)
+ if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes))
return X86EMUL_UNHANDLEABLE;
- if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
- ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
- *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
- *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
- ctxt->eflags &= ~X86_EFLAGS_ZF;
- } else {
- ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
- (u32) reg_read(ctxt, VCPU_REGS_RBX);
-
- ctxt->eflags |= X86_EFLAGS_ZF;
- }
+ if (!(ctxt->rex_bits & REX_W))
+ em_cmpxchg8b_16b(ctxt, 32, 64);
+ else
+ em_cmpxchg8b_16b(ctxt, 64, 128);
return X86EMUL_CONTINUE;
}
@@ -5418,8 +5427,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
goto done;
}
}
- /* Copy full 64-bit value for CMPXCHG8B. */
- ctxt->dst.orig_val64 = ctxt->dst.val64;
+ /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */
+ if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16)
+ ctxt->dst.orig_val128 = ctxt->dst.val128;
+ else
+ ctxt->dst.orig_val64 = ctxt->dst.val64;
special_insn:
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index fb3dab4b5a53..0e9968319343 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -255,6 +255,7 @@ struct operand {
union {
unsigned long orig_val;
u64 orig_val64;
+ u64 orig_val128;
};
union {
unsigned long *reg;
@@ -268,6 +269,7 @@ struct operand {
union {
unsigned long val;
u64 val64;
+ __uint128_t val128;
char valptr[sizeof(avx256_t)];
sse128_t vec_val;
avx256_t vec_val2;
base-commit: 5128b972fb2801ad9aca54d990a75611ab5283a9
--
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar
2026-03-06 15:38 ` Sean Christopherson
@ 2026-03-07 2:39 ` kernel test robot
2026-03-07 5:40 ` kernel test robot
2 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2026-03-07 2:39 UTC (permalink / raw)
To: Sairaj Kodilkar, H. Peter Anvin, Borislav Petkov, Dave Hansen,
Ingo Molnar, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
kvm, linux-kernel, x86
Cc: oe-kbuild-all, suravee.suthikulpanit, vasant.hegde,
nikunj.dadhania, Manali.Shukla, Sairaj Kodilkar
Hi Sairaj,
kernel test robot noticed the following build warnings:
[auto build test WARNING on kvm/queue]
[also build test WARNING on kvm/next tip/master linus/master v7.0-rc2 next-20260306]
[cannot apply to kvm/linux-next tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sairaj-Kodilkar/KVM-x86-Add-support-for-cmpxchg16b-emulation/20260306-182915
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20260306102047.29760-1-sarunkod%40amd.com
patch subject: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
config: i386-randconfig-005-20260307 (https://download.01.org/0day-ci/archive/20260307/202603071055.Wi43mREW-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.4.0-5) 12.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260307/202603071055.Wi43mREW-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603071055.Wi43mREW-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from arch/x86/kvm/emulate.c:24:
arch/x86/kvm/kvm_emulate.h:271:17: error: unknown type name '__uint128_t'
271 | __uint128_t val128;
| ^~~~~~~~~~~
arch/x86/kvm/emulate.c: In function '__handle_cmpxchg16b':
arch/x86/kvm/emulate.c:2215:9: error: unknown type name '__uint128_t'; did you mean '__int128__'?
2215 | __uint128_t old128 = ctxt->dst.val128;
| ^~~~~~~~~~~
| __int128__
>> arch/x86/kvm/emulate.c:2222:28: warning: right shift count >= width of type [-Wshift-count-overflow]
2222 | ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
| ^~
arch/x86/kvm/emulate.c:2224:65: warning: right shift count >= width of type [-Wshift-count-overflow]
2224 | *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
| ^~
arch/x86/kvm/emulate.c:2228:27: error: '__uint128_t' undeclared (first use in this function); did you mean '__int128__'?
2228 | ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
| ^~~~~~~~~~~
| __int128__
arch/x86/kvm/emulate.c:2228:27: note: each undeclared identifier is reported only once for each function it appears in
arch/x86/kvm/emulate.c:2228:39: error: expected ')' before 'reg_read'
2228 | ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
| ~ ^~~~~~~~~
| )
vim +2222 arch/x86/kvm/emulate.c
2212
2213 static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt)
2214 {
2215 __uint128_t old128 = ctxt->dst.val128;
2216
2217 /* Use of the REX.W prefix promotes operation to 128 bits */
2218 if (!(ctxt->rex_bits & REX_W))
2219 return X86EMUL_UNHANDLEABLE;
2220
2221 if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) ||
> 2222 ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
2223 *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0);
2224 *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
2225 ctxt->eflags &= ~X86_EFLAGS_ZF;
2226 } else {
2227 ctxt->dst.val128 =
2228 ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
2229 (u64) reg_read(ctxt, VCPU_REGS_RBX);
2230
2231 ctxt->eflags |= X86_EFLAGS_ZF;
2232 }
2233 return X86EMUL_CONTINUE;
2234 }
2235
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar
2026-03-06 15:38 ` Sean Christopherson
2026-03-07 2:39 ` kernel test robot
@ 2026-03-07 5:40 ` kernel test robot
2 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2026-03-07 5:40 UTC (permalink / raw)
To: Sairaj Kodilkar, H. Peter Anvin, Borislav Petkov, Dave Hansen,
Ingo Molnar, Paolo Bonzini, Sean Christopherson, Thomas Gleixner,
kvm, linux-kernel, x86
Cc: llvm, oe-kbuild-all, suravee.suthikulpanit, vasant.hegde,
nikunj.dadhania, Manali.Shukla, Sairaj Kodilkar
Hi Sairaj,
kernel test robot noticed the following build errors:
[auto build test ERROR on kvm/queue]
[also build test ERROR on kvm/next tip/master linus/master v7.0-rc2 next-20260306]
[cannot apply to kvm/linux-next tip/auto-latest bp/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sairaj-Kodilkar/KVM-x86-Add-support-for-cmpxchg16b-emulation/20260306-182915
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20260306102047.29760-1-sarunkod%40amd.com
patch subject: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
config: i386-randconfig-012-20260307 (https://download.01.org/0day-ci/archive/20260307/202603071358.8bRahq3i-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260307/202603071358.8bRahq3i-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603071358.8bRahq3i-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from arch/x86/kvm/mmu/page_track.c:19:
In file included from arch/x86/kvm/mmu.h:7:
In file included from arch/x86/kvm/x86.h:10:
>> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t'
271 | __uint128_t val128;
| ^
1 error generated.
--
In file included from arch/x86/kvm/vmx/vmcs12.c:4:
In file included from arch/x86/kvm/vmx/vmcs12.h:7:
In file included from arch/x86/kvm/vmx/vmcs.h:12:
In file included from arch/x86/kvm/vmx/capabilities.h:7:
In file included from arch/x86/kvm/vmx/../lapic.h:11:
In file included from arch/x86/kvm/vmx/../hyperv.h:25:
In file included from arch/x86/kvm/vmx/../x86.h:10:
>> arch/x86/kvm/vmx/../kvm_emulate.h:271:3: error: unknown type name '__uint128_t'
271 | __uint128_t val128;
| ^
1 error generated.
--
In file included from arch/x86/kvm/emulate.c:24:
>> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t'
271 | __uint128_t val128;
| ^
>> arch/x86/kvm/emulate.c:393:12: error: no member named 'val' in 'struct operand'
393 | ctxt->dst.val = 0xFF * !!(ctxt->eflags & X86_EFLAGS_CF);
| ~~~~~~~~~ ^
>> arch/x86/kvm/emulate.c:423:27: error: no member named 'val64' in 'struct operand'
423 | .src_val = ctxt->src.val64,
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:424:27: error: no member named 'val64' in 'struct operand'
424 | .dst_val = ctxt->dst.val64,
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2'
346 | case 1: __EM_ASM_2(op##b, al, dl); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM'
291 | : "+a" (ctxt->dst.val), \
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2'
346 | case 1: __EM_ASM_2(op##b, al, dl); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM'
292 | "+d" (ctxt->src.val), \
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:346:10: note: expanded from macro 'EM_ASM_2'
346 | case 1: __EM_ASM_2(op##b, al, dl); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:295:25: note: expanded from macro '__EM_ASM'
295 | : "c" (ctxt->src2.val))
| ~~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2'
347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM'
291 | : "+a" (ctxt->dst.val), \
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2'
347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM'
292 | "+d" (ctxt->src.val), \
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:347:10: note: expanded from macro 'EM_ASM_2'
347 | case 2: __EM_ASM_2(op##w, ax, dx); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:295:25: note: expanded from macro '__EM_ASM'
295 | : "c" (ctxt->src2.val))
| ~~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:348:10: note: expanded from macro 'EM_ASM_2'
348 | case 4: __EM_ASM_2(op##l, eax, edx); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:291:25: note: expanded from macro '__EM_ASM'
291 | : "+a" (ctxt->dst.val), \
| ~~~~~~~~~ ^
arch/x86/kvm/emulate.c:955:1: error: no member named 'val' in 'struct operand'
955 | EM_ASM_2(add);
| ^~~~~~~~~~~~~
arch/x86/kvm/emulate.c:348:10: note: expanded from macro 'EM_ASM_2'
348 | case 4: __EM_ASM_2(op##l, eax, edx); break; \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:305:3: note: expanded from macro '__EM_ASM_2'
305 | __EM_ASM(#op " %%" #src ", %%" #dst " \n\t")
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/x86/kvm/emulate.c:292:25: note: expanded from macro '__EM_ASM'
292 | "+d" (ctxt->src.val), \
--
In file included from arch/x86/kvm/x86.c:21:
In file included from arch/x86/kvm/irq.h:19:
In file included from arch/x86/kvm/lapic.h:11:
In file included from arch/x86/kvm/hyperv.h:25:
In file included from arch/x86/kvm/x86.h:10:
>> arch/x86/kvm/kvm_emulate.h:271:3: error: unknown type name '__uint128_t'
271 | __uint128_t val128;
| ^
In file included from arch/x86/kvm/x86.c:44:
include/linux/mman.h:157:9: warning: division by zero is undefined [-Wdivision-by-zero]
157 | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/mman.h:135:21: note: expanded from macro '_calc_vm_trans'
135 | : ((x) & (bit1)) / ((bit1) / (bit2))))
| ^ ~~~~~~~~~~~~~~~~~
>> arch/x86/kvm/x86.c:9381:30: error: no member named 'val' in 'struct operand'
9381 | return vector == ctxt->src.val;
| ~~~~~~~~~ ^
1 warning and 2 errors generated.
vim +/__uint128_t +271 arch/x86/kvm/kvm_emulate.h
249
250 /* Type, address-of, and value of an instruction's operand. */
251 struct operand {
252 enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_YMM, OP_MM, OP_NONE } type;
253 unsigned int bytes;
254 unsigned int count;
255 union {
256 unsigned long orig_val;
257 u64 orig_val64;
258 };
259 union {
260 unsigned long *reg;
261 struct segmented_address {
262 ulong ea;
263 unsigned seg;
264 } mem;
265 unsigned xmm;
266 unsigned mm;
267 } addr;
268 union {
269 unsigned long val;
270 u64 val64;
> 271 __uint128_t val128;
272 char valptr[sizeof(avx256_t)];
273 sse128_t vec_val;
274 avx256_t vec_val2;
275 u64 mm_val;
276 void *data;
277 } __aligned(32);
278 };
279
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
2026-03-06 15:38 ` Sean Christopherson
@ 2026-03-11 11:18 ` Sairaj Kodilkar
2026-03-12 11:46 ` Paolo Bonzini
1 sibling, 0 replies; 6+ messages in thread
From: Sairaj Kodilkar @ 2026-03-11 11:18 UTC (permalink / raw)
To: Sean Christopherson
Cc: sarunkod, H. Peter Anvin, Borislav Petkov, Dave Hansen,
Ingo Molnar, Paolo Bonzini, Thomas Gleixner, kvm, linux-kernel,
x86, suravee.suthikulpanit, vasant.hegde, nikunj.dadhania,
Manali.Shukla
On 3/6/2026 9:08 PM, Sean Christopherson wrote:
> On Fri, Mar 06, 2026, Sairaj Kodilkar wrote:
>> AMD and Intel both provides support for 128 bit cmpxchg operands using
>> cmpxchg8b/cmpxchg16b instructions (opcode 0FC7). However, kvm does not
>> support emulating cmpxchg16b (i.e when destination memory is 128 bit and
>> REX.W = 1) which causes emulation failure when QEMU guest performs a
>> cmpxchg16b on a memory region setup as a IO.
>>
>> Hence extend cmpxchg8b to perform cmpxchg16b when the destination memory
>> is 128 bit.
>>
>> Signed-off-by: Sairaj Kodilkar <sarunkod@amd.com>
>> ---
>> Background:
>>
>> The AMD IOMMU driver writes 256-bit device table entries with two
>> 128-bit cmpxchg operations. For guests using hardware-accelerated
>> vIOMMU (still in progress), QEMU traps device table accesses to set up
>> nested page tables. Without 128-bit cmpxchg emulation, KVM cannot
>> handle these traps and DTE access emulation fails.
> Please put this paragraph in the changelog proper, the "why" matters greatly here.
Sure.
>> QEMU implementation that traps DTE accesses:
>> https://github.com/AMDESE/qemu-iommu/blob/wip/for_iommufd_hw_queue-v8_amd_viommu_20260106/hw/i386/amd_viommu.c#L517
>> ---
>> arch/x86/kvm/emulate.c | 48 +++++++++++++++++++++++++++++++-------
>> arch/x86/kvm/kvm_emulate.h | 1 +
>> 2 files changed, 41 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
>> index c8e292e9a24d..e1a08cd3274b 100644
>> --- a/arch/x86/kvm/emulate.c
>> +++ b/arch/x86/kvm/emulate.c
>> @@ -2188,17 +2188,18 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
>> return rc;
>> }
>>
>> -static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>> +static int __handle_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>> {
>> - u64 old = ctxt->dst.orig_val64;
>> + u64 old64 = ctxt->dst.orig_val64;
>>
>> - if (ctxt->dst.bytes == 16)
>> + /* Use of the REX.W prefix promotes operation to 128 bits */
>> + if (ctxt->rex_bits & REX_W)
> Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a
> sanity check, but it should be exactly that, e.g.
>
> if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes))
> return X86EMUL_UNHANDLEABLE;
Sure I'll update it.
>> return X86EMUL_UNHANDLEABLE;
>>
>> - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
>> - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
>> - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
>> - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
>> + if (((u32) (old64 >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
>> + ((u32) (old64 >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
>> + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old64 >> 0);
>> + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old64 >> 32);
>> ctxt->eflags &= ~X86_EFLAGS_ZF;
>> } else {
>> ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
>> @@ -2209,6 +2210,37 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>> return X86EMUL_CONTINUE;
>> }
>>
>> +static int __handle_cmpxchg16b(struct x86_emulate_ctxt *ctxt)
>> +{
>> + __uint128_t old128 = ctxt->dst.val128;
> There is zero chance you properly tested this patch. Please write comprehensive
> testcases in KUT's emulator64.c before posting the next version.
>
> In writeback(), the OP_MEM case quite clearly operates on "unsigned long" values
> *and* consumes orig_val in the locked case.
>
> case OP_MEM:
> if (ctxt->lock_prefix)
> return segmented_cmpxchg(ctxt,
> op->addr.mem,
> &op->orig_val,
> &op->val,
> op->bytes);
> else
> return segmented_write(ctxt,
> op->addr.mem,
> &op->val,
> op->bytes);
>
> And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself:
>
> /* guests cmpxchg8b have to be emulated atomically */
> if (bytes > 8 || (bytes & (bytes - 1)))
> goto emul_write;
>
> That might be a big lift, e.g. to get a 16-byte uaccess version. If it's
> unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B.
I think its not a too much of the work, I have a prototype ready which
implements cmpxchg16b access in uaccess.h. Tested with a KUT as well
seems to be working fine. Will post it once I cleanup the series.
> And this code would also need to be updated to copy the full 128-bit value.
>
> /* Copy full 64-bit value for CMPXCHG8B. */
> ctxt->dst.orig_val64 = ctxt->dst.val64;
>
> Maybe something like this? Or maybe just copy 128 bits unconditionally (I'm not
> sure if that could read uninitiatlized data or not).
>
> /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */
> if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16)
> ctxt->dst.orig_val128 = ctxt->dst.val128;
> else
> ctxt->dst.orig_val64 = ctxt->dst.val64;
Yeah thanks, I completely missed that cmpxchg with lock prefix
consumes original value. I'll update the code.
>> +
>> + /* Use of the REX.W prefix promotes operation to 128 bits */
>> + if (!(ctxt->rex_bits & REX_W))
>> + return X86EMUL_UNHANDLEABLE;
>> +
>> + if (((u64) (old128 >> 0) != (u64) reg_read(ctxt, VCPU_REGS_RAX)) ||
>> + ((u64) (old128 >> 64) != (u64) reg_read(ctxt, VCPU_REGS_RDX))) {
>> + *reg_write(ctxt, VCPU_REGS_RAX) = (u64) (old128 >> 0);
>> + *reg_write(ctxt, VCPU_REGS_RDX) = (u64) (old128 >> 64);
>> + ctxt->eflags &= ~X86_EFLAGS_ZF;
>> + } else {
>> + ctxt->dst.val128 =
>> + ((__uint128_t) reg_read(ctxt, VCPU_REGS_RCX) << 64) |
>> + (u64) reg_read(ctxt, VCPU_REGS_RBX);
>> +
>> + ctxt->eflags |= X86_EFLAGS_ZF;
> IMO we should use a macro to handle 8b vs. 16b. The code is going to be heinous
> no matter what, and so to me, duplicating everything makes it twice as ugly,
> whereas a macro makes it like 10% more ugly.
>
>> + }
>> + return X86EMUL_CONTINUE;
>> +}
>> +
>> +static int em_cmpxchgxb(struct x86_emulate_ctxt *ctxt)
>> +{
>> + if (ctxt->dst.bytes == 16)
>> + return __handle_cmpxchg16b(ctxt);
>> +
>> + return __handle_cmpxchg8b(ctxt);
>> +}
>> +
>> static int em_ret(struct x86_emulate_ctxt *ctxt)
>> {
>> int rc;
>> @@ -4097,7 +4129,7 @@ static const struct gprefix pfx_0f_c7_7 = {
>>
>>
>> static const struct group_dual group9 = { {
>> - N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
>> + N, I(DstMem64 | Lock | PageTable, em_cmpxchgxb), N, N, N, N, N, N,
> Eh, I vote to keep it em_cmpxchg8b. _If_ we want to capture that its size is
> variable, maybe em_cmpxchg8b_16b? em_cmpxchgxb just looks like a typo.
Yeah ok, I was not sure how to represent variable size hence used the x.
Will rename it.
>
> Sans correct writeback() handling, something like so:
>
> ---
> arch/x86/kvm/emulate.c | 44 ++++++++++++++++++++++++--------------
> arch/x86/kvm/kvm_emulate.h | 2 ++
> 2 files changed, 30 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 6145dac4a605..3ec4555571fa 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -2201,24 +2201,33 @@ static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
> return rc;
> }
>
> +#define em_cmpxchg8b_16b(__c, rbits, mbits) \
> +do { \
> + u##mbits old = __c->dst.orig_val##mbits; \
> + \
> + BUILD_BUG_ON(rbits * 2 != mbits); \
> + \
> + if (((u##rbits)(old >> 0) != (u##rbits)reg_read(__c, VCPU_REGS_RAX)) || \
> + ((u##rbits)(old >> rbits) != (u##rbits)reg_read(__c, VCPU_REGS_RDX))) { \
> + *reg_write(__c, VCPU_REGS_RAX) = (u##rbits)(old >> 0); \
> + *reg_write(__c, VCPU_REGS_RDX) = (u##rbits)(old >> rbits); \
> + __c->eflags &= ~X86_EFLAGS_ZF; \
> + } else { \
> + __c->dst.val##mbits = ((u##mbits)reg_read(__c, VCPU_REGS_RCX) << rbits) | \
> + (u##rbits)reg_read(__c, VCPU_REGS_RBX); \
> + __c->eflags |= X86_EFLAGS_ZF; \
> + } \
> +} while (0)
> +
> static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
> {
> - u64 old = ctxt->dst.orig_val64;
> -
> - if (ctxt->dst.bytes == 16)
> + if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes))
> return X86EMUL_UNHANDLEABLE;
>
> - if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
> - ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
> - *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
> - *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
> - ctxt->eflags &= ~X86_EFLAGS_ZF;
> - } else {
> - ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
> - (u32) reg_read(ctxt, VCPU_REGS_RBX);
> -
> - ctxt->eflags |= X86_EFLAGS_ZF;
> - }
> + if (!(ctxt->rex_bits & REX_W))
> + em_cmpxchg8b_16b(ctxt, 32, 64);
> + else
> + em_cmpxchg8b_16b(ctxt, 64, 128);
> return X86EMUL_CONTINUE;
> }
>
> @@ -5418,8 +5427,11 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
> goto done;
> }
> }
> - /* Copy full 64-bit value for CMPXCHG8B. */
> - ctxt->dst.orig_val64 = ctxt->dst.val64;
> + /* Copy the full 64/128-bit value for CMPXCHG8B/16B. */
> + if (IS_ENABLED(CONFIG_X86_64) && ctxt->dst.bytes == 16)
> + ctxt->dst.orig_val128 = ctxt->dst.val128;
> + else
> + ctxt->dst.orig_val64 = ctxt->dst.val64;
>
> special_insn:
>
> diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
> index fb3dab4b5a53..0e9968319343 100644
> --- a/arch/x86/kvm/kvm_emulate.h
> +++ b/arch/x86/kvm/kvm_emulate.h
> @@ -255,6 +255,7 @@ struct operand {
> union {
> unsigned long orig_val;
> u64 orig_val64;
> + u64 orig_val128;
> };
> union {
> unsigned long *reg;
> @@ -268,6 +269,7 @@ struct operand {
> union {
> unsigned long val;
> u64 val64;
> + __uint128_t val128;
> char valptr[sizeof(avx256_t)];
> sse128_t vec_val;
> avx256_t vec_val2;
>
> base-commit: 5128b972fb2801ad9aca54d990a75611ab5283a9
> --
Thanks
Sairaj
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] KVM: x86: Add support for cmpxchg16b emulation
2026-03-06 15:38 ` Sean Christopherson
2026-03-11 11:18 ` Sairaj Kodilkar
@ 2026-03-12 11:46 ` Paolo Bonzini
1 sibling, 0 replies; 6+ messages in thread
From: Paolo Bonzini @ 2026-03-12 11:46 UTC (permalink / raw)
To: Sean Christopherson, Sairaj Kodilkar
Cc: H. Peter Anvin, Borislav Petkov, Dave Hansen, Ingo Molnar,
Thomas Gleixner, kvm, linux-kernel, x86, suravee.suthikulpanit,
vasant.hegde, nikunj.dadhania, Manali.Shukla
On 3/6/26 16:38, Sean Christopherson wrote:
>> - if (ctxt->dst.bytes == 16)
>> + /* Use of the REX.W prefix promotes operation to 128 bits */
>> + if (ctxt->rex_bits & REX_W)
>
> Uh, yeah, and the caller specifically pivoted on this size. I'm a-ok with a
> sanity check, but it should be exactly that, e.g.
>
> if (WARN_ON_ONCE(8 + (ctxt->rex_bits & REX_W) * 8 != ctxt->dst.bytes))
> return X86EMUL_UNHANDLEABLE;
> And then emulator_cmpxchg_emulated() should be taught to do cmpxchg16b itself:
>
> /* guests cmpxchg8b have to be emulated atomically */
> if (bytes > 8 || (bytes & (bytes - 1)))
> goto emul_write;
>
> That might be a big lift, e.g. to get a 16-byte uaccess version. If it's
> unreasonably difficult, we should reject emulation of LOCK CMPXCHG16B.
It's actually pretty easy because a kind soul already wrote the
cmpxchg8b version of the macros. Compile-tested only (and checked
that the asm does have the instruction):
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 3a7755c1a4410..f40e815263233 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -388,14 +388,13 @@ do { \
*_old = __old; \
likely(success); })
-#ifdef CONFIG_X86_32
-#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+#define __try_cmpxchg8b_16b_user_asm(_b, _ptr, _pold, _new, label)({ \
bool success; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
asm_goto_output("\n" \
- "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ "1: " LOCK_PREFIX "cmpxchg" #_b "b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
"+A" (__old), \
@@ -407,7 +406,6 @@ do { \
if (unlikely(!success)) \
*_old = __old; \
likely(success); })
-#endif // CONFIG_X86_32
#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \
int __err = 0; \
@@ -433,7 +431,6 @@ do { \
*_old = __old; \
likely(success); })
-#ifdef CONFIG_X86_32
/*
* Unlike the normal CMPXCHG, use output GPR for both success/fail and error.
* There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are
@@ -441,13 +438,13 @@ do { \
* both ESI and EDI for the memory operand, compilation will fail if the error
* is an input+output as there will be no register available for input.
*/
-#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \
+#define __try_cmpxchg8b_16b_user_asm(_b, _ptr, _pold, _new, label) ({ \
int __result; \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
asm volatile("\n" \
- "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
+ "1: " LOCK_PREFIX "cmpxchg" #_b "b %[ptr]\n" \
"mov $0, %[result]\n\t" \
"setz %b[result]\n" \
"2:\n" \
@@ -464,7 +461,6 @@ do { \
if (unlikely(!__result)) \
*_old = __old; \
likely(__result); })
-#endif // CONFIG_X86_32
#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT
/* FIXME: this hack is definitely wrong -AK */
@@ -552,9 +549,15 @@ do { \
extern void __try_cmpxchg_user_wrong_size(void);
-#ifndef CONFIG_X86_32
+#ifdef CONFIG_X86_32
+#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) \
+ __try_cmpxchg8b_16b_user_asm(8, _ptr, _pold, _new, label)
+#else
#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \
__try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label)
+
+#define __try_cmpxchg128_user_asm(_ptr, _pold, _new, label) \
+ __try_cmpxchg8b_16b_user_asm(16, _ptr, _pold, _new, label)
#endif
/*
@@ -581,6 +584,9 @@ extern void __try_cmpxchg_user_wrong_size(void);
case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\
(_nval), _label); \
break; \
+ case 16:__ret = __try_cmpxchg128_user_asm((__force u128 *)(_ptr), (_oldp),\
+ (_nval), _label); \
+ break; \
default: __try_cmpxchg_user_wrong_size(); \
} \
__ret; })
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f2db04aa6a17f..3e1bd9cdd75e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7973,7 +7973,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
int r;
/* guests cmpxchg8b have to be emulated atomically */
- if (bytes > 8 || (bytes & (bytes - 1)))
+ if (bytes > 2 * sizeof(unsigned long) || (bytes & (bytes - 1)))
goto emul_write;
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
@@ -8013,6 +8013,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
case 8:
r = emulator_try_cmpxchg_user(u64, hva, old, new);
break;
+#ifdef CONFIG_X86_64
+ case 16:
+ r = emulator_try_cmpxchg_user(u128, hva, old, new);
+ break;
+#endif
default:
BUG();
}
(with the uaccess.h part to be split in a separate patch, of course).
Paolo
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2026-03-12 11:46 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-06 10:20 [PATCH] KVM: x86: Add support for cmpxchg16b emulation Sairaj Kodilkar
2026-03-06 15:38 ` Sean Christopherson
2026-03-11 11:18 ` Sairaj Kodilkar
2026-03-12 11:46 ` Paolo Bonzini
2026-03-07 2:39 ` kernel test robot
2026-03-07 5:40 ` kernel test robot
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox