* [PATCH v2 0/8] SSE MMIO
@ 2011-03-29 12:53 Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
` (7 more replies)
0 siblings, 8 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
This patchset extends the emulator to support SSE instructions. Currently
just one instruction (MOVDQU) is emulated, but it should be easy to add more.
Avi Kivity (8):
KVM: extend in-kernel mmio to handle >8 byte transactions
KVM: Split mmio completion into a function
KVM: 16-byte mmio support
KVM: x86 emulator: do not munge rep prefix
KVM: x86 emulator: define callbacks for using the guest fpu within
the emulator
KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3
prefixes
KVM: x86 emulator: SSE support
KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)
arch/x86/include/asm/kvm_emulate.h | 12 ++-
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/emulate.c | 155 +++++++++++++++++++++++++++++++++--
arch/x86/kvm/x86.c | 145 ++++++++++++++++++++++++++--------
include/linux/kvm_host.h | 7 ++-
5 files changed, 274 insertions(+), 46 deletions(-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 14:51 ` Wei Xu
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
` (6 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Needed for coalesced mmio using sse.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfd7763..e6bcc97 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
const void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
+
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
- return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ return handled;
}
static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
+
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
- return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ return handled;
}
static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ int handled;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -3795,10 +3819,14 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
+ handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+
+ if (handled == bytes)
return X86EMUL_CONTINUE;
- }
+
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
@@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ int handled;
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
@@ -3848,9 +3877,14 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+ handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+ if (handled == bytes)
return X86EMUL_CONTINUE;
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
+
vcpu->mmio_needed = 1;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 2/8] KVM: Split mmio completion into a function
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Make room for sse mmio completions.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++--------------
1 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6bcc97..2eb1124 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5415,6 +5415,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+static int complete_mmio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int r;
+
+ if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
+ return 1;
+
+ if (vcpu->mmio_needed) {
+ memcpy(vcpu->mmio_data, run->mmio.data, 8);
+ vcpu->mmio_read_completed = 1;
+ vcpu->mmio_needed = 0;
+ }
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (r != EMULATE_DONE)
+ return 0;
+ return 1;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -5441,20 +5462,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
- if (vcpu->arch.pio.count || vcpu->mmio_needed) {
- if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
- vcpu->mmio_needed = 0;
- }
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- if (r != EMULATE_DONE) {
- r = 0;
- goto out;
- }
- }
+ r = complete_mmio(vcpu);
+ if (r <= 0)
+ goto out;
+
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 3/8] KVM: 16-byte mmio support
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Since sse instructions can issue 16-byte mmios, we need to support them. We
can't increase the kvm_run mmio buffer size to 16 bytes without breaking
compatibility, so instead we break the large mmios into two smaller 8-byte
ones. Since the bus is 64-bit we aren't breaking any atomicity guarantees.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++---------
include/linux/kvm_host.h | 7 ++++++-
3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35f81b1..e820c63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2eb1124..01aafc8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3833,8 +3833,10 @@ mmio:
vcpu->mmio_needed = 1;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+ vcpu->mmio_index = 0;
return X86EMUL_IO_NEEDED;
}
@@ -3886,11 +3888,14 @@ mmio:
val += handled;
vcpu->mmio_needed = 1;
+ memcpy(vcpu->mmio_data, val, bytes);
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
- memcpy(vcpu->run->mmio.data, val, bytes);
+ memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ vcpu->mmio_index = 0;
return X86EMUL_CONTINUE;
}
@@ -4498,11 +4503,9 @@ restart:
if (!vcpu->arch.pio.in)
vcpu->arch.pio.count = 0;
r = EMULATE_DO_MMIO;
- } else if (vcpu->mmio_needed) {
- if (vcpu->mmio_is_write)
- vcpu->mmio_needed = 0;
+ } else if (vcpu->mmio_needed)
r = EMULATE_DO_MMIO;
- } else if (r == EMULATION_RESTART)
+ else if (r == EMULATION_RESTART)
goto restart;
else
r = EMULATE_DONE;
@@ -5424,9 +5427,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
return 1;
if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
+ if (!vcpu->mmio_is_write)
+ memcpy(vcpu->mmio_data, run->mmio.data, 8);
+ vcpu->mmio_index += 8;
+ if (vcpu->mmio_index < vcpu->mmio_size) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+ memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+ run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+ run->mmio.is_write = vcpu->mmio_is_write;
+ vcpu->mmio_needed = 1;
+ return 0;
+ }
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
}
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57d7092..1934f48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
#include <asm/kvm_host.h>
+#ifndef KVM_MMIO_SIZE
+#define KVM_MMIO_SIZE 8
+#endif
+
/*
* vcpu->requests bit members
*/
@@ -133,7 +137,8 @@ struct kvm_vcpu {
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
- unsigned char mmio_data[8];
+ int mmio_index;
+ unsigned char mmio_data[KVM_MMIO_SIZE];
gpa_t mmio_phys_addr;
#endif
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (2 preceding siblings ...)
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Currently we store a rep prefix as 1 or 2 depending on whether it is a REPE or
REPNE. Since sse instructions depend on the prefix value, store it as the
original opcode to simplify things further on.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 4 ++--
arch/x86/kvm/emulate.c | 4 +---
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f52135..c00aed1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -249,8 +249,8 @@ struct x86_emulate_ctxt {
};
/* Repeat String Operation Prefix */
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
+#define REPE_PREFIX 0xf3
+#define REPNE_PREFIX 0xf2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 14c5ad5..7066cf8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2692,10 +2692,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
c->lock_prefix = 1;
break;
case 0xf2: /* REPNE/REPNZ */
- c->rep_prefix = REPNE_PREFIX;
- break;
case 0xf3: /* REP/REPE/REPZ */
- c->rep_prefix = REPE_PREFIX;
+ c->rep_prefix = c->b;
break;
default:
goto done_prefixes;
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (3 preceding siblings ...)
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Needed for emulating fpu instructions.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 2 ++
arch/x86/kvm/x86.c | 18 ++++++++++++++++++
2 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c00aed1..4c0e682 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -158,6 +158,8 @@ struct x86_emulate_ops {
int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+ void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
+ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01aafc8..9686547 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4281,6 +4281,22 @@ static void emulator_set_segment_selector(u16 sel, int seg,
kvm_set_segment(vcpu, &kvm_seg, seg);
}
+static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_disable();
+ kvm_load_guest_fpu(ctxt->vcpu);
+ /*
+ * CR0.TS may reference the host fpu state, not the guest fpu state,
+ * so it may be clear at this point.
+ */
+ clts();
+}
+
+static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_enable();
+}
+
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
@@ -4304,6 +4320,8 @@ static struct x86_emulate_ops emulate_ops = {
.set_dr = emulator_set_dr,
.set_msr = kvm_set_msr,
.get_msr = kvm_get_msr,
+ .get_fpu = emulator_get_fpu,
+ .put_fpu = emulator_put_fpu,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (4 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Most SIMD instructions use the 66/f2/f3 prefixes to distinguish between
different variants of the same instruction. Usually the encoding is quite
regular, but in some cases (including non-SIMD instructions) the prefixes
generate very different instructions. Examples include XCHG/PAUSE,
MOVQ/MOVDQA/MOVDQU, and MOVBE/CRC32.
Allow the emulator to handle these special cases by splitting such opcodes
into groups, with different decode flags and execution functions for different
prefixes.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/emulate.c | 26 +++++++++++++++++++++++++-
1 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7066cf8..458faea 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,7 @@
#define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
+#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
/* Misc flags */
#define VendorSpecific (1<<22) /* Vendor specific instruction */
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -106,6 +107,7 @@ struct opcode {
int (*execute)(struct x86_emulate_ctxt *ctxt);
struct opcode *group;
struct group_dual *gdual;
+ struct gprefix *gprefix;
} u;
};
@@ -114,6 +116,13 @@ struct group_dual {
struct opcode mod3[8];
};
+struct gprefix {
+ struct opcode pfx_no;
+ struct opcode pfx_66;
+ struct opcode pfx_f2;
+ struct opcode pfx_f3;
+};
+
/* EFLAGS bit definitions. */
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
@@ -2625,7 +2634,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes, dual, goffset;
+ int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix;
+ bool op_prefix = false;
struct opcode opcode, *g_mod012, *g_mod3;
struct operand memop = { .type = OP_NONE };
@@ -2662,6 +2672,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
for (;;) {
switch (c->b = insn_fetch(u8, 1, c->eip)) {
case 0x66: /* operand-size override */
+ op_prefix = true;
/* switch between 2/4 bytes */
c->op_bytes = def_op_bytes ^ 6;
break;
@@ -2742,6 +2753,19 @@ done_prefixes:
c->d |= opcode.flags;
}
+ if (c->d & Prefix) {
+ if (c->rep_prefix && op_prefix)
+ return X86EMUL_UNHANDLEABLE;
+ simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
+ switch (simd_prefix) {
+ case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
+ case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
+ case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
+ case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
+ }
+ c->d |= opcode.flags;
+ }
+
c->execute = opcode.u.execute;
/* Unrecognised? */
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (5 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-06-28 6:49 ` Cyclonus J
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Add support for marking an instruction as SSE, switching registers used
to the SSE register file.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 6 ++-
arch/x86/kvm/emulate.c | 102 ++++++++++++++++++++++++++++++++++-
2 files changed, 104 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 4c0e682..48693f0 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -162,9 +162,11 @@ struct x86_emulate_ops {
void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
};
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@@ -176,11 +178,13 @@ struct operand {
ulong ea;
unsigned seg;
} mem;
+ unsigned xmm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
+ sse128_t vec_val;
};
};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 458faea..7b7d96a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -76,6 +76,7 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
+#define Sse (1<<17) /* SSE Vector instruction */
/* Misc flags */
#define VendorSpecific (1<<22) /* Vendor specific instruction */
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, DE_VECTOR, 0, false);
}
+static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+{
+ return emulate_exception(ctxt, NM_VECTOR, 0, false);
+}
+
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
unsigned long eip, u8 *dest)
@@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
}
}
-static void decode_register_operand(struct operand *op,
+static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
+ case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
+ case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
+ case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
+ case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
+ case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
+ case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
+ case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
+ case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
+ case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
+ case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
+ case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
+ case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
+ case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
+ case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
+ int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
+ case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
+ case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
+ case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
+ case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
+ case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
+ case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
+ case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
+ case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
+ case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
+ case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
+ case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
+ case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
+ case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
+ case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
+ struct operand *op,
struct decode_cache *c,
int inhibit_bytereg)
{
@@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
if (!(c->d & ModRM))
reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = reg;
+ read_sse_reg(ctxt, &op->vec_val, reg);
+ return;
+ }
+
op->type = OP_REG;
if ((c->d & ByteOp) && !inhibit_bytereg) {
op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
op->addr.reg = decode_register(c->modrm_rm,
c->regs, c->d & ByteOp);
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = c->modrm_rm;
+ read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
+ return rc;
+ }
fetch_register_operand(op);
return rc;
}
@@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE)
return rc;
break;
+ case OP_XMM:
+ write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
+ break;
case OP_NONE:
/* no writeback */
break;
@@ -2785,6 +2866,9 @@ done_prefixes:
c->op_bytes = 4;
}
+ if (c->d & Sse)
+ c->op_bytes = 16;
+
/* ModRM and SIB bytes. */
if (c->d & ModRM) {
rc = decode_modrm(ctxt, ops, &memop);
@@ -2814,7 +2898,7 @@ done_prefixes:
case SrcNone:
break;
case SrcReg:
- decode_register_operand(&c->src, c, 0);
+ decode_register_operand(ctxt, &c->src, c, 0);
break;
case SrcMem16:
memop.bytes = 2;
@@ -2905,7 +2989,7 @@ done_prefixes:
/* Decode and fetch the destination operand: register or memory. */
switch (c->d & DstMask) {
case DstReg:
- decode_register_operand(&c->dst, c,
+ decode_register_operand(ctxt, &c->dst, c,
c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
break;
case DstImmUByte:
@@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ if ((c->d & Sse)
+ && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
+ || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+
+ if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
+ rc = emulate_nm(ctxt);
+ goto done;
+ }
+
/* Privileged instruction can be executed only in CPL=0 */
if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
rc = emulate_gp(ctxt, 0);
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (6 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/emulate.c | 23 +++++++++++++++++++++--
1 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7b7d96a..bfb34af 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2415,11 +2415,19 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_movdqu(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
+ return X86EMUL_CONTINUE;
+}
+
#define D(_y) { .flags = (_y) }
#define N D(0)
#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
#define D2bv(_f) D((_f) | ByteOp), D(_f)
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
@@ -2484,6 +2492,10 @@ static struct opcode group11[] = {
I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
};
+static struct gprefix pfx_0f_6f_0f_7f = {
+ N, N, N, I(Sse, em_movdqu),
+};
+
static struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
D6ALU(Lock),
@@ -2608,9 +2620,15 @@ static struct opcode twobyte_table[256] = {
/* 0x50 - 0x5F */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 0x60 - 0x6F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x70 - 0x7F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
X16(D(SrcImm)),
/* 0x90 - 0x9F */
@@ -2654,6 +2672,7 @@ static struct opcode twobyte_table[256] = {
#undef G
#undef GD
#undef I
+#undef GP
#undef D2bv
#undef I2bv
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 14:51 ` Wei Xu
2011-03-29 14:55 ` Avi Kivity
0 siblings, 1 reply; 13+ messages in thread
From: Wei Xu @ 2011-03-29 14:51 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti, kvm
Avi,
Really appreciate your help! Anything if you need help let me know. I am
working on qemu-kvm now and willing to help out...
Wei Xu
On 3/29/11 5:53 AM, "Avi Kivity" <avi@redhat.com> wrote:
> Needed for coalesced mmio using sse.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++----------
> 1 files changed, 46 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfd7763..e6bcc97 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
> static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
> const void *v)
> {
> - if (vcpu->arch.apic &&
> - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
> - return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> + n = min(len, 8);
> + if (!(vcpu->arch.apic &&
> + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
> + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> + break;
> + handled += n;
> + addr += n;
> + len -= n;
> + v += n;
> + } while (len);
>
> - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
> }
>
> static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void
> *v)
> {
> - if (vcpu->arch.apic &&
> - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
> - return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> + n = min(len, 8);
> + if (!(vcpu->arch.apic &&
> + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
> + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> + break;
> + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
> + handled += n;
> + addr += n;
> + len -= n;
> + v += n;
> + } while (len);
>
> - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
> }
>
> static void kvm_set_segment(struct kvm_vcpu *vcpu,
> @@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
> struct kvm_vcpu *vcpu)
> {
> gpa_t gpa;
> + int handled;
>
> if (vcpu->mmio_read_completed) {
> memcpy(val, vcpu->mmio_data, bytes);
> @@ -3795,10 +3819,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
> - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
> + handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
> +
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
> - }
> +
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
>
> trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
>
> @@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long
> addr,
> struct kvm_vcpu *vcpu)
> {
> gpa_t gpa;
> + int handled;
>
> gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
>
> @@ -3848,9 +3877,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
> + handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
>
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
> +
> vcpu->mmio_needed = 1;
> vcpu->run->exit_reason = KVM_EXIT_MMIO;
> vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 14:51 ` Wei Xu
@ 2011-03-29 14:55 ` Avi Kivity
0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 14:55 UTC (permalink / raw)
To: Wei Xu; +Cc: Marcelo Tosatti, kvm
On 03/29/2011 04:51 PM, Wei Xu wrote:
> Avi,
>
> Really appreciate your help! Anything if you need help let me know. I am
> working on qemu-kvm now and willing to help out...
Note that this patchset only implements movdqu. Feel free to tackle
movdqa (easy) and movq (harder - needs mmx support).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-06-28 6:49 ` Cyclonus J
2011-06-28 8:11 ` Avi Kivity
0 siblings, 1 reply; 13+ messages in thread
From: Cyclonus J @ 2011-06-28 6:49 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Wei Xu
On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity <avi@redhat.com> wrote:
> Add support for marking an instruction as SSE, switching registers used
> to the SSE register file.
Avi,
So this change will only support XMM register as its destination not
memory, right? I am seeing the mmio size in qemu is still 8 bytes.
Do we need to support memory destination and update qemu as well?
Thanks,
CJ
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/x86/include/asm/kvm_emulate.h | 6 ++-
> arch/x86/kvm/emulate.c | 102 ++++++++++++++++++++++++++++++++++-
> 2 files changed, 104 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index 4c0e682..48693f0 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -162,9 +162,11 @@ struct x86_emulate_ops {
> void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
> };
>
> +typedef u32 __attribute__((vector_size(16))) sse128_t;
> +
> /* Type, address-of, and value of an instruction's operand. */
> struct operand {
> - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
> + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
> unsigned int bytes;
> union {
> unsigned long orig_val;
> @@ -176,11 +178,13 @@ struct operand {
> ulong ea;
> unsigned seg;
> } mem;
> + unsigned xmm;
> } addr;
> union {
> unsigned long val;
> u64 val64;
> char valptr[sizeof(unsigned long) + 2];
> + sse128_t vec_val;
> };
> };
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 458faea..7b7d96a 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -76,6 +76,7 @@
> #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
> #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
> #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
> +#define Sse (1<<17) /* SSE Vector instruction */
> /* Misc flags */
> #define VendorSpecific (1<<22) /* Vendor specific instruction */
> #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
> @@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
> return emulate_exception(ctxt, DE_VECTOR, 0, false);
> }
>
> +static int emulate_nm(struct x86_emulate_ctxt *ctxt)
> +{
> + return emulate_exception(ctxt, NM_VECTOR, 0, false);
> +}
> +
> static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
> struct x86_emulate_ops *ops,
> unsigned long eip, u8 *dest)
> @@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
> }
> }
>
> -static void decode_register_operand(struct operand *op,
> +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
> +{
> + ctxt->ops->get_fpu(ctxt);
> + switch (reg) {
> + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
> + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
> + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
> + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
> + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
> + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
> + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
> + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
> +#ifdef CONFIG_X86_64
> + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
> + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
> + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
> + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
> + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
> + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
> + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
> + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
> +#endif
> + default: BUG();
> + }
> + ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
> + int reg)
> +{
> + ctxt->ops->get_fpu(ctxt);
> + switch (reg) {
> + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
> + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
> + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
> + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
> + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
> + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
> + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
> + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
> +#ifdef CONFIG_X86_64
> + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
> + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
> + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
> + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
> + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
> + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
> + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
> + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
> +#endif
> + default: BUG();
> + }
> + ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
> + struct operand *op,
> struct decode_cache *c,
> int inhibit_bytereg)
> {
> @@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
>
> if (!(c->d & ModRM))
> reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
> +
> + if (c->d & Sse) {
> + op->type = OP_XMM;
> + op->bytes = 16;
> + op->addr.xmm = reg;
> + read_sse_reg(ctxt, &op->vec_val, reg);
> + return;
> + }
> +
> op->type = OP_REG;
> if ((c->d & ByteOp) && !inhibit_bytereg) {
> op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
> @@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
> op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
> op->addr.reg = decode_register(c->modrm_rm,
> c->regs, c->d & ByteOp);
> + if (c->d & Sse) {
> + op->type = OP_XMM;
> + op->bytes = 16;
> + op->addr.xmm = c->modrm_rm;
> + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
> + return rc;
> + }
> fetch_register_operand(op);
> return rc;
> }
> @@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
> if (rc != X86EMUL_CONTINUE)
> return rc;
> break;
> + case OP_XMM:
> + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
> + break;
> case OP_NONE:
> /* no writeback */
> break;
> @@ -2785,6 +2866,9 @@ done_prefixes:
> c->op_bytes = 4;
> }
>
> + if (c->d & Sse)
> + c->op_bytes = 16;
> +
> /* ModRM and SIB bytes. */
> if (c->d & ModRM) {
> rc = decode_modrm(ctxt, ops, &memop);
> @@ -2814,7 +2898,7 @@ done_prefixes:
> case SrcNone:
> break;
> case SrcReg:
> - decode_register_operand(&c->src, c, 0);
> + decode_register_operand(ctxt, &c->src, c, 0);
> break;
> case SrcMem16:
> memop.bytes = 2;
> @@ -2905,7 +2989,7 @@ done_prefixes:
> /* Decode and fetch the destination operand: register or memory. */
> switch (c->d & DstMask) {
> case DstReg:
> - decode_register_operand(&c->dst, c,
> + decode_register_operand(ctxt, &c->dst, c,
> c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
> break;
> case DstImmUByte:
> @@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
> goto done;
> }
>
> + if ((c->d & Sse)
> + && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
> + || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
> + rc = emulate_ud(ctxt);
> + goto done;
> + }
> +
> + if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
> + rc = emulate_nm(ctxt);
> + goto done;
> + }
> +
> /* Privileged instruction can be executed only in CPL=0 */
> if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
> rc = emulate_gp(ctxt, 0);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-06-28 6:49 ` Cyclonus J
@ 2011-06-28 8:11 ` Avi Kivity
0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-06-28 8:11 UTC (permalink / raw)
To: Cyclonus J; +Cc: Marcelo Tosatti, kvm, Wei Xu
On 06/28/2011 09:49 AM, Cyclonus J wrote:
> On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity<avi@redhat.com> wrote:
> > Add support for marking an instruction as SSE, switching registers used
> > to the SSE register file.
>
> Avi,
>
> So this change will only support XMM register as its destination not
> memory, right? I am seeing the mmio size in qemu is still 8 bytes.
>
Memory is supported as well.
> Do we need to support memory destination and update qemu as well?
>
kvm breaks up 16 byte writes into two 8-byte writes, so that the
interface between kvm and userspace is not affected.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2011-06-28 8:11 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
2011-03-29 14:51 ` Wei Xu
2011-03-29 14:55 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
2011-06-28 6:49 ` Cyclonus J
2011-06-28 8:11 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox