* [PATCH v2 0/8] SSE MMIO
@ 2011-03-29 12:53 Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
` (7 more replies)
0 siblings, 8 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
This patchset extends the emulator to support SSE instructions. Currently
just one instruction (MOVDQU) is emulated, but it should be easy to add more.
Avi Kivity (8):
KVM: extend in-kernel mmio to handle >8 byte transactions
KVM: Split mmio completion into a function
KVM: 16-byte mmio support
KVM: x86 emulator: do not munge rep prefix
KVM: x86 emulator: define callbacks for using the guest fpu within
the emulator
KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3
prefixes
KVM: x86 emulator: SSE support
KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)
arch/x86/include/asm/kvm_emulate.h | 12 ++-
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/emulate.c | 155 +++++++++++++++++++++++++++++++++--
arch/x86/kvm/x86.c | 145 ++++++++++++++++++++++++++--------
include/linux/kvm_host.h | 7 ++-
5 files changed, 274 insertions(+), 46 deletions(-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 14:51 ` Wei Xu
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
` (6 subsequent siblings)
7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Needed for coalesced mmio using sse.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++----------
1 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfd7763..e6bcc97 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
const void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
+
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
- return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ return handled;
}
static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
{
- if (vcpu->arch.apic &&
- !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
- return 0;
+ int handled = 0;
+ int n;
+
+ do {
+ n = min(len, 8);
+ if (!(vcpu->arch.apic &&
+ !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ break;
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+ handled += n;
+ addr += n;
+ len -= n;
+ v += n;
+ } while (len);
- return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+ return handled;
}
static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ int handled;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -3795,10 +3819,14 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
+ handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+
+ if (handled == bytes)
return X86EMUL_CONTINUE;
- }
+
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
@@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ int handled;
gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
@@ -3848,9 +3877,14 @@ mmio:
/*
* Is this MMIO handled locally?
*/
- if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+ handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+ if (handled == bytes)
return X86EMUL_CONTINUE;
+ gpa += handled;
+ bytes -= handled;
+ val += handled;
+
vcpu->mmio_needed = 1;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 2/8] KVM: Split mmio completion into a function
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Make room for sse mmio completions.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/x86.c | 39 +++++++++++++++++++++++++--------------
1 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6bcc97..2eb1124 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5415,6 +5415,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return r;
}
+static int complete_mmio(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int r;
+
+ if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
+ return 1;
+
+ if (vcpu->mmio_needed) {
+ memcpy(vcpu->mmio_data, run->mmio.data, 8);
+ vcpu->mmio_read_completed = 1;
+ vcpu->mmio_needed = 0;
+ }
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (r != EMULATE_DONE)
+ return 0;
+ return 1;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -5441,20 +5462,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
}
- if (vcpu->arch.pio.count || vcpu->mmio_needed) {
- if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
- vcpu->mmio_needed = 0;
- }
- vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- if (r != EMULATE_DONE) {
- r = 0;
- goto out;
- }
- }
+ r = complete_mmio(vcpu);
+ if (r <= 0)
+ goto out;
+
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
kvm_register_write(vcpu, VCPU_REGS_RAX,
kvm_run->hypercall.ret);
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 3/8] KVM: 16-byte mmio support
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Since sse instructions can issue 16-byte mmios, we need to support them. We
can't increase the kvm_run mmio buffer size to 16 bytes without breaking
compatibility, so instead we break the large mmios into two smaller 8-byte
ones. Since the bus is 64-bit we aren't breaking any atomicity guarantees.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++---------
include/linux/kvm_host.h | 7 ++++++-
3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35f81b1..e820c63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2eb1124..01aafc8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3833,8 +3833,10 @@ mmio:
vcpu->mmio_needed = 1;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+ vcpu->mmio_index = 0;
return X86EMUL_IO_NEEDED;
}
@@ -3886,11 +3888,14 @@ mmio:
val += handled;
vcpu->mmio_needed = 1;
+ memcpy(vcpu->mmio_data, val, bytes);
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
- vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+ vcpu->mmio_size = bytes;
+ vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
- memcpy(vcpu->run->mmio.data, val, bytes);
+ memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+ vcpu->mmio_index = 0;
return X86EMUL_CONTINUE;
}
@@ -4498,11 +4503,9 @@ restart:
if (!vcpu->arch.pio.in)
vcpu->arch.pio.count = 0;
r = EMULATE_DO_MMIO;
- } else if (vcpu->mmio_needed) {
- if (vcpu->mmio_is_write)
- vcpu->mmio_needed = 0;
+ } else if (vcpu->mmio_needed)
r = EMULATE_DO_MMIO;
- } else if (r == EMULATION_RESTART)
+ else if (r == EMULATION_RESTART)
goto restart;
else
r = EMULATE_DONE;
@@ -5424,9 +5427,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
return 1;
if (vcpu->mmio_needed) {
- memcpy(vcpu->mmio_data, run->mmio.data, 8);
- vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
+ if (!vcpu->mmio_is_write)
+ memcpy(vcpu->mmio_data, run->mmio.data, 8);
+ vcpu->mmio_index += 8;
+ if (vcpu->mmio_index < vcpu->mmio_size) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+ memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+ run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+ run->mmio.is_write = vcpu->mmio_is_write;
+ vcpu->mmio_needed = 1;
+ return 0;
+ }
+ if (vcpu->mmio_is_write)
+ return 1;
+ vcpu->mmio_read_completed = 1;
}
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57d7092..1934f48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
#include <asm/kvm_host.h>
+#ifndef KVM_MMIO_SIZE
+#define KVM_MMIO_SIZE 8
+#endif
+
/*
* vcpu->requests bit members
*/
@@ -133,7 +137,8 @@ struct kvm_vcpu {
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
- unsigned char mmio_data[8];
+ int mmio_index;
+ unsigned char mmio_data[KVM_MMIO_SIZE];
gpa_t mmio_phys_addr;
#endif
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (2 preceding siblings ...)
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Currently we store a rep prefix as 1 or 2 depending on whether it is a REPE or
REPNE. Since sse instructions depend on the prefix value, store it as the
original opcode to simplify things further on.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 4 ++--
arch/x86/kvm/emulate.c | 4 +---
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f52135..c00aed1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -249,8 +249,8 @@ struct x86_emulate_ctxt {
};
/* Repeat String Operation Prefix */
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
+#define REPE_PREFIX 0xf3
+#define REPNE_PREFIX 0xf2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 14c5ad5..7066cf8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2692,10 +2692,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
c->lock_prefix = 1;
break;
case 0xf2: /* REPNE/REPNZ */
- c->rep_prefix = REPNE_PREFIX;
- break;
case 0xf3: /* REP/REPE/REPZ */
- c->rep_prefix = REPE_PREFIX;
+ c->rep_prefix = c->b;
break;
default:
goto done_prefixes;
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (3 preceding siblings ...)
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Needed for emulating fpu instructions.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 2 ++
arch/x86/kvm/x86.c | 18 ++++++++++++++++++
2 files changed, 20 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c00aed1..4c0e682 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -158,6 +158,8 @@ struct x86_emulate_ops {
int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+ void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
+ void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
};
/* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01aafc8..9686547 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4281,6 +4281,22 @@ static void emulator_set_segment_selector(u16 sel, int seg,
kvm_set_segment(vcpu, &kvm_seg, seg);
}
+static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_disable();
+ kvm_load_guest_fpu(ctxt->vcpu);
+ /*
+ * CR0.TS may reference the host fpu state, not the guest fpu state,
+ * so it may be clear at this point.
+ */
+ clts();
+}
+
+static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
+{
+ preempt_enable();
+}
+
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt_system,
.write_std = kvm_write_guest_virt_system,
@@ -4304,6 +4320,8 @@ static struct x86_emulate_ops emulate_ops = {
.set_dr = emulator_set_dr,
.set_msr = kvm_set_msr,
.get_msr = kvm_get_msr,
+ .get_fpu = emulator_get_fpu,
+ .put_fpu = emulator_put_fpu,
};
static void cache_all_regs(struct kvm_vcpu *vcpu)
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (4 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Most SIMD instructions use the 66/f2/f3 prefixes to distinguish between
different variants of the same instruction. Usually the encoding is quite
regular, but in some cases (including non-SIMD instructions) the prefixes
generate very different instructions. Examples include XCHG/PAUSE,
MOVQ/MOVDQA/MOVDQU, and MOVBE/CRC32.
Allow the emulator to handle these special cases by splitting such opcodes
into groups, with different decode flags and execution functions for different
prefixes.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/emulate.c | 26 +++++++++++++++++++++++++-
1 files changed, 25 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7066cf8..458faea 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,7 @@
#define Stack (1<<13) /* Stack instruction (push/pop) */
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
+#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
/* Misc flags */
#define VendorSpecific (1<<22) /* Vendor specific instruction */
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -106,6 +107,7 @@ struct opcode {
int (*execute)(struct x86_emulate_ctxt *ctxt);
struct opcode *group;
struct group_dual *gdual;
+ struct gprefix *gprefix;
} u;
};
@@ -114,6 +116,13 @@ struct group_dual {
struct opcode mod3[8];
};
+struct gprefix {
+ struct opcode pfx_no;
+ struct opcode pfx_66;
+ struct opcode pfx_f2;
+ struct opcode pfx_f3;
+};
+
/* EFLAGS bit definitions. */
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
@@ -2625,7 +2634,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
struct decode_cache *c = &ctxt->decode;
int rc = X86EMUL_CONTINUE;
int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes, dual, goffset;
+ int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix;
+ bool op_prefix = false;
struct opcode opcode, *g_mod012, *g_mod3;
struct operand memop = { .type = OP_NONE };
@@ -2662,6 +2672,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
for (;;) {
switch (c->b = insn_fetch(u8, 1, c->eip)) {
case 0x66: /* operand-size override */
+ op_prefix = true;
/* switch between 2/4 bytes */
c->op_bytes = def_op_bytes ^ 6;
break;
@@ -2742,6 +2753,19 @@ done_prefixes:
c->d |= opcode.flags;
}
+ if (c->d & Prefix) {
+ if (c->rep_prefix && op_prefix)
+ return X86EMUL_UNHANDLEABLE;
+ simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
+ switch (simd_prefix) {
+ case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
+ case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
+ case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
+ case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
+ }
+ c->d |= opcode.flags;
+ }
+
c->execute = opcode.u.execute;
/* Unrecognised? */
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (5 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
2011-06-28 6:49 ` Cyclonus J
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Add support for marking an instruction as SSE, switching registers used
to the SSE register file.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 6 ++-
arch/x86/kvm/emulate.c | 102 ++++++++++++++++++++++++++++++++++-
2 files changed, 104 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 4c0e682..48693f0 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -162,9 +162,11 @@ struct x86_emulate_ops {
void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
};
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+
/* Type, address-of, and value of an instruction's operand. */
struct operand {
- enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+ enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
unsigned int bytes;
union {
unsigned long orig_val;
@@ -176,11 +178,13 @@ struct operand {
ulong ea;
unsigned seg;
} mem;
+ unsigned xmm;
} addr;
union {
unsigned long val;
u64 val64;
char valptr[sizeof(unsigned long) + 2];
+ sse128_t vec_val;
};
};
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 458faea..7b7d96a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -76,6 +76,7 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
+#define Sse (1<<17) /* SSE Vector instruction */
/* Misc flags */
#define VendorSpecific (1<<22) /* Vendor specific instruction */
#define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, DE_VECTOR, 0, false);
}
+static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+{
+ return emulate_exception(ctxt, NM_VECTOR, 0, false);
+}
+
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
unsigned long eip, u8 *dest)
@@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
}
}
-static void decode_register_operand(struct operand *op,
+static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
+ case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
+ case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
+ case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
+ case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
+ case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
+ case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
+ case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
+ case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
+ case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
+ case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
+ case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
+ case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
+ case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
+ case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
+ int reg)
+{
+ ctxt->ops->get_fpu(ctxt);
+ switch (reg) {
+ case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
+ case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
+ case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
+ case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
+ case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
+ case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
+ case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
+ case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+ case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
+ case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
+ case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
+ case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
+ case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
+ case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
+ case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
+ case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+#endif
+ default: BUG();
+ }
+ ctxt->ops->put_fpu(ctxt);
+}
+
+static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
+ struct operand *op,
struct decode_cache *c,
int inhibit_bytereg)
{
@@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
if (!(c->d & ModRM))
reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = reg;
+ read_sse_reg(ctxt, &op->vec_val, reg);
+ return;
+ }
+
op->type = OP_REG;
if ((c->d & ByteOp) && !inhibit_bytereg) {
op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
op->addr.reg = decode_register(c->modrm_rm,
c->regs, c->d & ByteOp);
+ if (c->d & Sse) {
+ op->type = OP_XMM;
+ op->bytes = 16;
+ op->addr.xmm = c->modrm_rm;
+ read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
+ return rc;
+ }
fetch_register_operand(op);
return rc;
}
@@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE)
return rc;
break;
+ case OP_XMM:
+ write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
+ break;
case OP_NONE:
/* no writeback */
break;
@@ -2785,6 +2866,9 @@ done_prefixes:
c->op_bytes = 4;
}
+ if (c->d & Sse)
+ c->op_bytes = 16;
+
/* ModRM and SIB bytes. */
if (c->d & ModRM) {
rc = decode_modrm(ctxt, ops, &memop);
@@ -2814,7 +2898,7 @@ done_prefixes:
case SrcNone:
break;
case SrcReg:
- decode_register_operand(&c->src, c, 0);
+ decode_register_operand(ctxt, &c->src, c, 0);
break;
case SrcMem16:
memop.bytes = 2;
@@ -2905,7 +2989,7 @@ done_prefixes:
/* Decode and fetch the destination operand: register or memory. */
switch (c->d & DstMask) {
case DstReg:
- decode_register_operand(&c->dst, c,
+ decode_register_operand(ctxt, &c->dst, c,
c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
break;
case DstImmUByte:
@@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ if ((c->d & Sse)
+ && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
+ || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
+ rc = emulate_ud(ctxt);
+ goto done;
+ }
+
+ if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
+ rc = emulate_nm(ctxt);
+ goto done;
+ }
+
/* Privileged instruction can be executed only in CPL=0 */
if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
rc = emulate_gp(ctxt, 0);
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
` (6 preceding siblings ...)
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
To: Marcelo Tosatti, kvm; +Cc: Wei Xu
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/kvm/emulate.c | 23 +++++++++++++++++++++--
1 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7b7d96a..bfb34af 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2415,11 +2415,19 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int em_movdqu(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
+ return X86EMUL_CONTINUE;
+}
+
#define D(_y) { .flags = (_y) }
#define N D(0)
#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
#define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
#define D2bv(_f) D((_f) | ByteOp), D(_f)
#define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
@@ -2484,6 +2492,10 @@ static struct opcode group11[] = {
I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
};
+static struct gprefix pfx_0f_6f_0f_7f = {
+ N, N, N, I(Sse, em_movdqu),
+};
+
static struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
D6ALU(Lock),
@@ -2608,9 +2620,15 @@ static struct opcode twobyte_table[256] = {
/* 0x50 - 0x5F */
N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
/* 0x60 - 0x6F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x70 - 0x7F */
- N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, N,
+ N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
/* 0x80 - 0x8F */
X16(D(SrcImm)),
/* 0x90 - 0x9F */
@@ -2654,6 +2672,7 @@ static struct opcode twobyte_table[256] = {
#undef G
#undef GD
#undef I
+#undef GP
#undef D2bv
#undef I2bv
--
1.7.1
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 14:51 ` Wei Xu
2011-03-29 14:55 ` Avi Kivity
0 siblings, 1 reply; 13+ messages in thread
From: Wei Xu @ 2011-03-29 14:51 UTC (permalink / raw)
To: Avi Kivity, Marcelo Tosatti, kvm
Avi,
Really appreciate your help! Anything if you need help let me know. I am
working on qemu-kvm now and willing to help out...
Wei Xu
On 3/29/11 5:53 AM, "Avi Kivity" <avi@redhat.com> wrote:
> Needed for coalesced mmio using sse.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/x86/kvm/x86.c | 58 +++++++++++++++++++++++++++++++++++++++++----------
> 1 files changed, 46 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfd7763..e6bcc97 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
> static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
> const void *v)
> {
> - if (vcpu->arch.apic &&
> - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
> - return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> + n = min(len, 8);
> + if (!(vcpu->arch.apic &&
> + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
> + && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> + break;
> + handled += n;
> + addr += n;
> + len -= n;
> + v += n;
> + } while (len);
>
> - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
> }
>
> static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void
> *v)
> {
> - if (vcpu->arch.apic &&
> - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
> - return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> + n = min(len, 8);
> + if (!(vcpu->arch.apic &&
> + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
> + && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> + break;
> + trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
> + handled += n;
> + addr += n;
> + len -= n;
> + v += n;
> + } while (len);
>
> - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
> }
>
> static void kvm_set_segment(struct kvm_vcpu *vcpu,
> @@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
> struct kvm_vcpu *vcpu)
> {
> gpa_t gpa;
> + int handled;
>
> if (vcpu->mmio_read_completed) {
> memcpy(val, vcpu->mmio_data, bytes);
> @@ -3795,10 +3819,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
> - trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
> + handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
> +
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
> - }
> +
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
>
> trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
>
> @@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long
> addr,
> struct kvm_vcpu *vcpu)
> {
> gpa_t gpa;
> + int handled;
>
> gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
>
> @@ -3848,9 +3877,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
> + handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
>
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
> +
> vcpu->mmio_needed = 1;
> vcpu->run->exit_reason = KVM_EXIT_MMIO;
> vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
2011-03-29 14:51 ` Wei Xu
@ 2011-03-29 14:55 ` Avi Kivity
0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 14:55 UTC (permalink / raw)
To: Wei Xu; +Cc: Marcelo Tosatti, kvm
On 03/29/2011 04:51 PM, Wei Xu wrote:
> Avi,
>
> Really appreciate your help! Anything if you need help let me know. I am
> working on qemu-kvm now and willing to help out...
Note that this patchset only implements movdqu. Feel free to tackle
movdqa (easy) and movq (harder - needs mmx support).
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-06-28 6:49 ` Cyclonus J
2011-06-28 8:11 ` Avi Kivity
0 siblings, 1 reply; 13+ messages in thread
From: Cyclonus J @ 2011-06-28 6:49 UTC (permalink / raw)
To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Wei Xu
On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity <avi@redhat.com> wrote:
> Add support for marking an instruction as SSE, switching registers used
> to the SSE register file.
Avi,
So this change will only support XMM register as its destination not
memory, right? I am seeing the mmio size in qemu is still 8 bytes.
Do we need to support memory destination and update qemu as well?
Thanks,
CJ
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> arch/x86/include/asm/kvm_emulate.h | 6 ++-
> arch/x86/kvm/emulate.c | 102 ++++++++++++++++++++++++++++++++++-
> 2 files changed, 104 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index 4c0e682..48693f0 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -162,9 +162,11 @@ struct x86_emulate_ops {
> void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
> };
>
> +typedef u32 __attribute__((vector_size(16))) sse128_t;
> +
> /* Type, address-of, and value of an instruction's operand. */
> struct operand {
> - enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
> + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
> unsigned int bytes;
> union {
> unsigned long orig_val;
> @@ -176,11 +178,13 @@ struct operand {
> ulong ea;
> unsigned seg;
> } mem;
> + unsigned xmm;
> } addr;
> union {
> unsigned long val;
> u64 val64;
> char valptr[sizeof(unsigned long) + 2];
> + sse128_t vec_val;
> };
> };
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 458faea..7b7d96a 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -76,6 +76,7 @@
> #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
> #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
> #define Prefix (1<<16) /* Instruction varies with 66/f2/f3 prefix */
> +#define Sse (1<<17) /* SSE Vector instruction */
> /* Misc flags */
> #define VendorSpecific (1<<22) /* Vendor specific instruction */
> #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
> @@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
> return emulate_exception(ctxt, DE_VECTOR, 0, false);
> }
>
> +static int emulate_nm(struct x86_emulate_ctxt *ctxt)
> +{
> + return emulate_exception(ctxt, NM_VECTOR, 0, false);
> +}
> +
> static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
> struct x86_emulate_ops *ops,
> unsigned long eip, u8 *dest)
> @@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
> }
> }
>
> -static void decode_register_operand(struct operand *op,
> +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
> +{
> + ctxt->ops->get_fpu(ctxt);
> + switch (reg) {
> + case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
> + case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
> + case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
> + case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
> + case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
> + case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
> + case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
> + case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
> +#ifdef CONFIG_X86_64
> + case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
> + case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
> + case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
> + case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
> + case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
> + case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
> + case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
> + case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
> +#endif
> + default: BUG();
> + }
> + ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
> + int reg)
> +{
> + ctxt->ops->get_fpu(ctxt);
> + switch (reg) {
> + case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
> + case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
> + case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
> + case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
> + case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
> + case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
> + case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
> + case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
> +#ifdef CONFIG_X86_64
> + case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
> + case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
> + case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
> + case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
> + case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
> + case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
> + case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
> + case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
> +#endif
> + default: BUG();
> + }
> + ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
> + struct operand *op,
> struct decode_cache *c,
> int inhibit_bytereg)
> {
> @@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
>
> if (!(c->d & ModRM))
> reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
> +
> + if (c->d & Sse) {
> + op->type = OP_XMM;
> + op->bytes = 16;
> + op->addr.xmm = reg;
> + read_sse_reg(ctxt, &op->vec_val, reg);
> + return;
> + }
> +
> op->type = OP_REG;
> if ((c->d & ByteOp) && !inhibit_bytereg) {
> op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
> @@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
> op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
> op->addr.reg = decode_register(c->modrm_rm,
> c->regs, c->d & ByteOp);
> + if (c->d & Sse) {
> + op->type = OP_XMM;
> + op->bytes = 16;
> + op->addr.xmm = c->modrm_rm;
> + read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
> + return rc;
> + }
> fetch_register_operand(op);
> return rc;
> }
> @@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
> if (rc != X86EMUL_CONTINUE)
> return rc;
> break;
> + case OP_XMM:
> + write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
> + break;
> case OP_NONE:
> /* no writeback */
> break;
> @@ -2785,6 +2866,9 @@ done_prefixes:
> c->op_bytes = 4;
> }
>
> + if (c->d & Sse)
> + c->op_bytes = 16;
> +
> /* ModRM and SIB bytes. */
> if (c->d & ModRM) {
> rc = decode_modrm(ctxt, ops, &memop);
> @@ -2814,7 +2898,7 @@ done_prefixes:
> case SrcNone:
> break;
> case SrcReg:
> - decode_register_operand(&c->src, c, 0);
> + decode_register_operand(ctxt, &c->src, c, 0);
> break;
> case SrcMem16:
> memop.bytes = 2;
> @@ -2905,7 +2989,7 @@ done_prefixes:
> /* Decode and fetch the destination operand: register or memory. */
> switch (c->d & DstMask) {
> case DstReg:
> - decode_register_operand(&c->dst, c,
> + decode_register_operand(ctxt, &c->dst, c,
> c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
> break;
> case DstImmUByte:
> @@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
> goto done;
> }
>
> + if ((c->d & Sse)
> + && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
> + || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
> + rc = emulate_ud(ctxt);
> + goto done;
> + }
> +
> + if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
> + rc = emulate_nm(ctxt);
> + goto done;
> + }
> +
> /* Privileged instruction can be executed only in CPL=0 */
> if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
> rc = emulate_gp(ctxt, 0);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
2011-06-28 6:49 ` Cyclonus J
@ 2011-06-28 8:11 ` Avi Kivity
0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-06-28 8:11 UTC (permalink / raw)
To: Cyclonus J; +Cc: Marcelo Tosatti, kvm, Wei Xu
On 06/28/2011 09:49 AM, Cyclonus J wrote:
> On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity<avi@redhat.com> wrote:
> > Add support for marking an instruction as SSE, switching registers used
> > to the SSE register file.
>
> Avi,
>
> So this change will only support XMM register as its destination not
> memory, right? I am seeing the mmio size in qemu is still 8 bytes.
>
Memory is supported as well.
> Do we need to support memory destination and update qemu as well?
>
kvm breaks up 16 byte writes into two 8-byte writes, so that the
interface between kvm and userspace is not affected.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2011-06-28 8:11 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
2011-03-29 14:51 ` Wei Xu
2011-03-29 14:55 ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
2011-06-28 6:49 ` Cyclonus J
2011-06-28 8:11 ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.