public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/8] SSE MMIO
@ 2011-03-29 12:53 Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
                   ` (7 more replies)
  0 siblings, 8 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

This patchset extends the emulator to support SSE instructions.  Currently
just one instruction (MOVDQU) is emulated, but it should be easy to add more.

Avi Kivity (8):
  KVM: extend in-kernel mmio to handle >8 byte transactions
  KVM: Split mmio completion into a function
  KVM: 16-byte mmio support
  KVM: x86 emulator: do not munge rep prefix
  KVM: x86 emulator: define callbacks for using the guest fpu within
    the emulator
  KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3
    prefixes
  KVM: x86 emulator: SSE support
  KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)

 arch/x86/include/asm/kvm_emulate.h |   12 ++-
 arch/x86/include/asm/kvm_host.h    |    1 +
 arch/x86/kvm/emulate.c             |  155 +++++++++++++++++++++++++++++++++--
 arch/x86/kvm/x86.c                 |  145 ++++++++++++++++++++++++++--------
 include/linux/kvm_host.h           |    7 ++-
 5 files changed, 274 insertions(+), 46 deletions(-)


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
  2011-03-29 14:51   ` Wei Xu
  2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Needed for coalesced mmio using sse.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c |   58 +++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfd7763..e6bcc97 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
 			   const void *v)
 {
-	if (vcpu->arch.apic &&
-	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
-		return 0;
+	int handled = 0;
+	int n;
+
+	do {
+		n = min(len, 8);
+		if (!(vcpu->arch.apic &&
+		      !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+			break;
+		handled += n;
+		addr += n;
+		len -= n;
+		v += n;
+	} while (len);
 
-	return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	return handled;
 }
 
 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 {
-	if (vcpu->arch.apic &&
-	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
-		return 0;
+	int handled = 0;
+	int n;
+
+	do {
+		n = min(len, 8);
+		if (!(vcpu->arch.apic &&
+		      !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
+		    && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+			break;
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+		handled += n;
+		addr += n;
+		len -= n;
+		v += n;
+	} while (len);
 
-	return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
+	return handled;
 }
 
 static void kvm_set_segment(struct kvm_vcpu *vcpu,
@@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
 				  struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	int handled;
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
@@ -3795,10 +3819,14 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
+	handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
+
+	if (handled == bytes)
 		return X86EMUL_CONTINUE;
-	}
+
+	gpa += handled;
+	bytes -= handled;
+	val += handled;
 
 	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 
@@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 					   struct kvm_vcpu *vcpu)
 {
 	gpa_t                 gpa;
+	int handled;
 
 	gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
 
@@ -3848,9 +3877,14 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+	handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
+	if (handled == bytes)
 		return X86EMUL_CONTINUE;
 
+	gpa += handled;
+	bytes -= handled;
+	val += handled;
+
 	vcpu->mmio_needed = 1;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 2/8] KVM: Split mmio completion into a function
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Make room for sse mmio completions.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c |   39 +++++++++++++++++++++++++--------------
 1 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e6bcc97..2eb1124 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5415,6 +5415,27 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static int complete_mmio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	int r;
+
+	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
+		return 1;
+
+	if (vcpu->mmio_needed) {
+		memcpy(vcpu->mmio_data, run->mmio.data, 8);
+		vcpu->mmio_read_completed = 1;
+		vcpu->mmio_needed = 0;
+	}
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5441,20 +5462,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	if (vcpu->arch.pio.count || vcpu->mmio_needed) {
-		if (vcpu->mmio_needed) {
-			memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
-			vcpu->mmio_read_completed = 1;
-			vcpu->mmio_needed = 0;
-		}
-		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-		r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-		if (r != EMULATE_DONE) {
-			r = 0;
-			goto out;
-		}
-	}
+	r = complete_mmio(vcpu);
+	if (r <= 0)
+		goto out;
+
 	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
 		kvm_register_write(vcpu, VCPU_REGS_RAX,
 				     kvm_run->hypercall.ret);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 3/8] KVM: 16-byte mmio support
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
  2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Since sse instructions can issue 16-byte mmios, we need to support them.  We
can't increase the kvm_run mmio buffer size to 16 bytes without breaking
compatibility, so instead we break the large mmios into two smaller 8-byte
ones.  Since the bus is 64-bit we aren't breaking any atomicity guarantees.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   34 +++++++++++++++++++++++++---------
 include/linux/kvm_host.h        |    7 ++++++-
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35f81b1..e820c63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -30,6 +30,7 @@
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_MMIO_SIZE 16
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2eb1124..01aafc8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3833,8 +3833,10 @@ mmio:
 	vcpu->mmio_needed = 1;
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 0;
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_IO_NEEDED;
 }
@@ -3886,11 +3888,14 @@ mmio:
 	val += handled;
 
 	vcpu->mmio_needed = 1;
+	memcpy(vcpu->mmio_data, val, bytes);
 	vcpu->run->exit_reason = KVM_EXIT_MMIO;
 	vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
-	vcpu->run->mmio.len = vcpu->mmio_size = bytes;
+	vcpu->mmio_size = bytes;
+	vcpu->run->mmio.len = min(vcpu->mmio_size, 8);
 	vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
-	memcpy(vcpu->run->mmio.data, val, bytes);
+	memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8);
+	vcpu->mmio_index = 0;
 
 	return X86EMUL_CONTINUE;
 }
@@ -4498,11 +4503,9 @@ restart:
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
 		r = EMULATE_DO_MMIO;
-	} else if (vcpu->mmio_needed) {
-		if (vcpu->mmio_is_write)
-			vcpu->mmio_needed = 0;
+	} else if (vcpu->mmio_needed)
 		r = EMULATE_DO_MMIO;
-	} else if (r == EMULATION_RESTART)
+	else if (r == EMULATION_RESTART)
 		goto restart;
 	else
 		r = EMULATE_DONE;
@@ -5424,9 +5427,22 @@ static int complete_mmio(struct kvm_vcpu *vcpu)
 		return 1;
 
 	if (vcpu->mmio_needed) {
-		memcpy(vcpu->mmio_data, run->mmio.data, 8);
-		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
+		if (!vcpu->mmio_is_write)
+			memcpy(vcpu->mmio_data, run->mmio.data, 8);
+		vcpu->mmio_index += 8;
+		if (vcpu->mmio_index < vcpu->mmio_size) {
+			run->exit_reason = KVM_EXIT_MMIO;
+			run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index;
+			memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8);
+			run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8);
+			run->mmio.is_write = vcpu->mmio_is_write;
+			vcpu->mmio_needed = 1;
+			return 0;
+		}
+		if (vcpu->mmio_is_write)
+			return 1;
+		vcpu->mmio_read_completed = 1;
 	}
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 57d7092..1934f48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -27,6 +27,10 @@
 
 #include <asm/kvm_host.h>
 
+#ifndef KVM_MMIO_SIZE
+#define KVM_MMIO_SIZE 8
+#endif
+
 /*
  * vcpu->requests bit members
  */
@@ -133,7 +137,8 @@ struct kvm_vcpu {
 	int mmio_read_completed;
 	int mmio_is_write;
 	int mmio_size;
-	unsigned char mmio_data[8];
+	int mmio_index;
+	unsigned char mmio_data[KVM_MMIO_SIZE];
 	gpa_t mmio_phys_addr;
 #endif
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
                   ` (2 preceding siblings ...)
  2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
@ 2011-03-29 12:53 ` Avi Kivity
  2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:53 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Currently we store a rep prefix as 1 or 2 depending on whether it is a REPE or
REPNE.  Since sse instructions depend on the prefix value, store it as the
original opcode to simplify things further on.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    4 ++--
 arch/x86/kvm/emulate.c             |    4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 0f52135..c00aed1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -249,8 +249,8 @@ struct x86_emulate_ctxt {
 };
 
 /* Repeat String Operation Prefix */
-#define REPE_PREFIX	1
-#define REPNE_PREFIX	2
+#define REPE_PREFIX	0xf3
+#define REPNE_PREFIX	0xf2
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0	/* Real mode.             */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 14c5ad5..7066cf8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2692,10 +2692,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 			c->lock_prefix = 1;
 			break;
 		case 0xf2:	/* REPNE/REPNZ */
-			c->rep_prefix = REPNE_PREFIX;
-			break;
 		case 0xf3:	/* REP/REPE/REPZ */
-			c->rep_prefix = REPE_PREFIX;
+			c->rep_prefix = c->b;
 			break;
 		default:
 			goto done_prefixes;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
                   ` (3 preceding siblings ...)
  2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
  2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Needed for emulating fpu instructions.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    2 ++
 arch/x86/kvm/x86.c                 |   18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c00aed1..4c0e682 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -158,6 +158,8 @@ struct x86_emulate_ops {
 	int (*set_dr)(int dr, unsigned long value, struct kvm_vcpu *vcpu);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+	void (*get_fpu)(struct x86_emulate_ctxt *ctxt); /* disables preempt */
+	void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
 };
 
 /* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 01aafc8..9686547 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4281,6 +4281,22 @@ static void emulator_set_segment_selector(u16 sel, int seg,
 	kvm_set_segment(vcpu, &kvm_seg, seg);
 }
 
+static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
+{
+	preempt_disable();
+	kvm_load_guest_fpu(ctxt->vcpu);
+	/*
+	 * CR0.TS may reference the host fpu state, not the guest fpu state,
+	 * so it may be clear at this point.
+	 */
+	clts();
+}
+
+static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
+{
+	preempt_enable();
+}
+
 static struct x86_emulate_ops emulate_ops = {
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
@@ -4304,6 +4320,8 @@ static struct x86_emulate_ops emulate_ops = {
 	.set_dr              = emulator_set_dr,
 	.set_msr             = kvm_set_msr,
 	.get_msr             = kvm_get_msr,
+	.get_fpu             = emulator_get_fpu,
+	.put_fpu             = emulator_put_fpu,
 };
 
 static void cache_all_regs(struct kvm_vcpu *vcpu)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
                   ` (4 preceding siblings ...)
  2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
  2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
  2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Most SIMD instructions use the 66/f2/f3 prefixes to distinguish between
different variants of the same instruction.  Usually the encoding is quite
regular, but in some cases (including non-SIMD instructions) the prefixes
generate very different instructions.  Examples include XCHG/PAUSE,
MOVQ/MOVDQA/MOVDQU, and MOVBE/CRC32.

Allow the emulator to handle these special cases by splitting such opcodes
into groups, with different decode flags and execution functions for different
prefixes.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c |   26 +++++++++++++++++++++++++-
 1 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7066cf8..458faea 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,7 @@
 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
+#define Prefix      (1<<16)     /* Instruction varies with 66/f2/f3 prefix */
 /* Misc flags */
 #define VendorSpecific (1<<22) /* Vendor specific instruction */
 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -106,6 +107,7 @@ struct opcode {
 		int (*execute)(struct x86_emulate_ctxt *ctxt);
 		struct opcode *group;
 		struct group_dual *gdual;
+		struct gprefix *gprefix;
 	} u;
 };
 
@@ -114,6 +116,13 @@ struct group_dual {
 	struct opcode mod3[8];
 };
 
+struct gprefix {
+	struct opcode pfx_no;
+	struct opcode pfx_66;
+	struct opcode pfx_f2;
+	struct opcode pfx_f3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -2625,7 +2634,8 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	struct decode_cache *c = &ctxt->decode;
 	int rc = X86EMUL_CONTINUE;
 	int mode = ctxt->mode;
-	int def_op_bytes, def_ad_bytes, dual, goffset;
+	int def_op_bytes, def_ad_bytes, dual, goffset, simd_prefix;
+	bool op_prefix = false;
 	struct opcode opcode, *g_mod012, *g_mod3;
 	struct operand memop = { .type = OP_NONE };
 
@@ -2662,6 +2672,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	for (;;) {
 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
 		case 0x66:	/* operand-size override */
+			op_prefix = true;
 			/* switch between 2/4 bytes */
 			c->op_bytes = def_op_bytes ^ 6;
 			break;
@@ -2742,6 +2753,19 @@ done_prefixes:
 		c->d |= opcode.flags;
 	}
 
+	if (c->d & Prefix) {
+		if (c->rep_prefix && op_prefix)
+			return X86EMUL_UNHANDLEABLE;
+		simd_prefix = op_prefix ? 0x66 : c->rep_prefix;
+		switch (simd_prefix) {
+		case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
+		case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
+		case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
+		case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
+		}
+		c->d |= opcode.flags;
+	}
+
 	c->execute = opcode.u.execute;
 
 	/* Unrecognised? */
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 7/8] KVM: x86 emulator: SSE support
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
                   ` (5 preceding siblings ...)
  2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
  2011-06-28  6:49   ` Cyclonus J
  2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity
  7 siblings, 1 reply; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Add support for marking an instruction as SSE, switching registers used
to the SSE register file.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    6 ++-
 arch/x86/kvm/emulate.c             |  102 ++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 4c0e682..48693f0 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -162,9 +162,11 @@ struct x86_emulate_ops {
 	void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
 };
 
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-	enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
+	enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
 	unsigned int bytes;
 	union {
 		unsigned long orig_val;
@@ -176,11 +178,13 @@ struct operand {
 			ulong ea;
 			unsigned seg;
 		} mem;
+		unsigned xmm;
 	} addr;
 	union {
 		unsigned long val;
 		u64 val64;
 		char valptr[sizeof(unsigned long) + 2];
+		sse128_t vec_val;
 	};
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 458faea..7b7d96a 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -76,6 +76,7 @@
 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define Prefix      (1<<16)     /* Instruction varies with 66/f2/f3 prefix */
+#define Sse         (1<<17)     /* SSE Vector instruction */
 /* Misc flags */
 #define VendorSpecific (1<<22) /* Vendor specific instruction */
 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
@@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
 	return emulate_exception(ctxt, DE_VECTOR, 0, false);
 }
 
+static int emulate_nm(struct x86_emulate_ctxt *ctxt)
+{
+	return emulate_exception(ctxt, NM_VECTOR, 0, false);
+}
+
 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 			      struct x86_emulate_ops *ops,
 			      unsigned long eip, u8 *dest)
@@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
 	}
 }
 
-static void decode_register_operand(struct operand *op,
+static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
+{
+	ctxt->ops->get_fpu(ctxt);
+	switch (reg) {
+	case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
+	case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
+	case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
+	case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
+	case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
+	case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
+	case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
+	case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
+	case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
+	case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
+	case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
+	case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
+	case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
+	case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
+	case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+#endif
+	default: BUG();
+	}
+	ctxt->ops->put_fpu(ctxt);
+}
+
+static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
+			  int reg)
+{
+	ctxt->ops->get_fpu(ctxt);
+	switch (reg) {
+	case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
+	case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
+	case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
+	case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
+	case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
+	case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
+	case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
+	case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
+	case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
+	case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
+	case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
+	case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
+	case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
+	case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
+	case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+#endif
+	default: BUG();
+	}
+	ctxt->ops->put_fpu(ctxt);
+}
+
+static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
+				    struct operand *op,
 				    struct decode_cache *c,
 				    int inhibit_bytereg)
 {
@@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
 
 	if (!(c->d & ModRM))
 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
+
+	if (c->d & Sse) {
+		op->type = OP_XMM;
+		op->bytes = 16;
+		op->addr.xmm = reg;
+		read_sse_reg(ctxt, &op->vec_val, reg);
+		return;
+	}
+
 	op->type = OP_REG;
 	if ((c->d & ByteOp) && !inhibit_bytereg) {
 		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
@@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 		op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		op->addr.reg = decode_register(c->modrm_rm,
 					       c->regs, c->d & ByteOp);
+		if (c->d & Sse) {
+			op->type = OP_XMM;
+			op->bytes = 16;
+			op->addr.xmm = c->modrm_rm;
+			read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
+			return rc;
+		}
 		fetch_register_operand(op);
 		return rc;
 	}
@@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
+	case OP_XMM:
+		write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
+		break;
 	case OP_NONE:
 		/* no writeback */
 		break;
@@ -2785,6 +2866,9 @@ done_prefixes:
 			c->op_bytes = 4;
 	}
 
+	if (c->d & Sse)
+		c->op_bytes = 16;
+
 	/* ModRM and SIB bytes. */
 	if (c->d & ModRM) {
 		rc = decode_modrm(ctxt, ops, &memop);
@@ -2814,7 +2898,7 @@ done_prefixes:
 	case SrcNone:
 		break;
 	case SrcReg:
-		decode_register_operand(&c->src, c, 0);
+		decode_register_operand(ctxt, &c->src, c, 0);
 		break;
 	case SrcMem16:
 		memop.bytes = 2;
@@ -2905,7 +2989,7 @@ done_prefixes:
 	/* Decode and fetch the destination operand: register or memory. */
 	switch (c->d & DstMask) {
 	case DstReg:
-		decode_register_operand(&c->dst, c,
+		decode_register_operand(ctxt, &c->dst, c,
 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
 		break;
 	case DstImmUByte:
@@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 		goto done;
 	}
 
+	if ((c->d & Sse)
+	    && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
+		|| !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
+		rc = emulate_ud(ctxt);
+		goto done;
+	}
+
+	if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
+		rc = emulate_nm(ctxt);
+		goto done;
+	}
+
 	/* Privileged instruction can be executed only in CPL=0 */
 	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
 		rc = emulate_gp(ctxt, 0);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f)
  2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
                   ` (6 preceding siblings ...)
  2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-03-29 12:54 ` Avi Kivity
  7 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 12:54 UTC (permalink / raw)
  To: Marcelo Tosatti, kvm; +Cc: Wei Xu

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c |   23 +++++++++++++++++++++--
 1 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7b7d96a..bfb34af 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2415,11 +2415,19 @@ static int em_mov(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_movdqu(struct x86_emulate_ctxt *ctxt)
+{
+	struct decode_cache *c = &ctxt->decode;
+	memcpy(&c->dst.vec_val, &c->src.vec_val, c->op_bytes);
+	return X86EMUL_CONTINUE;
+}
+
 #define D(_y) { .flags = (_y) }
 #define N    D(0)
 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
+#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
 
 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
@@ -2484,6 +2492,10 @@ static struct opcode group11[] = {
 	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
 };
 
+static struct gprefix pfx_0f_6f_0f_7f = {
+	N, N, N, I(Sse, em_movdqu),
+};
+
 static struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	D6ALU(Lock),
@@ -2608,9 +2620,15 @@ static struct opcode twobyte_table[256] = {
 	/* 0x50 - 0x5F */
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
 	/* 0x60 - 0x6F */
-	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x70 - 0x7F */
-	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, N,
+	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
 	X16(D(SrcImm)),
 	/* 0x90 - 0x9F */
@@ -2654,6 +2672,7 @@ static struct opcode twobyte_table[256] = {
 #undef G
 #undef GD
 #undef I
+#undef GP
 
 #undef D2bv
 #undef I2bv
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
  2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
@ 2011-03-29 14:51   ` Wei Xu
  2011-03-29 14:55     ` Avi Kivity
  0 siblings, 1 reply; 13+ messages in thread
From: Wei Xu @ 2011-03-29 14:51 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti, kvm

Avi,

Really appreciate your help! Anything if you need help let me know. I am
working on qemu-kvm now and willing to help out...

Wei Xu


On 3/29/11 5:53 AM, "Avi Kivity" <avi@redhat.com> wrote:

> Needed for coalesced mmio using sse.
> 
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  arch/x86/kvm/x86.c |   58 +++++++++++++++++++++++++++++++++++++++++----------
>  1 files changed, 46 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index bfd7763..e6bcc97 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3596,20 +3596,43 @@ static void kvm_init_msr_list(void)
>  static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
>   const void *v)
>  {
> - if (vcpu->arch.apic &&
> -     !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
> -  return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> +  n = min(len, 8);
> +  if (!(vcpu->arch.apic &&
> +        !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
> +      && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> +   break;
> +  handled += n;
> +  addr += n;
> +  len -= n;
> +  v += n;
> + } while (len);
>  
> - return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
>  }
>  
>  static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void
> *v)
>  {
> - if (vcpu->arch.apic &&
> -     !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
> -  return 0;
> + int handled = 0;
> + int n;
> +
> + do {
> +  n = min(len, 8);
> +  if (!(vcpu->arch.apic &&
> +        !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
> +      && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
> +   break;
> +  trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
> +  handled += n;
> +  addr += n;
> +  len -= n;
> +  v += n;
> + } while (len);
>  
> - return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
> + return handled;
>  }
>  
>  static void kvm_set_segment(struct kvm_vcpu *vcpu,
> @@ -3769,6 +3792,7 @@ static int emulator_read_emulated(unsigned long addr,
>  struct kvm_vcpu *vcpu)
>  {
> gpa_t                 gpa;
> + int handled;
>  
> if (vcpu->mmio_read_completed) {
> memcpy(val, vcpu->mmio_data, bytes);
> @@ -3795,10 +3819,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
> -  trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
> + handled = vcpu_mmio_read(vcpu, gpa, bytes, val);
> +
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
> - }
> +
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
>  
> trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
>  
> @@ -3830,6 +3858,7 @@ static int emulator_write_emulated_onepage(unsigned long
> addr,
>   struct kvm_vcpu *vcpu)
>  {
> gpa_t                 gpa;
> + int handled;
>  
> gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
>  
> @@ -3848,9 +3877,14 @@ mmio:
> /*
> * Is this MMIO handled locally?
> */
> - if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
> + handled = vcpu_mmio_write(vcpu, gpa, bytes, val);
> + if (handled == bytes)
> return X86EMUL_CONTINUE;
>  
> + gpa += handled;
> + bytes -= handled;
> + val += handled;
> +
> vcpu->mmio_needed = 1;
> vcpu->run->exit_reason = KVM_EXIT_MMIO;
> vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions
  2011-03-29 14:51   ` Wei Xu
@ 2011-03-29 14:55     ` Avi Kivity
  0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-03-29 14:55 UTC (permalink / raw)
  To: Wei Xu; +Cc: Marcelo Tosatti, kvm

On 03/29/2011 04:51 PM, Wei Xu wrote:
> Avi,
>
> Really appreciate your help! Anything if you need help let me know. I am
> working on qemu-kvm now and willing to help out...

Note that this patchset only implements movdqu.  Feel free to tackle 
movdqa (easy) and movq (harder - needs mmx support).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
  2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
@ 2011-06-28  6:49   ` Cyclonus J
  2011-06-28  8:11     ` Avi Kivity
  0 siblings, 1 reply; 13+ messages in thread
From: Cyclonus J @ 2011-06-28  6:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm, Wei Xu

On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity <avi@redhat.com> wrote:
> Add support for marking an instruction as SSE, switching registers used
> to the SSE register file.

Avi,

So this change will only support XMM register as its destination not
memory, right? I am seeing the mmio size in qemu is still 8 bytes.

Do we need to support memory destination and update qemu as well?

Thanks,
CJ

>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>  arch/x86/include/asm/kvm_emulate.h |    6 ++-
>  arch/x86/kvm/emulate.c             |  102 ++++++++++++++++++++++++++++++++++-
>  2 files changed, 104 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
> index 4c0e682..48693f0 100644
> --- a/arch/x86/include/asm/kvm_emulate.h
> +++ b/arch/x86/include/asm/kvm_emulate.h
> @@ -162,9 +162,11 @@ struct x86_emulate_ops {
>        void (*put_fpu)(struct x86_emulate_ctxt *ctxt); /* reenables preempt */
>  };
>
> +typedef u32 __attribute__((vector_size(16))) sse128_t;
> +
>  /* Type, address-of, and value of an instruction's operand. */
>  struct operand {
> -       enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
> +       enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type;
>        unsigned int bytes;
>        union {
>                unsigned long orig_val;
> @@ -176,11 +178,13 @@ struct operand {
>                        ulong ea;
>                        unsigned seg;
>                } mem;
> +               unsigned xmm;
>        } addr;
>        union {
>                unsigned long val;
>                u64 val64;
>                char valptr[sizeof(unsigned long) + 2];
> +               sse128_t vec_val;
>        };
>  };
>
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 458faea..7b7d96a 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -76,6 +76,7 @@
>  #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
>  #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
>  #define Prefix      (1<<16)     /* Instruction varies with 66/f2/f3 prefix */
> +#define Sse         (1<<17)     /* SSE Vector instruction */
>  /* Misc flags */
>  #define VendorSpecific (1<<22) /* Vendor specific instruction */
>  #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
> @@ -505,6 +506,11 @@ static int emulate_de(struct x86_emulate_ctxt *ctxt)
>        return emulate_exception(ctxt, DE_VECTOR, 0, false);
>  }
>
> +static int emulate_nm(struct x86_emulate_ctxt *ctxt)
> +{
> +       return emulate_exception(ctxt, NM_VECTOR, 0, false);
> +}
> +
>  static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
>                              struct x86_emulate_ops *ops,
>                              unsigned long eip, u8 *dest)
> @@ -632,7 +638,63 @@ static void fetch_register_operand(struct operand *op)
>        }
>  }
>
> -static void decode_register_operand(struct operand *op,
> +static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
> +{
> +       ctxt->ops->get_fpu(ctxt);
> +       switch (reg) {
> +       case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
> +       case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
> +       case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
> +       case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
> +       case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
> +       case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
> +       case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
> +       case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
> +#ifdef CONFIG_X86_64
> +       case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
> +       case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
> +       case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
> +       case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
> +       case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
> +       case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
> +       case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
> +       case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
> +#endif
> +       default: BUG();
> +       }
> +       ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
> +                         int reg)
> +{
> +       ctxt->ops->get_fpu(ctxt);
> +       switch (reg) {
> +       case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
> +       case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
> +       case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
> +       case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
> +       case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
> +       case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
> +       case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
> +       case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
> +#ifdef CONFIG_X86_64
> +       case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
> +       case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
> +       case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
> +       case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
> +       case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
> +       case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
> +       case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
> +       case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
> +#endif
> +       default: BUG();
> +       }
> +       ctxt->ops->put_fpu(ctxt);
> +}
> +
> +static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
> +                                   struct operand *op,
>                                    struct decode_cache *c,
>                                    int inhibit_bytereg)
>  {
> @@ -641,6 +703,15 @@ static void decode_register_operand(struct operand *op,
>
>        if (!(c->d & ModRM))
>                reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
> +
> +       if (c->d & Sse) {
> +               op->type = OP_XMM;
> +               op->bytes = 16;
> +               op->addr.xmm = reg;
> +               read_sse_reg(ctxt, &op->vec_val, reg);
> +               return;
> +       }
> +
>        op->type = OP_REG;
>        if ((c->d & ByteOp) && !inhibit_bytereg) {
>                op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
> @@ -680,6 +751,13 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
>                op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
>                op->addr.reg = decode_register(c->modrm_rm,
>                                               c->regs, c->d & ByteOp);
> +               if (c->d & Sse) {
> +                       op->type = OP_XMM;
> +                       op->bytes = 16;
> +                       op->addr.xmm = c->modrm_rm;
> +                       read_sse_reg(ctxt, &op->vec_val, c->modrm_rm);
> +                       return rc;
> +               }
>                fetch_register_operand(op);
>                return rc;
>        }
> @@ -1107,6 +1185,9 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
>                if (rc != X86EMUL_CONTINUE)
>                        return rc;
>                break;
> +       case OP_XMM:
> +               write_sse_reg(ctxt, &c->dst.vec_val, c->dst.addr.xmm);
> +               break;
>        case OP_NONE:
>                /* no writeback */
>                break;
> @@ -2785,6 +2866,9 @@ done_prefixes:
>                        c->op_bytes = 4;
>        }
>
> +       if (c->d & Sse)
> +               c->op_bytes = 16;
> +
>        /* ModRM and SIB bytes. */
>        if (c->d & ModRM) {
>                rc = decode_modrm(ctxt, ops, &memop);
> @@ -2814,7 +2898,7 @@ done_prefixes:
>        case SrcNone:
>                break;
>        case SrcReg:
> -               decode_register_operand(&c->src, c, 0);
> +               decode_register_operand(ctxt, &c->src, c, 0);
>                break;
>        case SrcMem16:
>                memop.bytes = 2;
> @@ -2905,7 +2989,7 @@ done_prefixes:
>        /* Decode and fetch the destination operand: register or memory. */
>        switch (c->d & DstMask) {
>        case DstReg:
> -               decode_register_operand(&c->dst, c,
> +               decode_register_operand(ctxt, &c->dst, c,
>                         c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
>                break;
>        case DstImmUByte:
> @@ -3001,6 +3085,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
>                goto done;
>        }
>
> +       if ((c->d & Sse)
> +           && ((ops->get_cr(0, ctxt->vcpu) & X86_CR0_EM)
> +               || !(ops->get_cr(4, ctxt->vcpu) & X86_CR4_OSFXSR))) {
> +               rc = emulate_ud(ctxt);
> +               goto done;
> +       }
> +
> +       if ((c->d & Sse) && (ops->get_cr(0, ctxt->vcpu) & X86_CR0_TS)) {
> +               rc = emulate_nm(ctxt);
> +               goto done;
> +       }
> +
>        /* Privileged instruction can be executed only in CPL=0 */
>        if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
>                rc = emulate_gp(ctxt, 0);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 7/8] KVM: x86 emulator: SSE support
  2011-06-28  6:49   ` Cyclonus J
@ 2011-06-28  8:11     ` Avi Kivity
  0 siblings, 0 replies; 13+ messages in thread
From: Avi Kivity @ 2011-06-28  8:11 UTC (permalink / raw)
  To: Cyclonus J; +Cc: Marcelo Tosatti, kvm, Wei Xu

On 06/28/2011 09:49 AM, Cyclonus J wrote:
> On Tue, Mar 29, 2011 at 5:54 AM, Avi Kivity<avi@redhat.com>  wrote:
> >  Add support for marking an instruction as SSE, switching registers used
> >  to the SSE register file.
>
> Avi,
>
> So this change will only support XMM register as its destination not
> memory, right? I am seeing the mmio size in qemu is still 8 bytes.
>

Memory is supported as well.

> Do we need to support memory destination and update qemu as well?
>


kvm breaks up 16 byte writes into two 8-byte writes, so that the 
interface between kvm and userspace is not affected.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-06-28  8:11 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-29 12:53 [PATCH v2 0/8] SSE MMIO Avi Kivity
2011-03-29 12:53 ` [PATCH v2 1/8] KVM: extend in-kernel mmio to handle >8 byte transactions Avi Kivity
2011-03-29 14:51   ` Wei Xu
2011-03-29 14:55     ` Avi Kivity
2011-03-29 12:53 ` [PATCH v2 2/8] KVM: Split mmio completion into a function Avi Kivity
2011-03-29 12:53 ` [PATCH v2 3/8] KVM: 16-byte mmio support Avi Kivity
2011-03-29 12:53 ` [PATCH v2 4/8] KVM: x86 emulator: do not munge rep prefix Avi Kivity
2011-03-29 12:54 ` [PATCH v2 5/8] KVM: x86 emulator: define callbacks for using the guest fpu within the emulator Avi Kivity
2011-03-29 12:54 ` [PATCH v2 6/8] KVM: x86 emulator: Specialize decoding for insns with 66/f2/f3 prefixes Avi Kivity
2011-03-29 12:54 ` [PATCH v2 7/8] KVM: x86 emulator: SSE support Avi Kivity
2011-06-28  6:49   ` Cyclonus J
2011-06-28  8:11     ` Avi Kivity
2011-03-29 12:54 ` [PATCH v2 8/8] KVM: x86 emulator: implement movdqu instruction (f3 0f 6f, f3 0f 7f) Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox