[PATCHv5 0/4] improve speed of "rep ins" emulation

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCHv5 0/4] improve speed of "rep ins" emulation
@ 2012-07-30 14:38 Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 1/4] Provide userspace IO exit completion callback Gleb Natapov
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-07-30 14:38 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

And now for something completely different.

So this series (or rather the last patch of it) takes different approach
to "rep ins" optimization. Instead of writing separate fast path for
it do the fast path inside emulator itself. This way nobody can say the
code is not reused!

Patch 1,2 are now, strictly speaking, not needed, but I think this is still
nice cleanup and, in case of patch 1, eliminates some ifs at each KVM_RUN ioctl.

Gleb Natapov (4):
  Provide userspace IO exit completion callback.
  KVM: emulator: make x86 emulation modes enum instead of defines
  KVM: emulator: string_addr_inc() cleanup
  KVM: emulator: optimize "rep ins" handling.

 arch/x86/include/asm/kvm_emulate.h |   26 +++++-----
 arch/x86/include/asm/kvm_host.h    |    1 +
 arch/x86/kvm/emulate.c             |   48 ++++++++++++++-----
 arch/x86/kvm/x86.c                 |   92 +++++++++++++++++++++---------------
 4 files changed, 104 insertions(+), 63 deletions(-)

-- 
1.7.10

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCHv5 1/4] Provide userspace IO exit completion callback.
  2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
@ 2012-07-30 14:38 ` Gleb Natapov
  2012-08-02 19:26   ` Marcelo Tosatti
  2012-07-30 14:38 ` [PATCHv5 2/4] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-07-30 14:38 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   92 +++++++++++++++++++++++----------------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48e7131..37f4f11 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,6 +414,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6379e5..41c0c3d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4539,6 +4539,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4609,13 +4612,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5471,6 +5477,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5487,47 +5511,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5554,9 +5568,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	}
 
 	r = __vcpu_run(vcpu);
 
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCHv5 2/4] KVM: emulator: make x86 emulation modes enum instead of defines
  2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 1/4] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-07-30 14:38 ` Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 3/4] KVM: emulator: string_addr_inc() cleanup Gleb Natapov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-07-30 14:38 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti


Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |   22 ++++++++++------------
 arch/x86/kvm/emulate.c             |    4 +++-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c764f43..8d0fe8f 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -236,6 +236,15 @@ struct read_cache {
 	unsigned long end;
 };
 
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+	X86EMUL_MODE_REAL,	/* Real mode.             */
+	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
+	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
+	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
+	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
+};
+
 struct x86_emulate_ctxt {
 	struct x86_emulate_ops *ops;
 
@@ -243,7 +252,7 @@ struct x86_emulate_ctxt {
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	int mode;
+	enum x86emul_mode mode;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
@@ -293,17 +302,6 @@ struct x86_emulate_ctxt {
 #define REPE_PREFIX	0xf3
 #define REPNE_PREFIX	0xf2
 
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL     0	/* Real mode.             */
-#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
-#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
-
-/* any protected mode   */
-#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
-			       X86EMUL_MODE_PROT64)
-
 /* CPUID vendors */
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 10f0136..4d33423 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2210,6 +2210,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 		if (msr_data == 0x0)
 			return emulate_gp(ctxt, 0);
 		break;
+	default:
+		break;
 	}
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -4338,7 +4340,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/* Instruction can only be executed in protected mode */
-	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+	if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCHv5 3/4] KVM: emulator: string_addr_inc() cleanup
  2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 1/4] Provide userspace IO exit completion callback Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 2/4] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
@ 2012-07-30 14:38 ` Gleb Natapov
  2012-07-30 14:38 ` [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling Gleb Natapov
  2012-08-13 14:39 ` [PATCHv5 0/4] improve speed of "rep ins" emulation Richard W.M. Jones
  4 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-07-30 14:38 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Remove unneeded segment argument. Address structure already has correct
segment which was put there during decode.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c |   11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4d33423..c22762d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2729,14 +2729,13 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
-static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
-			    int reg, struct operand *op)
+static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
+		struct operand *op)
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
 	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
-	op->addr.mem.seg = seg;
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -4508,12 +4507,10 @@ writeback:
 	ctxt->dst.type = saved_dst_type;
 
 	if ((ctxt->d & SrcMask) == SrcSI)
-		string_addr_inc(ctxt, seg_override(ctxt),
-				VCPU_REGS_RSI, &ctxt->src);
+		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
 
 	if ((ctxt->d & DstMask) == DstDI)
-		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
-				&ctxt->dst);
+		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
                   ` (2 preceding siblings ...)
  2012-07-30 14:38 ` [PATCHv5 3/4] KVM: emulator: string_addr_inc() cleanup Gleb Natapov
@ 2012-07-30 14:38 ` Gleb Natapov
  2012-08-05 15:03   ` Avi Kivity
  2012-08-06  8:50   ` Avi Kivity
  2012-08-13 14:39 ` [PATCHv5 0/4] improve speed of "rep ins" emulation Richard W.M. Jones
  4 siblings, 2 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-07-30 14:38 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti

Optimize "rep ins" by allowing emulator to write back more than one
datum at a time. Introduce new operand type OP_MEM_STR which tells
writeback() that dst contains pointer to an array that should be written
back as opposite to just one data element.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    4 +++-
 arch/x86/kvm/emulate.c             |   33 ++++++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 8d0fe8f..d1777f8 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -200,8 +200,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
 
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-	enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+	enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
 	unsigned int bytes;
+	unsigned int count;
 	union {
 		unsigned long orig_val;
 		u64 orig_val64;
@@ -221,6 +222,7 @@ struct operand {
 		char valptr[sizeof(unsigned long) + 2];
 		sse128_t vec_val;
 		u64 mm_val;
+		void *data;
 	};
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c22762d..c74bce8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1251,8 +1251,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 		rc->end = n * size;
 	}
 
-	memcpy(dest, rc->data + rc->pos, size);
-	rc->pos += size;
+	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
+		ctxt->dst.data = rc->data + rc->pos;
+		ctxt->dst.type = OP_MEM_STR;
+		ctxt->dst.count = (rc->end - rc->pos) / size;
+		rc->pos = rc->end;
+	} else {
+		memcpy(dest, rc->data + rc->pos, size);
+		rc->pos += size;
+	}
 	return 1;
 }
 
@@ -1500,6 +1507,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
+	case OP_MEM_STR:
+		rc = segmented_write(ctxt,
+				ctxt->dst.addr.mem,
+				ctxt->dst.data,
+				ctxt->dst.bytes * ctxt->dst.count);
+		if (rc != X86EMUL_CONTINUE)
+			return rc;
+		break;
 	case OP_XMM:
 		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
 		break;
@@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 		struct operand *op)
 {
-	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
+	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
 	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
@@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
+	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
@@ -3930,6 +3945,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpDX:
 		op->type = OP_REG;
@@ -3973,6 +3989,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpImmFAddr:
 		op->type = OP_IMM;
@@ -4513,8 +4530,14 @@ writeback:
 		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
+		unsigned int count;
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
+		if ((ctxt->d & SrcMask) == SrcSI)
+			count = ctxt->src.count;
+		else
+			count = ctxt->dst.count;
+		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX],
+				-count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
-- 
1.7.10


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 1/4] Provide userspace IO exit completion callback.
  2012-07-30 14:38 ` [PATCHv5 1/4] Provide userspace IO exit completion callback Gleb Natapov
@ 2012-08-02 19:26   ` Marcelo Tosatti
  2012-08-05 14:49     ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Marcelo Tosatti @ 2012-08-02 19:26 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi

On Mon, Jul 30, 2012 at 05:38:18PM +0300, Gleb Natapov wrote:
>  	int r;
> @@ -5554,9 +5568,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  		}
>  	}
>  
> -	r = complete_mmio(vcpu);
> -	if (r <= 0)
> -		goto out;
> +	if (unlikely(vcpu->arch.complete_userspace_io)) {
> +		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
> +		vcpu->arch.complete_userspace_io = NULL;
> +		r = cui(vcpu);
> +		if (r <= 0)
> +			goto out;
> +	}

Would it be worthwhile to add BUG/WARN_ONs here checking for
variables that represent valid mmio/pio, but without
complete_userspace_io function pointer set? (you do that in 
the reverse case, inside the complete_userspace_io 
function pointers).


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 1/4] Provide userspace IO exit completion callback.
  2012-08-02 19:26   ` Marcelo Tosatti
@ 2012-08-05 14:49     ` Gleb Natapov
  0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-08-05 14:49 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: kvm, avi

On Thu, Aug 02, 2012 at 04:26:29PM -0300, Marcelo Tosatti wrote:
> On Mon, Jul 30, 2012 at 05:38:18PM +0300, Gleb Natapov wrote:
> >  	int r;
> > @@ -5554,9 +5568,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  		}
> >  	}
> >  
> > -	r = complete_mmio(vcpu);
> > -	if (r <= 0)
> > -		goto out;
> > +	if (unlikely(vcpu->arch.complete_userspace_io)) {
> > +		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
> > +		vcpu->arch.complete_userspace_io = NULL;
> > +		r = cui(vcpu);
> > +		if (r <= 0)
> > +			goto out;
> > +	}
> 
> Would it be worthwhile to add BUG/WARN_ONs here checking for
> variables that represent valid mmio/pio, but without
> complete_userspace_io function pointer set? (you do that in 
> the reverse case, inside the complete_userspace_io 
> function pointers).
There are never too much asserts :), But I wouldn't want to resend the
series just for that, so if there are other comments that will require
me to resend the series anyway I'll add asserts too.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-07-30 14:38 ` [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling Gleb Natapov
@ 2012-08-05 15:03   ` Avi Kivity
  2012-08-05 15:18     ` Gleb Natapov
  2012-08-06  8:50   ` Avi Kivity
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2012-08-05 15:03 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> Optimize "rep ins" by allowing emulator to write back more than one
> datum at a time. Introduce new operand type OP_MEM_STR which tells
> writeback() that dst contains pointer to an array that should be written
> back as opposite to just one data element.
> 
>  
>  	if (ctxt->rep_prefix && (ctxt->d & String)) {
> +		unsigned int count;
>  		struct read_cache *r = &ctxt->io_read;
> -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> +		if ((ctxt->d & SrcMask) == SrcSI)
> +			count = ctxt->src.count;
> +		else
> +			count = ctxt->dst.count;
> +		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX],
> +				-count);
>  

count is unsigned.  Does it sign extend correctly in
register_address_increment()?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-05 15:03   ` Avi Kivity
@ 2012-08-05 15:18     ` Gleb Natapov
  2012-08-05 15:20       ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-08-05 15:18 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Sun, Aug 05, 2012 at 06:03:12PM +0300, Avi Kivity wrote:
> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> > Optimize "rep ins" by allowing emulator to write back more than one
> > datum at a time. Introduce new operand type OP_MEM_STR which tells
> > writeback() that dst contains pointer to an array that should be written
> > back as opposite to just one data element.
> > 
> >  
> >  	if (ctxt->rep_prefix && (ctxt->d & String)) {
> > +		unsigned int count;
> >  		struct read_cache *r = &ctxt->io_read;
> > -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> > +		if ((ctxt->d & SrcMask) == SrcSI)
> > +			count = ctxt->src.count;
> > +		else
> > +			count = ctxt->dst.count;
> > +		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX],
> > +				-count);
> >  
> 
> count is unsigned.  Does it sign extend correctly in
> register_address_increment()?
> 
I think it sign extent before register_address_increment() when compiler
sees -count. count is in the range 1-1024 here, so there shouldn't be a
problem. By I welcome better suggestions.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-05 15:18     ` Gleb Natapov
@ 2012-08-05 15:20       ` Avi Kivity
  0 siblings, 0 replies; 19+ messages in thread
From: Avi Kivity @ 2012-08-05 15:20 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 08/05/2012 06:18 PM, Gleb Natapov wrote:
> On Sun, Aug 05, 2012 at 06:03:12PM +0300, Avi Kivity wrote:
>> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
>> > Optimize "rep ins" by allowing emulator to write back more than one
>> > datum at a time. Introduce new operand type OP_MEM_STR which tells
>> > writeback() that dst contains pointer to an array that should be written
>> > back as opposite to just one data element.
>> > 
>> >  
>> >  	if (ctxt->rep_prefix && (ctxt->d & String)) {
>> > +		unsigned int count;
>> >  		struct read_cache *r = &ctxt->io_read;
>> > -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
>> > +		if ((ctxt->d & SrcMask) == SrcSI)
>> > +			count = ctxt->src.count;
>> > +		else
>> > +			count = ctxt->dst.count;
>> > +		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX],
>> > +				-count);
>> >  
>> 
>> count is unsigned.  Does it sign extend correctly in
>> register_address_increment()?
>> 
> I think it sign extent before register_address_increment() when compiler
> sees -count. count is in the range 1-1024 here, so there shouldn't be a
> problem. By I welcome better suggestions.

There is actually no problem since the 'inc' parameter is signed.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-07-30 14:38 ` [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling Gleb Natapov
  2012-08-05 15:03   ` Avi Kivity
@ 2012-08-06  8:50   ` Avi Kivity
  2012-08-06  8:58     ` Gleb Natapov
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2012-08-06  8:50 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> Optimize "rep ins" by allowing emulator to write back more than one
> datum at a time. Introduce new operand type OP_MEM_STR which tells
> writeback() that dst contains pointer to an array that should be written
> back as opposite to just one data element.
> 
>  	}
>  
> -	memcpy(dest, rc->data + rc->pos, size);
> -	rc->pos += size;
> +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
> +		ctxt->dst.data = rc->data + rc->pos;
> +		ctxt->dst.type = OP_MEM_STR;
> +		ctxt->dst.count = (rc->end - rc->pos) / size;
> +		rc->pos = rc->end;

Should take into account the segment limit.

> +	} else {
> +		memcpy(dest, rc->data + rc->pos, size);
> +		rc->pos += size;
> +	}
>  	return 1;
>  }
>  
> @@ -1500,6 +1507,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
>  		if (rc != X86EMUL_CONTINUE)
>  			return rc;
>  		break;
> +	case OP_MEM_STR:
> +		rc = segmented_write(ctxt,
> +				ctxt->dst.addr.mem,
> +				ctxt->dst.data,
> +				ctxt->dst.bytes * ctxt->dst.count);
> +		if (rc != X86EMUL_CONTINUE)
> +			return rc;
> +		break;
>  	case OP_XMM:
>  		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
>  		break;
> @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
>  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
>  		struct operand *op)
>  {
> -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
> +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
>  
>  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
>  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
> @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
>  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
>  	I(SrcImmByte | Mov | Stack, em_push),
>  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
> -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
> +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */

Eww.

>  	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
>  	/* 0x70 - 0x7F */
>  	X16(D(SrcImmByte)),
> @@ -3930,6 +3945,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
>  			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
>  		op->addr.mem.seg = VCPU_SREG_ES;
>  		op->val = 0;
> +		op->count = 1;
>  		break;
>  	case OpDX:
>  		op->type = OP_REG;
> @@ -3973,6 +3989,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
>  			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
>  		op->addr.mem.seg = seg_override(ctxt);
>  		op->val = 0;
> +		op->count = 1;
>  		break;
>  	case OpImmFAddr:
>  		op->type = OP_IMM;
> @@ -4513,8 +4530,14 @@ writeback:
>  		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
>  
>  	if (ctxt->rep_prefix && (ctxt->d & String)) {
> +		unsigned int count;
>  		struct read_cache *r = &ctxt->io_read;
> -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> +		if ((ctxt->d & SrcMask) == SrcSI)
> +			count = ctxt->src.count;
> +		else
> +			count = ctxt->dst.count;

Does this work correctly for 'rep movs' and friends?


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06  8:50   ` Avi Kivity
@ 2012-08-06  8:58     ` Gleb Natapov
  2012-08-06  9:28       ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-08-06  8:58 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> > Optimize "rep ins" by allowing emulator to write back more than one
> > datum at a time. Introduce new operand type OP_MEM_STR which tells
> > writeback() that dst contains pointer to an array that should be written
> > back as opposite to just one data element.
> > 
> >  	}
> >  
> > -	memcpy(dest, rc->data + rc->pos, size);
> > -	rc->pos += size;
> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
> > +		ctxt->dst.data = rc->data + rc->pos;
> > +		ctxt->dst.type = OP_MEM_STR;
> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
> > +		rc->pos = rc->end;
> 
> Should take into account the segment limit.
> 
It does. During write back. pio_in_emulated() should linearize() address
before calculating page boundary, but this is (minor) bug unrelated to the patch
series.

> > +	} else {
> > +		memcpy(dest, rc->data + rc->pos, size);
> > +		rc->pos += size;
> > +	}
> >  	return 1;
> >  }
> >  
> > @@ -1500,6 +1507,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
> >  		if (rc != X86EMUL_CONTINUE)
> >  			return rc;
> >  		break;
> > +	case OP_MEM_STR:
> > +		rc = segmented_write(ctxt,
> > +				ctxt->dst.addr.mem,
> > +				ctxt->dst.data,
> > +				ctxt->dst.bytes * ctxt->dst.count);
> > +		if (rc != X86EMUL_CONTINUE)
> > +			return rc;
> > +		break;
> >  	case OP_XMM:
> >  		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
> >  		break;
> > @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
> >  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
> >  		struct operand *op)
> >  {
> > -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
> > +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
> >  
> >  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
> >  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
> > @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
> >  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
> >  	I(SrcImmByte | Mov | Stack, em_push),
> >  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
> > -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
> > +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
> 
> Eww.
This brings us back to the question what alignment check is doing in
linearize :)

> 
> >  	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
> >  	/* 0x70 - 0x7F */
> >  	X16(D(SrcImmByte)),
> > @@ -3930,6 +3945,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
> >  			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
> >  		op->addr.mem.seg = VCPU_SREG_ES;
> >  		op->val = 0;
> > +		op->count = 1;
> >  		break;
> >  	case OpDX:
> >  		op->type = OP_REG;
> > @@ -3973,6 +3989,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
> >  			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
> >  		op->addr.mem.seg = seg_override(ctxt);
> >  		op->val = 0;
> > +		op->count = 1;
> >  		break;
> >  	case OpImmFAddr:
> >  		op->type = OP_IMM;
> > @@ -4513,8 +4530,14 @@ writeback:
> >  		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
> >  
> >  	if (ctxt->rep_prefix && (ctxt->d & String)) {
> > +		unsigned int count;
> >  		struct read_cache *r = &ctxt->io_read;
> > -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> > +		if ((ctxt->d & SrcMask) == SrcSI)
> > +			count = ctxt->src.count;
> > +		else
> > +			count = ctxt->dst.count;
> 
> Does this work correctly for 'rep movs' and friends?
> 
(src|dst).count is initialized to 1 during decode, so anything that does
not touch "count" behaves exactly like before.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06  8:58     ` Gleb Natapov
@ 2012-08-06  9:28       ` Avi Kivity
  2012-08-06 11:05         ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2012-08-06  9:28 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 08/06/2012 11:58 AM, Gleb Natapov wrote:
> On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
>> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
>> > Optimize "rep ins" by allowing emulator to write back more than one
>> > datum at a time. Introduce new operand type OP_MEM_STR which tells
>> > writeback() that dst contains pointer to an array that should be written
>> > back as opposite to just one data element.
>> > 
>> >  	}
>> >  
>> > -	memcpy(dest, rc->data + rc->pos, size);
>> > -	rc->pos += size;
>> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
>> > +		ctxt->dst.data = rc->data + rc->pos;
>> > +		ctxt->dst.type = OP_MEM_STR;
>> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
>> > +		rc->pos = rc->end;
>> 
>> Should take into account the segment limit.
>> 
> It does. During write back. pio_in_emulated() should linearize() address
> before calculating page boundary, but this is (minor) bug unrelated to the patch
> series.

I see, yes, this problem preexists.

However, in normal conditions, non-repeating instructions will not reach
the emulator at all since they will fault in the guest (or in the shadow
mmu, which will reflect the fault to the guest).  Here, the first
iteration may fit in the segment but the second will not, so this will fail.

It's not a huge problem since no guest does this.

>> > @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
>> >  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
>> >  		struct operand *op)
>> >  {
>> > -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
>> > +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
>> >  
>> >  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
>> >  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
>> > @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
>> >  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
>> >  	I(SrcImmByte | Mov | Stack, em_push),
>> >  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
>> > -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
>> > +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
>> 
>> Eww.
> This brings us back to the question what alignment check is doing in
> linearize :)

It's checking alignment...

Let's see how we would fix this mess.  We need to move linearization
(and virt->phys translation) to the decode stage, or perhaps the
execution state, but before instruction dispatch.  This would cause all
the various exceptions to be checked against before execution, and would
avoid double translation for RMW operands.


>> >  		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
>> >  
>> >  	if (ctxt->rep_prefix && (ctxt->d & String)) {
>> > +		unsigned int count;
>> >  		struct read_cache *r = &ctxt->io_read;
>> > -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
>> > +		if ((ctxt->d & SrcMask) == SrcSI)
>> > +			count = ctxt->src.count;
>> > +		else
>> > +			count = ctxt->dst.count;
>> 
>> Does this work correctly for 'rep movs' and friends?
>> 
> (src|dst).count is initialized to 1 during decode, so anything that does
> not touch "count" behaves exactly like before.

Ok.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06  9:28       ` Avi Kivity
@ 2012-08-06 11:05         ` Gleb Natapov
  2012-08-06 11:39           ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-08-06 11:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Aug 06, 2012 at 12:28:05PM +0300, Avi Kivity wrote:
> On 08/06/2012 11:58 AM, Gleb Natapov wrote:
> > On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
> >> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> >> > Optimize "rep ins" by allowing emulator to write back more than one
> >> > datum at a time. Introduce new operand type OP_MEM_STR which tells
> >> > writeback() that dst contains pointer to an array that should be written
> >> > back as opposite to just one data element.
> >> > 
> >> >  	}
> >> >  
> >> > -	memcpy(dest, rc->data + rc->pos, size);
> >> > -	rc->pos += size;
> >> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
> >> > +		ctxt->dst.data = rc->data + rc->pos;
> >> > +		ctxt->dst.type = OP_MEM_STR;
> >> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
> >> > +		rc->pos = rc->end;
> >> 
> >> Should take into account the segment limit.
> >> 
> > It does. During write back. pio_in_emulated() should linearize() address
> > before calculating page boundary, but this is (minor) bug unrelated to the patch
> > series.
> 
> I see, yes, this problem preexists.
> 
> However, in normal conditions, non-repeating instructions will not reach
> the emulator at all since they will fault in the guest (or in the shadow
> mmu, which will reflect the fault to the guest).  Here, the first
> iteration may fit in the segment but the second will not, so this will fail.
> 
Correct. And this can happen with or without the patch series.

> It's not a huge problem since no guest does this.
> 
> >> > @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
> >> >  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
> >> >  		struct operand *op)
> >> >  {
> >> > -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
> >> > +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
> >> >  
> >> >  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
> >> >  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
> >> > @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
> >> >  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
> >> >  	I(SrcImmByte | Mov | Stack, em_push),
> >> >  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
> >> > -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
> >> > +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
> >> 
> >> Eww.
> > This brings us back to the question what alignment check is doing in
> > linearize :)
> 
> It's checking alignment...
> 
It either check it in a wrong place or we need to mark all instructions
that do not care about alignment, so the patch is not "Eww" :)

> Let's see how we would fix this mess.  We need to move linearization
> (and virt->phys translation) to the decode stage, or perhaps the
> execution state, but before instruction dispatch.  This would cause all
> the various exceptions to be checked against before execution, and would
> avoid double translation for RMW operands.
> 
Execution state likely. String instruction works on segmented address
for instance (address increment/decrement). May be there are others.

> 
> >> >  		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
> >> >  
> >> >  	if (ctxt->rep_prefix && (ctxt->d & String)) {
> >> > +		unsigned int count;
> >> >  		struct read_cache *r = &ctxt->io_read;
> >> > -		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
> >> > +		if ((ctxt->d & SrcMask) == SrcSI)
> >> > +			count = ctxt->src.count;
> >> > +		else
> >> > +			count = ctxt->dst.count;
> >> 
> >> Does this work correctly for 'rep movs' and friends?
> >> 
> > (src|dst).count is initialized to 1 during decode, so anything that does
> > not touch "count" behaves exactly like before.
> 
> Ok.
> 
> 
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06 11:05         ` Gleb Natapov
@ 2012-08-06 11:39           ` Avi Kivity
  2012-08-06 11:49             ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2012-08-06 11:39 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 08/06/2012 02:05 PM, Gleb Natapov wrote:
> On Mon, Aug 06, 2012 at 12:28:05PM +0300, Avi Kivity wrote:
>> On 08/06/2012 11:58 AM, Gleb Natapov wrote:
>> > On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
>> >> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
>> >> > Optimize "rep ins" by allowing emulator to write back more than one
>> >> > datum at a time. Introduce new operand type OP_MEM_STR which tells
>> >> > writeback() that dst contains pointer to an array that should be written
>> >> > back as opposite to just one data element.
>> >> > 
>> >> >  	}
>> >> >  
>> >> > -	memcpy(dest, rc->data + rc->pos, size);
>> >> > -	rc->pos += size;
>> >> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
>> >> > +		ctxt->dst.data = rc->data + rc->pos;
>> >> > +		ctxt->dst.type = OP_MEM_STR;
>> >> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
>> >> > +		rc->pos = rc->end;
>> >> 
>> >> Should take into account the segment limit.
>> >> 
>> > It does. During write back. pio_in_emulated() should linearize() address
>> > before calculating page boundary, but this is (minor) bug unrelated to the patch
>> > series.
>> 
>> I see, yes, this problem preexists.
>> 
>> However, in normal conditions, non-repeating instructions will not reach
>> the emulator at all since they will fault in the guest (or in the shadow
>> mmu, which will reflect the fault to the guest).  Here, the first
>> iteration may fit in the segment but the second will not, so this will fail.
>> 
> Correct. And this can happen with or without the patch series.

No, it can't.  Ordinarily ins will trap inside the guest.

> 
>> It's not a huge problem since no guest does this.
>> 
>> >> > @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
>> >> >  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
>> >> >  		struct operand *op)
>> >> >  {
>> >> > -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
>> >> > +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
>> >> >  
>> >> >  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
>> >> >  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
>> >> > @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
>> >> >  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
>> >> >  	I(SrcImmByte | Mov | Stack, em_push),
>> >> >  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
>> >> > -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
>> >> > +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
>> >> 
>> >> Eww.
>> > This brings us back to the question what alignment check is doing in
>> > linearize :)
>> 
>> It's checking alignment...
>> 
> It either check it in a wrong place or we need to mark all instructions
> that do not care about alignment, so the patch is not "Eww" :)

If not there, where?

16-byte sse instructions, cmpxchg16b, fxsave/fxrstor all check for 16
byte alignment.  There is also the #AC exception.  I couldn't find in
the SDM whether linear or virtual addresses are checked, but I'm
guessing linear.

Another way to work around this is to pass size/count separately.

> 
>> Let's see how we would fix this mess.  We need to move linearization
>> (and virt->phys translation) to the decode stage, or perhaps the
>> execution state, but before instruction dispatch.  This would cause all
>> the various exceptions to be checked against before execution, and would
>> avoid double translation for RMW operands.
>> 
> Execution state likely. String instruction works on segmented address
> for instance (address increment/decrement). May be there are others.

Practically everything works on segmented addresses.


-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06 11:39           ` Avi Kivity
@ 2012-08-06 11:49             ` Gleb Natapov
  2012-08-06 12:08               ` Avi Kivity
  0 siblings, 1 reply; 19+ messages in thread
From: Gleb Natapov @ 2012-08-06 11:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Aug 06, 2012 at 02:39:52PM +0300, Avi Kivity wrote:
> On 08/06/2012 02:05 PM, Gleb Natapov wrote:
> > On Mon, Aug 06, 2012 at 12:28:05PM +0300, Avi Kivity wrote:
> >> On 08/06/2012 11:58 AM, Gleb Natapov wrote:
> >> > On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
> >> >> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> >> >> > Optimize "rep ins" by allowing emulator to write back more than one
> >> >> > datum at a time. Introduce new operand type OP_MEM_STR which tells
> >> >> > writeback() that dst contains pointer to an array that should be written
> >> >> > back as opposite to just one data element.
> >> >> > 
> >> >> >  	}
> >> >> >  
> >> >> > -	memcpy(dest, rc->data + rc->pos, size);
> >> >> > -	rc->pos += size;
> >> >> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
> >> >> > +		ctxt->dst.data = rc->data + rc->pos;
> >> >> > +		ctxt->dst.type = OP_MEM_STR;
> >> >> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
> >> >> > +		rc->pos = rc->end;
> >> >> 
> >> >> Should take into account the segment limit.
> >> >> 
> >> > It does. During write back. pio_in_emulated() should linearize() address
> >> > before calculating page boundary, but this is (minor) bug unrelated to the patch
> >> > series.
> >> 
> >> I see, yes, this problem preexists.
> >> 
> >> However, in normal conditions, non-repeating instructions will not reach
> >> the emulator at all since they will fault in the guest (or in the shadow
> >> mmu, which will reflect the fault to the guest).  Here, the first
> >> iteration may fit in the segment but the second will not, so this will fail.
> >> 
> > Correct. And this can happen with or without the patch series.
> 
> No, it can't.  Ordinarily ins will trap inside the guest.
> 
We do not go to a guest for each iteration. In fact we will not go to a
guest for exactly "count" iterations.

> > 
> >> It's not a huge problem since no guest does this.
> >> 
> >> >> > @@ -2732,7 +2747,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
> >> >> >  static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
> >> >> >  		struct operand *op)
> >> >> >  {
> >> >> > -	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
> >> >> > +	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
> >> >> >  
> >> >> >  	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
> >> >> >  	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
> >> >> > @@ -3672,7 +3687,7 @@ static struct opcode opcode_table[256] = {
> >> >> >  	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
> >> >> >  	I(SrcImmByte | Mov | Stack, em_push),
> >> >> >  	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
> >> >> > -	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
> >> >> > +	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
> >> >> 
> >> >> Eww.
> >> > This brings us back to the question what alignment check is doing in
> >> > linearize :)
> >> 
> >> It's checking alignment...
> >> 
> > It either check it in a wrong place or we need to mark all instructions
> > that do not care about alignment, so the patch is not "Eww" :)
> 
> If not there, where?
> 
During execution if instruction requires alignment? Why don't you like marking
instruction as Unaligned?
 
> 16-byte sse instructions, cmpxchg16b, fxsave/fxrstor all check for 16
> byte alignment.  There is also the #AC exception.  I couldn't find in
> the SDM whether linear or virtual addresses are checked, but I'm
> guessing linear.
> 
> Another way to work around this is to pass size/count separately.
> 
> > 
> >> Let's see how we would fix this mess.  We need to move linearization
> >> (and virt->phys translation) to the decode stage, or perhaps the
> >> execution state, but before instruction dispatch.  This would cause all
> >> the various exceptions to be checked against before execution, and would
> >> avoid double translation for RMW operands.
> >> 
> > Execution state likely. String instruction works on segmented address
> > for instance (address increment/decrement). May be there are others.
> 
> Practically everything works on segmented addresses.
> 
Hmm, true. We can calculate liner address whenever it is needed and
cache it. If address changes cache is invalidated.

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06 11:49             ` Gleb Natapov
@ 2012-08-06 12:08               ` Avi Kivity
  2012-08-07 12:07                 ` Gleb Natapov
  0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2012-08-06 12:08 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti

On 08/06/2012 02:49 PM, Gleb Natapov wrote:
> On Mon, Aug 06, 2012 at 02:39:52PM +0300, Avi Kivity wrote:
>> On 08/06/2012 02:05 PM, Gleb Natapov wrote:
>> > On Mon, Aug 06, 2012 at 12:28:05PM +0300, Avi Kivity wrote:
>> >> On 08/06/2012 11:58 AM, Gleb Natapov wrote:
>> >> > On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
>> >> >> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
>> >> >> > Optimize "rep ins" by allowing emulator to write back more than one
>> >> >> > datum at a time. Introduce new operand type OP_MEM_STR which tells
>> >> >> > writeback() that dst contains pointer to an array that should be written
>> >> >> > back as opposite to just one data element.
>> >> >> > 
>> >> >> >  	}
>> >> >> >  
>> >> >> > -	memcpy(dest, rc->data + rc->pos, size);
>> >> >> > -	rc->pos += size;
>> >> >> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
>> >> >> > +		ctxt->dst.data = rc->data + rc->pos;
>> >> >> > +		ctxt->dst.type = OP_MEM_STR;
>> >> >> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
>> >> >> > +		rc->pos = rc->end;
>> >> >> 
>> >> >> Should take into account the segment limit.
>> >> >> 
>> >> > It does. During write back. pio_in_emulated() should linearize() address
>> >> > before calculating page boundary, but this is (minor) bug unrelated to the patch
>> >> > series.
>> >> 
>> >> I see, yes, this problem preexists.
>> >> 
>> >> However, in normal conditions, non-repeating instructions will not reach
>> >> the emulator at all since they will fault in the guest (or in the shadow
>> >> mmu, which will reflect the fault to the guest).  Here, the first
>> >> iteration may fit in the segment but the second will not, so this will fail.
>> >> 
>> > Correct. And this can happen with or without the patch series.
>> 
>> No, it can't.  Ordinarily ins will trap inside the guest.
>> 
> We do not go to a guest for each iteration. In fact we will not go to a
> guest for exactly "count" iterations.

Ok.

If we linearize and translate pre-execution we can keep track of the
remaining space available in the segment/page and do this correctly.

>> >> > This brings us back to the question what alignment check is doing in
>> >> > linearize :)
>> >> 
>> >> It's checking alignment...
>> >> 
>> > It either check it in a wrong place or we need to mark all instructions
>> > that do not care about alignment, so the patch is not "Eww" :)
>> 
>> If not there, where?
>> 
> During execution if instruction requires alignment? 

Too many (all sse) instructions require alignment.

> Why don't you like marking
> instruction as Unaligned?

Because it's a workaround for a side effect of the implementation.  At a
minimum it needs a comment.

>> >> 
>> > Execution state likely. String instruction works on segmented address
>> > for instance (address increment/decrement). May be there are others.
>> 
>> Practically everything works on segmented addresses.
>> 
> Hmm, true. We can calculate liner address whenever it is needed and
> cache it. If address changes cache is invalidated.

The correct thing is to check before, like the processor does.  For
example linearize also checks write permissions, so for RMW it needs to
check writes before performing the first read.

Also cmovcc performs the checks even though it might not perform the access.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling.
  2012-08-06 12:08               ` Avi Kivity
@ 2012-08-07 12:07                 ` Gleb Natapov
  0 siblings, 0 replies; 19+ messages in thread
From: Gleb Natapov @ 2012-08-07 12:07 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, mtosatti

On Mon, Aug 06, 2012 at 03:08:28PM +0300, Avi Kivity wrote:
> On 08/06/2012 02:49 PM, Gleb Natapov wrote:
> > On Mon, Aug 06, 2012 at 02:39:52PM +0300, Avi Kivity wrote:
> >> On 08/06/2012 02:05 PM, Gleb Natapov wrote:
> >> > On Mon, Aug 06, 2012 at 12:28:05PM +0300, Avi Kivity wrote:
> >> >> On 08/06/2012 11:58 AM, Gleb Natapov wrote:
> >> >> > On Mon, Aug 06, 2012 at 11:50:20AM +0300, Avi Kivity wrote:
> >> >> >> On 07/30/2012 05:38 PM, Gleb Natapov wrote:
> >> >> >> > Optimize "rep ins" by allowing emulator to write back more than one
> >> >> >> > datum at a time. Introduce new operand type OP_MEM_STR which tells
> >> >> >> > writeback() that dst contains pointer to an array that should be written
> >> >> >> > back as opposite to just one data element.
> >> >> >> > 
> >> >> >> >  	}
> >> >> >> >  
> >> >> >> > -	memcpy(dest, rc->data + rc->pos, size);
> >> >> >> > -	rc->pos += size;
> >> >> >> > +	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
> >> >> >> > +		ctxt->dst.data = rc->data + rc->pos;
> >> >> >> > +		ctxt->dst.type = OP_MEM_STR;
> >> >> >> > +		ctxt->dst.count = (rc->end - rc->pos) / size;
> >> >> >> > +		rc->pos = rc->end;
> >> >> >> 
> >> >> >> Should take into account the segment limit.
> >> >> >> 
> >> >> > It does. During write back. pio_in_emulated() should linearize() address
> >> >> > before calculating page boundary, but this is (minor) bug unrelated to the patch
> >> >> > series.
> >> >> 
> >> >> I see, yes, this problem preexists.
> >> >> 
> >> >> However, in normal conditions, non-repeating instructions will not reach
> >> >> the emulator at all since they will fault in the guest (or in the shadow
> >> >> mmu, which will reflect the fault to the guest).  Here, the first
> >> >> iteration may fit in the segment but the second will not, so this will fail.
> >> >> 
> >> > Correct. And this can happen with or without the patch series.
> >> 
> >> No, it can't.  Ordinarily ins will trap inside the guest.
> >> 
> > We do not go to a guest for each iteration. In fact we will not go to a
> > guest for exactly "count" iterations.
> 
> Ok.
> 
> If we linearize and translate pre-execution we can keep track of the
> remaining space available in the segment/page and do this correctly.
> 
Absolutely.

> >> >> > This brings us back to the question what alignment check is doing in
> >> >> > linearize :)
> >> >> 
> >> >> It's checking alignment...
> >> >> 
> >> > It either check it in a wrong place or we need to mark all instructions
> >> > that do not care about alignment, so the patch is not "Eww" :)
> >> 
> >> If not there, where?
> >> 
> > During execution if instruction requires alignment? 
> 
> Too many (all sse) instructions require alignment.
> 
But most do not. Although they do not work on big chunk of data either.

> > Why don't you like marking
> > instruction as Unaligned?
> 
> Because it's a workaround for a side effect of the implementation.  At a
> minimum it needs a comment.
> 
So add a comment and resend? Any other pre-requests before the series is
accepted?

> >> >> 
> >> > Execution state likely. String instruction works on segmented address
> >> > for instance (address increment/decrement). May be there are others.
> >> 
> >> Practically everything works on segmented addresses.
> >> 
> > Hmm, true. We can calculate liner address whenever it is needed and
> > cache it. If address changes cache is invalidated.
> 
> The correct thing is to check before, like the processor does.  For
Are you sure processor checks before? If you do "in" to an address that
lays outside of a segment PIO will not happen before a fault?

> example linearize also checks write permissions, so for RMW it needs to
> check writes before performing the first read.
> 
> Also cmovcc performs the checks even though it might not perform the access.
> 

--
			Gleb.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCHv5 0/4] improve speed of "rep ins" emulation
  2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
                   ` (3 preceding siblings ...)
  2012-07-30 14:38 ` [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling Gleb Natapov
@ 2012-08-13 14:39 ` Richard W.M. Jones
  4 siblings, 0 replies; 19+ messages in thread
From: Richard W.M. Jones @ 2012-08-13 14:39 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi, mtosatti

On Mon, Jul 30, 2012 at 05:38:17PM +0300, Gleb Natapov wrote:
> And now for something completely different.
> 
> So this series (or rather the last patch of it) takes different approach
> to "rep ins" optimization. Instead of writing separate fast path for
> it do the fast path inside emulator itself. This way nobody can say the
> code is not reused!
> 
> Patch 1,2 are now, strictly speaking, not needed, but I think this is still
> nice cleanup and, in case of patch 1, eliminates some ifs at each KVM_RUN ioctl.
>  
> Gleb Natapov (4):
>   Provide userspace IO exit completion callback.
>   KVM: emulator: make x86 emulation modes enum instead of defines
>   KVM: emulator: string_addr_inc() cleanup
>   KVM: emulator: optimize "rep ins" handling.
> 
>  arch/x86/include/asm/kvm_emulate.h |   26 +++++-----
>  arch/x86/include/asm/kvm_host.h    |    1 +
>  arch/x86/kvm/emulate.c             |   48 ++++++++++++++-----
>  arch/x86/kvm/x86.c                 |   92 +++++++++++++++++++++---------------
>  4 files changed, 104 insertions(+), 63 deletions(-)

I tested this patch series and it really helped to improve the
performance of loading the libguestfs -kernel and -initrd:

  https://bugzilla.redhat.com/show_bug.cgi?id=847546

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
virt-p2v converts physical machines to virtual machines.  Boot with a
live CD or over the network (PXE) and turn machines into Xen guests.
http://et.redhat.com/~rjones/virt-p2v

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-08-13 14:39 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-07-30 14:38 [PATCHv5 0/4] improve speed of "rep ins" emulation Gleb Natapov
2012-07-30 14:38 ` [PATCHv5 1/4] Provide userspace IO exit completion callback Gleb Natapov
2012-08-02 19:26   ` Marcelo Tosatti
2012-08-05 14:49     ` Gleb Natapov
2012-07-30 14:38 ` [PATCHv5 2/4] KVM: emulator: make x86 emulation modes enum instead of defines Gleb Natapov
2012-07-30 14:38 ` [PATCHv5 3/4] KVM: emulator: string_addr_inc() cleanup Gleb Natapov
2012-07-30 14:38 ` [PATCHv5 4/4] KVM: emulator: optimize "rep ins" handling Gleb Natapov
2012-08-05 15:03   ` Avi Kivity
2012-08-05 15:18     ` Gleb Natapov
2012-08-05 15:20       ` Avi Kivity
2012-08-06  8:50   ` Avi Kivity
2012-08-06  8:58     ` Gleb Natapov
2012-08-06  9:28       ` Avi Kivity
2012-08-06 11:05         ` Gleb Natapov
2012-08-06 11:39           ` Avi Kivity
2012-08-06 11:49             ` Gleb Natapov
2012-08-06 12:08               ` Avi Kivity
2012-08-07 12:07                 ` Gleb Natapov
2012-08-13 14:39 ` [PATCHv5 0/4] improve speed of "rep ins" emulation Richard W.M. Jones

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).