From: Marcelo Tosatti <mtosatti@redhat.com>
To: Avi Kivity <avi@qumranet.com>
Cc: kvm-devel <kvm@vger.kernel.org>
Subject: Re: RFC: cache_regs in kvm_emulate_pio
Date: Sat, 21 Jun 2008 16:46:39 -0300 [thread overview]
Message-ID: <20080621194639.GA15032@dmt.cnet> (raw)
On Sat, Jun 21, 2008 at 10:04:18AM +0300, Avi Kivity wrote:
>> /*
>> * Sync the rsp and rip registers into the vcpu structure. This allows
>> * registers to be accessed by indexing vcpu->arch.regs.
>> */
>>
>> But I think it just refers to the interface in general, so that nobody
>> would try to access RSP or RIP (and RAX in AMD's case) before calling
>> ->cache_regs().
>>
>
> It refers to the fact that sometimes you don't know which registers you
> refer to, e.g. in the emulator.
How's this?
Test performed with idle UP guest booted with "nohz=off
clocksource=acpi_pm", so most of the exits are acpi timer reads. This
average is from available entries in the dmesg buffer, about 1500.
"regs_available" and "regs_dirty" could be 8 bits, and placed in a hole
instead of in the middle of longs.
avg cycles to exit to qemu:
before: 3376
after: 3181
195 cycles (~= 6.1% improvement)
avg cycles to exit to qemu, handle exit and return to __vcpu_run before
irq_disable:
before: 7482 cycles.
after: 7227 cycles.
255 cycles (~= 3.5% improvement)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 73f43de..ecf26e3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,6 +32,7 @@
#include <asm/current.h>
#include <asm/apicdef.h>
#include <asm/atomic.h>
+#include "kvm_cache_regs.h"
#include "irq.h"
#define PRId64 "d"
@@ -558,7 +559,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write)
struct kvm_run *run = vcpu->run;
set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests);
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_RIP);
run->tpr_access.rip = vcpu->arch.rip;
run->tpr_access.is_write = write;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 238e8f3..8554b37 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -709,21 +709,38 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
rdtscll(vcpu->arch.host_tsc);
}
-static void svm_cache_regs(struct kvm_vcpu *vcpu)
+static void svm_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs)
{
struct vcpu_svm *svm = to_svm(vcpu);
- vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
- vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
- vcpu->arch.rip = svm->vmcb->save.rip;
+ switch (regs) {
+ case REGS_GPR:
+ vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+ break;
+ case REGS_RSP:
+ vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+ break;
+ case REGS_RIP:
+ vcpu->arch.rip = svm->vmcb->save.rip;
+ break;
+ }
}
-static void svm_decache_regs(struct kvm_vcpu *vcpu)
+static void svm_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs)
{
struct vcpu_svm *svm = to_svm(vcpu);
- svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
- svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = vcpu->arch.rip;
+
+ switch (regs) {
+ case REGS_GPR:
+ svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
+ break;
+ case REGS_RSP:
+ svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
+ break;
+ case REGS_RIP:
+ svm->vmcb->save.rip = vcpu->arch.rip;
+ break;
+ }
}
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e4278d..b7a988c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -26,6 +26,7 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/moduleparam.h>
+#include "kvm_cache_regs.h"
#include <asm/io.h>
#include <asm/desc.h>
@@ -707,9 +708,11 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
unsigned long rip;
u32 interruptibility;
- rip = vmcs_readl(GUEST_RIP);
+ cache_regs(vcpu, REGS_RIP);
+ rip = vcpu->arch.rip;
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- vmcs_writel(GUEST_RIP, rip);
+ vcpu->arch.rip = rip;
+ decache_regs(vcpu, REGS_RIP);
/*
* We emulated an instruction, so temporary interrupt blocking
@@ -935,20 +938,48 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
* Sync the rsp and rip registers into the vcpu structure. This allows
* registers to be accessed by indexing vcpu->arch.regs.
*/
-static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
+static void vmx_cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs)
{
- vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
- vcpu->arch.rip = vmcs_readl(GUEST_RIP);
+ switch (regs) {
+ case REGS_RSP:
+ vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
+ break;
+ case REGS_RIP:
+ vcpu->arch.rip = vmcs_readl(GUEST_RIP);
+ break;
+ case REGS_GPR:
+ break;
+ }
}
/*
* Syncs rsp and rip back into the vmcs. Should be called after possible
* modification.
*/
+static void vmx_decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set regs)
+{
+ switch (regs) {
+ case REGS_RSP:
+ vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+ break;
+ case REGS_RIP:
+ vmcs_writel(GUEST_RIP, vcpu->arch.rip);
+ break;
+ case REGS_GPR:
+ break;
+ }
+}
+
+static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu)
+{
+ cache_regs(vcpu, REGS_RSP);
+ cache_regs(vcpu, REGS_RIP);
+}
+
static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu)
{
- vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- vmcs_writel(GUEST_RIP, vcpu->arch.rip);
+ decache_regs(vcpu, REGS_RSP);
+ decache_regs(vcpu, REGS_RIP);
}
static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
@@ -3213,8 +3244,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
- .cache_regs = vcpu_load_rsp_rip,
- .decache_regs = vcpu_put_rsp_rip,
+ .cache_regs = vmx_cache_regs,
+ .decache_regs = vmx_decache_regs,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 26b051b..0a8d83b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -19,6 +19,7 @@
#include "mmu.h"
#include "i8254.h"
#include "tss.h"
+#include "kvm_cache_regs.h"
#include <linux/clocksource.h>
#include <linux/kvm.h>
@@ -61,6 +62,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
struct kvm_x86_ops *kvm_x86_ops;
+EXPORT_SYMBOL(kvm_x86_ops);
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -1778,6 +1780,16 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
return dev;
}
+static void flush_regs(struct kvm_vcpu *vcpu)
+{
+ if (__test_and_clear_bit(REGS_RSP, &vcpu->arch.regs_dirty))
+ kvm_x86_ops->decache_regs(vcpu, REGS_RSP);
+ if (__test_and_clear_bit(REGS_RIP, &vcpu->arch.regs_dirty))
+ kvm_x86_ops->decache_regs(vcpu, REGS_RIP);
+ if (__test_and_clear_bit(REGS_GPR, &vcpu->arch.regs_dirty))
+ kvm_x86_ops->decache_regs(vcpu, REGS_GPR);
+}
+
int emulator_read_std(unsigned long addr,
void *val,
unsigned int bytes,
@@ -2060,7 +2072,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
struct decode_cache *c;
vcpu->arch.mmio_fault_cr2 = cr2;
- kvm_x86_ops->cache_regs(vcpu);
+ cache_all_regs(vcpu);
vcpu->mmio_is_write = 0;
vcpu->arch.pio.string = 0;
@@ -2141,7 +2153,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DO_MMIO;
}
- kvm_x86_ops->decache_regs(vcpu);
+ decache_all_regs(vcpu);
kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
if (vcpu->mmio_is_write) {
@@ -2195,7 +2207,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
long delta;
int r;
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_GPR);
if (!io->string) {
if (io->in)
@@ -2205,7 +2217,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
if (io->in) {
r = pio_copy_data(vcpu);
if (r) {
- kvm_x86_ops->cache_regs(vcpu);
+ kvm_x86_ops->cache_regs(vcpu, REGS_GPR);
return r;
}
}
@@ -2228,7 +2240,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSI] += delta;
}
- kvm_x86_ops->decache_regs(vcpu);
+ decache_regs(vcpu, REGS_GPR);
io->count -= io->cur_count;
io->cur_count = 0;
@@ -2302,7 +2314,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
handler);
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_GPR);
memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -2488,7 +2500,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
unsigned long nr, a0, a1, a2, a3, ret;
int r = 1;
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_GPR);
nr = vcpu->arch.regs[VCPU_REGS_RAX];
a0 = vcpu->arch.regs[VCPU_REGS_RBX];
@@ -2518,7 +2530,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
}
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
- kvm_x86_ops->decache_regs(vcpu);
+ decache_regs(vcpu, REGS_GPR);
++vcpu->stat.hypercalls;
return r;
}
@@ -2537,7 +2549,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
*/
kvm_mmu_zap_all(vcpu->kvm);
- kvm_x86_ops->cache_regs(vcpu);
+ cache_all_regs(vcpu);
kvm_x86_ops->patch_hypercall(vcpu, instruction);
if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu)
!= X86EMUL_CONTINUE)
@@ -2669,7 +2681,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
u32 function, index;
struct kvm_cpuid_entry2 *e, *best;
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_GPR);
function = vcpu->arch.regs[VCPU_REGS_RAX];
index = vcpu->arch.regs[VCPU_REGS_RCX];
vcpu->arch.regs[VCPU_REGS_RAX] = 0;
@@ -2698,7 +2710,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx;
vcpu->arch.regs[VCPU_REGS_RDX] = best->edx;
}
- kvm_x86_ops->decache_regs(vcpu);
+ decache_regs(vcpu, REGS_GPR);
kvm_x86_ops->skip_emulated_instruction(vcpu);
KVMTRACE_5D(CPUID, vcpu, function,
(u32)vcpu->arch.regs[VCPU_REGS_RAX],
@@ -2811,6 +2823,8 @@ again:
}
}
+ flush_regs(vcpu);
+
clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);
@@ -2865,6 +2879,8 @@ again:
local_irq_enable();
++vcpu->stat.exits;
+ vcpu->arch.regs_available = 0;
+ vcpu->arch.regs_dirty = 0;
/*
* We must have an instruction between local_irq_enable() and
@@ -2884,7 +2900,7 @@ again:
* Profile KVM exit RIPs:
*/
if (unlikely(prof_on == KVM_PROFILING)) {
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_RIP);
profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip);
}
@@ -2969,9 +2985,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
#endif
if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) {
- kvm_x86_ops->cache_regs(vcpu);
+ cache_regs(vcpu, REGS_GPR);
vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret;
- kvm_x86_ops->decache_regs(vcpu);
+ decache_regs(vcpu, REGS_GPR);
}
r = __vcpu_run(vcpu, kvm_run);
@@ -2988,7 +3004,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
vcpu_load(vcpu);
- kvm_x86_ops->cache_regs(vcpu);
+ cache_all_regs(vcpu);
regs->rax = vcpu->arch.regs[VCPU_REGS_RAX];
regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX];
@@ -3049,7 +3065,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.rip = regs->rip;
kvm_x86_ops->set_rflags(vcpu, regs->rflags);
- kvm_x86_ops->decache_regs(vcpu);
+ decache_all_regs(vcpu);
vcpu->arch.exception.pending = false;
@@ -3525,8 +3541,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
}
+ cache_all_regs(vcpu);
kvm_x86_ops->skip_emulated_instruction(vcpu);
- kvm_x86_ops->cache_regs(vcpu);
if (nseg_desc.type & 8)
ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
@@ -3551,7 +3567,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
tr_seg.type = 11;
kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
out:
- kvm_x86_ops->decache_regs(vcpu);
+ decache_all_regs(vcpu);
return ret;
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 851184d..56f01cc 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -120,6 +120,12 @@ enum {
VCPU_SREG_LDTR,
};
+enum kvm_reg_set {
+ REGS_GPR,
+ REGS_RSP,
+ REGS_RIP,
+};
+
#include <asm/kvm_x86_emulate.h>
#define KVM_NR_MEM_OBJS 40
@@ -217,8 +223,10 @@ struct kvm_vcpu_arch {
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
- unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
- unsigned long rip; /* needs vcpu_load_rsp_rip() */
+ unsigned long regs[NR_VCPU_REGS]; /* needs cache_regs() */
+ unsigned long rip; /* needs cache_regs() */
+ u16 regs_available;
+ u16 regs_dirty;
unsigned long cr0;
unsigned long cr2;
@@ -410,8 +418,8 @@ struct kvm_x86_ops {
unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
int *exception);
- void (*cache_regs)(struct kvm_vcpu *vcpu);
- void (*decache_regs)(struct kvm_vcpu *vcpu);
+ void (*cache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg_set regs);
+ void (*decache_regs)(struct kvm_vcpu *vcpu, enum kvm_reg_set regs);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
--- /dev/null 2008-06-20 14:49:09.418709161 -0300
+++ b/arch/x86/kvm/kvm_cache_regs.h 2008-06-21 14:12:49.000000000 -0300
@@ -0,0 +1,27 @@
+#ifndef ASM_KVM_CACHE_REGS_H
+#define ASM_KVM_CACHE_REGS_H
+static inline void cache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set reg_set)
+{
+ if (!__test_and_set_bit(reg_set, &vcpu->arch.regs_available))
+ kvm_x86_ops->cache_regs(vcpu, reg_set);
+}
+
+static inline void decache_regs(struct kvm_vcpu *vcpu, enum kvm_reg_set reg_set)
+{
+ __set_bit(reg_set, &vcpu->arch.regs_dirty);
+}
+
+static inline void cache_all_regs(struct kvm_vcpu *vcpu)
+{
+ cache_regs(vcpu, REGS_RSP);
+ cache_regs(vcpu, REGS_RIP);
+ cache_regs(vcpu, REGS_GPR);
+}
+
+static inline void decache_all_regs(struct kvm_vcpu *vcpu)
+{
+ decache_regs(vcpu, REGS_RSP);
+ decache_regs(vcpu, REGS_RIP);
+ decache_regs(vcpu, REGS_GPR);
+}
+#endif
----- End forwarded message -----
----- End forwarded message -----
next reply other threads:[~2008-06-21 19:47 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-06-21 19:46 Marcelo Tosatti [this message]
2008-06-22 5:16 ` RFC: cache_regs in kvm_emulate_pio Avi Kivity
2008-06-22 18:05 ` Marcelo Tosatti
2008-06-24 19:33 ` Marcelo Tosatti
2008-06-26 9:18 ` Avi Kivity
2008-06-26 14:52 ` Marcelo Tosatti
2008-06-26 22:15 ` Marcelo Tosatti
2008-06-27 2:28 ` Marcelo Tosatti
-- strict thread matches above, loose matches on Subject: below --
2008-06-19 23:31 Marcelo Tosatti
2008-06-20 20:30 ` Avi Kivity
2008-06-20 21:24 ` Marcelo Tosatti
2008-06-21 7:04 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080621194639.GA15032@dmt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@qumranet.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox