* [patch] KVM: add MSR based hypercall API
@ 2007-01-09 9:27 Ingo Molnar
[not found] ` <20070109092705.GA8300-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 9:27 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
Subject: [patch] KVM: add MSR based hypercall API
From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
this adds a special MSR based hypercall API to KVM. This is to be used
by paravirtual kernels and virtual drivers.
VMX-only at the moment.
Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
---
drivers/kvm/kvm.h | 5 +++
drivers/kvm/mmu.c | 1
drivers/kvm/vmx.c | 74 +++++++++++++++++++++++++++++++++++++++++++++--
include/linux/kvm_para.h | 64 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 142 insertions(+), 2 deletions(-)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,8 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t vm_syscall_hpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
@@ -626,4 +629,6 @@ static inline u32 get_rdx_init_val(void)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+extern int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa);
+
#endif
Index: linux/drivers/kvm/mmu.c
===================================================================
--- linux.orig/drivers/kvm/mmu.c
+++ linux/drivers/kvm/mmu.c
@@ -719,6 +719,7 @@ hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu,
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
| (gpa & (PAGE_SIZE-1));
}
+EXPORT_SYMBOL_GPL(gpa_to_hpa);
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
{
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -406,10 +406,15 @@ static int vmx_set_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_TIME_STAMP_COUNTER: {
+ case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
break;
- }
+ /*
+ * This is the 'probe whether the host is KVM' logic:
+ */
+ case MSR_KVM_API_MAGIC:
+ return vcpu_register_para(vcpu, data);
+
default:
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
@@ -1448,6 +1453,71 @@ static int handle_io(struct kvm_vcpu *vc
return 0;
}
+/*
+ * Register the para guest with the host:
+ */
+int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
+{
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t para_state_hpa, vm_syscall_hpa;
+ unsigned char *vm_syscall;
+
+ printk("KVM: guest trying to enter paravirtual mode\n");
+ printk(".... para_state_gpa: %08Lx\n", para_state_gpa);
+
+ /*
+ * Needs to be page aligned:
+ */
+ if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
+ goto err_gp;
+
+ para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
+ printk(".... para_state_hpa: %08Lx\n", para_state_hpa);
+ if (is_error_hpa(para_state_hpa))
+ goto err_gp;
+
+ para_state = (void *)__va(para_state_hpa);
+ printk(".... para_state_hva: %p\n", para_state);
+
+ printk(".... guest version: %d\n", para_state->guest_version);
+ printk(".... size: %d\n", para_state->size);
+
+ para_state->host_version = KVM_PARA_API_VERSION;
+ /*
+ * We cannot support guests that try to register themselves
+ * with a newer API version than the host supports:
+ */
+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
+ para_state->ret = -EINVAL;
+ goto err_skip;
+ }
+
+ vm_syscall_hpa = gpa_to_hpa(vcpu, para_state->vm_syscall_addr);
+ printk(".... vm_syscall_hpa: %08Lx\n", vm_syscall_hpa);
+ if (is_error_hpa(vm_syscall_hpa)) {
+ para_state->ret = -EINVAL;
+ goto err_skip;
+ }
+
+ printk("KVM: para guest successfully registered.\n");
+ vcpu->para_state = para_state;
+ vcpu->vm_syscall_hpa = vm_syscall_hpa;
+
+ vm_syscall = __va(vm_syscall_hpa);
+ /*
+ * Patch in the VMCALL instruction:
+ */
+ vm_syscall[0] = 0x0f;
+ vm_syscall[1] = 0x01;
+ vm_syscall[2] = 0xc1;
+
+ para_state->ret = 0;
+err_skip:
+ return 0;
+err_gp:
+ return 1;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
Index: linux/include/linux/kvm_para.h
===================================================================
--- /dev/null
+++ linux/include/linux/kvm_para.h
@@ -0,0 +1,64 @@
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+/*
+ * Guest OS interface for KVM paravirtualization
+ *
+ * Note: this interface is considered experimental and may change without
+ * notice.
+ */
+
+#define KVM_CR3_CACHE_SIZE 4
+
+struct kvm_cr3_cache_entry {
+ u64 guest_cr3;
+ u64 host_cr3;
+};
+
+struct kvm_cr3_cache {
+ struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
+ u32 max_idx;
+};
+
+/*
+ * Per-VCPU descriptor area shared between guest and host. Writable to
+ * both guest and host. Registered with the host by the guest when
+ * a guest acknowledges paravirtual mode.
+ */
+struct kvm_vcpu_para_state {
+ /*
+ * API version information for compatibility. If there's any support
+ * mismatch (too old host trying to execute too new guest) then
+ * the host will deny entry into paravirtual mode. Any other
+ * combination (new host + old guest and new host + new guest)
+ * is supposed to work - new host versions will support all old
+ * guest API versions.
+ */
+ u32 guest_version;
+ u32 host_version;
+ u32 size;
+ u32 ret;
+
+ /*
+ * The address of the vm exit instruction (VMCALL or VMMCALL),
+ * which the host will patch according to the CPU model the
+ * VM runs on:
+ */
+ u64 vm_syscall_addr;
+
+ struct kvm_cr3_cache cr3_cache;
+
+} __attribute__ ((aligned(PAGE_SIZE)));
+
+#define KVM_PARA_API_VERSION 1
+
+/*
+ * This is used for an RDMSR's ECX parameter to probe for a KVM host.
+ * Hopefully no CPU vendor will use up this number. This is placed well
+ * out of way of the typical space occupied by CPU vendors' MSR indices,
+ * and we think (or at least hope) it wont be occupied in the future
+ * either.
+ */
+#define MSR_KVM_API_MAGIC 0x87655678
+
+#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109092705.GA8300-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 9:58 ` Avi Kivity
[not found] ` <45A36758.1000808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 9:58 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> Subject: [patch] KVM: add MSR based hypercall API
> From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
>
> this adds a special MSR based hypercall API to KVM. This is to be used
> by paravirtual kernels and virtual drivers.
>
> VMX-only at the moment.
>
> Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
> ---
> drivers/kvm/kvm.h | 5 +++
> drivers/kvm/mmu.c | 1
> drivers/kvm/vmx.c | 74 +++++++++++++++++++++++++++++++++++++++++++++--
> include/linux/kvm_para.h | 64 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 142 insertions(+), 2 deletions(-)
>
> Index: linux/drivers/kvm/kvm.h
> ===================================================================
> --- linux.orig/drivers/kvm/kvm.h
> +++ linux/drivers/kvm/kvm.h
> @@ -14,6 +14,7 @@
>
> #include "vmx.h"
> #include <linux/kvm.h>
> +#include <linux/kvm_para.h>
>
> #define CR0_PE_MASK (1ULL << 0)
> #define CR0_TS_MASK (1ULL << 3)
> @@ -237,6 +238,8 @@ struct kvm_vcpu {
> unsigned long cr0;
> unsigned long cr2;
> unsigned long cr3;
> + struct kvm_vcpu_para_state *para_state;
>
Do we want this as part of kvm_vcpu or kvm? I can see arguments for
both views.
> + hpa_t vm_syscall_hpa;
>
This should be a gpa so it can be migrated, and so we can support guest
paging. Should also be named hypercall to avoid confusion with the
syscall protocol.
> Index: linux/drivers/kvm/vmx.c
> ===================================================================
> --- linux.orig/drivers/kvm/vmx.c
> +++ linux/drivers/kvm/vmx.c
> @@ -406,10 +406,15 @@ static int vmx_set_msr(struct kvm_vcpu *
> case MSR_IA32_SYSENTER_ESP:
> vmcs_write32(GUEST_SYSENTER_ESP, data);
> break;
> - case MSR_IA32_TIME_STAMP_COUNTER: {
> + case MSR_IA32_TIME_STAMP_COUNTER:
> guest_write_tsc(data);
> break;
> - }
> + /*
> + * This is the 'probe whether the host is KVM' logic:
> + */
> + case MSR_KVM_API_MAGIC:
> + return vcpu_register_para(vcpu, data);
> +
>
Why not move this to kvm_set_msr_common()? That will get svm support
for free.
> default:
> msr = find_msr_entry(vcpu, msr_index);
> if (msr) {
> @@ -1448,6 +1453,71 @@ static int handle_io(struct kvm_vcpu *vc
> return 0;
> }
>
> +/*
> + * Register the para guest with the host:
> + */
> +int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
> +{
> + struct kvm_vcpu_para_state *para_state;
> + hpa_t para_state_hpa, vm_syscall_hpa;
> + unsigned char *vm_syscall;
> +
> + printk("KVM: guest trying to enter paravirtual mode\n");
> + printk(".... para_state_gpa: %08Lx\n", para_state_gpa);
> +
> + /*
> + * Needs to be page aligned:
> + */
> + if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
> + goto err_gp;
> +
> + para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
> + printk(".... para_state_hpa: %08Lx\n", para_state_hpa);
> + if (is_error_hpa(para_state_hpa))
> + goto err_gp;
> +
> + para_state = (void *)__va(para_state_hpa);
> + printk(".... para_state_hva: %p\n", para_state);
> +
> + printk(".... guest version: %d\n", para_state->guest_version);
> + printk(".... size: %d\n", para_state->size);
> +
> + para_state->host_version = KVM_PARA_API_VERSION;
> + /*
> + * We cannot support guests that try to register themselves
> + * with a newer API version than the host supports:
> + */
> + if (para_state->guest_version > KVM_PARA_API_VERSION) {
> + para_state->ret = -EINVAL;
>
EINVAL may be different or nonexistent on the guest. We need to define
kvm-specific error codes.
> + goto err_skip;
> + }
> +
> + vm_syscall_hpa = gpa_to_hpa(vcpu, para_state->vm_syscall_addr);
> + printk(".... vm_syscall_hpa: %08Lx\n", vm_syscall_hpa);
> + if (is_error_hpa(vm_syscall_hpa)) {
> + para_state->ret = -EINVAL;
> + goto err_skip;
> + }
> +
> + printk("KVM: para guest successfully registered.\n");
> + vcpu->para_state = para_state;
> + vcpu->vm_syscall_hpa = vm_syscall_hpa;
> +
> + vm_syscall = __va(vm_syscall_hpa);
>
kmap() is needed here (guest pages are not GFP_KERNEL).
> + /*
> + * Patch in the VMCALL instruction:
> + */
> + vm_syscall[0] = 0x0f;
> + vm_syscall[1] = 0x01;
> + vm_syscall[2] = 0xc1;
>
This should be an arch op, and the rest of the function moved to kvm_main.c.
> +
> + para_state->ret = 0;
> +err_skip:
> + return 0;
> +err_gp:
> + return 1;
> +}
> +
> static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> {
> u64 exit_qualification;
> Index: linux/include/linux/kvm_para.h
> ===================================================================
> --- /dev/null
> +++ linux/include/linux/kvm_para.h
> @@ -0,0 +1,64 @@
> +#ifndef __LINUX_KVM_PARA_H
> +#define __LINUX_KVM_PARA_H
> +
> +/*
> + * Guest OS interface for KVM paravirtualization
> + *
> + * Note: this interface is considered experimental and may change without
> + * notice.
> + */
> +
> +#define KVM_CR3_CACHE_SIZE 4
> +
> +struct kvm_cr3_cache_entry {
> + u64 guest_cr3;
> + u64 host_cr3;
> +};
> +
> +struct kvm_cr3_cache {
> + struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
> + u32 max_idx;
> +};
>
This will require an api version bump whenever KVM_CR3_CACHE_SIZE changes.
Better to advertise the gpa of the cache, so it can be unlimited.
> +
> +/*
> + * Per-VCPU descriptor area shared between guest and host. Writable to
> + * both guest and host. Registered with the host by the guest when
> + * a guest acknowledges paravirtual mode.
> + */
> +struct kvm_vcpu_para_state {
> + /*
> + * API version information for compatibility. If there's any support
> + * mismatch (too old host trying to execute too new guest) then
> + * the host will deny entry into paravirtual mode. Any other
> + * combination (new host + old guest and new host + new guest)
> + * is supposed to work - new host versions will support all old
> + * guest API versions.
> + */
> + u32 guest_version;
> + u32 host_version;
> + u32 size;
> + u32 ret;
> +
> + /*
> + * The address of the vm exit instruction (VMCALL or VMMCALL),
> + * which the host will patch according to the CPU model the
> + * VM runs on:
> + */
> + u64 vm_syscall_addr;
>
Please rename to hypercall, and make it explicit that it is not a
virtual address.
> +
> + struct kvm_cr3_cache cr3_cache;
> +
> +} __attribute__ ((aligned(PAGE_SIZE)));
>
Perhaps packed too, to avoid 32/64 ambiguity. Or even better, pad it
explicitly to avoid unaligned fields.
> +
> +#define KVM_PARA_API_VERSION 1
> +
> +/*
> + * This is used for an RDMSR's ECX parameter to probe for a KVM host.
> + * Hopefully no CPU vendor will use up this number. This is placed well
> + * out of way of the typical space occupied by CPU vendors' MSR indices,
> + * and we think (or at least hope) it wont be occupied in the future
> + * either.
> + */
> +#define MSR_KVM_API_MAGIC 0x87655678
> +
> +#endif
>
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A36758.1000808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 10:38 ` Ingo Molnar
[not found] ` <20070109103809.GA24515-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 10:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> >@@ -237,6 +238,8 @@ struct kvm_vcpu {
> > unsigned long cr0;
> > unsigned long cr2;
> > unsigned long cr3;
> >+ struct kvm_vcpu_para_state *para_state;
> >
> Do we want this as part of kvm_vcpu or kvm? I can see arguments for
> both views.
definitely needs to be a property of the vcpu. For example the cr3 cache
is attached to the physical CPU. SMP scalability necessiates this too -
we want to use the para_state to pass information between the guest and
the host without any hypercall.
> >+ hpa_t vm_syscall_hpa;
> >
>
> This should be a gpa so it can be migrated, and so we can support
> guest paging. Should also be named hypercall to avoid confusion with
> the syscall protocol.
ok, done. I have also added a para_state_gpa to the vcpu, to save that
gpa value as well.
> >+ /*
> >+ * This is the 'probe whether the host is KVM' logic:
> >+ */
> >+ case MSR_KVM_API_MAGIC:
> >+ return vcpu_register_para(vcpu, data);
>
> Why not move this to kvm_set_msr_common()? That will get svm support
> for free.
done. I have also created a patch_hypercall CPU arch method, which is
called from the generic code - and this enabled me to move the
registration code to kvm_main.c as well.
> >+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
> >+ para_state->ret = -EINVAL;
>
> EINVAL may be different or nonexistent on the guest. We need to
> define kvm-specific error codes.
ok, done.
> >+ printk("KVM: para guest successfully registered.\n");
> >+ vcpu->para_state = para_state;
> >+ vcpu->vm_syscall_hpa = vm_syscall_hpa;
> >+
> >+ vm_syscall = __va(vm_syscall_hpa);
> >
>
> kmap() is needed here (guest pages are not GFP_KERNEL).
done.
> >+struct kvm_cr3_cache {
> >+ struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
> >+ u32 max_idx;
> >+};
> >
>
> This will require an api version bump whenever KVM_CR3_CACHE_SIZE
> changes.
>
> Better to advertise the gpa of the cache, so it can be unlimited.
the gpa of the cache, and its guest-side size, right?
> >+
> >+/*
> >+ * Per-VCPU descriptor area shared between guest and host. Writable to
> >+ * both guest and host. Registered with the host by the guest when
> >+ * a guest acknowledges paravirtual mode.
> >+ */
> >+struct kvm_vcpu_para_state {
> >+ /*
> >+ * API version information for compatibility. If there's any support
> >+ * mismatch (too old host trying to execute too new guest) then
> >+ * the host will deny entry into paravirtual mode. Any other
> >+ * combination (new host + old guest and new host + new guest)
> >+ * is supposed to work - new host versions will support all old
> >+ * guest API versions.
> >+ */
> >+ u32 guest_version;
> >+ u32 host_version;
> >+ u32 size;
> >+ u32 ret;
> >+
> >+ /*
> >+ * The address of the vm exit instruction (VMCALL or VMMCALL),
> >+ * which the host will patch according to the CPU model the
> >+ * VM runs on:
> >+ */
> >+ u64 vm_syscall_addr;
> >
>
> Please rename to hypercall, and make it explicit that it is not a
> virtual address.
done.
> >+
> >+ struct kvm_cr3_cache cr3_cache;
> >+
> >+} __attribute__ ((aligned(PAGE_SIZE)));
> >
>
> Perhaps packed too, to avoid 32/64 ambiguity. Or even better, pad it
> explicitly to avoid unaligned fields.
it should already be padded - i layed it out that way. (if it's not then
let me know where it's not padded)
updated patch below. (This doesnt yet have the cr3 cache size change.)
Ingo
Subject: [patch] KVM: add MSR based hypercall API
From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
this adds a special MSR based hypercall API to KVM. This is to be
used by paravirtual kernels and virtual drivers.
VMX-only at the moment.
Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
---
drivers/kvm/kvm.h | 5 +++
drivers/kvm/kvm_main.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/mmu.c | 1
drivers/kvm/vmx.c | 16 ++++++++--
include/linux/kvm_para.h | 72 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 166 insertions(+), 2 deletions(-)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,9 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
+ struct kvm_vcpu_para_state *para_state;
+ gpa_t para_state_gpa;
+ gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
@@ -379,6 +383,7 @@ struct kvm_arch_ops {
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*patch_hypercall)(unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -1204,6 +1204,74 @@ void realmode_set_cr(struct kvm_vcpu *vc
}
}
+
+/*
+ * Register the para guest with the host:
+ */
+static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
+{
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t para_state_hpa, hypercall_hpa;
+ gpa_t hypercall_gpa;
+
+ printk("KVM: guest trying to enter paravirtual mode\n");
+ printk(".... para_state_gpa: %08Lx\n", para_state_gpa);
+
+ /*
+ * Needs to be page aligned:
+ */
+ if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
+ goto err_gp;
+
+ para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
+ printk(".... para_state_hpa: %08Lx\n", para_state_hpa);
+ if (is_error_hpa(para_state_hpa))
+ goto err_gp;
+
+ para_state = (void *)__va(para_state_hpa);
+ printk(".... para_state_hva: %p\n", para_state);
+
+ printk(".... guest version: %d\n", para_state->guest_version);
+ printk(".... size: %d\n", para_state->size);
+
+ para_state->host_version = KVM_PARA_API_VERSION;
+ /*
+ * We cannot support guests that try to register themselves
+ * with a newer API version than the host supports:
+ */
+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_skip;
+ }
+
+ hypercall_gpa = para_state->hypercall_addr;
+ hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
+ printk(".... hypercall_hpa: %08Lx\n", hypercall_hpa);
+ if (is_error_hpa(hypercall_hpa)) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_skip;
+ }
+
+ printk("KVM: para guest successfully registered.\n");
+ vcpu->para_state = para_state;
+ vcpu->para_state_gpa = para_state_gpa;
+ vcpu->hypercall_gpa = hypercall_gpa;
+
+ if (kvm_arch_ops->patch_hypercall) {
+ unsigned char *hypercall;
+ hypercall = kmap_atomic(pfn_to_page(hypercall_hpa>>PAGE_SHIFT),
+ KM_USER0);
+ kvm_arch_ops->patch_hypercall(hypercall);
+ kunmap_atomic(hypercall, KM_USER0);
+ }
+
+ para_state->ret = 0;
+err_skip:
+ return 0;
+err_gp:
+ return 1;
+}
+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
@@ -1240,6 +1308,12 @@ int kvm_get_msr_common(struct kvm_vcpu *
data = vcpu->shadow_efer;
break;
#endif
+ /*
+ * This is the 'probe whether the host is KVM' logic:
+ */
+ case MSR_KVM_API_MAGIC:
+ return vcpu_register_para(vcpu, *pdata);
+
default:
printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr);
return 1;
Index: linux/drivers/kvm/mmu.c
===================================================================
--- linux.orig/drivers/kvm/mmu.c
+++ linux/drivers/kvm/mmu.c
@@ -719,6 +719,7 @@ hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu,
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
| (gpa & (PAGE_SIZE-1));
}
+EXPORT_SYMBOL_GPL(gpa_to_hpa);
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
{
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -406,10 +406,10 @@ static int vmx_set_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_TIME_STAMP_COUNTER: {
+ case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
break;
- }
+
default:
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
@@ -1448,6 +1448,17 @@ static int handle_io(struct kvm_vcpu *vc
return 0;
}
+static void
+vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xc1;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
@@ -2042,6 +2053,7 @@ static struct kvm_arch_ops vmx_arch_ops
.run = vmx_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = vmx_vcpu_setup,
+ .patch_hypercall = vmx_patch_hypercall,
};
static int __init vmx_init(void)
Index: linux/include/linux/kvm_para.h
===================================================================
--- /dev/null
+++ linux/include/linux/kvm_para.h
@@ -0,0 +1,72 @@
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+#include <linux/errno.h>
+
+/*
+ * Guest OS interface for KVM paravirtualization
+ *
+ * Note: this interface is considered experimental and may change without
+ * notice.
+ */
+
+#define KVM_CR3_CACHE_SIZE 4
+
+struct kvm_cr3_cache_entry {
+ u64 guest_cr3;
+ u64 host_cr3;
+};
+
+struct kvm_cr3_cache {
+ u32 max_idx;
+ u32 __pad;
+ struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
+};
+
+/*
+ * Per-VCPU descriptor area shared between guest and host. Writable to
+ * both guest and host. Registered with the host by the guest when
+ * a guest acknowledges paravirtual mode.
+ *
+ * NOTE: all addresses are guest-physical addresses (gpa), to make it
+ * easier for the hypervisor to map between the various addresses.
+ */
+struct kvm_vcpu_para_state {
+ /*
+ * API version information for compatibility. If there's any support
+ * mismatch (too old host trying to execute too new guest) then
+ * the host will deny entry into paravirtual mode. Any other
+ * combination (new host + old guest and new host + new guest)
+ * is supposed to work - new host versions will support all old
+ * guest API versions.
+ */
+ u32 guest_version;
+ u32 host_version;
+ u32 size;
+ u32 ret;
+
+ /*
+ * The address of the vm exit instruction (VMCALL or VMMCALL),
+ * which the host will patch according to the CPU model the
+ * VM runs on:
+ */
+ u64 hypercall_addr;
+
+ struct kvm_cr3_cache cr3_cache;
+
+} __attribute__ ((aligned(PAGE_SIZE)));
+
+#define KVM_PARA_API_VERSION 1
+
+/*
+ * This is used for an RDMSR's ECX parameter to probe for a KVM host.
+ * Hopefully no CPU vendor will use up this number. This is placed well
+ * out of way of the typical space occupied by CPU vendors' MSR indices,
+ * and we think (or at least hope) it wont be occupied in the future
+ * either.
+ */
+#define MSR_KVM_API_MAGIC 0x87655678
+
+#define KVM_EINVAL EINVAL
+
+#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109103809.GA24515-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 11:24 ` Avi Kivity
[not found] ` <45A37B7A.8020709-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 11:24 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> * Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>
>
>>> @@ -237,6 +238,8 @@ struct kvm_vcpu {
>>> unsigned long cr0;
>>> unsigned long cr2;
>>> unsigned long cr3;
>>> + struct kvm_vcpu_para_state *para_state;
>>>
>>>
>> Do we want this as part of kvm_vcpu or kvm? I can see arguments for
>> both views.
>>
>
> definitely needs to be a property of the vcpu. For example the cr3 cache
> is attached to the physical CPU. SMP scalability necessiates this too -
> we want to use the para_state to pass information between the guest and
> the host without any hypercall.
>
>
Ok, agreed.
It makes guest-side registration a bit more icky (need execute the code
on all vcpus).
>>> +struct kvm_cr3_cache {
>>> + struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
>>> + u32 max_idx;
>>> +};
>>>
>>>
>> This will require an api version bump whenever KVM_CR3_CACHE_SIZE
>> changes.
>>
>> Better to advertise the gpa of the cache, so it can be unlimited.
>>
>
> the gpa of the cache, and its guest-side size, right?
>
>
Yes (can use max_idx, no?).
BTW, max_idx is ambiguous: is it the last valid entry or one past the
end? entry_count is more explicit IMO.
>>> +
>>> + struct kvm_cr3_cache cr3_cache;
>>> +
>>> +} __attribute__ ((aligned(PAGE_SIZE)));
>>>
>>>
>> Perhaps packed too, to avoid 32/64 ambiguity. Or even better, pad it
>> explicitly to avoid unaligned fields.
>>
>
> it should already be padded - i layed it out that way. (if it's not then
> let me know where it's not padded)
>
>
Right, I was confused by the cr3 cache, but it's the last field.
> +
> + if (kvm_arch_ops->patch_hypercall) {
>
It's safe to assume that the arch op exists.
> +EXPORT_SYMBOL_GPL(gpa_to_hpa);
>
Is this needed now? If so, it needs a kvm_ prefix.
> @@ -1448,6 +1448,17 @@ static int handle_io(struct kvm_vcpu *vc
> return 0;
> }
>
> +static void
> +vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
> +{
> + /*
> + * Patch in the VMCALL instruction:
> + */
> + hypercall[0] = 0x0f;
> + hypercall[1] = 0x01;
> + hypercall[2] = 0xc1;
> +}
> +
> static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> {
> u64 exit_qualification;
> @@ -2042,6 +2053,7 @@ static struct kvm_arch_ops vmx_arch_ops
> .run = vmx_vcpu_run,
> .skip_emulated_instruction = skip_emulated_instruction,
> .vcpu_setup = vmx_vcpu_setup,
> + .patch_hypercall = vmx_patch_hypercall,
> };
>
Where is the vmcall exit handler?
Please add the svm code too. I can test it if you lack amd hardware.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A37B7A.8020709-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 11:36 ` Ingo Molnar
[not found] ` <20070109113628.GA4421-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 11:36 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 1798 bytes --]
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> >>>+struct kvm_cr3_cache {
> >>>+ struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
> >>>+ u32 max_idx;
> >>>+};
> >>>
> >>>
> >>This will require an api version bump whenever KVM_CR3_CACHE_SIZE
> >>changes.
> >>
> >>Better to advertise the gpa of the cache, so it can be unlimited.
> >>
> >
> >the gpa of the cache, and its guest-side size, right?
>
> Yes (can use max_idx, no?).
agreed, done. the cr3 registration now passes in the guest-side max size
of the cr3 cache, which the host-side updates (trims if necessary).
> BTW, max_idx is ambiguous: is it the last valid entry or one past the
> end? entry_count is more explicit IMO.
agreed, done.
> >it should already be padded - i layed it out that way. (if it's not
> >then let me know where it's not padded)
>
> Right, I was confused by the cr3 cache, but it's the last field.
i moved it to the first place and added a padding word - that way all
structure sizes are properly aligned and we cannot combine them in an
unaligned way by accident.
> >+
> >+ if (kvm_arch_ops->patch_hypercall) {
> >
>
> It's safe to assume that the arch op exists.
ok, i changed this and i added the SVM patch function too. AMD's VMMCALL
is 0x0f, 0x01, 0xd9, correct?
> >+EXPORT_SYMBOL_GPL(gpa_to_hpa);
>
> Is this needed now? If so, it needs a kvm_ prefix.
not needed anymore because usage is now in kvm_main.c - i removed the
export.
> >+ .patch_hypercall = vmx_patch_hypercall,
> > };
> >
> Where is the vmcall exit handler?
in my tree, have not sent the patch yet - first want to combine it with
the cr3 feature to have it tested.
> Please add the svm code too. I can test it if you lack amd hardware.
done.
updated patches attached.
Ingo
[-- Attachment #2: kvm-paravirt-host.patch --]
[-- Type: text/plain, Size: 8300 bytes --]
Subject: [patch] KVM: add MSR based hypercall API
From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
this adds a special MSR based hypercall API to KVM. This is to be
used by paravirtual kernels and virtual drivers.
VMX-only at the moment.
Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
---
drivers/kvm/kvm.h | 6 +++
drivers/kvm/kvm_main.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/kvm/svm.c | 12 +++++++
drivers/kvm/vmx.c | 16 ++++++++--
include/linux/kvm_para.h | 75 +++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 181 insertions(+), 2 deletions(-)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -14,6 +14,7 @@
#include "vmx.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -237,6 +238,9 @@ struct kvm_vcpu {
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
+ gpa_t para_state_gpa;
+ struct page *para_state_page;
+ gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
@@ -379,6 +383,8 @@ struct kvm_arch_ops {
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
+ void (*patch_hypercall)(struct kvm_vcpu *vcpu,
+ unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -1204,6 +1204,74 @@ void realmode_set_cr(struct kvm_vcpu *vc
}
}
+
+/*
+ * Register the para guest with the host:
+ */
+static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
+{
+ struct kvm_vcpu_para_state *para_state;
+ hpa_t para_state_hpa, hypercall_hpa;
+ struct page *para_state_page;
+ unsigned char *hypercall;
+ gpa_t hypercall_gpa;
+
+ printk("KVM: guest trying to enter paravirtual mode\n");
+ printk(".... para_state_gpa: %08Lx\n", para_state_gpa);
+
+ /*
+ * Needs to be page aligned:
+ */
+ if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
+ goto err_gp;
+
+ para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
+ printk(".... para_state_hpa: %08Lx\n", para_state_hpa);
+ if (is_error_hpa(para_state_hpa))
+ goto err_gp;
+
+ para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
+ para_state = kmap_atomic(para_state_page, KM_USER0);
+
+ printk(".... guest version: %d\n", para_state->guest_version);
+ printk(".... size: %d\n", para_state->size);
+
+ para_state->host_version = KVM_PARA_API_VERSION;
+ /*
+ * We cannot support guests that try to register themselves
+ * with a newer API version than the host supports:
+ */
+ if (para_state->guest_version > KVM_PARA_API_VERSION) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ hypercall_gpa = para_state->hypercall_gpa;
+ hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
+ printk(".... hypercall_hpa: %08Lx\n", hypercall_hpa);
+ if (is_error_hpa(hypercall_hpa)) {
+ para_state->ret = -KVM_EINVAL;
+ goto err_kunmap_skip;
+ }
+
+ printk("KVM: para guest successfully registered.\n");
+ vcpu->para_state_page = para_state_page;
+ vcpu->para_state_gpa = para_state_gpa;
+ vcpu->hypercall_gpa = hypercall_gpa;
+
+ hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
+ KM_USER1);
+ kvm_arch_ops->patch_hypercall(vcpu, hypercall);
+ kunmap_atomic(hypercall, KM_USER1);
+
+ para_state->ret = 0;
+err_kunmap_skip:
+ kunmap_atomic(para_state, KM_USER0);
+ return 0;
+err_gp:
+ return 1;
+}
+
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
u64 data;
@@ -1306,6 +1374,12 @@ int kvm_set_msr_common(struct kvm_vcpu *
case MSR_IA32_APICBASE:
vcpu->apic_base = data;
break;
+ /*
+ * This is the 'probe whether the host is KVM' logic:
+ */
+ case MSR_KVM_API_MAGIC:
+ return vcpu_register_para(vcpu, data);
+
default:
printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr);
return 1;
Index: linux/drivers/kvm/svm.c
===================================================================
--- linux.orig/drivers/kvm/svm.c
+++ linux/drivers/kvm/svm.c
@@ -1642,6 +1642,17 @@ static int is_disabled(void)
return 0;
}
+static void
+svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xd9;
+}
+
static struct kvm_arch_ops svm_arch_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -1689,6 +1700,7 @@ static struct kvm_arch_ops svm_arch_ops
.run = svm_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = svm_vcpu_setup,
+ .patch_hypercall = svm_patch_hypercall,
};
static int __init svm_init(void)
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -406,10 +406,10 @@ static int vmx_set_msr(struct kvm_vcpu *
case MSR_IA32_SYSENTER_ESP:
vmcs_write32(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_TIME_STAMP_COUNTER: {
+ case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
break;
- }
+
default:
msr = find_msr_entry(vcpu, msr_index);
if (msr) {
@@ -1448,6 +1448,17 @@ static int handle_io(struct kvm_vcpu *vc
return 0;
}
+static void
+vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
+{
+ /*
+ * Patch in the VMCALL instruction:
+ */
+ hypercall[0] = 0x0f;
+ hypercall[1] = 0x01;
+ hypercall[2] = 0xc1;
+}
+
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u64 exit_qualification;
@@ -2042,6 +2053,7 @@ static struct kvm_arch_ops vmx_arch_ops
.run = vmx_vcpu_run,
.skip_emulated_instruction = skip_emulated_instruction,
.vcpu_setup = vmx_vcpu_setup,
+ .patch_hypercall = vmx_patch_hypercall,
};
static int __init vmx_init(void)
Index: linux/include/linux/kvm_para.h
===================================================================
--- /dev/null
+++ linux/include/linux/kvm_para.h
@@ -0,0 +1,75 @@
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+#include <linux/errno.h>
+
+/*
+ * Guest OS interface for KVM paravirtualization
+ *
+ * Note: this interface is considered experimental and may change without
+ * notice.
+ */
+
+#define KVM_CR3_CACHE_SIZE 4
+
+struct kvm_cr3_cache_entry {
+ u64 guest_cr3;
+ u64 host_cr3;
+};
+
+struct kvm_cr3_cache {
+ u32 entry_count;
+ u32 __pad;
+ struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
+};
+
+/*
+ * Per-VCPU descriptor area shared between guest and host. Writable to
+ * both guest and host. Registered with the host by the guest when
+ * a guest acknowledges paravirtual mode.
+ *
+ * NOTE: all addresses are guest-physical addresses (gpa), to make it
+ * easier for the hypervisor to map between the various addresses.
+ */
+struct kvm_vcpu_para_state {
+ /*
+ * API version information for compatibility. If there's any support
+ * mismatch (too old host trying to execute too new guest) then
+ * the host will deny entry into paravirtual mode. Any other
+ * combination (new host + old guest and new host + new guest)
+ * is supposed to work - new host versions will support all old
+ * guest API versions.
+ */
+ u32 guest_version;
+ u32 host_version;
+ u32 size;
+ u32 ret;
+
+ /*
+ * The address of the vm exit instruction (VMCALL or VMMCALL),
+ * which the host will patch according to the CPU model the
+ * VM runs on:
+ */
+ u64 hypercall_gpa;
+
+ /*
+ * Pointer to the struct kvm_cr3_cache CR3 cache:
+ */
+ u64 cr3_cache_gpa;
+
+} __attribute__ ((aligned(PAGE_SIZE)));
+
+#define KVM_PARA_API_VERSION 1
+
+/*
+ * This is used for an RDMSR's ECX parameter to probe for a KVM host.
+ * Hopefully no CPU vendor will use up this number. This is placed well
+ * out of way of the typical space occupied by CPU vendors' MSR indices,
+ * and we think (or at least hope) it wont be occupied in the future
+ * either.
+ */
+#define MSR_KVM_API_MAGIC 0x87655678
+
+#define KVM_EINVAL EINVAL
+
+#endif
[-- Attachment #3: kvm-paravirt-guest.patch --]
[-- Type: text/plain, Size: 9321 bytes --]
Subject: [patch] KVM: paravirtual guest support
From: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
this enables a CONFIG_PARAVIRT Linux guest kernel to establish a
hypercall API to a KVM host. If successfully registered, then the Linux
guest will optimize a few things like its interrupt controller, io-delay
and it also registers its cr3-cache structures with the host. (but the
host will not touch those, just yet)
(this is fully backwards compatible - if the WRMSR fails then the Linux
guest continues to execute as a native kernel.)
Signed-off-by: Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org>
---
arch/i386/kernel/paravirt.c | 238 +++++++++++++++++++++++++++++++++++++++++++
drivers/serial/8250.c | 3
include/asm-i386/processor.h | 1
include/linux/paravirt.h | 12 ++
init/main.c | 6 -
5 files changed, 257 insertions(+), 3 deletions(-)
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -20,6 +20,7 @@
#include <linux/efi.h>
#include <linux/bcd.h>
#include <linux/start_kernel.h>
+#include <linux/kvm_para.h>
#include <asm/bug.h>
#include <asm/paravirt.h>
@@ -33,6 +34,9 @@
#include <asm/apic.h>
#include <asm/tlbflush.h>
+#include <asm/i8259.h>
+#include <io_ports.h>
+
/* nop stub */
static void native_nop(void)
{
@@ -683,3 +687,237 @@ struct paravirt_ops paravirt_ops = {
.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,
};
+
+/*
+ * KVM paravirtualization optimizations:
+ */
+int kvm_paravirt;
+
+/*
+ * No need for any "IO delay" on KVM:
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state);
+
+static fastcall void kvm_write_cr3(unsigned long val)
+{
+ asm volatile("movl %0,%%cr3": :"r" (val));
+}
+
+/*
+ * Avoid the VM exit upon cr3 load by using the cached
+ * ->active_mm->pgd value:
+ */
+static void kvm_flush_tlb_user(void)
+{
+ kvm_write_cr3(__pa(current->active_mm->pgd));
+}
+
+static void kvm_flush_tlb_single(u32 addr)
+{
+ __native_flush_tlb_single(addr);
+}
+/*
+ * Disable global pages, do a flush, then enable global pages:
+ */
+static fastcall void kvm_flush_tlb_kernel(void)
+{
+ unsigned long orig_cr4 = read_cr4();
+
+ write_cr4(orig_cr4 & ~X86_CR4_PGE);
+ kvm_flush_tlb_user();
+ write_cr4(orig_cr4);
+}
+
+/*
+ * Simplified i8259A controller handling:
+ */
+static void mask_and_ack_kvm(unsigned int irq)
+{
+ unsigned int irqmask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask |= irqmask;
+
+ if (irq & 8) {
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
+ outb(0x60+(irq&7),PIC_SLAVE_CMD);/* 'Specific EOI' to slave */
+ outb(0x60+PIC_CASCADE_IR,PIC_MASTER_CMD); /* 'Specific EOI' to master-IRQ2 */
+ } else {
+ outb(cached_master_mask, PIC_MASTER_IMR);
+ /* 'Specific EOI' to master: */
+ outb(0x60+irq, PIC_MASTER_CMD);
+ }
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+static void disable_kvm_irq(unsigned int irq)
+{
+ unsigned int mask = 1 << irq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask |= mask;
+ if (irq & 8)
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
+ else
+ outb(cached_master_mask, PIC_MASTER_IMR);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+static void enable_kvm_irq(unsigned int irq)
+{
+ unsigned int mask = ~(1 << irq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8259A_lock, flags);
+ cached_irq_mask &= mask;
+ if (irq & 8)
+ outb(cached_slave_mask, PIC_SLAVE_IMR);
+ else
+ outb(cached_master_mask, PIC_MASTER_IMR);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+static struct irq_chip kvm_chip = {
+ .name = "XT-PIC",
+ .mask = disable_kvm_irq,
+ .disable = disable_kvm_irq,
+ .unmask = enable_kvm_irq,
+ .mask_ack = mask_and_ack_kvm,
+};
+
+static void __init kvm_init_IRQ(void)
+{
+ int i;
+
+ printk("init KVM IRQ controller\n");
+#ifdef CONFIG_X86_LOCAL_APIC
+ init_bsp_APIC();
+#endif
+ init_8259A(0);
+
+ for (i = 0; i < NR_IRQS; i++) {
+ irq_desc[i].status = IRQ_DISABLED;
+ irq_desc[i].action = NULL;
+ irq_desc[i].depth = 1;
+
+ if (i < 16) {
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ set_irq_chip_and_handler_name(i, &kvm_chip,
+ handle_level_irq, "XT");
+ } else {
+ /*
+ * 'high' PCI IRQs filled in on demand
+ */
+ irq_desc[i].chip = &no_irq_chip;
+ }
+ }
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (i >= NR_IRQS)
+ break;
+ if (vector != SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ /* setup after call gates are initialised (usually add in
+ * the architecture specific gates)
+ */
+ intr_init_hook();
+
+ irq_ctx_init(smp_processor_id());
+}
+
+/*
+ * This is the vm-syscall address - to be patched by the host to
+ * VMCALL (Intel) or VMMCALL (AMD), depending on the CPU model:
+ */
+asm (
+ " .globl hypercall_addr \n"
+ " .align 4 \n"
+ " hypercall_addr: \n"
+ " nop \n"
+ " nop \n"
+ " nop \n"
+ " ret \n"
+);
+
+extern unsigned char hypercall_addr[4];
+
+int kvm_guest_register_para(int cpu)
+{
+ struct kvm_vcpu_para_state *para_state = &per_cpu(para_state, cpu);
+
+ printk("kvm guest on VCPU#%d: trying to register para_state %p\n",
+ cpu, para_state);
+ /*
+ * Move a magic (and otherwise invalid) value to
+ * cr3, and thus signal to KVM that we are entering
+ * paravirtualized mode:
+ */
+ para_state->guest_version = KVM_PARA_API_VERSION;
+ para_state->host_version = -1;
+ para_state->size = sizeof(*para_state);
+ para_state->ret = 0;
+ para_state->hypercall_gpa = __pa(hypercall_addr);
+
+ if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) {
+ printk("KVM guest: WRMSR probe failed.\n");
+ return 0;
+ }
+
+ printk("kvm guest: host returned %d\n", para_state->ret);
+ printk("kvm guest: host version: %d\n", para_state->host_version);
+ printk("kvm guest: syscall entry: %02x %02x %02x %02x\n",
+ hypercall_addr[0], hypercall_addr[1],
+ hypercall_addr[2], hypercall_addr[3]);
+ if (para_state->ret) {
+ printk("kvm guest: host refused registration.\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int __init kvm_paravirt_setup(char *s)
+{
+ printk("KVM paravirtualization setup\n");
+ if (sscanf(s, "%u", &kvm_paravirt) <= 0)
+ return 1;
+ if (!kvm_paravirt)
+ return 1;
+
+ kvm_paravirt = kvm_guest_register_para(smp_processor_id());
+ if (!kvm_paravirt)
+ return 1;
+
+ printk("KVM paravirtualized: OK\n");
+
+ paravirt_ops.name = "KVM";
+ paravirt_ops.io_delay = kvm_io_delay;
+ paravirt_ops.init_IRQ = kvm_init_IRQ;
+ paravirt_ops.flush_tlb_user = kvm_flush_tlb_user;
+ paravirt_ops.flush_tlb_kernel = kvm_flush_tlb_kernel;
+ paravirt_ops.flush_tlb_single = kvm_flush_tlb_single;
+ paravirt_ops.write_cr3 = kvm_write_cr3;
+ paravirt_ops.paravirt_enabled = 1;
+
+ return 1;
+}
+__setup("kvm_paravirt=", kvm_paravirt_setup);
+
+EXPORT_SYMBOL_GPL(paravirt_ops);
+
Index: linux/drivers/serial/8250.c
===================================================================
--- linux.orig/drivers/serial/8250.c
+++ linux/drivers/serial/8250.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/ioport.h>
+#include <linux/paravirt.h>
#include <linux/init.h>
#include <linux/console.h>
#include <linux/sysrq.h>
@@ -1371,7 +1372,7 @@ static irqreturn_t serial8250_interrupt(
l = l->next;
- if (l == i->head && pass_counter++ > PASS_LIMIT) {
+ if (!paravirt_enabled() && l == i->head && pass_counter++ > PASS_LIMIT) {
/* If we hit this, we're dead. */
printk(KERN_ERR "serial8250: too much work for "
"irq%d\n", irq);
Index: linux/include/asm-i386/processor.h
===================================================================
--- linux.orig/include/asm-i386/processor.h
+++ linux/include/asm-i386/processor.h
@@ -547,7 +547,6 @@ static inline void rep_nop(void)
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
-#define paravirt_enabled() 0
#define __cpuid native_cpuid
static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
Index: linux/include/linux/paravirt.h
===================================================================
--- /dev/null
+++ linux/include/linux/paravirt.h
@@ -0,0 +1,12 @@
+#ifndef __LINUX_PARAVIRT_H
+#define __LINUX_PARAVIRT_H
+
+/*
+ * Paravirtualization support
+ */
+
+#ifndef CONFIG_PARAVIRT
+# define paravirt_enabled() 0
+#endif
+
+#endif
Index: linux/init/main.c
===================================================================
--- linux.orig/init/main.c
+++ linux/init/main.c
@@ -374,7 +374,11 @@ static void __init setup_per_cpu_areas(v
if (size < PERCPU_ENOUGH_ROOM)
size = PERCPU_ENOUGH_ROOM;
#endif
- ptr = alloc_bootmem(size * nr_possible_cpus);
+ /*
+ * Align them to page size - just in case someone aligns
+ * the per-CPU data to page that alignment should be preserved:
+ */
+ ptr = alloc_bootmem_pages(size * nr_possible_cpus);
for_each_possible_cpu(i) {
__per_cpu_offset[i] = ptr - __per_cpu_start;
[-- Attachment #4: Type: text/plain, Size: 347 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #5: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109113628.GA4421-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 12:54 ` Avi Kivity
[not found] ` <45A39095.80005-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 12:54 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> ok, i changed this and i added the SVM patch function too. AMD's VMMCALL
> is 0x0f, 0x01, 0xd9, correct?
>
>
yes.
>> Where is the vmcall exit handler?
>>
>
> in my tree, have not sent the patch yet - first want to combine it with
> the cr3 feature to have it tested.
>
>
I'd like it in the final patch.
btw, we need to define the hypercall protocol. We should aim for most
things (esp. hypercall number and return code) to be passed in
registers. Need to take care of 32/64 bit compatibility from day one.
Patches look good. Only thing missing is printk() log levels.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A39095.80005-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 13:17 ` Ingo Molnar
[not found] ` <20070109131733.GA28431-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 13:17 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 1860 bytes --]
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> >>Where is the vmcall exit handler?
> >>
> >
> >in my tree, have not sent the patch yet - first want to combine it
> >with the cr3 feature to have it tested.
> >
>
> I'd like it in the final patch.
yeah. Right now i have:
patches/kvm-paravirt-host.patch
patches/kvm-paravirt-guest.patch
patches/kvm-paravirt-hypercall-host.patch
patches/kvm-paravirt-hypercall-guest.patch
patches/kvm-paravirt-cr3-cache.patch
this builds and boots fine on VMX, and a test-hypercall is successfully
issued. I've attached a tarball of them.
i've changed the details you mentioned (printk loglevels, etc.), and
some other detail as well: the ->patch_hypercall() thing now patches in
the 'ret' instruction as well. This allowed me to add a -ENOSYS default
entry there, to have defined error behavior in case the hypervisor does
not patch things.
NOTE: i have not updated the cr3 patch to the hypercall API yet (hence
the aliasing bug is not fixed yet), i wanted to get this to you so that
we can think about the hypercall API.
Right now what i have is only good to test that the VMCALL instruction
works - but the API must look differently. I'd prefer a register-based
thing so that i can embedd hypercalls within Linux without having to go
to a wrapper function. Right now a "call hypercall_addr" is a
regparm-based function entry. I'd like to keep that - but not have a
fixed number of parameters but inlines/macros for all parameter
combinations: 1, 2, 3, 4, 5 param, picked up automatically via use. I.e.
a 2-param call would be:
hypercall(KVM_cr3_miss, cr3);
a 3-param call would be:
hypercall(KVM_api_call1, param1, param2);
a 1-param call would be:
hypercall(KVM_api_call2);
Does this look good to you? I'd like the basic API to be as light as
possible.
Ingo
[-- Attachment #2: kvm-para.tar.gz --]
[-- Type: application/x-gzip, Size: 11758 bytes --]
[-- Attachment #3: Type: text/plain, Size: 347 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109131733.GA28431-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 13:30 ` Ingo Molnar
2007-01-09 13:41 ` Avi Kivity
1 sibling, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 13:30 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
> NOTE: i have not updated the cr3 patch to the hypercall API yet (hence
> the aliasing bug is not fixed yet), i wanted to get this to you so
> that we can think about the hypercall API.
>
> Right now what i have is only good to test that the VMCALL instruction
> works - but the API must look differently. I'd prefer a register-based
> thing so that i can embedd hypercalls within Linux without having to
> go to a wrapper function. Right now a "call hypercall_addr" is a
> regparm-based function entry. I'd like to keep that - but not have a
> fixed number of parameters but inlines/macros for all parameter
> combinations: 1, 2, 3, 4, 5 param, picked up automatically via use.
> I.e. a 2-param call would be:
>
> hypercall(KVM_cr3_miss, cr3);
>
> a 3-param call would be:
>
> hypercall(KVM_api_call1, param1, param2);
>
> a 1-param call would be:
>
> hypercall(KVM_api_call2);
i.e. something like the patch below (ontop of the previous tarball,
still ad-hoc, these things need to go into a header, and i only have the
1 and 2 param macros done).
this is very tightly integrated into the natural instruction sequence of
functions that call it:
c01209d4 <test_hypercall>:
c01209d4: 55 push %ebp
c01209d5: 89 e5 mov %esp,%ebp
c01209d7: 50 push %eax
c01209d8: 50 push %eax
c01209d9: e8 06 fa ff ff call c01203e4 <hypercall_addr>
c01209de: c7 04 24 48 41 50 c0 movl $0xc0504148,(%esp)
c01209e5: e8 1f a9 00 00 call c012b309 <printk>
c01209ea: e8 f5 f9 ff ff call c01203e4 <hypercall_addr>
c01209ef: c7 04 24 67 41 50 c0 movl $0xc0504167,(%esp)
c01209f6: e8 0e a9 00 00 call c012b309 <printk>
c01209fb: c9 leave
c01209fc: c3 ret
this is as cheap as it gets, and it only clobbers the registers that are
needed. Basically "call hypercall_addr" is equivalent to a syscall trap
instruction.
Ingo
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -888,29 +888,40 @@ asm (
" ret \n"
);
-extern unsigned char hypercall_addr[4];
+extern unsigned char hypercall_addr[6];
-
-static inline int
-kvm_hypercall(void *param1, void *param2, void *param3, void *param4)
-{
- int ret = -1;
-
- asm (" call hypercall_addr\n"
- : "=g" (ret)
- : "eax" (param1),
- "ecx" (param2),
- "edx" (param3),
- "ebp" (param4));
-
- return ret;
-}
+#define hypercall1(nr) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=g" (__ret) \
+ : "eax" (nr) \
+ ); \
+ __ret; \
+})
+
+#define hypercall2(nr, p1) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=g" (__ret) \
+ : "eax" (nr), \
+ "ecx" (p1) \
+ ); \
+ __ret; \
+})
void test_hypercall(void)
{
- int ret = kvm_hypercall((void *)1, (void *)2, (void *)3, (void *)4);
+ int ret;
+
+ ret = hypercall1(1);
+ printk(KERN_DEBUG "hypercall test #1, ret: %d\n", ret);
- printk(KERN_DEBUG "hypercall test, ret: %d\n", ret);
+ ret = hypercall2(1, 2);
+ printk(KERN_DEBUG "hypercall test #2, ret: %d\n", ret);
}
int kvm_guest_register_para(int cpu)
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -1032,7 +1032,7 @@ static int vmcs_setup_cr3_cache(struct k
cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
printk(KERN_DEBUG " cr3 target values: %d\n", cr3_target_values);
if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
- printk(KERN_WARN "KVM: limiting cr3 cache size from %d to %d\n",
+ printk(KERN_WARNING "KVM: limiting cr3 cache size from %d to %d\n",
cr3_target_values, KVM_CR3_CACHE_SIZE);
cr3_target_values = KVM_CR3_CACHE_SIZE;
}
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109131733.GA28431-X9Un+BFzKDI@public.gmane.org>
2007-01-09 13:30 ` Ingo Molnar
@ 2007-01-09 13:41 ` Avi Kivity
[not found] ` <45A39B90.6070908-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 13:41 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> * Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>
>
>>>> Where is the vmcall exit handler?
>>>>
>>>>
>>> in my tree, have not sent the patch yet - first want to combine it
>>> with the cr3 feature to have it tested.
>>>
>>>
>> I'd like it in the final patch.
>>
>
> yeah. Right now i have:
>
> patches/kvm-paravirt-host.patch
> patches/kvm-paravirt-guest.patch
> patches/kvm-paravirt-hypercall-host.patch
> patches/kvm-paravirt-hypercall-guest.patch
> patches/kvm-paravirt-cr3-cache.patch
>
> this builds and boots fine on VMX, and a test-hypercall is successfully
> issued. I've attached a tarball of them.
>
> i've changed the details you mentioned (printk loglevels, etc.), and
> some other detail as well: the ->patch_hypercall() thing now patches in
> the 'ret' instruction as well. This allowed me to add a -ENOSYS default
> entry there, to have defined error behavior in case the hypervisor does
> not patch things.
>
> NOTE: i have not updated the cr3 patch to the hypercall API yet (hence
> the aliasing bug is not fixed yet), i wanted to get this to you so that
> we can think about the hypercall API.
>
> Right now what i have is only good to test that the VMCALL instruction
> works - but the API must look differently. I'd prefer a register-based
> thing so that i can embedd hypercalls within Linux without having to go
> to a wrapper function. Right now a "call hypercall_addr" is a
> regparm-based function entry. I'd like to keep that - but not have a
> fixed number of parameters but inlines/macros for all parameter
> combinations: 1, 2, 3, 4, 5 param, picked up automatically via use. I.e.
> a 2-param call would be:
>
> hypercall(KVM_cr3_miss, cr3);
>
> a 3-param call would be:
>
> hypercall(KVM_api_call1, param1, param2);
>
> a 1-param call would be:
>
> hypercall(KVM_api_call2);
>
> Does this look good to you? I'd like the basic API to be as light as
> possible.
>
>
Won't 32-bit and 64-bit pick different registers?
We can work around it (call is_long_mode() when decoding the hypercall),
but it kind of defeats the purpose of the optimization, no?
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A39B90.6070908-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 13:53 ` Ingo Molnar
[not found] ` <20070109135318.GA3084-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 13:53 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> > Does this look good to you? I'd like the basic API to be as light as
> > possible.
>
> Won't 32-bit and 64-bit pick different registers?
>
> We can work around it (call is_long_mode() when decoding the
> hypercall), but it kind of defeats the purpose of the optimization,
> no?
well, we can standardize on the 32-bit calling convention: eax, ecx,
edx, ebp, etc. We can do that via the 64-bit asm. So it should be the
same i think - just that a 32-bit guest on a 64-bit host wont be able to
set the high bits of those registers.
I've attached my current patch (ontop of tarball) - that's one idea
about how it could look like. Note that i didnt introduce a syscall
function table in kvm_handle_hypercall() - this will be the more optimal
solution until the # of hypercalls is relatively low. That way we only
prepare the parameters that are truly needed.
you are right in that we cannot call the syscall functions directly and
that in practice we'll shuffle things around - but we have to check the
first parameter anyway, and the shuffling isnt /that/ big of a problem.
I wanted to keep the guest-side as low-impact as possible, so that a
native kernel's instruction sequence is not disturbed too much by the
presence of a NOP hypercall.
in fact it would probably be more logical to use the standard syscall
order: eax, ebx, ecx, edx, esi, edi, ebp?
Ingo
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -888,29 +888,40 @@ asm (
" ret \n"
);
-extern unsigned char hypercall_addr[4];
+extern unsigned char hypercall_addr[6];
-
-static inline int
-kvm_hypercall(void *param1, void *param2, void *param3, void *param4)
-{
- int ret = -1;
-
- asm (" call hypercall_addr\n"
- : "=g" (ret)
- : "eax" (param1),
- "ecx" (param2),
- "edx" (param3),
- "ebp" (param4));
-
- return ret;
-}
+#define hypercall1(nr) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=g" (__ret) \
+ : "eax" (nr) \
+ ); \
+ __ret; \
+})
+
+#define hypercall2(nr, p1) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=g" (__ret) \
+ : "eax" (nr), \
+ "ecx" (p1) \
+ ); \
+ __ret; \
+})
void test_hypercall(void)
{
- int ret = kvm_hypercall((void *)1, (void *)2, (void *)3, (void *)4);
+ int ret;
+
+ ret = hypercall1(__NR_hypercall_load_cr3);
+ printk(KERN_DEBUG "hypercall test #1, ret: %d\n", ret);
- printk(KERN_DEBUG "hypercall test, ret: %d\n", ret);
+ ret = hypercall2(0xbad, 0xbad);
+ printk(KERN_DEBUG "hypercall test #2, ret: %d\n", ret);
}
int kvm_guest_register_para(int cpu)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -639,4 +639,6 @@ static inline u32 get_rdx_init_val(void)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+extern int kvm_handle_hypercall(struct kvm_vcpu *vcpu);
+
#endif
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -1138,6 +1138,32 @@ int emulate_instruction(struct kvm_vcpu
}
EXPORT_SYMBOL_GPL(emulate_instruction);
+int hypercall_load_cr3(struct kvm_vcpu *vcpu, unsigned long new_cr3)
+{
+ printk("not yet\n");
+
+ return -ENOSYS;
+}
+
+int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+ int nr = vcpu->regs[VCPU_REGS_RAX];
+ int ret = -EINVAL;
+
+ switch (nr) {
+ case __NR_hypercall_load_cr3:
+
+ ret = hypercall_load_cr3(vcpu, vcpu->regs[VCPU_REGS_RCX]);
+ break;
+ default:
+ printk(KERN_DEBUG "invalid hypercall %d\n", nr);
+ }
+ vcpu->regs[VCPU_REGS_RAX] = ret;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_hypercall);
+
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -1032,7 +1032,7 @@ static int vmcs_setup_cr3_cache(struct k
cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
printk(KERN_DEBUG " cr3 target values: %d\n", cr3_target_values);
if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
- printk(KERN_WARN "KVM: limiting cr3 cache size from %d to %d\n",
+ printk(KERN_WARNING "KVM: limiting cr3 cache size from %d to %d\n",
cr3_target_values, KVM_CR3_CACHE_SIZE);
cr3_target_values = KVM_CR3_CACHE_SIZE;
}
@@ -1726,16 +1726,12 @@ static int handle_halt(struct kvm_vcpu *
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- printk(KERN_DEBUG "got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP));
- printk(KERN_DEBUG "vmcall params: %08lx, %08lx, %08lx, %08lx\n",
- vcpu->regs[VCPU_REGS_RAX],
- vcpu->regs[VCPU_REGS_RCX],
- vcpu->regs[VCPU_REGS_RDX],
- vcpu->regs[VCPU_REGS_RBP]);
- vcpu->regs[VCPU_REGS_RAX] = 0;
+ kvm_handle_hypercall(vcpu);
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+
return 1;
}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
Index: linux/include/linux/kvm_para.h
===================================================================
--- linux.orig/include/linux/kvm_para.h
+++ linux/include/linux/kvm_para.h
@@ -72,4 +72,7 @@ struct kvm_vcpu_para_state {
#define KVM_EINVAL EINVAL
+#define __NR_hypercall_load_cr3 0
+#define __NR_hypercalls 1
+
#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109135318.GA3084-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 14:08 ` Avi Kivity
[not found] ` <45A3A1C6.201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 14:08 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> * Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>
>
>>> Does this look good to you? I'd like the basic API to be as light as
>>> possible.
>>>
>> Won't 32-bit and 64-bit pick different registers?
>>
>> We can work around it (call is_long_mode() when decoding the
>> hypercall), but it kind of defeats the purpose of the optimization,
>> no?
>>
>
> well, we can standardize on the 32-bit calling convention: eax, ecx,
> edx, ebp, etc. We can do that via the 64-bit asm. So it should be the
> same i think - just that a 32-bit guest on a 64-bit host wont be able to
> set the high bits of those registers.
>
That uglifies 64-bit at the expense of 32-bit. I'd prefer it to be the
other way round, but it's not really an issue either way.
In any case, it needs to be documented, as other guests may not use gcc
or regparm.
> I've attached my current patch (ontop of tarball) - that's one idea
> about how it could look like. Note that i didnt introduce a syscall
> function table in kvm_handle_hypercall() - this will be the more optimal
> solution until the # of hypercalls is relatively low. That way we only
> prepare the parameters that are truly needed.
>
> you are right in that we cannot call the syscall functions directly and
> that in practice we'll shuffle things around - but we have to check the
> first parameter anyway, and the shuffling isnt /that/ big of a problem.
> I wanted to keep the guest-side as low-impact as possible, so that a
> native kernel's instruction sequence is not disturbed too much by the
> presence of a NOP hypercall.
>
Yes, that makes sense.
> in fact it would probably be more logical to use the standard syscall
> order: eax, ebx, ecx, edx, esi, edi, ebp?
>
Even better. It allows more registers and avoids a random gcc dependency.
> +#define hypercall1(nr) \
> +({ \
> + int __ret; \
> + \
> + asm (" call hypercall_addr\n" \
> + : "=g" (__ret) \
> + : "eax" (nr) \
> + ); \
> + __ret; \
> +})
>
shouldn't that be
asm ("call hypercall_addr" : "=a"(__ret) : "a"(nr))
?
I don't think "eax" is a valid asm constraint, and we need to specify
eax as the return register.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A3A1C6.201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 14:22 ` Ingo Molnar
[not found] ` <20070109142203.GA6645-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 14:22 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> >well, we can standardize on the 32-bit calling convention: eax, ecx,
> >edx, ebp, etc. We can do that via the 64-bit asm. So it should be the
> >same i think - just that a 32-bit guest on a 64-bit host wont be able
> >to set the high bits of those registers.
> >
>
> That uglifies 64-bit at the expense of 32-bit. I'd prefer it to be
> the other way round, but it's not really an issue either way.
i can pick whichever is better. If we pick 64-bit's natural register
order, we at least have the chance to do the entry in assembly and then
to call the hyper-call table directly? (with a default-not-taken branch
leading out of this logic to a reshuffle thing if the guest is 32-bit)
[ We also have the chance to let future hardware do the call for us from
a MSR-programmed hypercall table, straight from the VMCALL, after it
has verified that RAX is within a pre-defined boundary. ]
so i'd vote for the 64-bit natural register order: return value in rax,
parameters in: rdi, rsi, rdx, rcx, r8, r9. On 32-bit that would be edi,
esi, edx, ecx, ebx, ebp - the last two shuffled into VCPU_REGS_R8/R9.
That's 6 parameters already - should be enough - that's what Linux has
itself. Whatever else must be passed in should come pointer-passed.
> In any case, it needs to be documented, as other guests may not use
> gcc or regparm.
yeah. Once we pick one and declare it stable, it's cast into stone -
wont change, ever.
> >in fact it would probably be more logical to use the standard syscall
> >order: eax, ebx, ecx, edx, esi, edi, ebp?
>
> Even better. It allows more registers and avoids a random gcc
> dependency.
ok.
>
> >+#define hypercall1(nr) \
> >+({ \
> >+ int __ret; \
> >+ \
> >+ asm (" call hypercall_addr\n" \
> >+ : "=g" (__ret) \
> >+ : "eax" (nr) \
> >+ ); \
> >+ __ret; \
> >+})
> >
> shouldn't that be
>
> asm ("call hypercall_addr" : "=a"(__ret) : "a"(nr))
>
> ?
oops, yes. I was already wondering about the bogus return value printout
;)
Ingo
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109142203.GA6645-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 14:35 ` Avi Kivity
[not found] ` <45A3A816.6010308-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 14:35 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> * Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
>
>
>>> well, we can standardize on the 32-bit calling convention: eax, ecx,
>>> edx, ebp, etc. We can do that via the 64-bit asm. So it should be the
>>> same i think - just that a 32-bit guest on a 64-bit host wont be able
>>> to set the high bits of those registers.
>>>
>>>
>> That uglifies 64-bit at the expense of 32-bit. I'd prefer it to be
>> the other way round, but it's not really an issue either way.
>>
>
> i can pick whichever is better. If we pick 64-bit's natural register
> order, we at least have the chance to do the entry in assembly and then
> to call the hyper-call table directly? (with a default-not-taken branch
> leading out of this logic to a reshuffle thing if the guest is 32-bit)
>
> [ We also have the chance to let future hardware do the call for us from
> a MSR-programmed hypercall table, straight from the VMCALL, after it
> has verified that RAX is within a pre-defined boundary. ]
>
> so i'd vote for the 64-bit natural register order: return value in rax,
> parameters in: rdi, rsi, rdx, rcx, r8, r9. On 32-bit that would be edi,
> esi, edx, ecx, ebx, ebp - the last two shuffled into VCPU_REGS_R8/R9.
> That's 6 parameters already - should be enough - that's what Linux has
> itself. Whatever else must be passed in should come pointer-passed.
>
>
Agreed, let's make it so. When you say "pointer-passed" you mean
physical address passed, right? ;-)
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A3A816.6010308-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 14:38 ` Ingo Molnar
[not found] ` <20070109143832.GA10735-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 14:38 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> > so i'd vote for the 64-bit natural register order: return value in
> > rax, parameters in: rdi, rsi, rdx, rcx, r8, r9. On 32-bit that would
> > be edi, esi, edx, ecx, ebx, ebp - the last two shuffled into
> > VCPU_REGS_R8/R9. That's 6 parameters already - should be enough -
> > that's what Linux has itself. Whatever else must be passed in should
> > come pointer-passed.
>
> Agreed, let's make it so. When you say "pointer-passed" you mean
> physical address passed, right? ;-)
yeah ;)
/me whistles
below is the current snapshot (ontop of tarball).
Ingo
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -888,29 +888,40 @@ asm (
" ret \n"
);
-extern unsigned char hypercall_addr[4];
+extern unsigned char hypercall_addr[6];
-
-static inline int
-kvm_hypercall(void *param1, void *param2, void *param3, void *param4)
-{
- int ret = -1;
-
- asm (" call hypercall_addr\n"
- : "=g" (ret)
- : "eax" (param1),
- "ecx" (param2),
- "edx" (param3),
- "ebp" (param4));
-
- return ret;
-}
+#define hypercall0(nr) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=a" (__ret) \
+ : "a" (nr) \
+ ); \
+ __ret; \
+})
+
+#define hypercall1(nr, p1) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=a" (__ret) \
+ : "a" (nr), \
+ "D" (p1) \
+ ); \
+ __ret; \
+})
void test_hypercall(void)
{
- int ret = kvm_hypercall((void *)1, (void *)2, (void *)3, (void *)4);
+ int ret;
+
+ ret = hypercall0(__NR_hypercall_load_cr3);
+ printk(KERN_DEBUG "hypercall test #1, ret: %d\n", ret);
- printk(KERN_DEBUG "hypercall test, ret: %d\n", ret);
+ ret = hypercall1(0xbad, 0xbad);
+ printk(KERN_DEBUG "hypercall test #2, ret: %d\n", ret);
}
int kvm_guest_register_para(int cpu)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -639,4 +639,6 @@ static inline u32 get_rdx_init_val(void)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+extern int kvm_handle_hypercall(struct kvm_vcpu *vcpu);
+
#endif
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -1138,6 +1138,32 @@ int emulate_instruction(struct kvm_vcpu
}
EXPORT_SYMBOL_GPL(emulate_instruction);
+int hypercall_load_cr3(struct kvm_vcpu *vcpu, unsigned long new_cr3)
+{
+ printk("not yet\n");
+
+ return -ENOSYS;
+}
+
+int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+ int nr = vcpu->regs[VCPU_REGS_RAX];
+ int ret = -EINVAL;
+
+ switch (nr) {
+ case __NR_hypercall_load_cr3:
+
+ ret = hypercall_load_cr3(vcpu, vcpu->regs[VCPU_REGS_RDI]);
+ break;
+ default:
+ printk(KERN_DEBUG "invalid hypercall %d\n", nr);
+ }
+ vcpu->regs[VCPU_REGS_RAX] = ret;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_hypercall);
+
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -1032,7 +1032,7 @@ static int vmcs_setup_cr3_cache(struct k
cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
printk(KERN_DEBUG " cr3 target values: %d\n", cr3_target_values);
if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
- printk(KERN_WARN "KVM: limiting cr3 cache size from %d to %d\n",
+ printk(KERN_WARNING "KVM: limiting cr3 cache size from %d to %d\n",
cr3_target_values, KVM_CR3_CACHE_SIZE);
cr3_target_values = KVM_CR3_CACHE_SIZE;
}
@@ -1726,16 +1726,12 @@ static int handle_halt(struct kvm_vcpu *
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- printk(KERN_DEBUG "got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP));
- printk(KERN_DEBUG "vmcall params: %08lx, %08lx, %08lx, %08lx\n",
- vcpu->regs[VCPU_REGS_RAX],
- vcpu->regs[VCPU_REGS_RCX],
- vcpu->regs[VCPU_REGS_RDX],
- vcpu->regs[VCPU_REGS_RBP]);
- vcpu->regs[VCPU_REGS_RAX] = 0;
+ kvm_handle_hypercall(vcpu);
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+
return 1;
}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
Index: linux/include/linux/kvm_para.h
===================================================================
--- linux.orig/include/linux/kvm_para.h
+++ linux/include/linux/kvm_para.h
@@ -72,4 +72,19 @@ struct kvm_vcpu_para_state {
#define KVM_EINVAL EINVAL
+/*
+ * Hypercall calling convention:
+ *
+ * RAX is the hypercall index, goes from 0 to __NR_hypercalls-1
+ *
+ * Each hypercall may have 0-6 parameters.
+ *
+ * parameters 1-6 are in the standard gcc x86_64 calling convention
+ * order: RDI, RSI, RDX, RCX, R8, R9.
+ *
+ * 32-bit parameters are: EDI, ESI, EDX, ECX, EBX, EBP
+ */
+#define __NR_hypercall_load_cr3 0
+#define __NR_hypercalls 1
+
#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109143832.GA10735-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 14:44 ` Ingo Molnar
[not found] ` <20070109144434.GA12152-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 14:44 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
> +/*
> + * Hypercall calling convention:
> + *
> + * RAX is the hypercall index, goes from 0 to __NR_hypercalls-1
> + *
> + * Each hypercall may have 0-6 parameters.
> + *
> + * parameters 1-6 are in the standard gcc x86_64 calling convention
> + * order: RDI, RSI, RDX, RCX, R8, R9.
> + *
> + * 32-bit parameters are: EDI, ESI, EDX, ECX, EBX, EBP
> + */
about the 32-bit side: maybe it gives us a bit more options for later if
we pick the natural order for 32-bit too: EBX for index, EAX, EDX, ECX
for the first 3 parameters, with the remaining 3 parameters in ESI, EDI,
EBP? Most hypercalls will have 0, 1, 2 or 3 parameters.
Ingo
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <20070109144434.GA12152-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 14:50 ` Avi Kivity
[not found] ` <45A3ABAF.90208-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2007-01-09 14:50 UTC (permalink / raw)
To: Ingo Molnar; +Cc: kvm-devel
Ingo Molnar wrote:
> * Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
>
>
>> +/*
>> + * Hypercall calling convention:
>> + *
>> + * RAX is the hypercall index, goes from 0 to __NR_hypercalls-1
>> + *
>> + * Each hypercall may have 0-6 parameters.
>> + *
>> + * parameters 1-6 are in the standard gcc x86_64 calling convention
>> + * order: RDI, RSI, RDX, RCX, R8, R9.
>> + *
>> + * 32-bit parameters are: EDI, ESI, EDX, ECX, EBX, EBP
>> + */
>>
>
> about the 32-bit side: maybe it gives us a bit more options for later if
> we pick the natural order for 32-bit too: EBX for index, EAX, EDX, ECX
> for the first 3 parameters, with the remaining 3 parameters in ESI, EDI,
> EBP? Most hypercalls will have 0, 1, 2 or 3 parameters.
>
I don't mind, as long as it's clearly documented.
--
error compiling committee.c: too many arguments to function
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patch] KVM: add MSR based hypercall API
[not found] ` <45A3ABAF.90208-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-01-09 15:04 ` Ingo Molnar
[not found] ` <20070109150424.GA16535-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 15:04 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
* Avi Kivity <avi-atKUWr5tajBWk0Htik3J/w@public.gmane.org> wrote:
> > about the 32-bit side: maybe it gives us a bit more options for
> > later if we pick the natural order for 32-bit too: EBX for index,
> > EAX, EDX, ECX for the first 3 parameters, with the remaining 3
> > parameters in ESI, EDI, EBP? Most hypercalls will have 0, 1, 2 or 3
> > parameters.
>
> I don't mind, as long as it's clearly documented.
ok. Below is the current snapshot - hypercall calling convention
documented and implemented for both 64-bit and 32-bit on the KVM side,
and a small demo call done on the 32-bit guest side.
Ingo
Index: linux/arch/i386/kernel/paravirt.c
===================================================================
--- linux.orig/arch/i386/kernel/paravirt.c
+++ linux/arch/i386/kernel/paravirt.c
@@ -888,29 +888,40 @@ asm (
" ret \n"
);
-extern unsigned char hypercall_addr[4];
+extern unsigned char hypercall_addr[6];
-
-static inline int
-kvm_hypercall(void *param1, void *param2, void *param3, void *param4)
-{
- int ret = -1;
-
- asm (" call hypercall_addr\n"
- : "=g" (ret)
- : "eax" (param1),
- "ecx" (param2),
- "edx" (param3),
- "ebp" (param4));
-
- return ret;
-}
+#define hypercall0(nr) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=a" (__ret) \
+ : "b" (nr) \
+ ); \
+ __ret; \
+})
+
+#define hypercall1(nr, p1) \
+({ \
+ int __ret; \
+ \
+ asm (" call hypercall_addr\n" \
+ : "=a" (__ret) \
+ : "b" (nr), \
+ "a" (p1) \
+ ); \
+ __ret; \
+})
void test_hypercall(void)
{
- int ret = kvm_hypercall((void *)1, (void *)2, (void *)3, (void *)4);
+ int ret;
+
+ ret = hypercall0(__NR_hypercall_load_cr3);
+ printk(KERN_DEBUG "hypercall test #1, ret: %d\n", ret);
- printk(KERN_DEBUG "hypercall test, ret: %d\n", ret);
+ ret = hypercall1(0xbad, 0xbad);
+ printk(KERN_DEBUG "hypercall test #2, ret: %d\n", ret);
}
int kvm_guest_register_para(int cpu)
Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -639,4 +639,6 @@ static inline u32 get_rdx_init_val(void)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+extern int kvm_handle_hypercall(struct kvm_vcpu *vcpu);
+
#endif
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -1138,6 +1138,62 @@ int emulate_instruction(struct kvm_vcpu
}
EXPORT_SYMBOL_GPL(emulate_instruction);
+int hypercall_load_cr3(struct kvm_vcpu *vcpu, unsigned long new_cr3)
+{
+ printk("not yet\n");
+
+ return -ENOSYS;
+}
+
+static inline int
+kvm_hypercall(struct kvm_vcpu *vcpu, unsigned int nr,
+ unsigned long p1, unsigned long p2, unsigned long p3,
+ unsigned long p4, unsigned long p5, unsigned long p6)
+{
+ int ret = -EINVAL;
+
+ switch (nr) {
+ case __NR_hypercall_load_cr3:
+ ret = hypercall_load_cr3(vcpu, p1);
+ break;
+ default:
+ printk(KERN_DEBUG "invalid hypercall %d\n", nr);
+ }
+ vcpu->regs[VCPU_REGS_RAX] = ret;
+
+ return 1;
+}
+
+static int kvm_handle_hypercall_32bit(struct kvm_vcpu *vcpu)
+{
+ int nr = vcpu->regs[VCPU_REGS_RBX];
+ unsigned long p1 = vcpu->regs[VCPU_REGS_RAX],
+ p2 = vcpu->regs[VCPU_REGS_RCX],
+ p3 = vcpu->regs[VCPU_REGS_RDX],
+ p4 = vcpu->regs[VCPU_REGS_RSI],
+ p5 = vcpu->regs[VCPU_REGS_RDI],
+ p6 = vcpu->regs[VCPU_REGS_RBP];
+
+ return kvm_hypercall(vcpu, nr, p1, p2, p3, p4, p5, p6);
+}
+
+int kvm_handle_hypercall(struct kvm_vcpu *vcpu)
+{
+ int nr = vcpu->regs[VCPU_REGS_RAX];
+ unsigned long p1 = vcpu->regs[VCPU_REGS_RAX],
+ p2 = vcpu->regs[VCPU_REGS_RCX],
+ p3 = vcpu->regs[VCPU_REGS_RDX],
+ p4 = vcpu->regs[VCPU_REGS_RSI],
+ p5 = vcpu->regs[VCPU_REGS_RDI],
+ p6 = vcpu->regs[VCPU_REGS_RBP];
+
+ if (unlikely(!is_long_mode(vcpu)))
+ return kvm_handle_hypercall_32bit(vcpu);
+
+ return kvm_hypercall(vcpu, nr, p1, p2, p3, p4, p5, p6);
+}
+EXPORT_SYMBOL_GPL(kvm_handle_hypercall);
+
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
{
return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -1032,7 +1032,7 @@ static int vmcs_setup_cr3_cache(struct k
cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
printk(KERN_DEBUG " cr3 target values: %d\n", cr3_target_values);
if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
- printk(KERN_WARN "KVM: limiting cr3 cache size from %d to %d\n",
+ printk(KERN_WARNING "KVM: limiting cr3 cache size from %d to %d\n",
cr3_target_values, KVM_CR3_CACHE_SIZE);
cr3_target_values = KVM_CR3_CACHE_SIZE;
}
@@ -1726,16 +1726,12 @@ static int handle_halt(struct kvm_vcpu *
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- printk(KERN_DEBUG "got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP));
- printk(KERN_DEBUG "vmcall params: %08lx, %08lx, %08lx, %08lx\n",
- vcpu->regs[VCPU_REGS_RAX],
- vcpu->regs[VCPU_REGS_RCX],
- vcpu->regs[VCPU_REGS_RDX],
- vcpu->regs[VCPU_REGS_RBP]);
- vcpu->regs[VCPU_REGS_RAX] = 0;
+ kvm_handle_hypercall(vcpu);
vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+
return 1;
}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
Index: linux/include/linux/kvm_para.h
===================================================================
--- linux.orig/include/linux/kvm_para.h
+++ linux/include/linux/kvm_para.h
@@ -72,4 +72,23 @@ struct kvm_vcpu_para_state {
#define KVM_EINVAL EINVAL
+/*
+ * Hypercall calling convention:
+ *
+ * Each hypercall may have 0-6 parameters.
+ *
+ * 64-bit hypercall index is in RAX, goes from 0 to __NR_hypercalls-1
+ *
+ * 64-bit parameters 1-6 are in the standard gcc x86_64 calling convention
+ * order: RDI, RSI, RDX, RCX, R8, R9.
+ *
+ * 32-bit index is EBX, parameters are: EAX, ECX, EDX, ESI, EDI, EBP.
+ * (the first 3 are according to the gcc regparm calling convention)
+ *
+ * No registers are clobbered by the hypercall, except that the
+ * return value is in RAX.
+ */
+#define __NR_hypercall_load_cr3 0
+#define __NR_hypercalls 1
+
#endif
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
^ permalink raw reply [flat|nested] 19+ messages in thread
* [patchset] KVM: paravirt/hypercall queue
[not found] ` <20070109150424.GA16535-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 16:20 ` Ingo Molnar
[not found] ` <20070109162028.GA764-X9Un+BFzKDI@public.gmane.org>
0 siblings, 1 reply; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 16:20 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 685 bytes --]
* Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
> ok. Below is the current snapshot - hypercall calling convention
> documented and implemented for both 64-bit and 32-bit on the KVM side,
> and a small demo call done on the 32-bit guest side.
i've updated the cr3-cache code to use the new hypercall facility - this
solves the hpa/gpa aliasing bug you found.
i have changed a few more details around hypercalls - the macros are now
cleaner, the cr3 cache is aligned (is easier to grow both para_state and
cr3_cache in a compatible way), and a few more details here and there.
Current tarball attached. Build and boot tested, the cr3 cache still
works fine.
Ingo
[-- Attachment #2: kvm-para.tar.gz --]
[-- Type: application/x-gzip, Size: 12957 bytes --]
[-- Attachment #3: Type: text/plain, Size: 347 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [patchset] KVM: paravirt/hypercall queue
[not found] ` <20070109162028.GA764-X9Un+BFzKDI@public.gmane.org>
@ 2007-01-09 16:23 ` Ingo Molnar
0 siblings, 0 replies; 19+ messages in thread
From: Ingo Molnar @ 2007-01-09 16:23 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel
[-- Attachment #1: Type: text/plain, Size: 685 bytes --]
* Ingo Molnar <mingo-X9Un+BFzKDI@public.gmane.org> wrote:
> i've updated the cr3-cache code to use the new hypercall facility -
> this solves the hpa/gpa aliasing bug you found.
>
> i have changed a few more details around hypercalls - the macros are
> now cleaner, the cr3 cache is aligned (is easier to grow both
> para_state and cr3_cache in a compatible way), and a few more details
> here and there. Current tarball attached. Build and boot tested, the
> cr3 cache still works fine.
sorry - new tarball attached - the previous one depended on Rusty's
paravirt_ops unexport patch - i flipped them around so this queue should
apply more or less fine to KVM-trunk.
Ingo
[-- Attachment #2: kvm-para.tar.gz --]
[-- Type: application/x-gzip, Size: 12929 bytes --]
[-- Attachment #3: Type: text/plain, Size: 347 bytes --]
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
[-- Attachment #4: Type: text/plain, Size: 186 bytes --]
_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2007-01-09 16:23 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-09 9:27 [patch] KVM: add MSR based hypercall API Ingo Molnar
[not found] ` <20070109092705.GA8300-X9Un+BFzKDI@public.gmane.org>
2007-01-09 9:58 ` Avi Kivity
[not found] ` <45A36758.1000808-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 10:38 ` Ingo Molnar
[not found] ` <20070109103809.GA24515-X9Un+BFzKDI@public.gmane.org>
2007-01-09 11:24 ` Avi Kivity
[not found] ` <45A37B7A.8020709-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 11:36 ` Ingo Molnar
[not found] ` <20070109113628.GA4421-X9Un+BFzKDI@public.gmane.org>
2007-01-09 12:54 ` Avi Kivity
[not found] ` <45A39095.80005-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 13:17 ` Ingo Molnar
[not found] ` <20070109131733.GA28431-X9Un+BFzKDI@public.gmane.org>
2007-01-09 13:30 ` Ingo Molnar
2007-01-09 13:41 ` Avi Kivity
[not found] ` <45A39B90.6070908-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 13:53 ` Ingo Molnar
[not found] ` <20070109135318.GA3084-X9Un+BFzKDI@public.gmane.org>
2007-01-09 14:08 ` Avi Kivity
[not found] ` <45A3A1C6.201-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 14:22 ` Ingo Molnar
[not found] ` <20070109142203.GA6645-X9Un+BFzKDI@public.gmane.org>
2007-01-09 14:35 ` Avi Kivity
[not found] ` <45A3A816.6010308-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 14:38 ` Ingo Molnar
[not found] ` <20070109143832.GA10735-X9Un+BFzKDI@public.gmane.org>
2007-01-09 14:44 ` Ingo Molnar
[not found] ` <20070109144434.GA12152-X9Un+BFzKDI@public.gmane.org>
2007-01-09 14:50 ` Avi Kivity
[not found] ` <45A3ABAF.90208-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-01-09 15:04 ` Ingo Molnar
[not found] ` <20070109150424.GA16535-X9Un+BFzKDI@public.gmane.org>
2007-01-09 16:20 ` [patchset] KVM: paravirt/hypercall queue Ingo Molnar
[not found] ` <20070109162028.GA764-X9Un+BFzKDI@public.gmane.org>
2007-01-09 16:23 ` Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox