* [PATCH 0/3] KVM: Save/resume support
@ 2006-11-16 17:59 Avi Kivity
2006-11-16 18:02 ` [PATCH 1/3] KVM: Expose interrupt bitmap Avi Kivity
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Avi Kivity @ 2006-11-16 17:59 UTC (permalink / raw)
To: kvm-devel; +Cc: lkml, Andrew Morton, Uri Lublin
The following patchset adds the missing bits that allow kvm virtual
machines to be suspended and resumed, with appropriate
userspace support.
The changes are:
- expose the pending interrupt bitmap to userspace
- tsc save/restore
- msr save/restore
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/3] KVM: Expose interrupt bitmap
2006-11-16 17:59 [PATCH 0/3] KVM: Save/resume support Avi Kivity
@ 2006-11-16 18:02 ` Avi Kivity
2006-11-16 18:03 ` [PATCH 2/3] KVM: Add time stamp counter msr and accessors Avi Kivity
2006-11-16 18:04 ` [PATCH 3/3] KVM: Expose MSRs to userspace Avi Kivity
2 siblings, 0 replies; 11+ messages in thread
From: Avi Kivity @ 2006-11-16 18:02 UTC (permalink / raw)
To: kvm-devel; +Cc: linux-kernel, akpm, uril
From: Uri Lublin <uril@qumranet.com>
Expose all not-yet-delivered interrupts to userspace. This allows a guest
to be saved and resumed even if some interrupts are yet pending.
Signed-off-by: Uri Lublin <uril@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -13,6 +13,7 @@
#include <linux/mm.h>
#include "vmx.h"
+#include <linux/kvm.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
@@ -164,7 +165,7 @@ struct kvm_vcpu {
int cpu;
int launched;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-#define NR_IRQ_WORDS (256 / BITS_PER_LONG)
+#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
unsigned long irq_pending[NR_IRQ_WORDS];
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -3000,7 +3000,8 @@ static int kvm_dev_ioctl_get_sregs(struc
sregs->efer = vcpu->shadow_efer;
sregs->apic_base = vcpu->apic_base;
- sregs->pending_int = vcpu->irq_summary != 0;
+ memcpy(sregs->interrupt_bitmap, vcpu->irq_pending,
+ sizeof sregs->interrupt_bitmap);
vcpu_put(vcpu);
@@ -3034,6 +3035,7 @@ static int kvm_dev_ioctl_set_sregs(struc
{
struct kvm_vcpu *vcpu;
int mmu_reset_needed = 0;
+ int i;
if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
return -EINVAL;
@@ -3083,6 +3085,14 @@ static int kvm_dev_ioctl_set_sregs(struc
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
+
+ memcpy(vcpu->irq_pending, sregs->interrupt_bitmap,
+ sizeof vcpu->irq_pending);
+ vcpu->irq_summary = 0;
+ for (i = 0; i < NR_IRQ_WORDS; ++i)
+ if (vcpu->irq_pending[i])
+ __set_bit(i, &vcpu->irq_summary);
+
vcpu_put(vcpu);
return 0;
Index: linux-2.6/include/linux/kvm.h
===================================================================
--- linux-2.6.orig/include/linux/kvm.h
+++ linux-2.6/include/linux/kvm.h
@@ -11,6 +11,15 @@
#include <asm/types.h>
#include <linux/ioctl.h>
+/*
+ * Architectural interrupt line count, and the size of the bitmap needed
+ * to hold them.
+ */
+#define KVM_NR_INTERRUPTS 256
+#define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8)
+#define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type))
+
+
/* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region {
__u32 slot;
@@ -129,10 +138,7 @@ struct kvm_sregs {
__u64 cr0, cr2, cr3, cr4, cr8;
__u64 efer;
__u64 apic_base;
-
- /* out (KVM_GET_SREGS) */
- __u32 pending_int;
- __u32 padding2;
+ __u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
};
/* for KVM_TRANSLATE */
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 2/3] KVM: Add time stamp counter msr and accessors
2006-11-16 17:59 [PATCH 0/3] KVM: Save/resume support Avi Kivity
2006-11-16 18:02 ` [PATCH 1/3] KVM: Expose interrupt bitmap Avi Kivity
@ 2006-11-16 18:03 ` Avi Kivity
2006-11-16 18:04 ` [PATCH 3/3] KVM: Expose MSRs to userspace Avi Kivity
2 siblings, 0 replies; 11+ messages in thread
From: Avi Kivity @ 2006-11-16 18:03 UTC (permalink / raw)
To: kvm-devel; +Cc: linux-kernel, akpm, uril
From: Uri Lublin <uril@qumranet.com>
Add a couple of accessors for the time stamp counter, and expose the tsc msr.
Signed-off-by: Uri Lublin <uril@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -121,6 +121,7 @@ static const u32 vmx_msr_index[] = {
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
+#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define MSR_IA32_FEATURE_CONTROL 0x03a
#define MSR_IA32_VMX_BASIC_MSR 0x480
#define MSR_IA32_VMX_PINBASED_CTLS_MSR 0x481
@@ -716,6 +717,31 @@ static void inject_gp(struct kvm_vcpu *v
INTR_INFO_VALID_MASK);
}
+/*
+ * reads and returns guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset -- 21.3
+ */
+static u64 guest_read_tsc(void)
+{
+ u64 host_tsc, tsc_offset;
+
+ rdtscll(host_tsc);
+ tsc_offset = vmcs_read64(TSC_OFFSET);
+ return host_tsc + tsc_offset;
+}
+
+/*
+ * writes 'guest_tsc' into guest's timestamp counter "register"
+ * guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
+ */
+static void guest_write_tsc(u64 guest_tsc)
+{
+ u64 host_tsc;
+
+ rdtscll(host_tsc);
+ vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
+}
+
static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
if (vcpu->rmode.active)
@@ -1177,7 +1204,6 @@ static int kvm_vcpu_setup(struct kvm_vcp
struct descriptor_table dt;
int i;
int ret;
- u64 tsc;
int nr_good_msrs;
@@ -1247,8 +1273,7 @@ static int kvm_vcpu_setup(struct kvm_vcp
vmcs_write64(IO_BITMAP_A, 0);
vmcs_write64(IO_BITMAP_B, 0);
- rdtscll(tsc);
- vmcs_write64(TSC_OFFSET, -tsc);
+ guest_write_tsc(0);
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
@@ -2297,6 +2322,9 @@ static int handle_rdmsr(struct kvm_vcpu
data = vmcs_readl(GUEST_GS_BASE);
break;
#endif
+ case MSR_IA32_TIME_STAMP_COUNTER:
+ data = guest_read_tsc();
+ break;
case MSR_IA32_SYSENTER_CS:
data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -2374,8 +2402,6 @@ static void set_efer(struct kvm_vcpu *vc
#endif
-#define MSR_IA32_TIME_STAMP_COUNTER 0x10
-
static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
u32 ecx = vcpu->regs[VCPU_REGS_RCX];
@@ -2419,10 +2445,7 @@ static int handle_wrmsr(struct kvm_vcpu
break;
#endif
case MSR_IA32_TIME_STAMP_COUNTER: {
- u64 tsc;
-
- rdtscll(tsc);
- vmcs_write64(TSC_OFFSET, data - tsc);
+ guest_write_tsc(data);
break;
}
case MSR_IA32_UCODE_REV:
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-16 17:59 [PATCH 0/3] KVM: Save/resume support Avi Kivity
2006-11-16 18:02 ` [PATCH 1/3] KVM: Expose interrupt bitmap Avi Kivity
2006-11-16 18:03 ` [PATCH 2/3] KVM: Add time stamp counter msr and accessors Avi Kivity
@ 2006-11-16 18:04 ` Avi Kivity
2006-11-16 19:08 ` [kvm-devel] " Arnd Bergmann
2006-11-17 1:02 ` Andrew Morton
2 siblings, 2 replies; 11+ messages in thread
From: Avi Kivity @ 2006-11-16 18:04 UTC (permalink / raw)
To: kvm-devel; +Cc: linux-kernel, akpm, uril
From: Uri Lublin <uril@qumranet.com>
Expose the model-specific registers to userspace. Primarily useful for
save/resume.
Signed-off-by: Uri Lublin <uril@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -106,11 +106,7 @@ struct vmcs {
char data[0];
};
-struct vmx_msr_entry {
- u32 index;
- u32 reserved;
- u64 data;
-};
+#define vmx_msr_entry kvm_msr_entry
struct kvm_vcpu;
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -2298,21 +2298,22 @@ static int handle_cpuid(struct kvm_vcpu
return 0;
}
-static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+/*
+ * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
{
- u32 ecx = vcpu->regs[VCPU_REGS_RCX];
- struct vmx_msr_entry *msr = find_msr_entry(vcpu, ecx);
u64 data;
+ struct vmx_msr_entry *msr;
-#ifdef KVM_DEBUG
- if (guest_cpl() != 0) {
- vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
- inject_gp(vcpu);
- return 1;
+ if (!pdata) {
+ printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
+ return -EINVAL;
}
-#endif
- switch (ecx) {
+ switch (msr_index) {
#ifdef __x86_64__
case MSR_FS_BASE:
data = vmcs_readl(GUEST_FS_BASE);
@@ -2351,11 +2352,25 @@ static int handle_rdmsr(struct kvm_vcpu
data = vcpu->apic_base;
break;
default:
- if (msr) {
- data = msr->data;
- break;
+ msr = find_msr_entry(vcpu, msr_index);
+ if (!msr) {
+ printk(KERN_ERR "kvm: unhandled rdmsr: %x\n", msr_index);
+ return 1;
}
- printk(KERN_ERR "kvm: unhandled rdmsr: %x\n", ecx);
+ data = msr->data;
+ break;
+ }
+
+ *pdata = data;
+ return 0;
+}
+
+static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+ u64 data;
+
+ if (get_msr(vcpu, ecx, &data)) {
inject_gp(vcpu);
return 1;
}
@@ -2401,22 +2416,16 @@ static void set_efer(struct kvm_vcpu *vc
#endif
-static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+
+/*
+ * Writes msr value into into the appropriate "register".
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
- u32 ecx = vcpu->regs[VCPU_REGS_RCX];
struct vmx_msr_entry *msr;
- u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
- | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
-
-#ifdef KVM_DEBUG
- if (guest_cpl() != 0) {
- vcpu_printf(vcpu, "%s: not supervisor\n", __FUNCTION__);
- inject_gp(vcpu);
- return 1;
- }
-#endif
-
- switch (ecx) {
+ switch (msr_index) {
#ifdef __x86_64__
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
@@ -2437,7 +2446,7 @@ static int handle_wrmsr(struct kvm_vcpu
#ifdef __x86_64
case MSR_EFER:
set_efer(vcpu, data);
- return 1;
+ break;
case MSR_IA32_MC0_STATUS:
printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n"
, __FUNCTION__, data);
@@ -2455,16 +2464,31 @@ static int handle_wrmsr(struct kvm_vcpu
vcpu->apic_base = data;
break;
default:
- msr = find_msr_entry(vcpu, ecx);
- if (msr) {
- msr->data = data;
- break;
+ msr = find_msr_entry(vcpu, msr_index);
+ if (!msr) {
+ printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr_index);
+ return 1;
}
- printk(KERN_ERR "kvm: unhandled wrmsr: %x\n", ecx);
+ msr->data = data;
+ break;
+ }
+
+ return 0;
+}
+
+static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ u32 ecx = vcpu->regs[VCPU_REGS_RCX];
+ u64 data = (vcpu->regs[VCPU_REGS_RAX] & -1u)
+ | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32);
+
+ if (set_msr(vcpu, ecx, data) != 0) {
inject_gp(vcpu);
return 1;
}
- skip_emulated_instruction(vcpu);
+
+ if (ecx != MSR_EFER)
+ skip_emulated_instruction(vcpu);
return 1;
}
@@ -3121,6 +3145,125 @@ static int kvm_dev_ioctl_set_sregs(struc
}
/*
+ * List of msr numbers which we expose to userspace through KVM_GET_MSRS
+ * and KVM_SET_MSRS.
+ */
+static u32 msrs_to_save[] = {
+ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+ MSR_K6_STAR,
+#ifdef __x86_64__
+ MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+#endif
+ MSR_IA32_TIME_STAMP_COUNTER,
+};
+
+static int kvm_dev_ioctl_get_msrs(struct kvm *kvm, struct kvm_msrs *msrs)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_msr_entry *entry, *entries;
+ int rc;
+ u32 size, num_entries, i;
+
+ if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
+ return -EINVAL;
+
+ num_entries = ARRAY_SIZE(msrs_to_save);
+ if (msrs->nmsrs < num_entries) {
+ /* inform actual number of entries */
+ msrs->nmsrs = num_entries;
+ return -EINVAL;
+ }
+
+ vcpu = vcpu_load(kvm, msrs->vcpu);
+ if (!vcpu)
+ return -ENOENT;
+
+ msrs->nmsrs = num_entries; /* update to actual number of entries */
+ size = msrs->nmsrs * sizeof(struct kvm_msr_entry);
+ rc = -E2BIG;
+ if (size > 4096)
+ goto out_vcpu;
+
+ rc = -ENOMEM;
+ entries = vmalloc(size);
+ if (entries == NULL)
+ goto out_vcpu;
+
+ memset(entries, 0, size);
+ rc = -EINVAL;
+ for (i=0; i<num_entries; i++) {
+ entry = &entries[i];
+ entry->index = msrs_to_save[i];
+ if (get_msr(vcpu, entry->index, &entry->data))
+ goto out_free;
+ }
+
+ rc = -EFAULT;
+ if (copy_to_user(msrs->entries, entries, size))
+ goto out_free;
+
+ rc = 0;
+out_free:
+ vfree(entries);
+
+out_vcpu:
+ vcpu_put(vcpu);
+
+ return rc;
+}
+
+static int kvm_dev_ioctl_set_msrs(struct kvm *kvm, struct kvm_msrs *msrs)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_msr_entry *entry, *entries;
+ int rc;
+ u32 size, num_entries, i;
+
+ if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
+ return -EINVAL;
+
+ num_entries = ARRAY_SIZE(msrs_to_save);
+ if (msrs->nmsrs < num_entries) {
+ msrs->nmsrs = num_entries; /* inform actual size */
+ return -EINVAL;
+ }
+
+ vcpu = vcpu_load(kvm, msrs->vcpu);
+ if (!vcpu)
+ return -ENOENT;
+
+ size = msrs->nmsrs * sizeof(struct kvm_msr_entry);
+ rc = -E2BIG;
+ if (size > 4096)
+ goto out_vcpu;
+
+ rc = -ENOMEM;
+ entries = vmalloc(size);
+ if (entries == NULL)
+ goto out_vcpu;
+
+ rc = -EFAULT;
+ if (copy_from_user(entries, msrs->entries, size))
+ goto out_free;
+
+ rc = -EINVAL;
+ for (i=0; i<num_entries; i++) {
+ entry = &entries[i];
+ if (set_msr(vcpu, entry->index, entry->data))
+ goto out_free;
+ }
+
+ rc = 0;
+out_free:
+ vfree(entries);
+
+out_vcpu:
+ vcpu_put(vcpu);
+
+ return rc;
+}
+
+/*
* Translate a guest virtual address to a guest physical address.
*/
static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr)
@@ -3361,6 +3504,33 @@ static long kvm_dev_ioctl(struct file *f
goto out;
break;
}
+ case KVM_GET_MSRS: {
+ struct kvm_msrs kvm_msrs;
+
+ r = -EFAULT;
+ if (copy_from_user(&kvm_msrs, (void *)arg, sizeof kvm_msrs))
+ goto out;
+ r = kvm_dev_ioctl_get_msrs(kvm, &kvm_msrs);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user((void *)arg, &kvm_msrs, sizeof kvm_msrs))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_MSRS: {
+ struct kvm_msrs kvm_msrs;
+
+ r = -EFAULT;
+ if (copy_from_user(&kvm_msrs, (void *)arg, sizeof kvm_msrs))
+ goto out;
+ r = kvm_dev_ioctl_set_msrs(kvm, &kvm_msrs);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
Index: linux-2.6/include/linux/kvm.h
===================================================================
--- linux-2.6.orig/include/linux/kvm.h
+++ linux-2.6/include/linux/kvm.h
@@ -141,6 +141,23 @@ struct kvm_sregs {
__u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)];
};
+struct kvm_msr_entry {
+ __u32 index;
+ __u32 reserved;
+ __u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+ __u32 vcpu;
+ __u32 nmsrs; /* number of msrs in entries */
+
+ union {
+ struct kvm_msr_entry __user *entries;
+ __u64 padding;
+ };
+};
+
/* for KVM_TRANSLATE */
struct kvm_translation {
/* in */
@@ -200,5 +217,7 @@ struct kvm_dirty_log {
#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 10, struct kvm_memory_region)
#define KVM_CREATE_VCPU _IOW(KVMIO, 11, int /* vcpu_slot */)
#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 12, struct kvm_dirty_log)
+#define KVM_GET_MSRS _IOWR(KVMIO, 13, struct kvm_msrs)
+#define KVM_SET_MSRS _IOW(KVMIO, 14, struct kvm_msrs)
#endif
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [kvm-devel] [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-16 18:04 ` [PATCH 3/3] KVM: Expose MSRs to userspace Avi Kivity
@ 2006-11-16 19:08 ` Arnd Bergmann
2006-11-16 19:17 ` Avi Kivity
2006-11-17 1:02 ` Andrew Morton
1 sibling, 1 reply; 11+ messages in thread
From: Arnd Bergmann @ 2006-11-16 19:08 UTC (permalink / raw)
To: kvm-devel; +Cc: Avi Kivity, akpm, linux-kernel, uril
On Thursday 16 November 2006 19:04, Avi Kivity wrote:
> +struct kvm_msr_entry {
> + __u32 index;
> + __u32 reserved;
> + __u64 data;
> +};
> +
> +/* for KVM_GET_MSRS and KVM_SET_MSRS */
> +struct kvm_msrs {
> + __u32 vcpu;
> + __u32 nmsrs; /* number of msrs in entries */
> +
> + union {
> + struct kvm_msr_entry __user *entries;
> + __u64 padding;
> + };
> +};
ioctl interfaces with pointers in them are generally a bad idea,
though you handle most of the points against them fine here
(endianess doesn't matter, padding is correct).
Still, it might be better not to set a bad example. Is accessing
the MSRs actually performance critical? If not, you could
define the ioctl to take only a single entry argument.
A possible alternative could also be to have a variable length
argument like below, but that creates other problems:
+struct kvm_msrs {
+ __u32 vcpu;
+ __u32 nmsrs; /* number of msrs in entries */
+ struct kvm_msr_entry entries[0]; /* followed by actual msrs */
+};
This would mean that you can't tell the transfer size from the
ioctl number, but you can't do that in your code either, because
you do two separate transfers.
Arnd <><
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [kvm-devel] [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-16 19:08 ` [kvm-devel] " Arnd Bergmann
@ 2006-11-16 19:17 ` Avi Kivity
2006-11-17 8:06 ` Christoph Hellwig
0 siblings, 1 reply; 11+ messages in thread
From: Avi Kivity @ 2006-11-16 19:17 UTC (permalink / raw)
To: Arnd Bergmann; +Cc: kvm-devel, akpm, linux-kernel, uril
Arnd Bergmann wrote:
> On Thursday 16 November 2006 19:04, Avi Kivity wrote:
>
>> +struct kvm_msr_entry {
>> + __u32 index;
>> + __u32 reserved;
>> + __u64 data;
>> +};
>> +
>> +/* for KVM_GET_MSRS and KVM_SET_MSRS */
>> +struct kvm_msrs {
>> + __u32 vcpu;
>> + __u32 nmsrs; /* number of msrs in entries */
>> +
>> + union {
>> + struct kvm_msr_entry __user *entries;
>> + __u64 padding;
>> + };
>> +};
>>
>
> ioctl interfaces with pointers in them are generally a bad idea,
> though you handle most of the points against them fine here
> (endianess doesn't matter, padding is correct).
>
> Still, it might be better not to set a bad example. Is accessing
> the MSRs actually performance critical? If not, you could
> define the ioctl to take only a single entry argument.
>
>
But then you can't dynamically determine which MSRs are available.
And no, reading/setting MSRs isn't performance critical for the current
use cases.
> A possible alternative could also be to have a variable length
> argument like below, but that creates other problems:
>
> +struct kvm_msrs {
> + __u32 vcpu;
> + __u32 nmsrs; /* number of msrs in entries */
> + struct kvm_msr_entry entries[0]; /* followed by actual msrs */
> +};
>
> This would mean that you can't tell the transfer size from the
> ioctl number, but you can't do that in your code either, because
> you do two separate transfers.
>
>
Heh. That was the original implementation by Uri. I felt that was
wrong because _IOW() encodes the size in the ioctl number, bit the
actual size is different.
> Arnd <><
>
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-16 18:04 ` [PATCH 3/3] KVM: Expose MSRs to userspace Avi Kivity
2006-11-16 19:08 ` [kvm-devel] " Arnd Bergmann
@ 2006-11-17 1:02 ` Andrew Morton
2006-11-17 7:20 ` Avi Kivity
1 sibling, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2006-11-17 1:02 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel, linux-kernel, uril
On Thu, 16 Nov 2006 18:04:22 -0000
Avi Kivity <avi@qumranet.com> wrote:
> +static int kvm_dev_ioctl_set_msrs(struct kvm *kvm, struct kvm_msrs *msrs)
> +{
> + struct kvm_vcpu *vcpu;
> + struct kvm_msr_entry *entry, *entries;
> + int rc;
> + u32 size, num_entries, i;
> +
> + if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
> + return -EINVAL;
> +
> + num_entries = ARRAY_SIZE(msrs_to_save);
> + if (msrs->nmsrs < num_entries) {
> + msrs->nmsrs = num_entries; /* inform actual size */
> + return -EINVAL;
> + }
> +
> + vcpu = vcpu_load(kvm, msrs->vcpu);
> + if (!vcpu)
> + return -ENOENT;
> +
> + size = msrs->nmsrs * sizeof(struct kvm_msr_entry);
> + rc = -E2BIG;
> + if (size > 4096)
> + goto out_vcpu;
Classic mutiplicative overflow bug. Only msrs->nmsrs doesn't get used
again, so there is no bug here. Yet.
> + rc = -ENOMEM;
> + entries = vmalloc(size);
> + if (entries == NULL)
> + goto out_vcpu;
> +
> + rc = -EFAULT;
> + if (copy_from_user(entries, msrs->entries, size))
> + goto out_free;
> +
> + rc = -EINVAL;
> + for (i=0; i<num_entries; i++) {
> + entry = &entries[i];
> + if (set_msr(vcpu, entry->index, entry->data))
> + goto out_free;
> + }
> +
> + rc = 0;
> +out_free:
> + vfree(entries);
> +
> +out_vcpu:
> + vcpu_put(vcpu);
> +
> + return rc;
> +}
This function returns no indication of how many msrs it actually did set.
Should it?
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-17 1:02 ` Andrew Morton
@ 2006-11-17 7:20 ` Avi Kivity
2006-11-17 8:15 ` Andrew Morton
0 siblings, 1 reply; 11+ messages in thread
From: Avi Kivity @ 2006-11-17 7:20 UTC (permalink / raw)
To: Andrew Morton; +Cc: kvm-devel, linux-kernel, uril
Andrew Morton wrote:
> On Thu, 16 Nov 2006 18:04:22 -0000
> Avi Kivity <avi@qumranet.com> wrote:
>
>
>> +static int kvm_dev_ioctl_set_msrs(struct kvm *kvm, struct kvm_msrs *msrs)
>> +{
>> + struct kvm_vcpu *vcpu;
>> + struct kvm_msr_entry *entry, *entries;
>> + int rc;
>> + u32 size, num_entries, i;
>> +
>> + if (msrs->vcpu < 0 || msrs->vcpu >= KVM_MAX_VCPUS)
>> + return -EINVAL;
>> +
>> + num_entries = ARRAY_SIZE(msrs_to_save);
>> + if (msrs->nmsrs < num_entries) {
>> + msrs->nmsrs = num_entries; /* inform actual size */
>> + return -EINVAL;
>> + }
>> +
>> + vcpu = vcpu_load(kvm, msrs->vcpu);
>> + if (!vcpu)
>> + return -ENOENT;
>> +
>> + size = msrs->nmsrs * sizeof(struct kvm_msr_entry);
>> + rc = -E2BIG;
>> + if (size > 4096)
>> + goto out_vcpu;
>>
>
> Classic mutiplicative overflow bug.
Right, will fix. The 4096 limit is arbitrary anyway, and can be
replaced by an arbitrary limit on nmsrs.
> Only msrs->nmsrs doesn't get used
> again, so there is no bug here. Yet.
>
>
But why isn't it used again? Looks like the kernel is forcing the user
to send at least num_entries for no good reason, and ignoring any
entries beyond num_entries.
>> + rc = -ENOMEM;
>> + entries = vmalloc(size);
>> + if (entries == NULL)
>> + goto out_vcpu;
>> +
>> + rc = -EFAULT;
>> + if (copy_from_user(entries, msrs->entries, size))
>> + goto out_free;
>> +
>> + rc = -EINVAL;
>> + for (i=0; i<num_entries; i++) {
>> + entry = &entries[i];
>> + if (set_msr(vcpu, entry->index, entry->data))
>> + goto out_free;
>> + }
>> +
>> + rc = 0;
>> +out_free:
>> + vfree(entries);
>> +
>> +out_vcpu:
>> + vcpu_put(vcpu);
>> +
>> + return rc;
>> +}
>>
>
> This function returns no indication of how many msrs it actually did set.
> Should it?
>
It can't hurt. Is returning the number of msrs set in the return code
(ala short write) acceptable, or do I need to make this a read/write ioctl?
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [kvm-devel] [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-16 19:17 ` Avi Kivity
@ 2006-11-17 8:06 ` Christoph Hellwig
0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2006-11-17 8:06 UTC (permalink / raw)
To: Avi Kivity; +Cc: Arnd Bergmann, kvm-devel, akpm, linux-kernel, uril
On Thu, Nov 16, 2006 at 09:17:18PM +0200, Avi Kivity wrote:
> Heh. That was the original implementation by Uri. I felt that was
> wrong because _IOW() encodes the size in the ioctl number, bit the
> actual size is different.
That really shouldn't be a problem. After all the pointer approach
doesn't encode the transfered size either. Given that the variable
sized array gives a much cleaner interface you should use it.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-17 7:20 ` Avi Kivity
@ 2006-11-17 8:15 ` Andrew Morton
2006-11-17 8:17 ` Avi Kivity
0 siblings, 1 reply; 11+ messages in thread
From: Andrew Morton @ 2006-11-17 8:15 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm-devel, linux-kernel, uril
On Fri, 17 Nov 2006 09:20:49 +0200
Avi Kivity <avi@qumranet.com> wrote:
> >> +out_vcpu:
> >> + vcpu_put(vcpu);
> >> +
> >> + return rc;
> >> +}
> >>
> >
> > This function returns no indication of how many msrs it actually did set.
> > Should it?
> >
>
> It can't hurt. Is returning the number of msrs set in the return code
> (ala short write) acceptable, or do I need to make this a read/write ioctl?
>
I'd have thought that you'd just copy the number written into msrs->nmsrs via
msrs->nmsrs = num_entries;
like kvm_dev_ioctl_set_msrs() does. Dunno...
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: Expose MSRs to userspace
2006-11-17 8:15 ` Andrew Morton
@ 2006-11-17 8:17 ` Avi Kivity
0 siblings, 0 replies; 11+ messages in thread
From: Avi Kivity @ 2006-11-17 8:17 UTC (permalink / raw)
To: Andrew Morton; +Cc: kvm-devel, linux-kernel, uril
Andrew Morton wrote:
> On Fri, 17 Nov 2006 09:20:49 +0200
> Avi Kivity <avi@qumranet.com> wrote:
>
>
>>>> +out_vcpu:
>>>> + vcpu_put(vcpu);
>>>> +
>>>> + return rc;
>>>> +}
>>>>
>>>>
>>> This function returns no indication of how many msrs it actually did set.
>>> Should it?
>>>
>>>
>> It can't hurt. Is returning the number of msrs set in the return code
>> (ala short write) acceptable, or do I need to make this a read/write ioctl?
>>
>>
>
> I'd have thought that you'd just copy the number written into msrs->nmsrs via
>
> msrs->nmsrs = num_entries;
>
> like kvm_dev_ioctl_set_msrs() does. Dunno...
>
It means making it an _IOWR() ioctl, but no matter.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2006-11-17 8:18 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-11-16 17:59 [PATCH 0/3] KVM: Save/resume support Avi Kivity
2006-11-16 18:02 ` [PATCH 1/3] KVM: Expose interrupt bitmap Avi Kivity
2006-11-16 18:03 ` [PATCH 2/3] KVM: Add time stamp counter msr and accessors Avi Kivity
2006-11-16 18:04 ` [PATCH 3/3] KVM: Expose MSRs to userspace Avi Kivity
2006-11-16 19:08 ` [kvm-devel] " Arnd Bergmann
2006-11-16 19:17 ` Avi Kivity
2006-11-17 8:06 ` Christoph Hellwig
2006-11-17 1:02 ` Andrew Morton
2006-11-17 7:20 ` Avi Kivity
2006-11-17 8:15 ` Andrew Morton
2006-11-17 8:17 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).