[PATCH][RFC] KVM: prepare user interface for smp guests

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH][RFC] KVM: prepare user interface for smp guests
@ 2006-10-29 13:31 Avi Kivity
       [not found] ` <4544AD24.4040801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Avi Kivity @ 2006-10-29 13:31 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f; +Cc: linux-kernel

While the current implementation of KVM only supports UP guests,
it should be quite easy to add SMP guest support in the future.
However, I'd like to get the userspace interface right today.

The current interface is specified as a bunch of ioctls on a
/dev/kvm chardev.  Some of the ioctls operate on the entire
virtual machine (creating memory, for example), while others
operate on a specific virtual cpu (vcpu) (running a vcpu or
accessing its registers).

A potential problem with the this is that the cacheline 
containing filp->f_count will ping-pong when different
vcpus are accessed (assuming each vcpu runs on a distinct
cpu).  While with today's hardware entering and exiting guest
mode will likely dominate over this, the hardware will
improve, core counts will rise, and a 64-way guest running
on a 4-socket VT-9 implementation will likely have a lot
of cacheline bouncing.

To address this, we need a filp for every vcpu.  Two
approches have been suggested:

  1. Have the ioctl() that creates a vcpu return a new
     fd

  2. Have a pseudo filesystem to represent the virtual
     machine, a syscall to return an inode in that fs,
     and a directory or file for each vcpu.

(There is also a third approach -- do nothing -- but since
this is a holy userspace interface, and because the changes
clean up the interface somewhat, I'd like to do them)

I've tried to summarize the differences between the two
approaches:

 - fs requires a new syscall, and is therefore more
   intrusive, especially for such a specialized driver.
 - ioctl messes up too much in vfs internals for its
   own good (it wants to export cdev_get().  in the
   patch below I duplicated it because I was lazy).
 - fs is similar to spufs, while ioctl returning an
   fd is probably a (bad) precedent.
 - ioctl is less code.

Patch implementing the ioctl approach below.  Please do
comment.

Index: linux-2.6/drivers/kvm/kvm.h
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm.h
+++ linux-2.6/drivers/kvm/kvm.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <asm/atomic.h>
 
 #include "vmx.h"
 
@@ -208,6 +209,7 @@ struct kvm_memory_slot {
 
 struct kvm {
 	spinlock_t lock; /* protects everything except vcpus */
+	atomic_t count;
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
 	struct list_head active_mmu_pages;
Index: linux-2.6/drivers/kvm/kvm_main.c
===================================================================
--- linux-2.6.orig/drivers/kvm/kvm_main.c
+++ linux-2.6/drivers/kvm/kvm_main.c
@@ -30,6 +30,8 @@
 #include <linux/debugfs.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/cdev.h>
 
 #include "vmx.h"
 #include "x86_emulate.h"
@@ -39,6 +41,8 @@ MODULE_LICENSE("GPL");
 
 struct kvm_stat kvm_stat;
 
+static struct file_operations kvm_vcpu_ops;
+
 static struct kvm_stats_debugfs_item {
 	const char *name;
 	u32 *data;
@@ -385,11 +389,6 @@ static void __vcpu_clear(void *arg)
 		per_cpu(current_vmcs, cpu) = 0;
 }
 
-static int vcpu_slot(struct kvm_vcpu *vcpu)
-{
-	return vcpu - vcpu->kvm->vcpus;
-}
-
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -439,6 +438,15 @@ static struct kvm_vcpu *__vcpu_load(stru
 /*
  * Switches to specified vcpu, until a matching vcpu_put()
  */
+static struct kvm_vcpu *vcpu_get(struct kvm_vcpu *vcpu)
+{
+	mutex_lock(&vcpu->mutex);
+	return __vcpu_load(vcpu);
+}
+
+/*
+ * Switches to specified vcpu, until a matching vcpu_put()
+ */
 static struct kvm_vcpu *vcpu_load(struct kvm *kvm, int vcpu_slot)
 {
 	struct kvm_vcpu *vcpu = &kvm->vcpus[vcpu_slot];
@@ -559,6 +567,7 @@ static int kvm_dev_open(struct inode *in
 		vcpu->mmu.root_hpa = INVALID_PAGE;
 		INIT_LIST_HEAD(&vcpu->free_pages);
 	}
+	atomic_set(&kvm->count, 1);
 	filp->private_data = kvm;
 	return 0;
 }
@@ -617,13 +626,34 @@ static void kvm_free_vcpus(struct kvm *k
 		kvm_free_vcpu(&kvm->vcpus[i]);
 }
 
-static int kvm_dev_release(struct inode *inode, struct file *filp)
+static void kvm_get(struct kvm *kvm)
 {
-	struct kvm *kvm = filp->private_data;
+	atomic_inc(&kvm->count);
+}
+
+static void kvm_put(struct kvm *kvm)
+{
+	if (!atomic_dec_and_test(&kvm->count))
+		return;
 
 	kvm_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
 	kfree(kvm);
+}
+
+static int kvm_dev_release(struct inode *inode, struct file *filp)
+{
+	struct kvm *kvm = filp->private_data;
+
+	kvm_put(kvm);
+	return 0;
+}
+
+static int vcpu_dev_release(struct inode *inode, struct file *filp)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+
+	kvm_put(vcpu->kvm);
 	return 0;
 }
 
@@ -1326,14 +1356,30 @@ static void vcpu_put_rsp_rip(struct kvm_
 	vmcs_writel(GUEST_RIP, vcpu->rip);
 }
 
+static struct kobject *cdev_get(struct cdev *p)
+{
+	struct module *owner = p->owner;
+	struct kobject *kobj;
+
+	if (owner && !try_module_get(owner))
+		return NULL;
+	kobj = kobject_get(&p->kobj);
+	if (!kobj)
+		module_put(owner);
+	return kobj;
+}
+
 /*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
-static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
+static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n,
+				     struct file *kvm_filp)
 {
 	int r;
+	struct file *filp;
 	struct kvm_vcpu *vcpu;
 	struct vmcs *vmcs;
+	int fd;
 
 	r = -EINVAL;
 	if (n < 0 || n >= KVM_MAX_VCPUS)
@@ -1343,10 +1389,20 @@ static int kvm_dev_ioctl_create_vcpu(str
 
 	mutex_lock(&vcpu->mutex);
 
-	if (vcpu->vmcs) {
-		mutex_unlock(&vcpu->mutex);
-		return -EEXIST;
-	}
+	r = -EEXIST;
+	if (vcpu->vmcs)
+		goto out_unlock;
+
+	r = -ENOMEM;
+	filp = get_empty_filp();
+	if (!filp)
+		goto out_unlock;
+
+	r = get_unused_fd();
+	if (r < 0)
+		goto out_free_filp;
+
+	fd = r;
 
 	vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
 					   FX_IMAGE_ALIGN);
@@ -1372,10 +1428,25 @@ static int kvm_dev_ioctl_create_vcpu(str
 	if (r < 0)
 		goto out_free_vcpus;
 
-	return 0;
+	filp->f_dentry = dget(kvm_filp->f_dentry);
+	filp->f_vfsmnt = mntget(kvm_filp->f_vfsmnt);
+	filp->f_mode = kvm_filp->f_mode;
+	allow_write_access(filp);
+	cdev_get(filp->f_dentry->d_inode->i_cdev);
+	kvm_get(kvm);
+	filp->f_op = fops_get(&kvm_vcpu_ops);
+	filp->private_data = vcpu;
+	fd_install(fd, filp);
+
+	return fd;
 
 out_free_vcpus:
 	kvm_free_vcpu(vcpu);
+	put_unused_fd(fd);
+out_free_filp:
+	fput(filp);
+out_unlock:
+	mutex_unlock(&vcpu->mutex);
 out:
 	return r;
 }
@@ -2553,19 +2624,13 @@ static void save_msrs(struct vmx_msr_ent
 		rdmsrl(e[msr_index].index, e[msr_index].data);
 }
 
-static int kvm_dev_ioctl_run(struct kvm *kvm, struct kvm_run *kvm_run)
+static int vcpu_dev_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	struct kvm_vcpu *vcpu;
 	u8 fail;
 	u16 fs_sel, gs_sel, ldt_sel;
 	int fs_gs_ldt_reload_needed;
 
-	if (kvm_run->vcpu < 0 || kvm_run->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-
-	vcpu = vcpu_load(kvm, kvm_run->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 	if (kvm_run->emulated) {
 		skip_emulated_instruction(vcpu);
@@ -2777,7 +2842,7 @@ again:
 			}
 			cond_resched();
 			/* Cannot fail -  no vcpu unplug yet. */
-			vcpu_load(kvm, vcpu_slot(vcpu));
+			vcpu_get(vcpu);
 			goto again;
 		}
 	}
@@ -2786,16 +2851,10 @@ again:
 	return 0;
 }
 
-static int kvm_dev_ioctl_get_regs(struct kvm *kvm, struct kvm_regs *regs)
+static int vcpu_dev_ioctl_get_regs(struct kvm_vcpu *vcpu,
+				   struct kvm_regs *regs)
 {
-	struct kvm_vcpu *vcpu;
-
-	if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-
-	vcpu = vcpu_load(kvm, regs->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 	regs->rax = vcpu->regs[VCPU_REGS_RAX];
 	regs->rbx = vcpu->regs[VCPU_REGS_RBX];
@@ -2830,16 +2889,10 @@ static int kvm_dev_ioctl_get_regs(struct
 	return 0;
 }
 
-static int kvm_dev_ioctl_set_regs(struct kvm *kvm, struct kvm_regs *regs)
+static int vcpu_dev_ioctl_set_regs(struct kvm_vcpu *vcpu,
+				   struct kvm_regs *regs)
 {
-	struct kvm_vcpu *vcpu;
-
-	if (regs->vcpu < 0 || regs->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-
-	vcpu = vcpu_load(kvm, regs->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 	vcpu->regs[VCPU_REGS_RAX] = regs->rax;
 	vcpu->regs[VCPU_REGS_RBX] = regs->rbx;
@@ -2868,15 +2921,10 @@ static int kvm_dev_ioctl_set_regs(struct
 	return 0;
 }
 
-static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
+static int vcpu_dev_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				    struct kvm_sregs *sregs)
 {
-	struct kvm_vcpu *vcpu;
-
-	if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-	vcpu = vcpu_load(kvm, sregs->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 #define get_segment(var, seg) \
 	do { \
@@ -2932,16 +2980,12 @@ static int kvm_dev_ioctl_get_sregs(struc
 	return 0;
 }
 
-static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
+static int vcpu_dev_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				    struct kvm_sregs *sregs)
 {
-	struct kvm_vcpu *vcpu;
 	int mmu_reset_needed = 0;
 
-	if (sregs->vcpu < 0 || sregs->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-	vcpu = vcpu_load(kvm, sregs->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 #define set_segment(var, seg) \
 	do { \
@@ -3016,15 +3060,14 @@ static int kvm_dev_ioctl_set_sregs(struc
 /*
  * Translate a guest virtual address to a guest physical address.
  */
-static int kvm_dev_ioctl_translate(struct kvm *kvm, struct kvm_translation *tr)
+static int vcpu_dev_ioctl_translate(struct kvm_vcpu *vcpu,
+				    struct kvm_translation *tr)
 {
 	unsigned long vaddr = tr->linear_address;
-	struct kvm_vcpu *vcpu;
+	struct kvm *kvm = vcpu->kvm;
 	gpa_t gpa;
 
-	vcpu = vcpu_load(kvm, tr->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 	spin_lock(&kvm->lock);
 	gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
 	tr->physical_address = gpa;
@@ -3037,17 +3080,13 @@ static int kvm_dev_ioctl_translate(struc
 	return 0;
 }
 
-static int kvm_dev_ioctl_interrupt(struct kvm *kvm, struct kvm_interrupt *irq)
+static int vcpu_dev_ioctl_interrupt(struct kvm_vcpu *vcpu,
+				    struct kvm_interrupt *irq)
 {
-	struct kvm_vcpu *vcpu;
-
-	if (irq->vcpu < 0 || irq->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
 	if (irq->irq < 0 || irq->irq >= 256)
 		return -EINVAL;
-	vcpu = vcpu_load(kvm, irq->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+
+	vcpu_get(vcpu);
 
 	set_bit(irq->irq, vcpu->irq_pending);
 	set_bit(irq->irq / BITS_PER_LONG, &vcpu->irq_summary);
@@ -3057,19 +3096,14 @@ static int kvm_dev_ioctl_interrupt(struc
 	return 0;
 }
 
-static int kvm_dev_ioctl_debug_guest(struct kvm *kvm,
-				     struct kvm_debug_guest *dbg)
+static int vcpu_dev_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+				      struct kvm_debug_guest *dbg)
 {
-	struct kvm_vcpu *vcpu;
 	unsigned long dr7 = 0x400;
 	u32 exception_bitmap;
 	int old_singlestep;
 
-	if (dbg->vcpu < 0 || dbg->vcpu >= KVM_MAX_VCPUS)
-		return -EINVAL;
-	vcpu = vcpu_load(kvm, dbg->vcpu);
-	if (!vcpu)
-		return -ENOENT;
+	vcpu_get(vcpu);
 
 	exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
 	old_singlestep = vcpu->guest_debug.singlestep;
@@ -3111,26 +3145,20 @@ static int kvm_dev_ioctl_debug_guest(str
 	return 0;
 }
 
-static long kvm_dev_ioctl(struct file *filp,
-			  unsigned int ioctl, unsigned long arg)
+static long vcpu_dev_ioctl(struct file *filp,
+			   unsigned int ioctl, unsigned long arg)
 {
-	struct kvm *kvm = filp->private_data;
+	struct kvm_vcpu *vcpu = filp->private_data;
 	int r = -EINVAL;
 
 	switch (ioctl) {
-	case KVM_CREATE_VCPU: {
-		r = kvm_dev_ioctl_create_vcpu(kvm, arg);
-		if (r)
-			goto out;
-		break;
-	}
 	case KVM_RUN: {
 		struct kvm_run kvm_run;
 
 		r = -EFAULT;
 		if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
 			goto out;
-		r = kvm_dev_ioctl_run(kvm, &kvm_run);
+		r = vcpu_dev_ioctl_run(vcpu, &kvm_run);
 		if (r < 0)
 			goto out;
 		r = -EFAULT;
@@ -3142,10 +3170,7 @@ static long kvm_dev_ioctl(struct file *f
 	case KVM_GET_REGS: {
 		struct kvm_regs kvm_regs;
 
-		r = -EFAULT;
-		if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
-			goto out;
-		r = kvm_dev_ioctl_get_regs(kvm, &kvm_regs);
+		r = vcpu_dev_ioctl_get_regs(vcpu, &kvm_regs);
 		if (r)
 			goto out;
 		r = -EFAULT;
@@ -3160,7 +3185,7 @@ static long kvm_dev_ioctl(struct file *f
 		r = -EFAULT;
 		if (copy_from_user(&kvm_regs, (void *)arg, sizeof kvm_regs))
 			goto out;
-		r = kvm_dev_ioctl_set_regs(kvm, &kvm_regs);
+		r = vcpu_dev_ioctl_set_regs(vcpu, &kvm_regs);
 		if (r)
 			goto out;
 		r = 0;
@@ -3169,10 +3194,7 @@ static long kvm_dev_ioctl(struct file *f
 	case KVM_GET_SREGS: {
 		struct kvm_sregs kvm_sregs;
 
-		r = -EFAULT;
-		if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
-			goto out;
-		r = kvm_dev_ioctl_get_sregs(kvm, &kvm_sregs);
+		r = vcpu_dev_ioctl_get_sregs(vcpu, &kvm_sregs);
 		if (r)
 			goto out;
 		r = -EFAULT;
@@ -3187,7 +3209,7 @@ static long kvm_dev_ioctl(struct file *f
 		r = -EFAULT;
 		if (copy_from_user(&kvm_sregs, (void *)arg, sizeof kvm_sregs))
 			goto out;
-		r = kvm_dev_ioctl_set_sregs(kvm, &kvm_sregs);
+		r = vcpu_dev_ioctl_set_sregs(vcpu, &kvm_sregs);
 		if (r)
 			goto out;
 		r = 0;
@@ -3199,7 +3221,7 @@ static long kvm_dev_ioctl(struct file *f
 		r = -EFAULT;
 		if (copy_from_user(&tr, (void *)arg, sizeof tr))
 			goto out;
-		r = kvm_dev_ioctl_translate(kvm, &tr);
+		r = vcpu_dev_ioctl_translate(vcpu, &tr);
 		if (r)
 			goto out;
 		r = -EFAULT;
@@ -3214,7 +3236,7 @@ static long kvm_dev_ioctl(struct file *f
 		r = -EFAULT;
 		if (copy_from_user(&irq, (void *)arg, sizeof irq))
 			goto out;
-		r = kvm_dev_ioctl_interrupt(kvm, &irq);
+		r = vcpu_dev_ioctl_interrupt(vcpu, &irq);
 		if (r)
 			goto out;
 		r = 0;
@@ -3226,12 +3248,32 @@ static long kvm_dev_ioctl(struct file *f
 		r = -EFAULT;
 		if (copy_from_user(&dbg, (void *)arg, sizeof dbg))
 			goto out;
-		r = kvm_dev_ioctl_debug_guest(kvm, &dbg);
+		r = vcpu_dev_ioctl_debug_guest(vcpu, &dbg);
 		if (r)
 			goto out;
 		r = 0;
 		break;
 	}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+static long kvm_dev_ioctl(struct file *filp,
+			  unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	int r = -EINVAL;
+
+	switch (ioctl) {
+	case KVM_CREATE_VCPU: {
+		r = kvm_dev_ioctl_create_vcpu(kvm, arg, filp);
+		if (r < 0)
+			goto out;
+		break;
+	}
 	case KVM_SET_MEMORY_REGION: {
 		struct kvm_memory_region kvm_mem;
 
@@ -3292,6 +3334,13 @@ static int kvm_dev_mmap(struct file *fil
 	return 0;
 }
 
+static struct file_operations kvm_vcpu_ops = {
+	.owner		= THIS_MODULE,
+	.release        = vcpu_dev_release,
+	.unlocked_ioctl = vcpu_dev_ioctl,
+	.compat_ioctl   = vcpu_dev_ioctl,
+};
+
 static struct file_operations kvm_chardev_ops = {
 	.owner		= THIS_MODULE,
 	.open		= kvm_dev_open,
Index: linux-2.6/include/linux/kvm.h
===================================================================
--- linux-2.6.orig/include/linux/kvm.h
+++ linux-2.6/include/linux/kvm.h
@@ -1,13 +1,6 @@
 #ifndef __LINUX_KVM_H
 #define __LINUX_KVM_H
 
-/*
- * Userspace interface for /dev/kvm - kernel based virtual machine
- *
- * Note: this interface is considered experimental and may change without
- *       notice.
- */
-
 #include <asm/types.h>
 #include <linux/ioctl.h>
 
@@ -88,10 +81,6 @@ struct kvm_run {
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
-	/* in */
-	__u32 vcpu;
-	__u32 padding;
-
 	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
 	__u64 rax, rbx, rcx, rdx;
 	__u64 rsi, rdi, rsp, rbp;
@@ -118,10 +107,6 @@ struct kvm_dtable {
 
 /* for KVM_GET_SREGS and KVM_SET_SREGS */
 struct kvm_sregs {
-	/* in */
-	__u32 vcpu;
-	__u32 padding;
-
 	/* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
 	struct kvm_segment cs, ds, es, fs, gs, ss;
 	struct kvm_segment tr, ldt;
@@ -152,7 +137,6 @@ struct kvm_translation {
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
 	/* in */
-	__u32 vcpu;
 	__u32 irq;
 };
 
@@ -165,8 +149,8 @@ struct kvm_breakpoint {
 /* for KVM_DEBUG_GUEST */
 struct kvm_debug_guest {
 	/* int */
-	__u32 vcpu;
 	__u32 enabled;
+	__u32 padding;
 	struct kvm_breakpoint breakpoints[4];
 	__u32 singlestep;
 };
@@ -183,6 +167,11 @@ struct kvm_dirty_log {
 
 #define KVMIO 0xAE
 
+/*
+ * Vcpu-specific operations
+ *
+ * Use with file descriptors returned from ioctl(KVM_CREATE_VCPU)
+ */
 #define KVM_RUN                   _IOWR(KVMIO, 2, struct kvm_run)
 #define KVM_GET_REGS              _IOWR(KVMIO, 3, struct kvm_regs)
 #define KVM_SET_REGS              _IOW(KVMIO, 4, struct kvm_regs)
@@ -191,6 +180,12 @@ struct kvm_dirty_log {
 #define KVM_TRANSLATE             _IOWR(KVMIO, 7, struct kvm_translation)
 #define KVM_INTERRUPT             _IOW(KVMIO, 8, struct kvm_interrupt)
 #define KVM_DEBUG_GUEST           _IOW(KVMIO, 9, struct kvm_debug_guest)
+
+/*
+ * Guest-wide operations
+ *
+ * Use with file descriptors returned from open("/dev/kvm")
+ */
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 10, struct kvm_memory_region)
 #define KVM_CREATE_VCPU           _IOW(KVMIO, 11, int /* vcpu_slot */)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 12, struct kvm_dirty_log)




-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][RFC] KVM: prepare user interface for smp guests
       [not found] ` <4544AD24.4040801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2006-10-30  0:01   ` Arnd Bergmann
       [not found]     ` <200610300101.11245.arnd-r2nGTMty4D4@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Arnd Bergmann @ 2006-10-30  0:01 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f; +Cc: linux-kernel

On Sunday 29 October 2006 14:31, Avi Kivity wrote:
> +       r = -EEXIST;
> +       if (vcpu->vmcs)
> +               goto out_unlock;
> +
> +       r = -ENOMEM;
> +       filp = get_empty_filp();
> +       if (!filp)
> +               goto out_unlock;
> +
> +       r = get_unused_fd();
> +       if (r < 0)
> +               goto out_free_filp;
> +
> +       fd = r;
>  
>         vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf,
>                                            FX_IMAGE_ALIGN);
> @@ -1372,10 +1428,25 @@ static int kvm_dev_ioctl_create_vcpu(str
>         if (r < 0)
>                 goto out_free_vcpus;
>  
> -       return 0;
> +       filp->f_dentry = dget(kvm_filp->f_dentry);
> +       filp->f_vfsmnt = mntget(kvm_filp->f_vfsmnt);
> +       filp->f_mode = kvm_filp->f_mode;
> +       allow_write_access(filp);
> +       cdev_get(filp->f_dentry->d_inode->i_cdev);
> +       kvm_get(kvm);
> +       filp->f_op = fops_get(&kvm_vcpu_ops);
> +       filp->private_data = vcpu;
> +       fd_install(fd, filp);

Separating the objects into different file descriptors sounds like a
good idea, but reusing an open dentry/inode with a new file and different
file operations is a rather unusual way to do it. Your concept of allocating
a new context on each open is already weird, but there have been other
examples of that before.

I'd suggest going to a syscall-based model with your own file system right
away, even if you don't use the spufs approach but something in the middle:

* You do a trivial nonmountable new file system with anonymous objects,
  similar to eventpollfs, and hand out file descriptors to inodes in it,
  for both the kvm and the vcpu objects.
* You replace the syscall you'd normally use to hand out a new kvm instance
  with an ioctl on /dev/kvm, and don't allow any other operations on that
  device.

This would be a much more consistant object model, compared with other
generic kernel functionality that is not bound to an actual device.
You still have all the flexibility of a loadable module without core
kernel changes for the development phase, and can easily switch to real
syscalls when merging it into mainline.

I really think that a small number of syscalls is where you should be
heading, whether you use a file system or not, but I understand that
ioctls are convenient for development.

	Arnd <><

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][RFC] KVM: prepare user interface for smp guests
       [not found]     ` <200610300101.11245.arnd-r2nGTMty4D4@public.gmane.org>
@ 2006-10-30  9:08       ` Avi Kivity
       [not found]         ` <4545C110.8080204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Avi Kivity @ 2006-10-30  9:08 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, linux-kernel

Arnd Bergmann wrote:
> Separating the objects into different file descriptors sounds like a
> good idea, but reusing an open dentry/inode with a new file and different
> file operations is a rather unusual way to do it. 

Yes, it doesn't feel right.

> Your concept of allocating
> a new context on each open is already weird, but there have been other
> examples of that before.
>   

Actually that seemed to me quite natural.

> I'd suggest going to a syscall-based model with your own file system right
> away, even if you don't use the spufs approach but something in the middle:
>
> * You do a trivial nonmountable new file system with anonymous objects,
>   similar to eventpollfs, and hand out file descriptors to inodes in it,
>   for both the kvm and the vcpu objects.
> * You replace the syscall you'd normally use to hand out a new kvm instance
>   with an ioctl on /dev/kvm, and don't allow any other operations on that
>   device.
>
> This would be a much more consistant object model, compared with other
> generic kernel functionality that is not bound to an actual device.
> You still have all the flexibility of a loadable module without core
> kernel changes for the development phase, and can easily switch to real
> syscalls when merging it into mainline.
>   

I agree, that sounds like a good plan.  I'll look into it.

BTW, what does lsof show for spufs users?  I thought lsof /dev/kvm would 
be a good way to look for virtual machines.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][RFC] KVM: prepare user interface for smp guests
       [not found]         ` <4545C110.8080204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2006-10-30 12:19           ` Arnd Bergmann
  0 siblings, 0 replies; 4+ messages in thread
From: Arnd Bergmann @ 2006-10-30 12:19 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f, linux-kernel

On Monday 30 October 2006 10:08, Avi Kivity wrote:
> > Your concept of allocating
> > a new context on each open is already weird, but there have been other
> > examples of that before.
>
> Actually that seemed to me quite natural.

It's described in LDD2 and other books, but the traditional view is
still that one device node in /dev refers to an actual device or
at least something that acts like a device (e.g. /dev/null, dev/tty).

> BTW, what does lsof show for spufs users?  I thought lsof /dev/kvm would 
> be a good way to look for virtual machines.

It does what you expect. Since spufs is mounted, 'ls /spu/' shows you
the existing spu contexts, 'lsof /spu/*' shows you the tasks using those.

	Arnd <><

-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-10-30 12:19 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-29 13:31 [PATCH][RFC] KVM: prepare user interface for smp guests Avi Kivity
     [not found] ` <4544AD24.4040801-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2006-10-30  0:01   ` Arnd Bergmann
     [not found]     ` <200610300101.11245.arnd-r2nGTMty4D4@public.gmane.org>
2006-10-30  9:08       ` Avi Kivity
     [not found]         ` <4545C110.8080204-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2006-10-30 12:19           ` Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox