From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark McLoughlin Subject: [PATCH 1/5] vring: Replace mmap() interface with ioctl() Date: Fri, 13 Jun 2008 14:57:57 +0100 Message-ID: <1213365481-23460-2-git-send-email-markmc@redhat.com> References: <1213365481-23460-1-git-send-email-markmc@redhat.com> Cc: kvm@vger.kernel.org, Mark McLoughlin To: Anthony Liguori , Avi Kivity , Rusty Russell Return-path: Received: from mail03.svc.cra.dublin.eircom.net ([159.134.118.19]:33563 "HELO mail03.svc.cra.dublin.eircom.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with SMTP id S1756181AbYFMN6F (ORCPT ); Fri, 13 Jun 2008 09:58:05 -0400 In-Reply-To: <1213365481-23460-1-git-send-email-markmc@redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: /dev/vring's mmap() interface is a strange creature. It serves as a way for userland to supply the address of the already allocated ring descriptors, but causes those pages to be re-maped as a natural side effect of the mmap() This is not an issue for lguest because it does the mmap() before even starting the guest. However, in the case of kvm, the guest allocates the ring and informs the host of its addresss. If we then mmap() it, we cause it to be remapped to new pages which the vring driver will then use. Now, KVM guests don't actually use the ring pages before informing the host of its address, so we could probably just invalidate the guest's shadow page table and have the new pfns picked up. That would be an odd requirement to impose on the guest ABI, though. Since the mmap() semantics are so strange, switch to using a single ioctl() for setting up the ring. (Against misc:dev_vring.patch and misc:ringfd-base-limit.patch) Signed-off-by: Mark McLoughlin --- drivers/char/vring.c | 61 +++++++++++++++++++++++------------------------- include/linux/vring.h | 10 ++++++- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/drivers/char/vring.c b/drivers/char/vring.c index 0b9bdf5..a66b890 100644 --- a/drivers/char/vring.c +++ b/drivers/char/vring.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -126,22 +127,21 @@ static int vring_release(struct inode *inode, struct file *filp) return 0; } -static int vring_mmap(struct file *filp, struct vm_area_struct *vma) +static long vring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - unsigned long size, num_descs; struct vring_info *vr = filp->private_data; + void __user *argp = (void __user *)arg; + struct vring_ioctl_info info; + unsigned long descs; int err; - /* We overload mmap's offset to hold the ring number. */ - num_descs = vma->vm_pgoff; + if (cmd != VRINGSETINFO) + return -ENOTTY; - /* Must be a power of two, and limit indices to a u16. */ - if (!is_power_of_2(num_descs) || num_descs > 65536) - return -EINVAL; + if (copy_from_user(&info, argp, sizeof(info))) + return -EFAULT; - /* mmap size must be what we expect for such a ring. */ - size = vma->vm_end - vma->vm_start; - if (size != ALIGN(vring_size(num_descs, PAGE_SIZE), PAGE_SIZE)) + if (!is_power_of_2(info.num_descs)) return -EINVAL; /* We only let them map this in one place. */ @@ -151,9 +151,14 @@ static int vring_mmap(struct file *filp, struct vm_area_struct *vma) goto unlock; } - vring_init(&vr->ring, num_descs, (void *)vma->vm_start, PAGE_SIZE); + descs = info.descs; + vring_init(&vr->ring, info.num_descs, (void *)descs, PAGE_SIZE); - vr->mask = num_descs - 1; + vr->mask = info.num_descs - 1; + vr->base = info.base; + vr->limit = info.limit; + if (vr->limit == 0) + vr->limit = -1UL; err = 0; unlock: @@ -161,6 +166,16 @@ unlock: return err; } +#ifdef CONFIG_COMPAT +static long vring_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return vring_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define vring_compat_ioctl NULL +#endif + static int vring_open(struct inode *in, struct file *filp) { struct vring_info *vr; @@ -176,32 +191,14 @@ static int vring_open(struct inode *in, struct file *filp) return 0; } -static int vring_ioctl(struct inode *in, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct vring_info *vr = filp->private_data; - - switch (cmd) { - case VRINGSETBASE: - vr->base = arg; - break; - case VRINGSETLIMIT: - vr->limit = arg; - break; - default: - return -ENOTTY; - } - return 0; -} - static const struct file_operations vring_fops = { .open = vring_open, .release = vring_release, - .mmap = vring_mmap, .read = vring_read, .write = vring_write, .poll = vring_poll, - .ioctl = vring_ioctl, + .unlocked_ioctl = vring_ioctl, + .compat_ioctl = vring_compat_ioctl, }; /** diff --git a/include/linux/vring.h b/include/linux/vring.h index 47c8848..de4125d 100644 --- a/include/linux/vring.h +++ b/include/linux/vring.h @@ -21,8 +21,14 @@ #include /* Ioctl defines. */ -#define VRINGSETBASE _IO(0xAD, 0) -#define VRINGSETLIMIT _IO(0xAD, 1) +#define VRINGSETINFO _IO(0xAD, 0) + +struct vring_ioctl_info { + __u16 num_descs; + __u64 descs; + __u64 base; + __u64 limit; +}; #ifdef __KERNEL__ -- 1.5.4.1