All of lore.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@qumranet.com>
To: Ben-Ami Yassour <benami@il.ibm.com>
Cc: amit.shah@qumranet.com, kvm@vger.kernel.org, muli@il.ibm.com,
	weidong.han@intel.com, anthony@codemonkey.ws, "Kay,
	Allen M" <allen.m.kay@intel.com>
Subject: Re: [PATCH 4/4] KVM: Device assignemnt with VT-d
Date: Sat, 26 Jul 2008 12:02:06 +0300	[thread overview]
Message-ID: <488AE80E.7030502@qumranet.com> (raw)
In-Reply-To: <1216728835-19721-5-git-send-email-benami@il.ibm.com>

Ben-Ami Yassour wrote:
> From: Kay, Allen M <allen.m.kay@intel.com>
>
> This patch includes the functions to support VT-d for passthrough
> devices.
>
> [Ben: fixed memory pinning, cleanup]
>
> Signed-off-by: Kay, Allen M <allen.m.kay@intel.com>
> Signed-off-by: Weidong Han <weidong.han@intel.com>
> Signed-off-by: Ben-Ami Yassour <benami@il.ibm.com>
> ---
>  arch/x86/kvm/Makefile      |    2 +-
>  arch/x86/kvm/vtd.c         |  182 ++++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c         |   11 +++
>  include/asm-x86/kvm_host.h |    3 +
>  include/linux/kvm_host.h   |    6 ++
>  virt/kvm/kvm_main.c        |    8 ++-
>  6 files changed, 210 insertions(+), 2 deletions(-)
>  create mode 100644 arch/x86/kvm/vtd.c
>
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index d0e940b..5d9d079 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -11,7 +11,7 @@ endif
>  EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
>  
>  kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
> -	i8254.o
> +	i8254.o vtd.o
>  obj-$(CONFIG_KVM) += kvm.o
>  kvm-intel-objs = vmx.o
>  obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
> diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
> new file mode 100644
> index 0000000..7a3cf4e
> --- /dev/null
> +++ b/arch/x86/kvm/vtd.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright (c) 2006, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
> + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + * Copyright (C) 2006-2008 Intel Corporation
> + * Author: Allen M. Kay <allen.m.kay@intel.com>
> + * Author: Weidong Han <weidong.han@intel.com>
> + */
> +
> +#include <linux/list.h>
> +#include <linux/kvm_host.h>
> +#include <linux/pci.h>
> +#include <linux/dmar.h>
> +#include <linux/intel-iommu.h>
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm);
> +
> +int kvm_iommu_map_pages(struct kvm *kvm,
> +			gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	int i, rc;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +
> +	if (!domain)
> +		return -EFAULT;
> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = gfn_to_pfn(kvm, gfn);
> +		if (!is_mmio_pfn(pfn)) {
> +			rc = intel_iommu_page_mapping(domain,
> +						      gfn << PAGE_SHIFT,
> +						      pfn << PAGE_SHIFT,
>   

This overflows on i386, where gfn and pfn are longs while gpas and hpas 
are u64s.

gfn_to_gpa() gets the first one right.  There should also be a 
pfn_to_phys(), but there isn't.

> +						      PAGE_SIZE,
> +						      DMA_PTE_READ |
> +						      DMA_PTE_WRITE);
> +			if (rc)
> +				kvm_release_pfn_clean(pfn);
>   

You're never actually returning rc, so any errors will be swallowed 
silently.

> +		} else {
> +			printk(KERN_DEBUG "kvm_iommu_map_page:"
> +			       "invalid pfn=%lx\n", pfn);
> +			return 0;
> +		}
> +
> +		gfn++;
> +	}
> +	return 0;
>   

Isn't this slow on large guests? (not a merge barrier).

> +}
> +
> +int kvm_iommu_map_guest(struct kvm *kvm,
> +			struct kvm_assigned_dev *assigned_dev)
> +{
> +	struct pci_dev *pdev = NULL;
> +
> +	printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n",
> +	       assigned_dev->host.busnr,
> +	       PCI_SLOT(assigned_dev->host.devfn),
> +	       PCI_FUNC(assigned_dev->host.devfn));
> +
> +	for_each_pci_dev(pdev) {
> +		if ((pdev->bus->number == assigned_dev->host.busnr) &&
> +		    (pdev->devfn == assigned_dev->host.devfn)) {
> +			break;
> +		}
> +	}
>   

Why not keep pdev in kvm_assigned_dev?  We've claimed it, so it's ours.

> +
> +	if (pdev == NULL) {
> +		if (kvm->arch.intel_iommu_domain) {
> +			intel_iommu_domain_exit(kvm->arch.intel_iommu_domain);
> +			kvm->arch.intel_iommu_domain = NULL;
> +		}
> +		return -ENODEV;
> +	}
> +
> +	kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev);
> +
> +	if (kvm_iommu_map_memslots(kvm)) {
> +		kvm_iommu_unmap_memslots(kvm);
> +		return -EFAULT;
>   

EFAULT is for the kernel touching an invalid userspace page.  You should 
simply propagate the original error.

> +	}
> +
> +	intel_iommu_detach_dev(kvm->arch.intel_iommu_domain,
> +			       pdev->bus->number, pdev->devfn);
> +
> +	if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain,
> +					pdev)) {
> +		printk(KERN_ERR "Domain context map for %s failed",
> +		       pci_name(pdev));
> +		return -EFAULT;
>   

Ditto.

> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_put_pages(struct kvm *kvm,
> +			       gfn_t base_gfn, unsigned long npages)
> +{
> +	gfn_t gfn = base_gfn;
> +	pfn_t pfn;
> +	struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
> +	int i;
> +
> +	if (!domain)
> +		return -EFAULT;
>   

Ditto.

> +
> +	for (i = 0; i < npages; i++) {
> +		pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
> +						     gfn << PAGE_SHIFT);
>   

Overflow.

> +		kvm_release_pfn_clean(pfn);
> +		gfn++;
> +	}
> +	return 0;
> +}
> +
> +static int kvm_iommu_unmap_memslots(struct kvm *kvm)
> +{
> +	int i, rc;
> +	for (i = 0; i < kvm->nmemslots; i++) {
> +		rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
> +					 kvm->memslots[i].npages);
> +		if (rc)
> +			return rc;
> +	}
> +	return 0;
> +}
> +
>   

Unmapping should be unfailable, since there's not way to recover from it.

I'm unhappy with this wanton taking-of-references, but as long as mmu 
notifiers are unmerged, I see no other way.


> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index d9aa931..e57c9e4 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -33,6 +33,7 @@
>  #include <linux/module.h>
>  #include <linux/mman.h>
>  #include <linux/highmem.h>
> +#include <linux/intel-iommu.h>
>  
>  #include <asm/uaccess.h>
>  #include <asm/msr.h>
> @@ -253,9 +254,17 @@ kvm_vm_ioctl_device_assignment(struct kvm *kvm,
>  	}
>  
>  
> +	if (intel_iommu_found()) {
> +		r = kvm_iommu_map_guest(kvm, assigned_dev);
> +		if (r)
> +			goto out_intr;
> +	}
> +
>  out:
>  	mutex_unlock(&kvm->lock);
>  	return r;
> +out_intr:
> +	free_irq(dev->irq, match);
>  out_list_del:
>  	list_del(&match->list);
>  	pci_release_regions(dev);
>   


The split between device assignment and irq assignment I suggested 
earlier would definitely help this function.



-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


  parent reply	other threads:[~2008-07-26  9:02 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-22 12:13 device assignemnt: updated patches Ben-Ami Yassour
2008-07-22 12:13 ` [PATCH 1/4] KVM: Add irq ack notifier list Ben-Ami Yassour
2008-07-22 12:13   ` [PATCH 2/4] KVM: pci device assignment Ben-Ami Yassour
2008-07-22 12:13     ` [PATCH 3/4] VT-d: changes to support KVM Ben-Ami Yassour
2008-07-22 12:13       ` [PATCH 4/4] KVM: Device assignemnt with VT-d Ben-Ami Yassour
2008-07-22 12:18         ` device assignment - userspace part Ben-Ami Yassour
2008-07-22 12:18           ` [PATCH 1/1] KVM/userspace: Support for assigning PCI devices to guest Ben-Ami Yassour
2008-07-26  9:02         ` Avi Kivity [this message]
2008-07-28  6:49         ` [PATCH 4/4] KVM: Device assignemnt with VT-d Han, Weidong
2008-07-28 16:34           ` Ben-Ami Yassour
2008-07-26  8:46     ` [PATCH 2/4] KVM: pci device assignment Avi Kivity
2008-07-28  7:27     ` Yang, Sheng
2008-07-28 16:41       ` Ben-Ami Yassour
2008-07-28  8:33     ` Amit Shah
2008-07-26  8:19   ` [PATCH 1/4] KVM: Add irq ack notifier list Avi Kivity
2008-07-26  9:05 ` device assignemnt: updated patches Avi Kivity
2008-07-26  9:24   ` Han, Weidong
2008-07-26  9:32     ` Avi Kivity
2008-07-26  9:48       ` Han, Weidong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=488AE80E.7030502@qumranet.com \
    --to=avi@qumranet.com \
    --cc=allen.m.kay@intel.com \
    --cc=amit.shah@qumranet.com \
    --cc=anthony@codemonkey.ws \
    --cc=benami@il.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=muli@il.ibm.com \
    --cc=weidong.han@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.