From mboxrd@z Thu Jan 1 00:00:00 1970 From: Anthony Liguori Subject: Re: [PATCH] KVM: PCIPT: VT-d support Date: Wed, 09 Jul 2008 10:49:42 -0500 Message-ID: <4874DE16.4080402@codemonkey.ws> References: <1215341549-29269-1-git-send-email-benami@il.ibm.com> <1215341549-29269-2-git-send-email-benami@il.ibm.com> <1215341549-29269-3-git-send-email-benami@il.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: amit.shah@qumranet.com, kvm@vger.kernel.org, muli@il.ibm.com, weidong.han@intel.com, "Kay, Allen M" To: Ben-Ami Yassour Return-path: Received: from el-out-1112.google.com ([209.85.162.183]:6435 "EHLO el-out-1112.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753492AbYGIPuI (ORCPT ); Wed, 9 Jul 2008 11:50:08 -0400 Received: by el-out-1112.google.com with SMTP id z25so463521ele.1 for ; Wed, 09 Jul 2008 08:50:07 -0700 (PDT) In-Reply-To: <1215341549-29269-3-git-send-email-benami@il.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: Ben-Ami Yassour wrote: > From: Kay, Allen M > > This patch includes the functions to support VT-d for passthrough > devices. > > [Ben: fixed memory pinning] > > Signed-off-by: Kay, Allen M > Signed-off-by: Weidong Han > Signed-off-by: Ben-Ami Yassour > --- > arch/x86/kvm/Makefile | 2 +- > arch/x86/kvm/vtd.c | 189 ++++++++++++++++++++++++++++++++++++++++++++ > include/asm-x86/kvm_host.h | 17 ++++ > include/asm-x86/kvm_para.h | 14 +++ > include/linux/kvm_host.h | 6 ++ > 5 files changed, 227 insertions(+), 1 deletions(-) > create mode 100644 arch/x86/kvm/vtd.c > > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile > index d0e940b..5d9d079 100644 > --- a/arch/x86/kvm/Makefile > +++ b/arch/x86/kvm/Makefile > @@ -11,7 +11,7 @@ endif > EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm > > kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ > - i8254.o > + i8254.o vtd.o > obj-$(CONFIG_KVM) += kvm.o > kvm-intel-objs = vmx.o > obj-$(CONFIG_KVM_INTEL) += kvm-intel.o > diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c > new file mode 100644 > index 0000000..5abeef1 > --- /dev/null > +++ b/arch/x86/kvm/vtd.c > @@ -0,0 +1,189 @@ > +/* > + * Copyright (c) 2006, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. > + * > + * Copyright (C) 2006-2008 Intel Corporation > + * Author: Allen M. Kay > + * Author: Weidong Han > + */ > + > +#include > +#include > +#include > +#include > +#include > + > +static int kvm_iommu_unmap_memslots(struct kvm *kvm); > + > +int kvm_iommu_map_pages(struct kvm *kvm, > + gfn_t base_gfn, unsigned long npages) > +{ > + gfn_t gfn = base_gfn; > + pfn_t pfn; > + struct page *page; > + int i, rc; > + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; > + > + if (!domain) > + return -EFAULT; > + > + for (i = 0; i < npages; i++) { > + pfn = gfn_to_pfn(kvm, gfn); > + if (pfn_valid(pfn)) { > As I've mentioned before, this is wrong. We should add MMIO pages to the VT-d tables but at any rate, pfn_valid() doesn't work for checking if something is MMIO. > + rc = intel_iommu_page_mapping(domain, > + gfn << PAGE_SHIFT, > + pfn << PAGE_SHIFT, > + PAGE_SIZE, > + DMA_PTE_READ | > + DMA_PTE_WRITE); > + if (rc) { > + page = pfn_to_page(pfn); > + put_page(page); > This should be kvm_release_pfn_clean(). > + } > + } else { > + printk(KERN_DEBUG "kvm_iommu_map_page:" > + "invalid pfn=%lx\n", pfn); > + return 0; > + } > + gfn++; > + } > + return 0; > +} > + > +static int kvm_iommu_map_memslots(struct kvm *kvm) > +{ > + int i, rc; > + for (i = 0; i < kvm->nmemslots; i++) { > + rc = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, > + kvm->memslots[i].npages); > + if (rc) > + return rc; > + } > + return 0; > +} > + > +int kvm_iommu_map_guest(struct kvm *kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev) > +{ > + struct pci_dev *pdev = NULL; > + > + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", > + pci_pt_dev->host.busnr, > + PCI_SLOT(pci_pt_dev->host.devfn), > + PCI_FUNC(pci_pt_dev->host.devfn)); > + > + for_each_pci_dev(pdev) { > + if ((pdev->bus->number == pci_pt_dev->host.busnr) && > + (pdev->devfn == pci_pt_dev->host.devfn)) { > + break; > + } > + } > + > + if (pdev == NULL) { > + if (kvm->arch.intel_iommu_domain) { > + intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); > + kvm->arch.intel_iommu_domain = NULL; > + } > + return -ENODEV; > + } > + > + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); > + > + if (kvm_iommu_map_memslots(kvm)) { > + kvm_iommu_unmap_memslots(kvm); > + return -EFAULT; > + } > + > + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, > + pdev->bus->number, pdev->devfn); > + > + if (intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, > + pdev)) { > + printk(KERN_ERR "Domain context map for %s failed", > + pci_name(pdev)); > + return -EFAULT; > + } > + return 0; > +} > + > +static int kvm_iommu_put_pages(struct kvm *kvm, > + gfn_t base_gfn, unsigned long npages) > +{ > + gfn_t gfn = base_gfn; > + pfn_t pfn; > + struct page *page; > + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; > + int i; > + > + if (!domain) > + return -EFAULT; > + > + for (i = 0; i < npages; i++) { > + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, > + gfn << PAGE_SHIFT); > + > + if (pfn && pfn_valid(pfn)) { > + page = pfn_to_page(pfn); > + put_page(page); > + } > + gfn++; > + } > + return 0; > +} > + > +static int kvm_iommu_unmap_memslots(struct kvm *kvm) > +{ > + int i, rc; > + for (i = 0; i < kvm->nmemslots; i++) { > + rc = kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, > + kvm->memslots[i].npages); > + if (rc) > + return rc; > + } > + return 0; > +} > + > +int kvm_iommu_unmap_guest(struct kvm *kvm) > +{ > + struct kvm_pci_pt_dev_list *entry; > + struct pci_dev *pdev = NULL; > + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; > + > + if (!domain) > + return 0; > + > + list_for_each_entry(entry, &kvm->arch.pci_pt_dev_head, list) { > + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", > + entry->pt_dev.host.busnr, > + PCI_SLOT(entry->pt_dev.host.devfn), > + PCI_FUNC(entry->pt_dev.host.devfn)); > + > + for_each_pci_dev(pdev) { > + if ((pdev->bus->number == entry->pt_dev.host.busnr) && > + (pdev->devfn == entry->pt_dev.host.devfn)) > + break; > + } > + > + if (pdev == NULL) > + return -ENODEV; > + > + /* detach kvm dmar domain */ > + intel_iommu_detach_dev(domain, > + pdev->bus->number, pdev->devfn); > + } > + kvm_iommu_unmap_memslots(kvm); > + intel_iommu_domain_exit(domain); > + return 0; > +} > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h > index 41332f6..9391e57 100644 > --- a/include/asm-x86/kvm_host.h > +++ b/include/asm-x86/kvm_host.h > @@ -308,6 +308,21 @@ struct kvm_mem_alias { > gfn_t target_gfn; > }; > > +struct kvm_pci_passthrough_dev_kernel { > + struct kvm_pci_pt_info guest; > + struct kvm_pci_pt_info host; > + struct pci_dev *dev; > +}; > + > +/* This list is to store the guest bus:device:function-irq and host > + * bus:device:function-irq mapping for assigned devices. > + */ > +struct kvm_pci_pt_dev_list { > + struct list_head list; > + struct kvm_pci_passthrough_dev_kernel pt_dev; > +}; > + > + > struct kvm_arch{ > int naliases; > struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; > @@ -320,6 +335,8 @@ struct kvm_arch{ > * Hash table of struct kvm_mmu_page. > */ > struct list_head active_mmu_pages; > + struct list_head pci_pt_dev_head; > + struct dmar_domain *intel_iommu_domain; > struct kvm_pic *vpic; > struct kvm_ioapic *vioapic; > struct kvm_pit *vpit; > diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h > index 76f3921..88153f4 100644 > --- a/include/asm-x86/kvm_para.h > +++ b/include/asm-x86/kvm_para.h > @@ -144,4 +144,18 @@ static inline unsigned int kvm_arch_para_features(void) > > #endif > > +/* Stores information for identifying host PCI devices assigned to the > + * guest: this is used in the host kernel and in the userspace. > + */ > +struct kvm_pci_pt_info { > + unsigned char busnr; > + unsigned int devfn; > + __u32 irq; > Userspace should not have to provide the IRQ. Just use the PCI device structure to find it. We should use different structures for host/guest devices. > +}; > + > +/* Mapping between host and guest PCI device */ > +struct kvm_pci_passthrough_dev { > + struct kvm_pci_pt_info guest; > + struct kvm_pci_pt_info host; > +}; > #endif > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index fc685c5..424534b 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -278,6 +278,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); > int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); > void kvm_vcpu_kick(struct kvm_vcpu *vcpu); > > +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, > + unsigned long npages); > +int kvm_iommu_map_guest(struct kvm *kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev); > +int kvm_iommu_unmap_guest(struct kvm *kvm); > + > static inline void kvm_guest_enter(void) > { > account_system_vtime(current); > I'm not seeing these functions called anywhere? Am I missing something obvious or is part of your patch missing? Regards, Anthony Liguori