From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH 2/5] IOMMU: make page table deallocation preemptible Date: Wed, 11 Dec 2013 19:01:23 +0000 Message-ID: <52A8B683.4050705@citrix.com> References: <52A744B7020000780010BEF1@nat28.tlf.novell.com> <52A7456A020000780010BF23@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============9169397424353092369==" Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1Vqp2E-0008JJ-2W for xen-devel@lists.xenproject.org; Wed, 11 Dec 2013 19:01:26 +0000 In-Reply-To: <52A7456A020000780010BF23@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich , xen-devel Cc: George Dunlap , Keir Fraser , suravee.suthikulpanit@amd.com, xiantao.zhang@intel.com List-Id: xen-devel@lists.xenproject.org --===============9169397424353092369== Content-Type: multipart/alternative; boundary="------------080209080900090008050308" --------------080209080900090008050308 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit On 10/12/2013 15:46, Jan Beulich wrote: > This too can take an arbitrary amount of time. > > In fact, the bulk of the work is being moved to a tasklet, as handling > the necessary preemption logic in line seems close to impossible given > that the teardown may also be invoked on error paths. > > Signed-off-by: Jan Beulich There is a lot of duplicated code here. I would suggest making most of this infrastructure common, and having a new iommu_op for "void free_io_page_table(struct page_info*);" ~Andrew > > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c > @@ -195,6 +195,8 @@ static int __init amd_iommu_setup_dom0_d > return 0; > } > > +static void deallocate_page_tables(unsigned long unused); > + > int __init amd_iov_detect(void) > { > INIT_LIST_HEAD(&amd_iommu_head); > @@ -215,6 +217,7 @@ int __init amd_iov_detect(void) > return -ENODEV; > } > > + tasklet_init(&iommu_pt_cleanup_tasklet, deallocate_page_tables, 0); > init_done = 1; > > if ( !amd_iommu_perdev_intremap ) > @@ -405,11 +408,21 @@ static int amd_iommu_assign_device(struc > return reassign_device(dom0, d, devfn, pdev); > } > > -static void deallocate_next_page_table(struct page_info* pg, int level) > +static void deallocate_next_page_table(struct page_info *pg, int level) > +{ > + spin_lock(&iommu_pt_cleanup_lock); > + PFN_ORDER(pg) = level; > + page_list_add_tail(pg, &iommu_pt_cleanup_list); > + spin_unlock(&iommu_pt_cleanup_lock); > +} > + > +static void deallocate_page_table(struct page_info *pg) > { > void *table_vaddr, *pde; > u64 next_table_maddr; > - int index, next_level; > + unsigned int index, level = PFN_ORDER(pg), next_level; > + > + PFN_ORDER(pg) = 0; > > if ( level <= 1 ) > { > @@ -439,6 +452,23 @@ static void deallocate_next_page_table(s > free_amd_iommu_pgtable(pg); > } > > +static void deallocate_page_tables(unsigned long unused) > +{ > + do { > + struct page_info *pg; > + > + spin_lock(&iommu_pt_cleanup_lock); > + pg = page_list_remove_head(&iommu_pt_cleanup_list); > + spin_unlock(&iommu_pt_cleanup_lock); > + if ( !pg ) > + return; > + deallocate_page_table(pg); > + } while ( !softirq_pending(smp_processor_id()) ); > + > + tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet, > + cpumask_cycle(smp_processor_id(), &cpu_online_map)); > +} > + > static void deallocate_iommu_page_tables(struct domain *d) > { > struct hvm_iommu *hd = domain_hvm_iommu(d); > @@ -451,6 +481,7 @@ static void deallocate_iommu_page_tables > { > deallocate_next_page_table(hd->root_table, hd->paging_mode); > hd->root_table = NULL; > + tasklet_schedule(&iommu_pt_cleanup_tasklet); > } > spin_unlock(&hd->mapping_lock); > } > --- a/xen/drivers/passthrough/iommu.c > +++ b/xen/drivers/passthrough/iommu.c > @@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in > > DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +DEFINE_SPINLOCK(iommu_pt_cleanup_lock); > +PAGE_LIST_HEAD(iommu_pt_cleanup_list); > +struct tasklet iommu_pt_cleanup_tasklet; > + > static struct keyhandler iommu_p2m_table = { > .diagnostic = 0, > .u.fn = iommu_dump_p2m_table, > --- a/xen/drivers/passthrough/vtd/iommu.c > +++ b/xen/drivers/passthrough/vtd/iommu.c > @@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom > > static void iommu_free_pagetable(u64 pt_maddr, int level) > { > - int i; > - struct dma_pte *pt_vaddr, *pte; > - int next_level = level - 1; > + struct page_info *pg = maddr_to_page(pt_maddr); > > if ( pt_maddr == 0 ) > return; > > + spin_lock(&iommu_pt_cleanup_lock); > + PFN_ORDER(pg) = level; > + page_list_add_tail(pg, &iommu_pt_cleanup_list); > + spin_unlock(&iommu_pt_cleanup_lock); > +} > + > +static void iommu_free_page_table(struct page_info *pg) > +{ > + unsigned int i, next_level = PFN_ORDER(pg) - 1; > + u64 pt_maddr = page_to_maddr(pg); > + struct dma_pte *pt_vaddr, *pte; > + > + PFN_ORDER(pg) = 0; > pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr); > > for ( i = 0; i < PTE_NUM; i++ ) > @@ -694,6 +705,23 @@ static void iommu_free_pagetable(u64 pt_ > free_pgtable_maddr(pt_maddr); > } > > +static void iommu_free_pagetables(unsigned long unused) > +{ > + do { > + struct page_info *pg; > + > + spin_lock(&iommu_pt_cleanup_lock); > + pg = page_list_remove_head(&iommu_pt_cleanup_list); > + spin_unlock(&iommu_pt_cleanup_lock); > + if ( !pg ) > + return; > + iommu_free_page_table(pg); > + } while ( !softirq_pending(smp_processor_id()) ); > + > + tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet, > + cpumask_cycle(smp_processor_id(), &cpu_online_map)); > +} > + > static int iommu_set_root_entry(struct iommu *iommu) > { > u32 sts; > @@ -1704,6 +1732,8 @@ void iommu_domain_teardown(struct domain > iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw)); > hd->pgd_maddr = 0; > spin_unlock(&hd->mapping_lock); > + > + tasklet_schedule(&iommu_pt_cleanup_tasklet); > } > > static int intel_iommu_map_page( > @@ -2204,6 +2234,7 @@ int __init intel_vtd_setup(void) > if ( ret ) > goto error; > > + tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0); > register_keyhandler('V', &dump_iommu_info_keyhandler); > > return 0; > --- a/xen/include/xen/iommu.h > +++ b/xen/include/xen/iommu.h > @@ -151,4 +151,8 @@ int adjust_vtd_irq_affinities(void); > */ > DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb); > > +extern struct spinlock iommu_pt_cleanup_lock; > +extern struct page_list_head iommu_pt_cleanup_list; > +extern struct tasklet iommu_pt_cleanup_tasklet; > + > #endif /* _IOMMU_H_ */ > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel --------------080209080900090008050308 Content-Type: text/html; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit
On 10/12/2013 15:46, Jan Beulich wrote:
This too can take an arbitrary amount of time.

In fact, the bulk of the work is being moved to a tasklet, as handling
the necessary preemption logic in line seems close to impossible given
that the teardown may also be invoked on error paths.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

There is a lot of duplicated code here.  I would suggest making most of this infrastructure common, and having a new iommu_op for "void free_io_page_table(struct page_info*);"

~Andrew


--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -195,6 +195,8 @@ static int __init amd_iommu_setup_dom0_d
     return 0;
 }
 
+static void deallocate_page_tables(unsigned long unused);
+
 int __init amd_iov_detect(void)
 {
     INIT_LIST_HEAD(&amd_iommu_head);
@@ -215,6 +217,7 @@ int __init amd_iov_detect(void)
         return -ENODEV;
     }
 
+    tasklet_init(&iommu_pt_cleanup_tasklet, deallocate_page_tables, 0);
     init_done = 1;
 
     if ( !amd_iommu_perdev_intremap )
@@ -405,11 +408,21 @@ static int amd_iommu_assign_device(struc
     return reassign_device(dom0, d, devfn, pdev);
 }
 
-static void deallocate_next_page_table(struct page_info* pg, int level)
+static void deallocate_next_page_table(struct page_info *pg, int level)
+{
+    spin_lock(&iommu_pt_cleanup_lock);
+    PFN_ORDER(pg) = level;
+    page_list_add_tail(pg, &iommu_pt_cleanup_list);
+    spin_unlock(&iommu_pt_cleanup_lock);
+}
+
+static void deallocate_page_table(struct page_info *pg)
 {
     void *table_vaddr, *pde;
     u64 next_table_maddr;
-    int index, next_level;
+    unsigned int index, level = PFN_ORDER(pg), next_level;
+
+    PFN_ORDER(pg) = 0;
 
     if ( level <= 1 )
     {
@@ -439,6 +452,23 @@ static void deallocate_next_page_table(s
     free_amd_iommu_pgtable(pg);
 }
 
+static void deallocate_page_tables(unsigned long unused)
+{
+    do {
+        struct page_info *pg;
+
+        spin_lock(&iommu_pt_cleanup_lock);
+        pg = page_list_remove_head(&iommu_pt_cleanup_list);
+        spin_unlock(&iommu_pt_cleanup_lock);
+        if ( !pg )
+            return;
+        deallocate_page_table(pg);
+    } while ( !softirq_pending(smp_processor_id()) );
+
+    tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
+                            cpumask_cycle(smp_processor_id(), &cpu_online_map));
+}
+
 static void deallocate_iommu_page_tables(struct domain *d)
 {
     struct hvm_iommu *hd  = domain_hvm_iommu(d);
@@ -451,6 +481,7 @@ static void deallocate_iommu_page_tables
     {
         deallocate_next_page_table(hd->root_table, hd->paging_mode);
         hd->root_table = NULL;
+        tasklet_schedule(&iommu_pt_cleanup_tasklet);
     }
     spin_unlock(&hd->mapping_lock);
 }
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in
 
 DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+DEFINE_SPINLOCK(iommu_pt_cleanup_lock);
+PAGE_LIST_HEAD(iommu_pt_cleanup_list);
+struct tasklet iommu_pt_cleanup_tasklet;
+
 static struct keyhandler iommu_p2m_table = {
     .diagnostic = 0,
     .u.fn = iommu_dump_p2m_table,
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom
 
 static void iommu_free_pagetable(u64 pt_maddr, int level)
 {
-    int i;
-    struct dma_pte *pt_vaddr, *pte;
-    int next_level = level - 1;
+    struct page_info *pg = maddr_to_page(pt_maddr);
 
     if ( pt_maddr == 0 )
         return;
 
+    spin_lock(&iommu_pt_cleanup_lock);
+    PFN_ORDER(pg) = level;
+    page_list_add_tail(pg, &iommu_pt_cleanup_list);
+    spin_unlock(&iommu_pt_cleanup_lock);
+}
+
+static void iommu_free_page_table(struct page_info *pg)
+{
+    unsigned int i, next_level = PFN_ORDER(pg) - 1;
+    u64 pt_maddr = page_to_maddr(pg);
+    struct dma_pte *pt_vaddr, *pte;
+
+    PFN_ORDER(pg) = 0;
     pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
 
     for ( i = 0; i < PTE_NUM; i++ )
@@ -694,6 +705,23 @@ static void iommu_free_pagetable(u64 pt_
     free_pgtable_maddr(pt_maddr);
 }
 
+static void iommu_free_pagetables(unsigned long unused)
+{
+    do {
+        struct page_info *pg;
+
+        spin_lock(&iommu_pt_cleanup_lock);
+        pg = page_list_remove_head(&iommu_pt_cleanup_list);
+        spin_unlock(&iommu_pt_cleanup_lock);
+        if ( !pg )
+            return;
+        iommu_free_page_table(pg);
+    } while ( !softirq_pending(smp_processor_id()) );
+
+    tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
+                            cpumask_cycle(smp_processor_id(), &cpu_online_map));
+}
+
 static int iommu_set_root_entry(struct iommu *iommu)
 {
     u32 sts;
@@ -1704,6 +1732,8 @@ void iommu_domain_teardown(struct domain
     iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
     hd->pgd_maddr = 0;
     spin_unlock(&hd->mapping_lock);
+
+    tasklet_schedule(&iommu_pt_cleanup_tasklet);
 }
 
 static int intel_iommu_map_page(
@@ -2204,6 +2234,7 @@ int __init intel_vtd_setup(void)
     if ( ret )
         goto error;
 
+    tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0);
     register_keyhandler('V', &dump_iommu_info_keyhandler);
 
     return 0;
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -151,4 +151,8 @@ int adjust_vtd_irq_affinities(void);
  */
 DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
 
+extern struct spinlock iommu_pt_cleanup_lock;
+extern struct page_list_head iommu_pt_cleanup_list;
+extern struct tasklet iommu_pt_cleanup_tasklet;
+
 #endif /* _IOMMU_H_ */




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

--------------080209080900090008050308-- --===============9169397424353092369== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --===============9169397424353092369==--