All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
	Keir Fraser <keir@xen.org>,
	xiantao.zhang@intel.com, suravee.suthikulpanit@amd.com
Subject: Re: [PATCH v2 2/5] IOMMU: make page table deallocation preemptible
Date: Fri, 13 Dec 2013 15:26:12 +0000	[thread overview]
Message-ID: <52AB2714.2050805@citrix.com> (raw)
In-Reply-To: <52AB2105020000780010D064@nat28.tlf.novell.com>

On 13/12/2013 14:00, Jan Beulich wrote:
> This too can take an arbitrary amount of time.
>
> In fact, the bulk of the work is being moved to a tasklet, as handling
> the necessary preemption logic in line seems close to impossible given
> that the teardown may also be invoked on error paths.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> ---
> v2: abstract out tasklet logic
>
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -405,11 +405,21 @@ static int amd_iommu_assign_device(struc
>      return reassign_device(dom0, d, devfn, pdev);
>  }
>  
> -static void deallocate_next_page_table(struct page_info* pg, int level)
> +static void deallocate_next_page_table(struct page_info *pg, int level)
> +{
> +    PFN_ORDER(pg) = level;
> +    spin_lock(&iommu_pt_cleanup_lock);
> +    page_list_add_tail(pg, &iommu_pt_cleanup_list);
> +    spin_unlock(&iommu_pt_cleanup_lock);
> +}
> +
> +static void deallocate_page_table(struct page_info *pg)
>  {
>      void *table_vaddr, *pde;
>      u64 next_table_maddr;
> -    int index, next_level;
> +    unsigned int index, level = PFN_ORDER(pg), next_level;
> +
> +    PFN_ORDER(pg) = 0;
>  
>      if ( level <= 1 )
>      {
> @@ -599,6 +609,7 @@ const struct iommu_ops amd_iommu_ops = {
>      .teardown = amd_iommu_domain_destroy,
>      .map_page = amd_iommu_map_page,
>      .unmap_page = amd_iommu_unmap_page,
> +    .free_page_table = deallocate_page_table,
>      .reassign_device = reassign_device,
>      .get_device_group_id = amd_iommu_group_id,
>      .update_ire_from_apic = amd_iommu_ioapic_update_ire,
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in
>  
>  DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>  
> +DEFINE_SPINLOCK(iommu_pt_cleanup_lock);
> +PAGE_LIST_HEAD(iommu_pt_cleanup_list);
> +static struct tasklet iommu_pt_cleanup_tasklet;
> +
>  static struct keyhandler iommu_p2m_table = {
>      .diagnostic = 0,
>      .u.fn = iommu_dump_p2m_table,
> @@ -235,6 +239,15 @@ int iommu_remove_device(struct pci_dev *
>      return hd->platform_ops->remove_device(pdev->devfn, pdev);
>  }
>  
> +static void iommu_teardown(struct domain *d)
> +{
> +    const struct hvm_iommu *hd = domain_hvm_iommu(d);
> +
> +    d->need_iommu = 0;
> +    hd->platform_ops->teardown(d);
> +    tasklet_schedule(&iommu_pt_cleanup_tasklet);
> +}
> +
>  /*
>   * If the device isn't owned by dom0, it means it already
>   * has been assigned to other domain, or it doesn't exist.
> @@ -309,10 +322,7 @@ static int assign_device(struct domain *
>  
>   done:
>      if ( !has_arch_pdevs(d) && need_iommu(d) )
> -    {
> -        d->need_iommu = 0;
> -        hd->platform_ops->teardown(d);
> -    }
> +        iommu_teardown(d);
>      spin_unlock(&pcidevs_lock);
>  
>      return rc;
> @@ -377,10 +387,7 @@ static int iommu_populate_page_table(str
>      if ( !rc )
>          iommu_iotlb_flush_all(d);
>      else if ( rc != -ERESTART )
> -    {
> -        d->need_iommu = 0;
> -        hd->platform_ops->teardown(d);
> -    }
> +        iommu_teardown(d);
>  
>      return rc;
>  }
> @@ -397,10 +404,7 @@ void iommu_domain_destroy(struct domain 
>          return;
>  
>      if ( need_iommu(d) )
> -    {
> -        d->need_iommu = 0;
> -        hd->platform_ops->teardown(d);
> -    }
> +        iommu_teardown(d);
>  
>      list_for_each_safe ( ioport_list, tmp, &hd->g2m_ioport_list )
>      {
> @@ -438,6 +442,23 @@ int iommu_unmap_page(struct domain *d, u
>      return hd->platform_ops->unmap_page(d, gfn);
>  }
>  
> +static void iommu_free_pagetables(unsigned long unused)
> +{
> +    do {
> +        struct page_info *pg;
> +
> +        spin_lock(&iommu_pt_cleanup_lock);
> +        pg = page_list_remove_head(&iommu_pt_cleanup_list);
> +        spin_unlock(&iommu_pt_cleanup_lock);
> +        if ( !pg )
> +            return;
> +        iommu_get_ops()->free_page_table(pg);
> +    } while ( !softirq_pending(smp_processor_id()) );
> +
> +    tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
> +                            cpumask_cycle(smp_processor_id(), &cpu_online_map));
> +}
> +
>  void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count)
>  {
>      struct hvm_iommu *hd = domain_hvm_iommu(d);
> @@ -500,10 +521,7 @@ int deassign_device(struct domain *d, u1
>      pdev->fault.count = 0;
>  
>      if ( !has_arch_pdevs(d) && need_iommu(d) )
> -    {
> -        d->need_iommu = 0;
> -        hd->platform_ops->teardown(d);
> -    }
> +        iommu_teardown(d);
>  
>      return ret;
>  }
> @@ -542,6 +560,7 @@ int __init iommu_setup(void)
>                 iommu_passthrough ? "Passthrough" :
>                 iommu_dom0_strict ? "Strict" : "Relaxed");
>          printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
> +        tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0);
>      }
>  
>      return rc;
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom
>  
>  static void iommu_free_pagetable(u64 pt_maddr, int level)
>  {
> -    int i;
> -    struct dma_pte *pt_vaddr, *pte;
> -    int next_level = level - 1;
> +    struct page_info *pg = maddr_to_page(pt_maddr);
>  
>      if ( pt_maddr == 0 )
>          return;
>  
> +    PFN_ORDER(pg) = level;
> +    spin_lock(&iommu_pt_cleanup_lock);
> +    page_list_add_tail(pg, &iommu_pt_cleanup_list);
> +    spin_unlock(&iommu_pt_cleanup_lock);
> +}
> +
> +static void iommu_free_page_table(struct page_info *pg)
> +{
> +    unsigned int i, next_level = PFN_ORDER(pg) - 1;
> +    u64 pt_maddr = page_to_maddr(pg);
> +    struct dma_pte *pt_vaddr, *pte;
> +
> +    PFN_ORDER(pg) = 0;
>      pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
>  
>      for ( i = 0; i < PTE_NUM; i++ )
> @@ -2430,6 +2441,7 @@ const struct iommu_ops intel_iommu_ops =
>      .teardown = iommu_domain_teardown,
>      .map_page = intel_iommu_map_page,
>      .unmap_page = intel_iommu_unmap_page,
> +    .free_page_table = iommu_free_page_table,
>      .reassign_device = reassign_device_ownership,
>      .get_device_group_id = intel_iommu_group_id,
>      .update_ire_from_apic = io_apic_write_remap_rte,
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -88,6 +88,7 @@ bool_t pt_irq_need_timer(uint32_t flags)
>  
>  struct msi_desc;
>  struct msi_msg;
> +struct page_info;
>  
>  struct iommu_ops {
>      int (*init)(struct domain *d);
> @@ -100,6 +101,7 @@ struct iommu_ops {
>      int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
>                      unsigned int flags);
>      int (*unmap_page)(struct domain *d, unsigned long gfn);
> +    void (*free_page_table)(struct page_info *);
>      int (*reassign_device)(struct domain *s, struct domain *t,
>  			   u8 devfn, struct pci_dev *);
>      int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
> @@ -151,4 +153,7 @@ int adjust_vtd_irq_affinities(void);
>   */
>  DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>  
> +extern struct spinlock iommu_pt_cleanup_lock;
> +extern struct page_list_head iommu_pt_cleanup_list;
> +
>  #endif /* _IOMMU_H_ */
>
>

  reply	other threads:[~2013-12-13 15:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-10 15:43 [PATCH 0/5] XSA-77 follow-ups Jan Beulich
2013-12-10 15:45 ` [PATCH 1/5] IOMMU: make page table population preemptible Jan Beulich
2013-12-11 18:40   ` Andrew Cooper
2013-12-13  9:51     ` Jan Beulich
2013-12-13 12:18       ` Andrew Cooper
2013-12-13 12:34         ` Jan Beulich
2013-12-13 13:57           ` Andrew Cooper
2013-12-13 13:59     ` [PATCH v2 " Jan Beulich
2013-12-13 14:16       ` Tim Deegan
2013-12-13 15:09       ` Andrew Cooper
2013-12-13 15:41         ` Jan Beulich
2013-12-13 15:44           ` Andrew Cooper
2013-12-30 13:43             ` Zhang, Xiantao
2014-01-07 13:23               ` Jan Beulich
2013-12-20 13:06   ` [PATCH " Jan Beulich
2013-12-10 15:46 ` [PATCH 2/5] IOMMU: make page table deallocation preemptible Jan Beulich
2013-12-11 19:01   ` Andrew Cooper
2013-12-13  9:55     ` Jan Beulich
2013-12-13 14:00     ` [PATCH v2 " Jan Beulich
2013-12-13 15:26       ` Andrew Cooper [this message]
2014-01-07 14:51       ` Zhang, Xiantao
2013-12-10 15:47 ` [PATCH 3/5] x86/p2m: restrict auditing to debug builds Jan Beulich
2013-12-10 15:58   ` Andrew Cooper
2013-12-10 17:31   ` George Dunlap
2013-12-13 13:46   ` Tim Deegan
2013-12-10 15:48 ` [PATCH 4/5] HVM: prevent leaking heap data from hvm_save_one() Jan Beulich
2013-12-10 16:01   ` Andrew Cooper
2013-12-10 17:32   ` George Dunlap
2013-12-17  9:16   ` Ping: " Jan Beulich
2013-12-17 10:45     ` Andrew Cooper
2013-12-17 11:02       ` Jan Beulich
2013-12-10 15:48 ` [PATCH 5/5] x86/PV: don't commit debug register values early in arch_set_info_guest() Jan Beulich
2013-12-10 17:23   ` Andrew Cooper
2013-12-10 17:33   ` George Dunlap
2013-12-10 18:17     ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52AB2714.2050805@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=keir@xen.org \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=xen-devel@lists.xenproject.org \
    --cc=xiantao.zhang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.