From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>,
Keir Fraser <keir@xen.org>,
xiantao.zhang@intel.com, suravee.suthikulpanit@amd.com
Subject: Re: [PATCH v2 2/5] IOMMU: make page table deallocation preemptible
Date: Fri, 13 Dec 2013 15:26:12 +0000 [thread overview]
Message-ID: <52AB2714.2050805@citrix.com> (raw)
In-Reply-To: <52AB2105020000780010D064@nat28.tlf.novell.com>
On 13/12/2013 14:00, Jan Beulich wrote:
> This too can take an arbitrary amount of time.
>
> In fact, the bulk of the work is being moved to a tasklet, as handling
> the necessary preemption logic in line seems close to impossible given
> that the teardown may also be invoked on error paths.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> v2: abstract out tasklet logic
>
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -405,11 +405,21 @@ static int amd_iommu_assign_device(struc
> return reassign_device(dom0, d, devfn, pdev);
> }
>
> -static void deallocate_next_page_table(struct page_info* pg, int level)
> +static void deallocate_next_page_table(struct page_info *pg, int level)
> +{
> + PFN_ORDER(pg) = level;
> + spin_lock(&iommu_pt_cleanup_lock);
> + page_list_add_tail(pg, &iommu_pt_cleanup_list);
> + spin_unlock(&iommu_pt_cleanup_lock);
> +}
> +
> +static void deallocate_page_table(struct page_info *pg)
> {
> void *table_vaddr, *pde;
> u64 next_table_maddr;
> - int index, next_level;
> + unsigned int index, level = PFN_ORDER(pg), next_level;
> +
> + PFN_ORDER(pg) = 0;
>
> if ( level <= 1 )
> {
> @@ -599,6 +609,7 @@ const struct iommu_ops amd_iommu_ops = {
> .teardown = amd_iommu_domain_destroy,
> .map_page = amd_iommu_map_page,
> .unmap_page = amd_iommu_unmap_page,
> + .free_page_table = deallocate_page_table,
> .reassign_device = reassign_device,
> .get_device_group_id = amd_iommu_group_id,
> .update_ire_from_apic = amd_iommu_ioapic_update_ire,
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -58,6 +58,10 @@ bool_t __read_mostly amd_iommu_perdev_in
>
> DEFINE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>
> +DEFINE_SPINLOCK(iommu_pt_cleanup_lock);
> +PAGE_LIST_HEAD(iommu_pt_cleanup_list);
> +static struct tasklet iommu_pt_cleanup_tasklet;
> +
> static struct keyhandler iommu_p2m_table = {
> .diagnostic = 0,
> .u.fn = iommu_dump_p2m_table,
> @@ -235,6 +239,15 @@ int iommu_remove_device(struct pci_dev *
> return hd->platform_ops->remove_device(pdev->devfn, pdev);
> }
>
> +static void iommu_teardown(struct domain *d)
> +{
> + const struct hvm_iommu *hd = domain_hvm_iommu(d);
> +
> + d->need_iommu = 0;
> + hd->platform_ops->teardown(d);
> + tasklet_schedule(&iommu_pt_cleanup_tasklet);
> +}
> +
> /*
> * If the device isn't owned by dom0, it means it already
> * has been assigned to other domain, or it doesn't exist.
> @@ -309,10 +322,7 @@ static int assign_device(struct domain *
>
> done:
> if ( !has_arch_pdevs(d) && need_iommu(d) )
> - {
> - d->need_iommu = 0;
> - hd->platform_ops->teardown(d);
> - }
> + iommu_teardown(d);
> spin_unlock(&pcidevs_lock);
>
> return rc;
> @@ -377,10 +387,7 @@ static int iommu_populate_page_table(str
> if ( !rc )
> iommu_iotlb_flush_all(d);
> else if ( rc != -ERESTART )
> - {
> - d->need_iommu = 0;
> - hd->platform_ops->teardown(d);
> - }
> + iommu_teardown(d);
>
> return rc;
> }
> @@ -397,10 +404,7 @@ void iommu_domain_destroy(struct domain
> return;
>
> if ( need_iommu(d) )
> - {
> - d->need_iommu = 0;
> - hd->platform_ops->teardown(d);
> - }
> + iommu_teardown(d);
>
> list_for_each_safe ( ioport_list, tmp, &hd->g2m_ioport_list )
> {
> @@ -438,6 +442,23 @@ int iommu_unmap_page(struct domain *d, u
> return hd->platform_ops->unmap_page(d, gfn);
> }
>
> +static void iommu_free_pagetables(unsigned long unused)
> +{
> + do {
> + struct page_info *pg;
> +
> + spin_lock(&iommu_pt_cleanup_lock);
> + pg = page_list_remove_head(&iommu_pt_cleanup_list);
> + spin_unlock(&iommu_pt_cleanup_lock);
> + if ( !pg )
> + return;
> + iommu_get_ops()->free_page_table(pg);
> + } while ( !softirq_pending(smp_processor_id()) );
> +
> + tasklet_schedule_on_cpu(&iommu_pt_cleanup_tasklet,
> + cpumask_cycle(smp_processor_id(), &cpu_online_map));
> +}
> +
> void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count)
> {
> struct hvm_iommu *hd = domain_hvm_iommu(d);
> @@ -500,10 +521,7 @@ int deassign_device(struct domain *d, u1
> pdev->fault.count = 0;
>
> if ( !has_arch_pdevs(d) && need_iommu(d) )
> - {
> - d->need_iommu = 0;
> - hd->platform_ops->teardown(d);
> - }
> + iommu_teardown(d);
>
> return ret;
> }
> @@ -542,6 +560,7 @@ int __init iommu_setup(void)
> iommu_passthrough ? "Passthrough" :
> iommu_dom0_strict ? "Strict" : "Relaxed");
> printk("Interrupt remapping %sabled\n", iommu_intremap ? "en" : "dis");
> + tasklet_init(&iommu_pt_cleanup_tasklet, iommu_free_pagetables, 0);
> }
>
> return rc;
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -668,13 +668,24 @@ static void dma_pte_clear_one(struct dom
>
> static void iommu_free_pagetable(u64 pt_maddr, int level)
> {
> - int i;
> - struct dma_pte *pt_vaddr, *pte;
> - int next_level = level - 1;
> + struct page_info *pg = maddr_to_page(pt_maddr);
>
> if ( pt_maddr == 0 )
> return;
>
> + PFN_ORDER(pg) = level;
> + spin_lock(&iommu_pt_cleanup_lock);
> + page_list_add_tail(pg, &iommu_pt_cleanup_list);
> + spin_unlock(&iommu_pt_cleanup_lock);
> +}
> +
> +static void iommu_free_page_table(struct page_info *pg)
> +{
> + unsigned int i, next_level = PFN_ORDER(pg) - 1;
> + u64 pt_maddr = page_to_maddr(pg);
> + struct dma_pte *pt_vaddr, *pte;
> +
> + PFN_ORDER(pg) = 0;
> pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
>
> for ( i = 0; i < PTE_NUM; i++ )
> @@ -2430,6 +2441,7 @@ const struct iommu_ops intel_iommu_ops =
> .teardown = iommu_domain_teardown,
> .map_page = intel_iommu_map_page,
> .unmap_page = intel_iommu_unmap_page,
> + .free_page_table = iommu_free_page_table,
> .reassign_device = reassign_device_ownership,
> .get_device_group_id = intel_iommu_group_id,
> .update_ire_from_apic = io_apic_write_remap_rte,
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -88,6 +88,7 @@ bool_t pt_irq_need_timer(uint32_t flags)
>
> struct msi_desc;
> struct msi_msg;
> +struct page_info;
>
> struct iommu_ops {
> int (*init)(struct domain *d);
> @@ -100,6 +101,7 @@ struct iommu_ops {
> int (*map_page)(struct domain *d, unsigned long gfn, unsigned long mfn,
> unsigned int flags);
> int (*unmap_page)(struct domain *d, unsigned long gfn);
> + void (*free_page_table)(struct page_info *);
> int (*reassign_device)(struct domain *s, struct domain *t,
> u8 devfn, struct pci_dev *);
> int (*get_device_group_id)(u16 seg, u8 bus, u8 devfn);
> @@ -151,4 +153,7 @@ int adjust_vtd_irq_affinities(void);
> */
> DECLARE_PER_CPU(bool_t, iommu_dont_flush_iotlb);
>
> +extern struct spinlock iommu_pt_cleanup_lock;
> +extern struct page_list_head iommu_pt_cleanup_list;
> +
> #endif /* _IOMMU_H_ */
>
>
next prev parent reply other threads:[~2013-12-13 15:26 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-10 15:43 [PATCH 0/5] XSA-77 follow-ups Jan Beulich
2013-12-10 15:45 ` [PATCH 1/5] IOMMU: make page table population preemptible Jan Beulich
2013-12-11 18:40 ` Andrew Cooper
2013-12-13 9:51 ` Jan Beulich
2013-12-13 12:18 ` Andrew Cooper
2013-12-13 12:34 ` Jan Beulich
2013-12-13 13:57 ` Andrew Cooper
2013-12-13 13:59 ` [PATCH v2 " Jan Beulich
2013-12-13 14:16 ` Tim Deegan
2013-12-13 15:09 ` Andrew Cooper
2013-12-13 15:41 ` Jan Beulich
2013-12-13 15:44 ` Andrew Cooper
2013-12-30 13:43 ` Zhang, Xiantao
2014-01-07 13:23 ` Jan Beulich
2013-12-20 13:06 ` [PATCH " Jan Beulich
2013-12-10 15:46 ` [PATCH 2/5] IOMMU: make page table deallocation preemptible Jan Beulich
2013-12-11 19:01 ` Andrew Cooper
2013-12-13 9:55 ` Jan Beulich
2013-12-13 14:00 ` [PATCH v2 " Jan Beulich
2013-12-13 15:26 ` Andrew Cooper [this message]
2014-01-07 14:51 ` Zhang, Xiantao
2013-12-10 15:47 ` [PATCH 3/5] x86/p2m: restrict auditing to debug builds Jan Beulich
2013-12-10 15:58 ` Andrew Cooper
2013-12-10 17:31 ` George Dunlap
2013-12-13 13:46 ` Tim Deegan
2013-12-10 15:48 ` [PATCH 4/5] HVM: prevent leaking heap data from hvm_save_one() Jan Beulich
2013-12-10 16:01 ` Andrew Cooper
2013-12-10 17:32 ` George Dunlap
2013-12-17 9:16 ` Ping: " Jan Beulich
2013-12-17 10:45 ` Andrew Cooper
2013-12-17 11:02 ` Jan Beulich
2013-12-10 15:48 ` [PATCH 5/5] x86/PV: don't commit debug register values early in arch_set_info_guest() Jan Beulich
2013-12-10 17:23 ` Andrew Cooper
2013-12-10 17:33 ` George Dunlap
2013-12-10 18:17 ` Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=52AB2714.2050805@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=keir@xen.org \
--cc=suravee.suthikulpanit@amd.com \
--cc=xen-devel@lists.xenproject.org \
--cc=xiantao.zhang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).