All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Durrant <pdurrant@amazon.com>
To: <xen-devel@lists.xenproject.org>
Cc: "Kevin Tian" <kevin.tian@intel.com>, "Wei Liu" <wl@xen.org>,
	"Andrew Cooper" <andrew.cooper3@citrix.com>,
	"Paul Durrant" <pdurrant@amazon.com>,
	"Jan Beulich" <jbeulich@suse.com>,
	"Roger Pau Monné" <roger.pau@citrix.com>
Subject: [Xen-devel] [PATCH v2] x86 / iommu: set up a scratch page in the quarantine domain
Date: Wed, 27 Nov 2019 17:11:43 +0000	[thread overview]
Message-ID: <20191127171143.27399-1-pdurrant@amazon.com> (raw)

This patch introduces a new iommu_op to facilitate a per-implementation
quarantine set up, and then further code for x86 implementations
(amd and vtd) to set up a read-only scratch page to serve as the source
for DMA reads whilst a device is assigned to dom_io. DMA writes will
continue to fault as before.

The reason for doing this is that some hardware may continue to re-try
DMA (despite FLR) in the event of an error, or even BME being cleared, and
will fail to deal with DMA read faults gracefully. Having a scratch page
mapped will allow pending DMA reads to complete and thus such buggy
hardware will eventually be quiesced.

NOTE: These modifications are restricted to x86 implementations only as
      the buggy h/w I am aware of is only used with Xen in an x86
      environment. ARM may require similar code but, since I am not
      aware of the need, this patch does not modify any ARM implementation.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Wei Liu <wl@xen.org>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v2:
 - Addressed comments from Jan

There is still the open question of whether use of a scratch page ought
to be gated on something, either are run-time or compile-time.
---
 xen/drivers/passthrough/amd/iommu_map.c       | 62 ++++++++++++++++
 xen/drivers/passthrough/amd/pci_amd_iommu.c   | 14 ++--
 xen/drivers/passthrough/iommu.c               | 20 +++++-
 xen/drivers/passthrough/vtd/iommu.c           | 72 +++++++++++++++----
 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h |  3 +
 xen/include/xen/iommu.h                       |  1 +
 6 files changed, 148 insertions(+), 24 deletions(-)

diff --git a/xen/drivers/passthrough/amd/iommu_map.c b/xen/drivers/passthrough/amd/iommu_map.c
index cd5c7de7c5..54e1d132d9 100644
--- a/xen/drivers/passthrough/amd/iommu_map.c
+++ b/xen/drivers/passthrough/amd/iommu_map.c
@@ -560,6 +560,68 @@ int amd_iommu_reserve_domain_unity_map(struct domain *domain,
     return rt;
 }
 
+int __init amd_iommu_quarantine_init(struct domain *d)
+{
+    struct domain_iommu *hd = dom_iommu(d);
+    unsigned long max_gfn =
+        PFN_DOWN((1ul << DEFAULT_DOMAIN_ADDRESS_WIDTH) - 1);
+    unsigned int level = amd_iommu_get_paging_mode(max_gfn);
+    struct amd_iommu_pte *table;
+
+    if ( hd->arch.root_table )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    spin_lock(&hd->arch.mapping_lock);
+
+    hd->arch.root_table = alloc_amd_iommu_pgtable();
+    if ( !hd->arch.root_table )
+        goto out;
+
+    table = __map_domain_page(hd->arch.root_table);
+    while ( level )
+    {
+        struct page_info *pg;
+        unsigned int i;
+
+        /*
+         * The pgtable allocator is fine for the leaf page, as well as
+         * page table pages, and the resulting allocations are always
+         * zeroed.
+         */
+        pg = alloc_amd_iommu_pgtable();
+        if ( !pg )
+            break;
+
+        for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
+        {
+            struct amd_iommu_pte *pde = &table[i];
+
+            /*
+             * PDEs are essentially a subset of PTEs, so this function
+             * is fine to use even at the leaf.
+             */
+            set_iommu_pde_present(pde, mfn_x(page_to_mfn(pg)), level - 1,
+                                  false, true);
+        }
+
+        unmap_domain_page(table);
+        table = __map_domain_page(pg);
+        level--;
+    }
+    unmap_domain_page(table);
+
+ out:
+    spin_unlock(&hd->arch.mapping_lock);
+
+    amd_iommu_flush_all_pages(d);
+
+    /* Pages leaked in failure case */
+    return level ? -ENOMEM : 0;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 75a0f1b4ab..4da6518773 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -95,10 +95,6 @@ static void amd_iommu_setup_domain_device(
     u8 bus = pdev->bus;
     const struct domain_iommu *hd = dom_iommu(domain);
 
-    /* dom_io is used as a sentinel for quarantined devices */
-    if ( domain == dom_io )
-        return;
-
     BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode ||
             !iommu->dev_table.buffer );
 
@@ -235,7 +231,7 @@ static int __must_check allocate_domain_resources(struct domain_iommu *hd)
     return rc;
 }
 
-static int get_paging_mode(unsigned long entries)
+int amd_iommu_get_paging_mode(unsigned long entries)
 {
     int level = 1;
 
@@ -257,7 +253,8 @@ static int amd_iommu_domain_init(struct domain *d)
 
     /* For pv and dom0, stick with get_paging_mode(max_page)
      * For HVM dom0, use 2 level page table at first */
-    hd->arch.paging_mode = is_hvm_domain(d) ? 2 : get_paging_mode(max_page);
+    hd->arch.paging_mode = is_hvm_domain(d) ?
+        2 : amd_iommu_get_paging_mode(max_page);
     return 0;
 }
 
@@ -290,10 +287,6 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
     int req_id;
     u8 bus = pdev->bus;
 
-    /* dom_io is used as a sentinel for quarantined devices */
-    if ( domain == dom_io )
-        return;
-
     BUG_ON ( iommu->dev_table.buffer == NULL );
     req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
     table = iommu->dev_table.buffer;
@@ -632,6 +625,7 @@ static void amd_dump_p2m_table(struct domain *d)
 static const struct iommu_ops __initconstrel _iommu_ops = {
     .init = amd_iommu_domain_init,
     .hwdom_init = amd_iommu_hwdom_init,
+    .quarantine_init = amd_iommu_quarantine_init,
     .add_device = amd_iommu_add_device,
     .remove_device = amd_iommu_remove_device,
     .assign_device  = amd_iommu_assign_device,
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 8cbe908fff..79f842e340 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -440,6 +440,23 @@ int iommu_iotlb_flush_all(struct domain *d, unsigned int flush_flags)
     return rc;
 }
 
+static int __init iommu_quarantine_init(void)
+{
+    const struct domain_iommu *hd = dom_iommu(dom_io);
+    int rc;
+
+    dom_io->options |= XEN_DOMCTL_CDF_iommu;
+
+    rc = iommu_domain_init(dom_io, 0);
+    if ( rc )
+        return rc;
+
+    if ( !hd->platform_ops->quarantine_init )
+        return 0;
+
+    return hd->platform_ops->quarantine_init(dom_io);
+}
+
 int __init iommu_setup(void)
 {
     int rc = -ENODEV;
@@ -473,8 +490,7 @@ int __init iommu_setup(void)
     }
     else
     {
-        dom_io->options |= XEN_DOMCTL_CDF_iommu;
-        if ( iommu_domain_init(dom_io, 0) )
+        if ( iommu_quarantine_init() )
             panic("Could not set up quarantine\n");
 
         printk(" - Dom0 mode: %s\n",
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index 25ad649c34..1e502131d7 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1291,10 +1291,6 @@ int domain_context_mapping_one(
     int agaw, rc, ret;
     bool_t flush_dev_iotlb;
 
-    /* dom_io is used as a sentinel for quarantined devices */
-    if ( domain == dom_io )
-        return 0;
-
     ASSERT(pcidevs_locked());
     spin_lock(&iommu->lock);
     maddr = bus_to_context_maddr(iommu, bus);
@@ -1541,10 +1537,6 @@ int domain_context_unmap_one(
     int iommu_domid, rc, ret;
     bool_t flush_dev_iotlb;
 
-    /* dom_io is used as a sentinel for quarantined devices */
-    if ( domain == dom_io )
-        return 0;
-
     ASSERT(pcidevs_locked());
     spin_lock(&iommu->lock);
 
@@ -1677,10 +1669,6 @@ static int domain_context_unmap(struct domain *domain, u8 devfn,
         goto out;
     }
 
-    /* dom_io is used as a sentinel for quarantined devices */
-    if ( domain == dom_io )
-        goto out;
-
     /*
      * if no other devices under the same iommu owned by this domain,
      * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
@@ -2683,9 +2671,69 @@ static void vtd_dump_p2m_table(struct domain *d)
     vtd_dump_p2m_table_level(hd->arch.pgd_maddr, agaw_to_level(hd->arch.agaw), 0, 0);
 }
 
+static int __init intel_iommu_quarantine_init(struct domain *d)
+{
+    struct domain_iommu *hd = dom_iommu(d);
+    struct dma_pte *parent;
+    unsigned int agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
+    unsigned int level = agaw_to_level(agaw);
+    int rc;
+
+    if ( hd->arch.pgd_maddr )
+    {
+        ASSERT_UNREACHABLE();
+        return 0;
+    }
+
+    spin_lock(&hd->arch.mapping_lock);
+
+    hd->arch.pgd_maddr = alloc_pgtable_maddr(1, hd->node);
+    if ( !hd->arch.pgd_maddr )
+        goto out;
+
+    parent = map_vtd_domain_page(hd->arch.pgd_maddr);
+    while ( level )
+    {
+        uint64_t maddr;
+        unsigned int offset;
+
+        /*
+         * The pgtable allocator is fine for the leaf page, as well as
+         * page table pages, and the resulting allocations are always
+         * zeroed.
+         */
+        maddr = alloc_pgtable_maddr(1, hd->node);
+        if ( !maddr )
+            break;
+
+        for ( offset = 0; offset < PTE_NUM; offset++ )
+        {
+            struct dma_pte *pte = &parent[offset];
+
+            dma_set_pte_addr(*pte, maddr);
+            dma_set_pte_readable(*pte);
+        }
+        iommu_flush_cache_page(parent, 1);
+
+        unmap_vtd_domain_page(parent);
+        parent = map_vtd_domain_page(maddr);
+        level--;
+    }
+    unmap_vtd_domain_page(parent);
+
+ out:
+    spin_unlock(&hd->arch.mapping_lock);
+
+    rc = iommu_flush_iotlb_all(d);
+
+    /* Pages leaked in failure case */
+    return level ? -ENOMEM : rc;
+}
+
 const struct iommu_ops __initconstrel intel_iommu_ops = {
     .init = intel_iommu_domain_init,
     .hwdom_init = intel_iommu_hwdom_init,
+    .quarantine_init = intel_iommu_quarantine_init,
     .add_device = intel_iommu_add_device,
     .enable_device = intel_iommu_enable_device,
     .remove_device = intel_iommu_remove_device,
diff --git a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
index 8ed9482791..664dfc93b9 100644
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -54,6 +54,9 @@ int amd_iommu_init_late(void);
 int amd_iommu_update_ivrs_mapping_acpi(void);
 int iov_adjust_irq_affinities(void);
 
+int amd_iommu_get_paging_mode(unsigned long entries);
+int amd_iommu_quarantine_init(struct domain *d);
+
 /* mapping functions */
 int __must_check amd_iommu_map_page(struct domain *d, dfn_t dfn,
                                     mfn_t mfn, unsigned int flags,
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 974bd3ffe8..6977ddbb97 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -211,6 +211,7 @@ typedef int iommu_grdm_t(xen_pfn_t start, xen_ulong_t nr, u32 id, void *ctxt);
 struct iommu_ops {
     int (*init)(struct domain *d);
     void (*hwdom_init)(struct domain *d);
+    int (*quarantine_init)(struct domain *d);
     int (*add_device)(u8 devfn, device_t *dev);
     int (*enable_device)(device_t *dev);
     int (*remove_device)(u8 devfn, device_t *dev);
-- 
2.20.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

             reply	other threads:[~2019-11-27 17:12 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27 17:11 Paul Durrant [this message]
2019-11-28 11:17 ` [Xen-devel] [PATCH v2] x86 / iommu: set up a scratch page in the quarantine domain Jan Beulich
2019-11-28 11:32   ` Jürgen Groß
2019-12-03  9:36     ` Jan Beulich
2019-12-10  7:16       ` Tian, Kevin
2019-12-10  8:05         ` Jan Beulich
2019-12-10  8:12           ` Jürgen Groß
2019-12-10  8:19             ` Durrant, Paul
2019-12-10  8:57             ` Jan Beulich
2019-12-10  9:07               ` Jürgen Groß
2019-12-10  9:16                 ` Durrant, Paul
2019-12-10  9:44                   ` Jan Beulich
2019-12-10 10:19                     ` Durrant, Paul

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191127171143.27399-1-pdurrant@amazon.com \
    --to=pdurrant@amazon.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=kevin.tian@intel.com \
    --cc=roger.pau@citrix.com \
    --cc=wl@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.