xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Chao Gao <chao.gao@intel.com>
To: xen-devel@lists.xen.org
Cc: Kevin Tian <kevin.tian@intel.com>,
	Jun Nakajima <jun.nakajima@intel.com>,
	George Dunlap <George.Dunlap@eu.citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Dario Faggioli <dario.faggioli@citrix.com>,
	Jan Beulich <jbeulich@suse.com>, Chao Gao <chao.gao@intel.com>
Subject: [PATCH v11 5/6] VT-d: introduce update_irte to update irte safely
Date: Wed, 29 Mar 2017 13:11:54 +0800	[thread overview]
Message-ID: <1490764315-7162-6-git-send-email-chao.gao@intel.com> (raw)
In-Reply-To: <1490764315-7162-1-git-send-email-chao.gao@intel.com>

We used structure assignment to update irte which was non-atomic when the
whole IRTE was to be updated. It is unsafe when a interrupt happened during
update. Furthermore, no bug or warning would be reported when this happened.

This patch introduces two variants, atomic and non-atomic, to update
irte. Both variants will update IRTE if possible. If the caller requests a
atomic update but we can't meet it, we raise a bug.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
v11:
- Add two variant function to update IRTE. Call the non-atomic one for init
and clear operations. Call the atomic one for other cases.
- Add a new field to indicate the remap_entry associated with msi_desc is
initialized or not.

v10:
- rename copy_irte_to_irt to update_irte
- remove copy_from_to_irt
- change commmit message and add some comments to illustrate on which
condition update_irte() is safe.

 xen/arch/x86/msi.c                     |  1 +
 xen/drivers/passthrough/vtd/intremap.c | 78 ++++++++++++++++++++++++++++++++--
 xen/include/asm-x86/msi.h              |  1 +
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 3374cd4..7ed1243 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -578,6 +578,7 @@ static struct msi_desc *alloc_msi_entry(unsigned int nr)
         entry[nr].dev = NULL;
         entry[nr].irq = -1;
         entry[nr].remap_index = -1;
+        entry[nr].remap_entry_initialized = false;
         entry[nr].pi_desc = NULL;
     }
 
diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
index b992f23..b7f3cf1 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -169,10 +169,64 @@ bool_t __init iommu_supports_eim(void)
     return 1;
 }
 
+static void update_irte(struct iremap_entry *entry,
+                        const struct iremap_entry *new_ire,
+                        bool atomic)
+{
+    if ( cpu_has_cx16 )
+    {
+        __uint128_t ret;
+        struct iremap_entry old_ire;
+
+        old_ire = *entry;
+        ret = cmpxchg16b(entry, &old_ire, new_ire);
+
+        /*
+         * In the above, we use cmpxchg16 to atomically update the 128-bit
+         * IRTE, and the hardware cannot update the IRTE behind us, so
+         * the return value of cmpxchg16 should be the same as old_ire.
+         * This ASSERT validate it.
+         */
+        ASSERT(ret == old_ire.val);
+    }
+    else
+    {
+        /*
+         * The following code will update irte atomically if possible.
+         * If the caller requests a atomic update but we can't meet it, 
+         * a bug will be raised.
+         */
+        if ( entry->lo == new_ire->lo )
+            entry->hi = new_ire->hi;
+        else if ( entry->hi == new_ire->hi )
+            entry->lo = new_ire->lo;
+        else if ( !atomic )
+        {
+            entry->lo = new_ire->lo;
+            entry->hi = new_ire->hi;
+        }
+        else
+            BUG();
+    }
+}
+
+static inline void update_irte_non_atomic(struct iremap_entry *entry,
+                                          const struct iremap_entry *new_ire)
+{
+    update_irte(entry, new_ire, false);
+}
+
+static inline void update_irte_atomic(struct iremap_entry *entry,
+                                      const struct iremap_entry *new_ire)
+{
+    update_irte(entry, new_ire, true);
+}
+
+
 /* Mark specified intr remap entry as free */
 static void free_remap_entry(struct iommu *iommu, int index)
 {
-    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
+    struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
 
     if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
@@ -183,7 +237,7 @@ static void free_remap_entry(struct iommu *iommu, int index)
     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
                      iremap_entries, iremap_entry);
 
-    memset(iremap_entry, 0, sizeof(*iremap_entry));
+    update_irte_non_atomic(iremap_entry, &new_ire);
     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
     iommu_flush_iec_index(iommu, 0, index);
 
@@ -286,6 +340,7 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
     int index;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    bool init = false;
 
     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -296,6 +351,7 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
         index = alloc_remap_entry(iommu, 1);
         if ( index < IREMAP_ENTRY_NR )
             apic_pin_2_ir_idx[apic][ioapic_pin] = index;
+        init = true;
     }
 
     if ( index > IREMAP_ENTRY_NR - 1 )
@@ -353,7 +409,11 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu,
         remap_rte->format = 1;    /* indicate remap format */
     }
 
-    *iremap_entry = new_ire;
+    if ( init )
+        update_irte_non_atomic(iremap_entry, &new_ire);
+    else
+        update_irte_atomic(iremap_entry, &new_ire);
+
     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
     iommu_flush_iec_index(iommu, 0, index);
 
@@ -567,7 +627,10 @@ static int msi_msg_to_remap_entry(
     {
         /* Free specified unused IRTEs */
         for ( i = 0; i < nr; ++i )
+        {
             free_remap_entry(iommu, msi_desc->remap_index + i);
+            msi_desc[i].remap_entry_initialized = false;
+        }
         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
         return 0;
     }
@@ -639,7 +702,14 @@ static int msi_msg_to_remap_entry(
     remap_rte->address_hi = 0;
     remap_rte->data = index - i;
 
-    *iremap_entry = new_ire;
+    if ( msi_desc->remap_entry_initialized )
+        update_irte_atomic(iremap_entry, &new_ire);
+    else
+    {
+        update_irte_non_atomic(iremap_entry, &new_ire);
+        msi_desc->remap_entry_initialized = true;
+    }
+
     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
     iommu_flush_iec_index(iommu, 0, index);
 
diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
index fc9ab04..a0bd3af 100644
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -118,6 +118,7 @@ struct msi_desc {
 	struct msi_msg msg;		/* Last set MSI message */
 
 	int remap_index;		/* index in interrupt remapping table */
+	bool remap_entry_initialized;
 	const struct pi_desc *pi_desc;	/* pointer to posted descriptor */
 	uint8_t gvec;			/* guest vector. valid when pi_desc isn't NULL */
 };
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2017-03-29  5:11 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-29  5:11 [PATCH v11 0/6] VMX: Properly handle pi descriptor and per-cpu blocking list Chao Gao
2017-03-29  5:11 ` [PATCH v11 1/6] passthrough: don't migrate pirq when it is delivered through VT-d PI Chao Gao
2017-03-31  5:28   ` Tian, Kevin
2017-03-30 23:10     ` Chao Gao
2017-03-31 10:27       ` Jan Beulich
2017-03-31  9:31   ` Jan Beulich
2017-03-31  2:42     ` Chao Gao
2017-03-31 10:06       ` Jan Beulich
2017-03-31  3:27         ` Chao Gao
2017-03-31 10:38           ` Jan Beulich
2017-04-05  0:20             ` Chao Gao
2017-04-05  8:03               ` Jan Beulich
2017-03-29  5:11 ` [PATCH v11 2/6] VT-d: Introduce new fields in msi_desc to track binding with guest interrupt Chao Gao
2017-03-31  5:46   ` Tian, Kevin
2017-03-30 23:01     ` Chao Gao
2017-03-31  8:11       ` Jan Beulich
2017-03-31  1:13         ` Chao Gao
2017-03-31  9:48   ` Jan Beulich
2017-03-29  5:11 ` [PATCH v11 3/6] VT-d: Some cleanups Chao Gao
2017-03-29  5:11 ` [PATCH v11 4/6] VMX: Fixup PI descriptor when cpu is offline Chao Gao
2017-03-29  5:11 ` Chao Gao [this message]
2017-03-31  6:03   ` [PATCH v11 5/6] VT-d: introduce update_irte to update irte safely Tian, Kevin
2017-03-31 10:01   ` Jan Beulich
2017-04-04 19:12     ` Chao Gao
2017-04-05  7:40       ` Jan Beulich
2017-03-29  5:11 ` [PATCH v11 6/6] passthrough/io: Fall back to remapping interrupt when we can't use VT-d PI Chao Gao
2017-03-31  5:13 ` [PATCH v11 0/6] VMX: Properly handle pi descriptor and per-cpu blocking list Tian, Kevin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1490764315-7162-6-git-send-email-chao.gao@intel.com \
    --to=chao.gao@intel.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dario.faggioli@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).