All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86/vMSI: miscellaneous fixes
@ 2012-01-20 16:33 Jan Beulich
  2012-01-20 16:39 ` Andrew Cooper
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Jan Beulich @ 2012-01-20 16:33 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com; +Cc: yuan.b.liu, Eddie Dong

[-- Attachment #1: Type: text/plain, Size: 6930 bytes --]

This addresses a number of problems in msixtbl_{read,write}():
- address alignment was not checked, allowing for memory corruption in
  the hypervisor (write case) or returning of hypervisor private data
  to the guest (read case)
- the interrupt mask bit was permitted to be written by the guest
  (while Xen's interrupt flow control routines need to control it)
- MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
  numbers (making it unobvious why they have these values, and making
  the latter non-portable)
- MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
  non-zero table offset); this was also affecting host MSI-X code
- struct msixtbl_entry's table_flags[] was one element larger than
  necessary due to improper open-coding of BITS_TO_LONGS()
- msixtbl_read() unconditionally accessed the physical table, even
  though the data was only needed in a quarter of all cases
- various calculations were done unnecessarily for both of the rather
  distinct code paths in msixtbl_read()

Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
chosen to be 3.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -165,7 +165,7 @@ struct msixtbl_entry
     struct pci_dev *pdev;
     unsigned long gtable;       /* gpa of msix table */
     unsigned long table_len;
-    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+    unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
 #define MAX_MSIX_ACC_ENTRIES 3
     struct { 
         uint32_t msi_ad[3];	/* Shadow of address low, high and data */
@@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
 static void __iomem *msixtbl_addr_to_virt(
     struct msixtbl_entry *entry, unsigned long addr)
 {
-    int idx, nr_page;
+    unsigned int idx, nr_page;
 
-    if ( !entry )
+    if ( !entry || !entry->pdev )
         return NULL;
 
     nr_page = (addr >> PAGE_SHIFT) -
               (entry->gtable >> PAGE_SHIFT);
 
-    if ( !entry->pdev )
-        return NULL;
-
     idx = entry->pdev->msix_table_idx[nr_page];
     if ( !idx )
         return NULL;
@@ -215,37 +212,34 @@ static int msixtbl_read(
     struct vcpu *v, unsigned long address,
     unsigned long len, unsigned long *pval)
 {
-    unsigned long offset, val;
+    unsigned long offset;
     struct msixtbl_entry *entry;
     void *virt;
-    int nr_entry, index;
+    unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-    rcu_read_lock(&msixtbl_rcu_lock);
+    if ( len != 4 || (address & 3) )
+        return r;
 
-    if ( len != 4 )
-        goto out;
+    rcu_read_lock(&msixtbl_rcu_lock);
 
     entry = msixtbl_find_entry(v, address);
-    virt = msixtbl_addr_to_virt(entry, address);
-    if ( !virt )
-        goto out;
-
-    nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
-    if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && 
-         offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
-        goto out;
 
-    val = readl(virt);
     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
     {
+        nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
+            goto out;
         index = offset / sizeof(uint32_t);
         *pval = entry->gentries[nr_entry].msi_ad[index];
     }
     else 
     {
-        *pval = val;
+        virt = msixtbl_addr_to_virt(entry, address);
+        if ( !virt )
+            goto out;
+        *pval = readl(virt);
     }
     
     r = X86EMUL_OKAY;
@@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
     unsigned long offset;
     struct msixtbl_entry *entry;
     void *virt;
-    int nr_entry, index;
+    unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-    rcu_read_lock(&msixtbl_rcu_lock);
+    if ( len != 4 || (address & 3) )
+        return r;
 
-    if ( len != 4 )
-        goto out;
+    rcu_read_lock(&msixtbl_rcu_lock);
 
     entry = msixtbl_find_entry(v, address);
     nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
@@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
     if ( !virt )
         goto out;
 
+    /* Do not allow the mask bit to be changed. */
+#if 0 /* XXX
+       * As the mask bit is the only defined bit in the word, and as the
+       * host MSI-X code doesn't preserve the other bits anyway, doing
+       * this is pointless. So for now just discard the write (also
+       * saving us from having to determine the matching irq_desc).
+       */
+    spin_lock_irqsave(&desc->lock, flags);
+    orig = readl(virt);
+    val &= ~PCI_MSIX_VECTOR_BITMASK;
+    val |= orig & PCI_MSIX_VECTOR_BITMASK;
     writel(val, virt);
-    r = X86EMUL_OKAY;
+    spin_unlock_irqrestore(&desc->lock, flags);
+#endif
 
+    r = X86EMUL_OKAY;
 out:
     rcu_read_unlock(&msixtbl_rcu_lock);
     return r;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
  */
 #define NR_HP_RESERVED_VECTORS 	20
 
-#define PCI_MSIX_ENTRY_SIZE			16
-#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
-#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
-#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
-#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
-
 #define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
 #define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
 #define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,8 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/irq.h>
+#include <xen/pci_regs.h>
+#include <xen/pfn.h>
 #include <asm/pci.h>
 
 /*
@@ -30,8 +32,10 @@
 #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
 
-#define MAX_MSIX_TABLE_ENTRIES  2048
-#define MAX_MSIX_TABLE_PAGES    8
+#define MAX_MSIX_TABLE_ENTRIES  (PCI_MSIX_FLAGS_QSIZE + 1)
+#define MAX_MSIX_TABLE_PAGES    PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
+                                       PCI_MSIX_ENTRY_SIZE + \
+                                       (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
 struct pci_dev_info {
     bool_t is_extfn;
     bool_t is_virtfn;
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -305,6 +305,12 @@
 #define PCI_MSIX_PBA		8
 #define  PCI_MSIX_BIRMASK	(7 << 0)
 
+#define PCI_MSIX_ENTRY_SIZE			16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
+#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
+
 #define PCI_MSIX_VECTOR_BITMASK	(1 << 0)
 
 /* CompactPCI Hotswap Register */



[-- Attachment #2: x86-vmsi-fixes.patch --]
[-- Type: text/plain, Size: 6959 bytes --]

x86/vMSI: miscellaneous fixes

This addresses a number of problems in msixtbl_{read,write}():
- address alignment was not checked, allowing for memory corruption in
  the hypervisor (write case) or returning of hypervisor private data
  to the guest (read case)
- the interrupt mask bit was permitted to be written by the guest
  (while Xen's interrupt flow control routines need to control it)
- MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
  numbers (making it unobvious why they have these values, and making
  the latter non-portable)
- MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
  non-zero table offset); this was also affecting host MSI-X code
- struct msixtbl_entry's table_flags[] was one element larger than
  necessary due to improper open-coding of BITS_TO_LONGS()
- msixtbl_read() unconditionally accessed the physical table, even
  though the data was only needed in a quarter of all cases
- various calculations were done unnecessarily for both of the rather
  distinct code paths in msixtbl_read()

Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
chosen to be 3.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -165,7 +165,7 @@ struct msixtbl_entry
     struct pci_dev *pdev;
     unsigned long gtable;       /* gpa of msix table */
     unsigned long table_len;
-    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+    unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
 #define MAX_MSIX_ACC_ENTRIES 3
     struct { 
         uint32_t msi_ad[3];	/* Shadow of address low, high and data */
@@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
 static void __iomem *msixtbl_addr_to_virt(
     struct msixtbl_entry *entry, unsigned long addr)
 {
-    int idx, nr_page;
+    unsigned int idx, nr_page;
 
-    if ( !entry )
+    if ( !entry || !entry->pdev )
         return NULL;
 
     nr_page = (addr >> PAGE_SHIFT) -
               (entry->gtable >> PAGE_SHIFT);
 
-    if ( !entry->pdev )
-        return NULL;
-
     idx = entry->pdev->msix_table_idx[nr_page];
     if ( !idx )
         return NULL;
@@ -215,37 +212,34 @@ static int msixtbl_read(
     struct vcpu *v, unsigned long address,
     unsigned long len, unsigned long *pval)
 {
-    unsigned long offset, val;
+    unsigned long offset;
     struct msixtbl_entry *entry;
     void *virt;
-    int nr_entry, index;
+    unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-    rcu_read_lock(&msixtbl_rcu_lock);
+    if ( len != 4 || (address & 3) )
+        return r;
 
-    if ( len != 4 )
-        goto out;
+    rcu_read_lock(&msixtbl_rcu_lock);
 
     entry = msixtbl_find_entry(v, address);
-    virt = msixtbl_addr_to_virt(entry, address);
-    if ( !virt )
-        goto out;
-
-    nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
-    if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && 
-         offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
-        goto out;
 
-    val = readl(virt);
     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
     {
+        nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
+            goto out;
         index = offset / sizeof(uint32_t);
         *pval = entry->gentries[nr_entry].msi_ad[index];
     }
     else 
     {
-        *pval = val;
+        virt = msixtbl_addr_to_virt(entry, address);
+        if ( !virt )
+            goto out;
+        *pval = readl(virt);
     }
     
     r = X86EMUL_OKAY;
@@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
     unsigned long offset;
     struct msixtbl_entry *entry;
     void *virt;
-    int nr_entry, index;
+    unsigned int nr_entry, index;
     int r = X86EMUL_UNHANDLEABLE;
 
-    rcu_read_lock(&msixtbl_rcu_lock);
+    if ( len != 4 || (address & 3) )
+        return r;
 
-    if ( len != 4 )
-        goto out;
+    rcu_read_lock(&msixtbl_rcu_lock);
 
     entry = msixtbl_find_entry(v, address);
     nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
@@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
     if ( !virt )
         goto out;
 
+    /* Do not allow the mask bit to be changed. */
+#if 0 /* XXX
+       * As the mask bit is the only defined bit in the word, and as the
+       * host MSI-X code doesn't preserve the other bits anyway, doing
+       * this is pointless. So for now just discard the write (also
+       * saving us from having to determine the matching irq_desc).
+       */
+    spin_lock_irqsave(&desc->lock, flags);
+    orig = readl(virt);
+    val &= ~PCI_MSIX_VECTOR_BITMASK;
+    val |= orig & PCI_MSIX_VECTOR_BITMASK;
     writel(val, virt);
-    r = X86EMUL_OKAY;
+    spin_unlock_irqrestore(&desc->lock, flags);
+#endif
 
+    r = X86EMUL_OKAY;
 out:
     rcu_read_unlock(&msixtbl_rcu_lock);
     return r;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
  */
 #define NR_HP_RESERVED_VECTORS 	20
 
-#define PCI_MSIX_ENTRY_SIZE			16
-#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
-#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
-#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
-#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
-
 #define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
 #define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
 #define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,8 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/irq.h>
+#include <xen/pci_regs.h>
+#include <xen/pfn.h>
 #include <asm/pci.h>
 
 /*
@@ -30,8 +32,10 @@
 #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
 #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
 
-#define MAX_MSIX_TABLE_ENTRIES  2048
-#define MAX_MSIX_TABLE_PAGES    8
+#define MAX_MSIX_TABLE_ENTRIES  (PCI_MSIX_FLAGS_QSIZE + 1)
+#define MAX_MSIX_TABLE_PAGES    PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
+                                       PCI_MSIX_ENTRY_SIZE + \
+                                       (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
 struct pci_dev_info {
     bool_t is_extfn;
     bool_t is_virtfn;
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -305,6 +305,12 @@
 #define PCI_MSIX_PBA		8
 #define  PCI_MSIX_BIRMASK	(7 << 0)
 
+#define PCI_MSIX_ENTRY_SIZE			16
+#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
+#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
+#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
+#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
+
 #define PCI_MSIX_VECTOR_BITMASK	(1 << 0)
 
 /* CompactPCI Hotswap Register */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2012-02-03  5:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
2012-01-20 16:39 ` Andrew Cooper
2012-01-20 16:44   ` Jan Beulich
2012-01-20 16:50     ` Andrew Cooper
2012-01-20 17:01       ` Jan Beulich
2012-01-20 17:03         ` Andrew Cooper
2012-01-20 16:42 ` Jan Beulich
2012-01-24 15:43 ` Andrew Cooper
2012-01-24 16:15   ` Jan Beulich
2012-02-03  5:38     ` Haitao Shan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.