* [PATCH] x86/vMSI: miscellaneous fixes
@ 2012-01-20 16:33 Jan Beulich
2012-01-20 16:39 ` Andrew Cooper
` (2 more replies)
0 siblings, 3 replies; 10+ messages in thread
From: Jan Beulich @ 2012-01-20 16:33 UTC (permalink / raw)
To: xen-devel@lists.xensource.com; +Cc: yuan.b.liu, Eddie Dong
[-- Attachment #1: Type: text/plain, Size: 6930 bytes --]
This addresses a number of problems in msixtbl_{read,write}():
- address alignment was not checked, allowing for memory corruption in
the hypervisor (write case) or returning of hypervisor private data
to the guest (read case)
- the interrupt mask bit was permitted to be written by the guest
(while Xen's interrupt flow control routines need to control it)
- MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
numbers (making it unobvious why they have these values, and making
the latter non-portable)
- MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
non-zero table offset); this was also affecting host MSI-X code
- struct msixtbl_entry's table_flags[] was one element larger than
necessary due to improper open-coding of BITS_TO_LONGS()
- msixtbl_read() unconditionally accessed the physical table, even
though the data was only needed in a quarter of all cases
- various calculations were done unnecessarily for both of the rather
distinct code paths in msixtbl_read()
Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
chosen to be 3.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -165,7 +165,7 @@ struct msixtbl_entry
struct pci_dev *pdev;
unsigned long gtable; /* gpa of msix table */
unsigned long table_len;
- unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+ unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
#define MAX_MSIX_ACC_ENTRIES 3
struct {
uint32_t msi_ad[3]; /* Shadow of address low, high and data */
@@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
static void __iomem *msixtbl_addr_to_virt(
struct msixtbl_entry *entry, unsigned long addr)
{
- int idx, nr_page;
+ unsigned int idx, nr_page;
- if ( !entry )
+ if ( !entry || !entry->pdev )
return NULL;
nr_page = (addr >> PAGE_SHIFT) -
(entry->gtable >> PAGE_SHIFT);
- if ( !entry->pdev )
- return NULL;
-
idx = entry->pdev->msix_table_idx[nr_page];
if ( !idx )
return NULL;
@@ -215,37 +212,34 @@ static int msixtbl_read(
struct vcpu *v, unsigned long address,
unsigned long len, unsigned long *pval)
{
- unsigned long offset, val;
+ unsigned long offset;
struct msixtbl_entry *entry;
void *virt;
- int nr_entry, index;
+ unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- rcu_read_lock(&msixtbl_rcu_lock);
+ if ( len != 4 || (address & 3) )
+ return r;
- if ( len != 4 )
- goto out;
+ rcu_read_lock(&msixtbl_rcu_lock);
entry = msixtbl_find_entry(v, address);
- virt = msixtbl_addr_to_virt(entry, address);
- if ( !virt )
- goto out;
-
- nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
- if ( nr_entry >= MAX_MSIX_ACC_ENTRIES &&
- offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
- goto out;
- val = readl(virt);
if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
+ nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+ if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
+ goto out;
index = offset / sizeof(uint32_t);
*pval = entry->gentries[nr_entry].msi_ad[index];
}
else
{
- *pval = val;
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+ *pval = readl(virt);
}
r = X86EMUL_OKAY;
@@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
unsigned long offset;
struct msixtbl_entry *entry;
void *virt;
- int nr_entry, index;
+ unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- rcu_read_lock(&msixtbl_rcu_lock);
+ if ( len != 4 || (address & 3) )
+ return r;
- if ( len != 4 )
- goto out;
+ rcu_read_lock(&msixtbl_rcu_lock);
entry = msixtbl_find_entry(v, address);
nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
@@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
if ( !virt )
goto out;
+ /* Do not allow the mask bit to be changed. */
+#if 0 /* XXX
+ * As the mask bit is the only defined bit in the word, and as the
+ * host MSI-X code doesn't preserve the other bits anyway, doing
+ * this is pointless. So for now just discard the write (also
+ * saving us from having to determine the matching irq_desc).
+ */
+ spin_lock_irqsave(&desc->lock, flags);
+ orig = readl(virt);
+ val &= ~PCI_MSIX_VECTOR_BITMASK;
+ val |= orig & PCI_MSIX_VECTOR_BITMASK;
writel(val, virt);
- r = X86EMUL_OKAY;
+ spin_unlock_irqrestore(&desc->lock, flags);
+#endif
+ r = X86EMUL_OKAY;
out:
rcu_read_unlock(&msixtbl_rcu_lock);
return r;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
*/
#define NR_HP_RESERVED_VECTORS 20
-#define PCI_MSIX_ENTRY_SIZE 16
-#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
-#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
-#define PCI_MSIX_ENTRY_DATA_OFFSET 8
-#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
-
#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,8 @@
#include <xen/list.h>
#include <xen/spinlock.h>
#include <xen/irq.h>
+#include <xen/pci_regs.h>
+#include <xen/pfn.h>
#include <asm/pci.h>
/*
@@ -30,8 +32,10 @@
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
-#define MAX_MSIX_TABLE_ENTRIES 2048
-#define MAX_MSIX_TABLE_PAGES 8
+#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1)
+#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
+ PCI_MSIX_ENTRY_SIZE + \
+ (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
struct pci_dev_info {
bool_t is_extfn;
bool_t is_virtfn;
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -305,6 +305,12 @@
#define PCI_MSIX_PBA 8
#define PCI_MSIX_BIRMASK (7 << 0)
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
+#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
+#define PCI_MSIX_ENTRY_DATA_OFFSET 8
+#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
+
#define PCI_MSIX_VECTOR_BITMASK (1 << 0)
/* CompactPCI Hotswap Register */
[-- Attachment #2: x86-vmsi-fixes.patch --]
[-- Type: text/plain, Size: 6959 bytes --]
x86/vMSI: miscellaneous fixes
This addresses a number of problems in msixtbl_{read,write}():
- address alignment was not checked, allowing for memory corruption in
the hypervisor (write case) or returning of hypervisor private data
to the guest (read case)
- the interrupt mask bit was permitted to be written by the guest
(while Xen's interrupt flow control routines need to control it)
- MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
numbers (making it unobvious why they have these values, and making
the latter non-portable)
- MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
non-zero table offset); this was also affecting host MSI-X code
- struct msixtbl_entry's table_flags[] was one element larger than
necessary due to improper open-coding of BITS_TO_LONGS()
- msixtbl_read() unconditionally accessed the physical table, even
though the data was only needed in a quarter of all cases
- various calculations were done unnecessarily for both of the rather
distinct code paths in msixtbl_read()
Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
chosen to be 3.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -165,7 +165,7 @@ struct msixtbl_entry
struct pci_dev *pdev;
unsigned long gtable; /* gpa of msix table */
unsigned long table_len;
- unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+ unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
#define MAX_MSIX_ACC_ENTRIES 3
struct {
uint32_t msi_ad[3]; /* Shadow of address low, high and data */
@@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
static void __iomem *msixtbl_addr_to_virt(
struct msixtbl_entry *entry, unsigned long addr)
{
- int idx, nr_page;
+ unsigned int idx, nr_page;
- if ( !entry )
+ if ( !entry || !entry->pdev )
return NULL;
nr_page = (addr >> PAGE_SHIFT) -
(entry->gtable >> PAGE_SHIFT);
- if ( !entry->pdev )
- return NULL;
-
idx = entry->pdev->msix_table_idx[nr_page];
if ( !idx )
return NULL;
@@ -215,37 +212,34 @@ static int msixtbl_read(
struct vcpu *v, unsigned long address,
unsigned long len, unsigned long *pval)
{
- unsigned long offset, val;
+ unsigned long offset;
struct msixtbl_entry *entry;
void *virt;
- int nr_entry, index;
+ unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- rcu_read_lock(&msixtbl_rcu_lock);
+ if ( len != 4 || (address & 3) )
+ return r;
- if ( len != 4 )
- goto out;
+ rcu_read_lock(&msixtbl_rcu_lock);
entry = msixtbl_find_entry(v, address);
- virt = msixtbl_addr_to_virt(entry, address);
- if ( !virt )
- goto out;
-
- nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
- if ( nr_entry >= MAX_MSIX_ACC_ENTRIES &&
- offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
- goto out;
- val = readl(virt);
if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
{
+ nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
+ if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
+ goto out;
index = offset / sizeof(uint32_t);
*pval = entry->gentries[nr_entry].msi_ad[index];
}
else
{
- *pval = val;
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+ *pval = readl(virt);
}
r = X86EMUL_OKAY;
@@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
unsigned long offset;
struct msixtbl_entry *entry;
void *virt;
- int nr_entry, index;
+ unsigned int nr_entry, index;
int r = X86EMUL_UNHANDLEABLE;
- rcu_read_lock(&msixtbl_rcu_lock);
+ if ( len != 4 || (address & 3) )
+ return r;
- if ( len != 4 )
- goto out;
+ rcu_read_lock(&msixtbl_rcu_lock);
entry = msixtbl_find_entry(v, address);
nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
@@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
if ( !virt )
goto out;
+ /* Do not allow the mask bit to be changed. */
+#if 0 /* XXX
+ * As the mask bit is the only defined bit in the word, and as the
+ * host MSI-X code doesn't preserve the other bits anyway, doing
+ * this is pointless. So for now just discard the write (also
+ * saving us from having to determine the matching irq_desc).
+ */
+ spin_lock_irqsave(&desc->lock, flags);
+ orig = readl(virt);
+ val &= ~PCI_MSIX_VECTOR_BITMASK;
+ val |= orig & PCI_MSIX_VECTOR_BITMASK;
writel(val, virt);
- r = X86EMUL_OKAY;
+ spin_unlock_irqrestore(&desc->lock, flags);
+#endif
+ r = X86EMUL_OKAY;
out:
rcu_read_unlock(&msixtbl_rcu_lock);
return r;
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
*/
#define NR_HP_RESERVED_VECTORS 20
-#define PCI_MSIX_ENTRY_SIZE 16
-#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
-#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
-#define PCI_MSIX_ENTRY_DATA_OFFSET 8
-#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
-
#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
#define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
#define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -11,6 +11,8 @@
#include <xen/list.h>
#include <xen/spinlock.h>
#include <xen/irq.h>
+#include <xen/pci_regs.h>
+#include <xen/pfn.h>
#include <asm/pci.h>
/*
@@ -30,8 +32,10 @@
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
-#define MAX_MSIX_TABLE_ENTRIES 2048
-#define MAX_MSIX_TABLE_PAGES 8
+#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1)
+#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
+ PCI_MSIX_ENTRY_SIZE + \
+ (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
struct pci_dev_info {
bool_t is_extfn;
bool_t is_virtfn;
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -305,6 +305,12 @@
#define PCI_MSIX_PBA 8
#define PCI_MSIX_BIRMASK (7 << 0)
+#define PCI_MSIX_ENTRY_SIZE 16
+#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
+#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
+#define PCI_MSIX_ENTRY_DATA_OFFSET 8
+#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
+
#define PCI_MSIX_VECTOR_BITMASK (1 << 0)
/* CompactPCI Hotswap Register */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
@ 2012-01-20 16:39 ` Andrew Cooper
2012-01-20 16:44 ` Jan Beulich
2012-01-20 16:42 ` Jan Beulich
2012-01-24 15:43 ` Andrew Cooper
2 siblings, 1 reply; 10+ messages in thread
From: Andrew Cooper @ 2012-01-20 16:39 UTC (permalink / raw)
To: Jan Beulich
Cc: yuan.b.liu@intel.com, xen-devel@lists.xensource.com, Eddie Dong
On 20/01/12 16:33, Jan Beulich wrote:
> This addresses a number of problems in msixtbl_{read,write}():
> - address alignment was not checked, allowing for memory corruption in
> the hypervisor (write case) or returning of hypervisor private data
> to the guest (read case)
> - the interrupt mask bit was permitted to be written by the guest
> (while Xen's interrupt flow control routines need to control it)
> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
> numbers (making it unobvious why they have these values, and making
> the latter non-portable)
> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
> non-zero table offset); this was also affecting host MSI-X code
> - struct msixtbl_entry's table_flags[] was one element larger than
> necessary due to improper open-coding of BITS_TO_LONGS()
> - msixtbl_read() unconditionally accessed the physical table, even
> though the data was only needed in a quarter of all cases
> - various calculations were done unnecessarily for both of the rather
> distinct code paths in msixtbl_read()
>
> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
> chosen to be 3.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -165,7 +165,7 @@ struct msixtbl_entry
> struct pci_dev *pdev;
> unsigned long gtable; /* gpa of msix table */
> unsigned long table_len;
> - unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
> + unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
> #define MAX_MSIX_ACC_ENTRIES 3
> struct {
> uint32_t msi_ad[3]; /* Shadow of address low, high and data */
> @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
> static void __iomem *msixtbl_addr_to_virt(
> struct msixtbl_entry *entry, unsigned long addr)
> {
> - int idx, nr_page;
> + unsigned int idx, nr_page;
>
> - if ( !entry )
> + if ( !entry || !entry->pdev )
> return NULL;
>
> nr_page = (addr >> PAGE_SHIFT) -
> (entry->gtable >> PAGE_SHIFT);
>
> - if ( !entry->pdev )
> - return NULL;
> -
> idx = entry->pdev->msix_table_idx[nr_page];
> if ( !idx )
> return NULL;
> @@ -215,37 +212,34 @@ static int msixtbl_read(
> struct vcpu *v, unsigned long address,
> unsigned long len, unsigned long *pval)
> {
> - unsigned long offset, val;
> + unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> - virt = msixtbl_addr_to_virt(entry, address);
> - if ( !virt )
> - goto out;
> -
> - nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
> - if ( nr_entry >= MAX_MSIX_ACC_ENTRIES &&
> - offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> - goto out;
>
> - val = readl(virt);
> if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> {
> + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> + if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
> + goto out;
> index = offset / sizeof(uint32_t);
> *pval = entry->gentries[nr_entry].msi_ad[index];
> }
> else
> {
> - *pval = val;
> + virt = msixtbl_addr_to_virt(entry, address);
> + if ( !virt )
> + goto out;
> + *pval = readl(virt);
> }
>
> r = X86EMUL_OKAY;
> @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
> unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
> if ( !virt )
> goto out;
>
> + /* Do not allow the mask bit to be changed. */
> +#if 0 /* XXX
You appear to still have some debugging in this patch.
~Andrew
> + * As the mask bit is the only defined bit in the word, and as the
> + * host MSI-X code doesn't preserve the other bits anyway, doing
> + * this is pointless. So for now just discard the write (also
> + * saving us from having to determine the matching irq_desc).
> + */
> + spin_lock_irqsave(&desc->lock, flags);
> + orig = readl(virt);
> + val &= ~PCI_MSIX_VECTOR_BITMASK;
> + val |= orig & PCI_MSIX_VECTOR_BITMASK;
> writel(val, virt);
> - r = X86EMUL_OKAY;
> + spin_unlock_irqrestore(&desc->lock, flags);
> +#endif
>
> + r = X86EMUL_OKAY;
> out:
> rcu_read_unlock(&msixtbl_rcu_lock);
> return r;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
> */
> #define NR_HP_RESERVED_VECTORS 20
>
> -#define PCI_MSIX_ENTRY_SIZE 16
> -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> -#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> -
> #define msi_control_reg(base) (base + PCI_MSI_FLAGS)
> #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
> #define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -11,6 +11,8 @@
> #include <xen/list.h>
> #include <xen/spinlock.h>
> #include <xen/irq.h>
> +#include <xen/pci_regs.h>
> +#include <xen/pfn.h>
> #include <asm/pci.h>
>
> /*
> @@ -30,8 +32,10 @@
> #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
> #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
>
> -#define MAX_MSIX_TABLE_ENTRIES 2048
> -#define MAX_MSIX_TABLE_PAGES 8
> +#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1)
> +#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
> + PCI_MSIX_ENTRY_SIZE + \
> + (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
> struct pci_dev_info {
> bool_t is_extfn;
> bool_t is_virtfn;
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -305,6 +305,12 @@
> #define PCI_MSIX_PBA 8
> #define PCI_MSIX_BIRMASK (7 << 0)
>
> +#define PCI_MSIX_ENTRY_SIZE 16
> +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> +#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> +
> #define PCI_MSIX_VECTOR_BITMASK (1 << 0)
>
> /* CompactPCI Hotswap Register */
>
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
2012-01-20 16:39 ` Andrew Cooper
@ 2012-01-20 16:42 ` Jan Beulich
2012-01-24 15:43 ` Andrew Cooper
2 siblings, 0 replies; 10+ messages in thread
From: Jan Beulich @ 2012-01-20 16:42 UTC (permalink / raw)
To: xen-devel@lists.xensource.com; +Cc: Eddie Dong, yuan.b.liu, Ian Jackson
>>> On 20.01.12 at 17:33, "Jan Beulich" <JBeulich@suse.com> wrote:
> This addresses a number of problems in msixtbl_{read,write}():
> - address alignment was not checked, allowing for memory corruption in
> the hypervisor (write case) or returning of hypervisor private data
> to the guest (read case)
> - the interrupt mask bit was permitted to be written by the guest
> (while Xen's interrupt flow control routines need to control it)
> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
> numbers (making it unobvious why they have these values, and making
> the latter non-portable)
> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
> non-zero table offset); this was also affecting host MSI-X code
> - struct msixtbl_entry's table_flags[] was one element larger than
> necessary due to improper open-coding of BITS_TO_LONGS()
> - msixtbl_read() unconditionally accessed the physical table, even
> though the data was only needed in a quarter of all cases
> - various calculations were done unnecessarily for both of the rather
> distinct code paths in msixtbl_read()
I should note (as done in the earlier response to the 4.2 todo thread)
that I have no way of actually testing this, so I'm relying on others
here. (Intel? Or, Ian, would the stage tester find issues with passed
through MSI-X capable devices?)
Jan
> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
> chosen to be 3.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -165,7 +165,7 @@ struct msixtbl_entry
> struct pci_dev *pdev;
> unsigned long gtable; /* gpa of msix table */
> unsigned long table_len;
> - unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
> + unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
> #define MAX_MSIX_ACC_ENTRIES 3
> struct {
> uint32_t msi_ad[3]; /* Shadow of address low, high and data */
> @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
> static void __iomem *msixtbl_addr_to_virt(
> struct msixtbl_entry *entry, unsigned long addr)
> {
> - int idx, nr_page;
> + unsigned int idx, nr_page;
>
> - if ( !entry )
> + if ( !entry || !entry->pdev )
> return NULL;
>
> nr_page = (addr >> PAGE_SHIFT) -
> (entry->gtable >> PAGE_SHIFT);
>
> - if ( !entry->pdev )
> - return NULL;
> -
> idx = entry->pdev->msix_table_idx[nr_page];
> if ( !idx )
> return NULL;
> @@ -215,37 +212,34 @@ static int msixtbl_read(
> struct vcpu *v, unsigned long address,
> unsigned long len, unsigned long *pval)
> {
> - unsigned long offset, val;
> + unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> - virt = msixtbl_addr_to_virt(entry, address);
> - if ( !virt )
> - goto out;
> -
> - nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
> - if ( nr_entry >= MAX_MSIX_ACC_ENTRIES &&
> - offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> - goto out;
>
> - val = readl(virt);
> if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> {
> + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> + if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
> + goto out;
> index = offset / sizeof(uint32_t);
> *pval = entry->gentries[nr_entry].msi_ad[index];
> }
> else
> {
> - *pval = val;
> + virt = msixtbl_addr_to_virt(entry, address);
> + if ( !virt )
> + goto out;
> + *pval = readl(virt);
> }
>
> r = X86EMUL_OKAY;
> @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
> unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
> if ( !virt )
> goto out;
>
> + /* Do not allow the mask bit to be changed. */
> +#if 0 /* XXX
> + * As the mask bit is the only defined bit in the word, and as the
> + * host MSI-X code doesn't preserve the other bits anyway, doing
> + * this is pointless. So for now just discard the write (also
> + * saving us from having to determine the matching irq_desc).
> + */
> + spin_lock_irqsave(&desc->lock, flags);
> + orig = readl(virt);
> + val &= ~PCI_MSIX_VECTOR_BITMASK;
> + val |= orig & PCI_MSIX_VECTOR_BITMASK;
> writel(val, virt);
> - r = X86EMUL_OKAY;
> + spin_unlock_irqrestore(&desc->lock, flags);
> +#endif
>
> + r = X86EMUL_OKAY;
> out:
> rcu_read_unlock(&msixtbl_rcu_lock);
> return r;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
> */
> #define NR_HP_RESERVED_VECTORS 20
>
> -#define PCI_MSIX_ENTRY_SIZE 16
> -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> -#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> -
> #define msi_control_reg(base) (base + PCI_MSI_FLAGS)
> #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
> #define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -11,6 +11,8 @@
> #include <xen/list.h>
> #include <xen/spinlock.h>
> #include <xen/irq.h>
> +#include <xen/pci_regs.h>
> +#include <xen/pfn.h>
> #include <asm/pci.h>
>
> /*
> @@ -30,8 +32,10 @@
> #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
> #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
>
> -#define MAX_MSIX_TABLE_ENTRIES 2048
> -#define MAX_MSIX_TABLE_PAGES 8
> +#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1)
> +#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
> + PCI_MSIX_ENTRY_SIZE + \
> + (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
> struct pci_dev_info {
> bool_t is_extfn;
> bool_t is_virtfn;
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -305,6 +305,12 @@
> #define PCI_MSIX_PBA 8
> #define PCI_MSIX_BIRMASK (7 << 0)
>
> +#define PCI_MSIX_ENTRY_SIZE 16
> +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> +#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> +
> #define PCI_MSIX_VECTOR_BITMASK (1 << 0)
>
> /* CompactPCI Hotswap Register */
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:39 ` Andrew Cooper
@ 2012-01-20 16:44 ` Jan Beulich
2012-01-20 16:50 ` Andrew Cooper
0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2012-01-20 16:44 UTC (permalink / raw)
To: Andrew Cooper
Cc: yuan.b.liu@intel.com, xen-devel@lists.xensource.com, Eddie Dong
>>> On 20.01.12 at 17:39, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 20/01/12 16:33, Jan Beulich wrote:
>> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
>> if ( !virt )
>> goto out;
>>
>> + /* Do not allow the mask bit to be changed. */
>> +#if 0 /* XXX
>
> You appear to still have some debugging in this patch.
No - that's why the comment is there. I wanted to keep the unused
code to make clear what ought to be taking place here.
Jan
>> + * As the mask bit is the only defined bit in the word, and as the
>> + * host MSI-X code doesn't preserve the other bits anyway, doing
>> + * this is pointless. So for now just discard the write (also
>> + * saving us from having to determine the matching irq_desc).
>> + */
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:44 ` Jan Beulich
@ 2012-01-20 16:50 ` Andrew Cooper
2012-01-20 17:01 ` Jan Beulich
0 siblings, 1 reply; 10+ messages in thread
From: Andrew Cooper @ 2012-01-20 16:50 UTC (permalink / raw)
To: Jan Beulich
Cc: yuan.b.liu@intel.com, xen-devel@lists.xensource.com, Eddie Dong
On 20/01/12 16:44, Jan Beulich wrote:
>>>> On 20.01.12 at 17:39, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 20/01/12 16:33, Jan Beulich wrote:
>>> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
>>> if ( !virt )
>>> goto out;
>>>
>>> + /* Do not allow the mask bit to be changed. */
>>> +#if 0 /* XXX
>> You appear to still have some debugging in this patch.
> No - that's why the comment is there. I wanted to keep the unused
> code to make clear what ought to be taking place here.
>
> Jan
Ah - that makes a little more sense. That was going to be my next query
for clarification.
I have some hardware at my disposal which can do SRIOV and PCI
passthrough with MSI-X capable network cards.
Any specific things you want testing, other than just a basic
passthrough and verify the cards are working?
~Andrew
>>> + * As the mask bit is the only defined bit in the word, and as the
>>> + * host MSI-X code doesn't preserve the other bits anyway, doing
>>> + * this is pointless. So for now just discard the write (also
>>> + * saving us from having to determine the matching irq_desc).
>>> + */
>
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:50 ` Andrew Cooper
@ 2012-01-20 17:01 ` Jan Beulich
2012-01-20 17:03 ` Andrew Cooper
0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2012-01-20 17:01 UTC (permalink / raw)
To: Andrew Cooper
Cc: yuan.b.liu@intel.com, xen-devel@lists.xensource.com, Eddie Dong
>>> On 20.01.12 at 17:50, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> I have some hardware at my disposal which can do SRIOV and PCI
> passthrough with MSI-X capable network cards.
>
> Any specific things you want testing, other than just a basic
> passthrough and verify the cards are working?
No, nothing specific. Thanks for taking on this!
Jan
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 17:01 ` Jan Beulich
@ 2012-01-20 17:03 ` Andrew Cooper
0 siblings, 0 replies; 10+ messages in thread
From: Andrew Cooper @ 2012-01-20 17:03 UTC (permalink / raw)
To: Jan Beulich
Cc: yuan.b.liu@intel.com, xen-devel@lists.xensource.com, Eddie Dong
On 20/01/12 17:01, Jan Beulich wrote:
>>>> On 20.01.12 at 17:50, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> I have some hardware at my disposal which can do SRIOV and PCI
>> passthrough with MSI-X capable network cards.
>>
>> Any specific things you want testing, other than just a basic
>> passthrough and verify the cards are working?
> No, nothing specific. Thanks for taking on this!
>
> Jan
No problem. I will try to get the testing done early next week - I have
other hypervisior bugs I am currently working on right at the moment.
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
2012-01-20 16:39 ` Andrew Cooper
2012-01-20 16:42 ` Jan Beulich
@ 2012-01-24 15:43 ` Andrew Cooper
2012-01-24 16:15 ` Jan Beulich
2 siblings, 1 reply; 10+ messages in thread
From: Andrew Cooper @ 2012-01-24 15:43 UTC (permalink / raw)
To: xen-devel, Jan Beulich
On 20/01/12 16:33, Jan Beulich wrote:
> This addresses a number of problems in msixtbl_{read,write}():
> - address alignment was not checked, allowing for memory corruption in
> the hypervisor (write case) or returning of hypervisor private data
> to the guest (read case)
> - the interrupt mask bit was permitted to be written by the guest
> (while Xen's interrupt flow control routines need to control it)
> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
> numbers (making it unobvious why they have these values, and making
> the latter non-portable)
> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
> non-zero table offset); this was also affecting host MSI-X code
> - struct msixtbl_entry's table_flags[] was one element larger than
> necessary due to improper open-coding of BITS_TO_LONGS()
> - msixtbl_read() unconditionally accessed the physical table, even
> though the data was only needed in a quarter of all cases
> - various calculations were done unnecessarily for both of the rather
> distinct code paths in msixtbl_read()
>
> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
> chosen to be 3.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested by backport to XenServer 6.0 (Xen-4.1.1) The only conflicts were
with the context around adding the two new header files, and <xen/pfn.h>
which is a new header file.
Tested-and-acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -165,7 +165,7 @@ struct msixtbl_entry
> struct pci_dev *pdev;
> unsigned long gtable; /* gpa of msix table */
> unsigned long table_len;
> - unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
> + unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
> #define MAX_MSIX_ACC_ENTRIES 3
> struct {
> uint32_t msi_ad[3]; /* Shadow of address low, high and data */
> @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
> static void __iomem *msixtbl_addr_to_virt(
> struct msixtbl_entry *entry, unsigned long addr)
> {
> - int idx, nr_page;
> + unsigned int idx, nr_page;
>
> - if ( !entry )
> + if ( !entry || !entry->pdev )
> return NULL;
>
> nr_page = (addr >> PAGE_SHIFT) -
> (entry->gtable >> PAGE_SHIFT);
>
> - if ( !entry->pdev )
> - return NULL;
> -
> idx = entry->pdev->msix_table_idx[nr_page];
> if ( !idx )
> return NULL;
> @@ -215,37 +212,34 @@ static int msixtbl_read(
> struct vcpu *v, unsigned long address,
> unsigned long len, unsigned long *pval)
> {
> - unsigned long offset, val;
> + unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> - virt = msixtbl_addr_to_virt(entry, address);
> - if ( !virt )
> - goto out;
> -
> - nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
> - if ( nr_entry >= MAX_MSIX_ACC_ENTRIES &&
> - offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> - goto out;
>
> - val = readl(virt);
> if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> {
> + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> + if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
> + goto out;
> index = offset / sizeof(uint32_t);
> *pval = entry->gentries[nr_entry].msi_ad[index];
> }
> else
> {
> - *pval = val;
> + virt = msixtbl_addr_to_virt(entry, address);
> + if ( !virt )
> + goto out;
> + *pval = readl(virt);
> }
>
> r = X86EMUL_OKAY;
> @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
> unsigned long offset;
> struct msixtbl_entry *entry;
> void *virt;
> - int nr_entry, index;
> + unsigned int nr_entry, index;
> int r = X86EMUL_UNHANDLEABLE;
>
> - rcu_read_lock(&msixtbl_rcu_lock);
> + if ( len != 4 || (address & 3) )
> + return r;
>
> - if ( len != 4 )
> - goto out;
> + rcu_read_lock(&msixtbl_rcu_lock);
>
> entry = msixtbl_find_entry(v, address);
> nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
> if ( !virt )
> goto out;
>
> + /* Do not allow the mask bit to be changed. */
> +#if 0 /* XXX
> + * As the mask bit is the only defined bit in the word, and as the
> + * host MSI-X code doesn't preserve the other bits anyway, doing
> + * this is pointless. So for now just discard the write (also
> + * saving us from having to determine the matching irq_desc).
> + */
> + spin_lock_irqsave(&desc->lock, flags);
> + orig = readl(virt);
> + val &= ~PCI_MSIX_VECTOR_BITMASK;
> + val |= orig & PCI_MSIX_VECTOR_BITMASK;
> writel(val, virt);
> - r = X86EMUL_OKAY;
> + spin_unlock_irqrestore(&desc->lock, flags);
> +#endif
>
> + r = X86EMUL_OKAY;
> out:
> rcu_read_unlock(&msixtbl_rcu_lock);
> return r;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
> */
> #define NR_HP_RESERVED_VECTORS 20
>
> -#define PCI_MSIX_ENTRY_SIZE 16
> -#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> -#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> -#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> -#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> -
> #define msi_control_reg(base) (base + PCI_MSI_FLAGS)
> #define msi_lower_address_reg(base) (base + PCI_MSI_ADDRESS_LO)
> #define msi_upper_address_reg(base) (base + PCI_MSI_ADDRESS_HI)
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -11,6 +11,8 @@
> #include <xen/list.h>
> #include <xen/spinlock.h>
> #include <xen/irq.h>
> +#include <xen/pci_regs.h>
> +#include <xen/pfn.h>
> #include <asm/pci.h>
>
> /*
> @@ -30,8 +32,10 @@
> #define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
> #define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
>
> -#define MAX_MSIX_TABLE_ENTRIES 2048
> -#define MAX_MSIX_TABLE_PAGES 8
> +#define MAX_MSIX_TABLE_ENTRIES (PCI_MSIX_FLAGS_QSIZE + 1)
> +#define MAX_MSIX_TABLE_PAGES PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
> + PCI_MSIX_ENTRY_SIZE + \
> + (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
> struct pci_dev_info {
> bool_t is_extfn;
> bool_t is_virtfn;
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -305,6 +305,12 @@
> #define PCI_MSIX_PBA 8
> #define PCI_MSIX_BIRMASK (7 << 0)
>
> +#define PCI_MSIX_ENTRY_SIZE 16
> +#define PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET 0
> +#define PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET 4
> +#define PCI_MSIX_ENTRY_DATA_OFFSET 8
> +#define PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET 12
> +
> #define PCI_MSIX_VECTOR_BITMASK (1 << 0)
>
> /* CompactPCI Hotswap Register */
>
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-24 15:43 ` Andrew Cooper
@ 2012-01-24 16:15 ` Jan Beulich
2012-02-03 5:38 ` Haitao Shan
0 siblings, 1 reply; 10+ messages in thread
From: Jan Beulich @ 2012-01-24 16:15 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel
>>> On 24.01.12 at 16:43, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 20/01/12 16:33, Jan Beulich wrote:
>> This addresses a number of problems in msixtbl_{read,write}():
>> - address alignment was not checked, allowing for memory corruption in
>> the hypervisor (write case) or returning of hypervisor private data
>> to the guest (read case)
>> - the interrupt mask bit was permitted to be written by the guest
>> (while Xen's interrupt flow control routines need to control it)
>> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
>> numbers (making it unobvious why they have these values, and making
>> the latter non-portable)
>> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
>> non-zero table offset); this was also affecting host MSI-X code
>> - struct msixtbl_entry's table_flags[] was one element larger than
>> necessary due to improper open-coding of BITS_TO_LONGS()
>> - msixtbl_read() unconditionally accessed the physical table, even
>> though the data was only needed in a quarter of all cases
>> - various calculations were done unnecessarily for both of the rather
>> distinct code paths in msixtbl_read()
>>
>> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
>> chosen to be 3.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Tested by backport to XenServer 6.0 (Xen-4.1.1) The only conflicts were
> with the context around adding the two new header files, and <xen/pfn.h>
> which is a new header file.
>
> Tested-and-acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
Thanks a lot, Andrew! (Keir had already committed the patch.)
Jan
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] x86/vMSI: miscellaneous fixes
2012-01-24 16:15 ` Jan Beulich
@ 2012-02-03 5:38 ` Haitao Shan
0 siblings, 0 replies; 10+ messages in thread
From: Haitao Shan @ 2012-02-03 5:38 UTC (permalink / raw)
To: Jan Beulich; +Cc: Andrew Cooper, xen-devel, Dugger, Donald D
We've tested the commit using standard NIC and SR-IOV NIC. It works fine.
Shan Haitao
2012/1/25 Jan Beulich <JBeulich@suse.com>:
>>>> On 24.01.12 at 16:43, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 20/01/12 16:33, Jan Beulich wrote:
>>> This addresses a number of problems in msixtbl_{read,write}():
>>> - address alignment was not checked, allowing for memory corruption in
>>> the hypervisor (write case) or returning of hypervisor private data
>>> to the guest (read case)
>>> - the interrupt mask bit was permitted to be written by the guest
>>> (while Xen's interrupt flow control routines need to control it)
>>> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
>>> numbers (making it unobvious why they have these values, and making
>>> the latter non-portable)
>>> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
>>> non-zero table offset); this was also affecting host MSI-X code
>>> - struct msixtbl_entry's table_flags[] was one element larger than
>>> necessary due to improper open-coding of BITS_TO_LONGS()
>>> - msixtbl_read() unconditionally accessed the physical table, even
>>> though the data was only needed in a quarter of all cases
>>> - various calculations were done unnecessarily for both of the rather
>>> distinct code paths in msixtbl_read()
>>>
>>> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
>>> chosen to be 3.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>
>> Tested by backport to XenServer 6.0 (Xen-4.1.1) The only conflicts were
>> with the context around adding the two new header files, and <xen/pfn.h>
>> which is a new header file.
>>
>> Tested-and-acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> Thanks a lot, Andrew! (Keir had already committed the patch.)
>
> Jan
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2012-02-03 5:38 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
2012-01-20 16:39 ` Andrew Cooper
2012-01-20 16:44 ` Jan Beulich
2012-01-20 16:50 ` Andrew Cooper
2012-01-20 17:01 ` Jan Beulich
2012-01-20 17:03 ` Andrew Cooper
2012-01-20 16:42 ` Jan Beulich
2012-01-24 15:43 ` Andrew Cooper
2012-01-24 16:15 ` Jan Beulich
2012-02-03 5:38 ` Haitao Shan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.