All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: "yuan.b.liu@intel.com" <yuan.b.liu@intel.com>,
	"xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
	Eddie Dong <eddie.dong@intel.com>
Subject: Re: [PATCH] x86/vMSI: miscellaneous fixes
Date: Fri, 20 Jan 2012 16:39:14 +0000	[thread overview]
Message-ID: <4F1998B2.1070501@citrix.com> (raw)
In-Reply-To: <4F19A577020000780006E115@nat28.tlf.novell.com>

On 20/01/12 16:33, Jan Beulich wrote:
> This addresses a number of problems in msixtbl_{read,write}():
> - address alignment was not checked, allowing for memory corruption in
>   the hypervisor (write case) or returning of hypervisor private data
>   to the guest (read case)
> - the interrupt mask bit was permitted to be written by the guest
>   (while Xen's interrupt flow control routines need to control it)
> - MAX_MSIX_TABLE_{ENTRIES,PAGES} were pointlessly defined to plain
>   numbers (making it unobvious why they have these values, and making
>   the latter non-portable)
> - MAX_MSIX_TABLE_PAGES was also off by one (failing to account for a
>   non-zero table offset); this was also affecting host MSI-X code
> - struct msixtbl_entry's table_flags[] was one element larger than
>   necessary due to improper open-coding of BITS_TO_LONGS()
> - msixtbl_read() unconditionally accessed the physical table, even
>   though the data was only needed in a quarter of all cases
> - various calculations were done unnecessarily for both of the rather
>   distinct code paths in msixtbl_read()
>
> Additionally it is unclear on what basis MAX_MSIX_ACC_ENTRIES was
> chosen to be 3.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -165,7 +165,7 @@ struct msixtbl_entry
>      struct pci_dev *pdev;
>      unsigned long gtable;       /* gpa of msix table */
>      unsigned long table_len;
> -    unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
> +    unsigned long table_flags[BITS_TO_LONGS(MAX_MSIX_TABLE_ENTRIES)];
>  #define MAX_MSIX_ACC_ENTRIES 3
>      struct { 
>          uint32_t msi_ad[3];	/* Shadow of address low, high and data */
> @@ -192,17 +192,14 @@ static struct msixtbl_entry *msixtbl_fin
>  static void __iomem *msixtbl_addr_to_virt(
>      struct msixtbl_entry *entry, unsigned long addr)
>  {
> -    int idx, nr_page;
> +    unsigned int idx, nr_page;
>  
> -    if ( !entry )
> +    if ( !entry || !entry->pdev )
>          return NULL;
>  
>      nr_page = (addr >> PAGE_SHIFT) -
>                (entry->gtable >> PAGE_SHIFT);
>  
> -    if ( !entry->pdev )
> -        return NULL;
> -
>      idx = entry->pdev->msix_table_idx[nr_page];
>      if ( !idx )
>          return NULL;
> @@ -215,37 +212,34 @@ static int msixtbl_read(
>      struct vcpu *v, unsigned long address,
>      unsigned long len, unsigned long *pval)
>  {
> -    unsigned long offset, val;
> +    unsigned long offset;
>      struct msixtbl_entry *entry;
>      void *virt;
> -    int nr_entry, index;
> +    unsigned int nr_entry, index;
>      int r = X86EMUL_UNHANDLEABLE;
>  
> -    rcu_read_lock(&msixtbl_rcu_lock);
> +    if ( len != 4 || (address & 3) )
> +        return r;
>  
> -    if ( len != 4 )
> -        goto out;
> +    rcu_read_lock(&msixtbl_rcu_lock);
>  
>      entry = msixtbl_find_entry(v, address);
> -    virt = msixtbl_addr_to_virt(entry, address);
> -    if ( !virt )
> -        goto out;
> -
> -    nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
>      offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
> -    if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && 
> -         offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
> -        goto out;
>  
> -    val = readl(virt);
>      if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
>      {
> +        nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> +        if ( nr_entry >= MAX_MSIX_ACC_ENTRIES )
> +            goto out;
>          index = offset / sizeof(uint32_t);
>          *pval = entry->gentries[nr_entry].msi_ad[index];
>      }
>      else 
>      {
> -        *pval = val;
> +        virt = msixtbl_addr_to_virt(entry, address);
> +        if ( !virt )
> +            goto out;
> +        *pval = readl(virt);
>      }
>      
>      r = X86EMUL_OKAY;
> @@ -260,13 +254,13 @@ static int msixtbl_write(struct vcpu *v,
>      unsigned long offset;
>      struct msixtbl_entry *entry;
>      void *virt;
> -    int nr_entry, index;
> +    unsigned int nr_entry, index;
>      int r = X86EMUL_UNHANDLEABLE;
>  
> -    rcu_read_lock(&msixtbl_rcu_lock);
> +    if ( len != 4 || (address & 3) )
> +        return r;
>  
> -    if ( len != 4 )
> -        goto out;
> +    rcu_read_lock(&msixtbl_rcu_lock);
>  
>      entry = msixtbl_find_entry(v, address);
>      nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
> @@ -291,9 +285,22 @@ static int msixtbl_write(struct vcpu *v,
>      if ( !virt )
>          goto out;
>  
> +    /* Do not allow the mask bit to be changed. */
> +#if 0 /* XXX

You appear to still have some debugging in this patch.

~Andrew

> +       * As the mask bit is the only defined bit in the word, and as the
> +       * host MSI-X code doesn't preserve the other bits anyway, doing
> +       * this is pointless. So for now just discard the write (also
> +       * saving us from having to determine the matching irq_desc).
> +       */
> +    spin_lock_irqsave(&desc->lock, flags);
> +    orig = readl(virt);
> +    val &= ~PCI_MSIX_VECTOR_BITMASK;
> +    val |= orig & PCI_MSIX_VECTOR_BITMASK;
>      writel(val, virt);
> -    r = X86EMUL_OKAY;
> +    spin_unlock_irqrestore(&desc->lock, flags);
> +#endif
>  
> +    r = X86EMUL_OKAY;
>  out:
>      rcu_read_unlock(&msixtbl_rcu_lock);
>      return r;
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -122,12 +122,6 @@ int msi_free_irq(struct msi_desc *entry)
>   */
>  #define NR_HP_RESERVED_VECTORS 	20
>  
> -#define PCI_MSIX_ENTRY_SIZE			16
> -#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
> -#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
> -#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
> -#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
> -
>  #define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
>  #define msi_lower_address_reg(base)	(base + PCI_MSI_ADDRESS_LO)
>  #define msi_upper_address_reg(base)	(base + PCI_MSI_ADDRESS_HI)
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -11,6 +11,8 @@
>  #include <xen/list.h>
>  #include <xen/spinlock.h>
>  #include <xen/irq.h>
> +#include <xen/pci_regs.h>
> +#include <xen/pfn.h>
>  #include <asm/pci.h>
>  
>  /*
> @@ -30,8 +32,10 @@
>  #define PCI_BDF(b,d,f)  ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
>  #define PCI_BDF2(b,df)  ((((b) & 0xff) << 8) | ((df) & 0xff))
>  
> -#define MAX_MSIX_TABLE_ENTRIES  2048
> -#define MAX_MSIX_TABLE_PAGES    8
> +#define MAX_MSIX_TABLE_ENTRIES  (PCI_MSIX_FLAGS_QSIZE + 1)
> +#define MAX_MSIX_TABLE_PAGES    PFN_UP(MAX_MSIX_TABLE_ENTRIES * \
> +                                       PCI_MSIX_ENTRY_SIZE + \
> +                                       (~PCI_MSIX_BIRMASK & (PAGE_SIZE - 1)))
>  struct pci_dev_info {
>      bool_t is_extfn;
>      bool_t is_virtfn;
> --- a/xen/include/xen/pci_regs.h
> +++ b/xen/include/xen/pci_regs.h
> @@ -305,6 +305,12 @@
>  #define PCI_MSIX_PBA		8
>  #define  PCI_MSIX_BIRMASK	(7 << 0)
>  
> +#define PCI_MSIX_ENTRY_SIZE			16
> +#define  PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET	0
> +#define  PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET	4
> +#define  PCI_MSIX_ENTRY_DATA_OFFSET		8
> +#define  PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET	12
> +
>  #define PCI_MSIX_VECTOR_BITMASK	(1 << 0)
>  
>  /* CompactPCI Hotswap Register */
>
>

-- 
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com

  reply	other threads:[~2012-01-20 16:39 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-20 16:33 [PATCH] x86/vMSI: miscellaneous fixes Jan Beulich
2012-01-20 16:39 ` Andrew Cooper [this message]
2012-01-20 16:44   ` Jan Beulich
2012-01-20 16:50     ` Andrew Cooper
2012-01-20 17:01       ` Jan Beulich
2012-01-20 17:03         ` Andrew Cooper
2012-01-20 16:42 ` Jan Beulich
2012-01-24 15:43 ` Andrew Cooper
2012-01-24 16:15   ` Jan Beulich
2012-02-03  5:38     ` Haitao Shan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F1998B2.1070501@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=eddie.dong@intel.com \
    --cc=xen-devel@lists.xensource.com \
    --cc=yuan.b.liu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.