xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Wei Wang <wei.wang2@amd.com>
To: Jan Beulich <JBeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>,
	Jeremy Fitzhardinge <jeremy@goop.org>,
	"xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Subject: Re: [PATCH V2] amd iommu: re-enable iommu msi if dom0 disabled it
Date: Thu, 21 Jun 2012 17:29:00 +0200	[thread overview]
Message-ID: <4FE33DBC.4030104@amd.com> (raw)
In-Reply-To: <4FE20C49020000780008AE25@nat28.tlf.novell.com>

On 06/20/2012 05:45 PM, Jan Beulich wrote:
  > Rather than submitting it for inclusion immediately, I'd rather see
> you first use it successfully. Below/attached what appears to
> work fine for me in contrived situations (i.e. hiding bridges with
> nothing connected, as I still don't have any AMD IOMMU system
> at hand).

Hi Jan,

The patch works for me. IOMMU msi Capability is shown as enabled with 
this patch.
Thanks,
Wei

00:00.2 Generic system peripheral [0806]: Advanced Micro Devices [AMD] 
Device 1419
         Subsystem: Advanced Micro Devices [AMD] Device 1419
         Flags: bus master, fast devsel, latency 0, IRQ 11
         Capabilities: [40] Secure device <?>
         Capabilities: [54] MSI: Enable+ Count=1/1 Maskable- 64bit+
         Capabilities: [64] HyperTransport: MSI Mapping Enable+ Fixed+


> Jan
>
> Note that due to ptwr_do_page_fault() being run first, there'll be a
> MEM_LOG() issued for each such attempt. If that's undesirable, the
> order of the calls would need to be swapped.
>
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -1875,6 +1875,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
>               break;
>           case 1:
>               l1e_remove_flags(nl1e, _PAGE_RW);
> +            rc = 0;
>               break;
>           }
>           if ( page )
> @@ -5208,6 +5209,97 @@ int ptwr_do_page_fault(struct vcpu *v, u
>       return 0;
>   }
>
> +#ifdef __x86_64__
> +/*************************
> + * fault handling for read-only MMIO pages
> + */
> +
> +struct mmio_ro_emulate_ctxt {
> +    struct x86_emulate_ctxt ctxt;
> +    unsigned long cr2;
> +};
> +
> +static int mmio_ro_emulated_read(
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    void *p_data,
> +    unsigned int bytes,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    return X86EMUL_UNHANDLEABLE;
> +}
> +
> +static int mmio_ro_emulated_write(
> +    enum x86_segment seg,
> +    unsigned long offset,
> +    void *p_data,
> +    unsigned int bytes,
> +    struct x86_emulate_ctxt *ctxt)
> +{
> +    struct mmio_ro_emulate_ctxt *mmio_ro_ctxt =
> +        container_of(ctxt, struct mmio_ro_emulate_ctxt, ctxt);
> +
> +    /* Only allow naturally-aligned stores at the original %cr2 address. */
> +    if ( ((bytes | offset)&  (bytes - 1)) || offset != mmio_ro_ctxt->cr2 )
> +    {
> +        MEM_LOG("mmio_ro_emulate: bad access (cr2=%lx, addr=%lx, bytes=%u)",
> +                mmio_ro_ctxt->cr2, offset, bytes);
> +        return X86EMUL_UNHANDLEABLE;
> +    }
> +
> +    return X86EMUL_OKAY;
> +}
> +
> +static const struct x86_emulate_ops mmio_ro_emulate_ops = {
> +    .read       = mmio_ro_emulated_read,
> +    .insn_fetch = ptwr_emulated_read,
> +    .write      = mmio_ro_emulated_write,
> +};
> +
> +/* Check if guest is trying to modify a r/o MMIO page. */
> +int mmio_ro_do_page_fault(struct vcpu *v, unsigned long addr,
> +                          struct cpu_user_regs *regs)
> +{
> +    l1_pgentry_t      pte;
> +    unsigned long     mfn;
> +    unsigned int      addr_size = is_pv_32on64_domain(v->domain) ?
> +                                  32 : BITS_PER_LONG;
> +    struct mmio_ro_emulate_ctxt mmio_ro_ctxt = {
> +        .ctxt.regs = regs,
> +        .ctxt.addr_size = addr_size,
> +        .ctxt.sp_size = addr_size,
> +        .cr2 = addr
> +    };
> +    int rc;
> +
> +    /* Attempt to read the PTE that maps the VA being accessed. */
> +    guest_get_eff_l1e(v, addr,&pte);
> +
> +    /* We are looking only for read-only mappings of MMIO pages. */
> +    if ( ((l1e_get_flags(pte)&  (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) )
> +        return 0;
> +
> +    mfn = l1e_get_pfn(pte);
> +    if ( mfn_valid(mfn) )
> +    {
> +        struct page_info *page = mfn_to_page(mfn);
> +        struct domain *owner = page_get_owner_and_reference(page);
> +
> +        if ( owner )
> +            put_page(page);
> +        if ( owner != dom_io )
> +            return 0;
> +    }
> +
> +    if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) )
> +        return 0;
> +
> +    rc = x86_emulate(&mmio_ro_ctxt.ctxt,&mmio_ro_emulate_ops);
> +
> +    return rc != X86EMUL_UNHANDLEABLE ? EXCRET_fault_fixed : 0;
> +}
> +#endif /* __x86_64__ */
> +
>   void free_xen_pagetable(void *v)
>   {
>       if ( system_state == SYS_STATE_early_boot )
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1349,20 +1349,23 @@ static int fixup_page_fault(unsigned lon
>           return 0;
>       }
>
> -    if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables)&&
> -         guest_kernel_mode(v, regs) )
> -    {
> -        unsigned int mbs = PFEC_write_access;
> -        unsigned int mbz = PFEC_reserved_bit | PFEC_insn_fetch;
> -
> -        /* Do not check if access-protection fault since the page may
> -           legitimately be not present in shadow page tables */
> -        if ( !paging_mode_enabled(d) )
> -            mbs |= PFEC_page_present;
> -
> -        if ( ((regs->error_code&  (mbs | mbz)) == mbs)&&
> +    if ( guest_kernel_mode(v, regs)&&
> +         !(regs->error_code&  (PFEC_reserved_bit | PFEC_insn_fetch))&&
> +         (regs->error_code&  PFEC_write_access) )
> +    {
> +        if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables)&&
> +             /* Do not check if access-protection fault since the page may
> +                legitimately be not present in shadow page tables */
> +             (paging_mode_enabled(d) ||
> +              (regs->error_code&  PFEC_page_present))&&
>                ptwr_do_page_fault(v, addr, regs) )
>               return EXCRET_fault_fixed;
> +
> +#ifdef __x86_64__
> +        if ( IS_PRIV(d)&&  (regs->error_code&  PFEC_page_present)&&
> +             mmio_ro_do_page_fault(v, addr, regs) )
> +            return EXCRET_fault_fixed;
> +#endif
>       }
>
>       /* For non-external shadowed guests, we fix up both their own
> @@ -1690,6 +1693,13 @@ static int pci_cfg_ok(struct domain *d,
>           return 0;
>
>       machine_bdf = (d->arch.pci_cf8>>  8)&  0xFFFF;
> +    if ( write )
> +    {
> +        const unsigned long *ro_map = pci_get_ro_map(0);
> +
> +        if ( ro_map&&  test_bit(machine_bdf, ro_map) )
> +            return 0;
> +    }
>       start = d->arch.pci_cf8&  0xFF;
>       end = start + size - 1;
>       if (xsm_pci_config_permission(d, machine_bdf, start, end, write))
> --- a/xen/arch/x86/x86_32/pci.c
> +++ b/xen/arch/x86/x86_32/pci.c
> @@ -6,6 +6,7 @@
>
>   #include<xen/spinlock.h>
>   #include<xen/pci.h>
> +#include<xen/init.h>
>   #include<asm/io.h>
>
>   #define PCI_CONF_ADDRESS(bus, dev, func, reg) \
> @@ -70,3 +71,7 @@ void pci_conf_write32(
>       BUG_ON((bus>  255) || (dev>  31) || (func>  7) || (reg>  255));
>       pci_conf_write(PCI_CONF_ADDRESS(bus, dev, func, reg), 0, 4, data);
>   }
> +
> +void __init arch_pci_ro_device(int seg, int bdf)
> +{
> +}
> --- a/xen/arch/x86/x86_64/mmconfig_64.c
> +++ b/xen/arch/x86/x86_64/mmconfig_64.c
> @@ -145,9 +145,30 @@ static void __iomem *mcfg_ioremap(const
>       return (void __iomem *) virt;
>   }
>
> +void arch_pci_ro_device(int seg, int bdf)
> +{
> +    unsigned int idx, bus = PCI_BUS(bdf);
> +
> +    for (idx = 0; idx<  pci_mmcfg_config_num; ++idx) {
> +        const struct acpi_mcfg_allocation *cfg = pci_mmcfg_virt[idx].cfg;
> +        unsigned long mfn = (cfg->address>>  PAGE_SHIFT) + bdf;
> +
> +        if (!pci_mmcfg_virt[idx].virt || cfg->pci_segment != seg ||
> +            cfg->start_bus_number>  bus || cfg->end_bus_number<  bus)
> +            continue;
> +
> +        if (rangeset_add_singleton(mmio_ro_ranges, mfn))
> +            printk(XENLOG_ERR
> +                   "%04x:%02x:%02x.%u: could not mark MCFG (mfn %#lx) read-only\n",
> +                   cfg->pci_segment, bus, PCI_SLOT(bdf), PCI_FUNC(bdf),
> +                   mfn);
> +    }
> +}
> +
>   int pci_mmcfg_arch_enable(unsigned int idx)
>   {
>       const typeof(pci_mmcfg_config[0]) *cfg = pci_mmcfg_virt[idx].cfg;
> +    const unsigned long *ro_map = pci_get_ro_map(cfg->pci_segment);
>
>       if (pci_mmcfg_virt[idx].virt)
>           return 0;
> @@ -159,6 +180,16 @@ int pci_mmcfg_arch_enable(unsigned int i
>       }
>       printk(KERN_INFO "PCI: Using MCFG for segment %04x bus %02x-%02x\n",
>              cfg->pci_segment, cfg->start_bus_number, cfg->end_bus_number);
> +    if (ro_map) {
> +        unsigned int bdf = PCI_BDF(cfg->start_bus_number, 0, 0);
> +        unsigned int end = PCI_BDF(cfg->end_bus_number, -1, -1);
> +
> +        while ((bdf = find_next_bit(ro_map, end + 1, bdf))<= end) {
> +            arch_pci_ro_device(cfg->pci_segment, bdf);
> +            if (bdf++ == end)
> +                break;
> +        }
> +    }
>       return 0;
>   }
>
> --- a/xen/drivers/passthrough/amd/iommu_detect.c
> +++ b/xen/drivers/passthrough/amd/iommu_detect.c
> @@ -153,6 +153,12 @@ int __init amd_iommu_detect_one_acpi(
>       if ( rt )
>           return -ENODEV;
>
> +    rt = pci_ro_device(iommu->seg, bus, PCI_DEVFN(dev, func));
> +    if ( rt )
> +        printk(XENLOG_ERR
> +               "Could not mark config space of %04x:%02x:%02x.%u read-only (%d)\n",
> +               iommu->seg, bus, dev, func, rt);
> +
>       list_add_tail(&iommu->list,&amd_iommu_head);
>
>       return 0;
> --- a/xen/drivers/passthrough/io.c
> +++ b/xen/drivers/passthrough/io.c
> @@ -593,11 +593,3 @@ void hvm_dpci_eoi(struct domain *d, unsi
>   unlock:
>       spin_unlock(&d->event_lock);
>   }
> -
> -static int __init setup_mmio_ro_ranges(void)
> -{
> -    mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
> -                                  RANGESETF_prettyprint_hex);
> -    return 0;
> -}
> -__initcall(setup_mmio_ro_ranges);
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -36,6 +36,7 @@
>   struct pci_seg {
>       struct list_head alldevs_list;
>       u16 nr;
> +    unsigned long *ro_map;
>       /* bus2bridge_lock protects bus2bridge array */
>       spinlock_t bus2bridge_lock;
>   #define MAX_BUSES 256
> @@ -106,6 +107,8 @@ void __init pt_pci_init(void)
>       radix_tree_init(&pci_segments);
>       if ( !alloc_pseg(0) )
>           panic("Could not initialize PCI segment 0\n");
> +    mmio_ro_ranges = rangeset_new(NULL, "r/o mmio ranges",
> +                                  RANGESETF_prettyprint_hex);
>   }
>
>   int __init pci_add_segment(u16 seg)
> @@ -113,6 +116,13 @@ int __init pci_add_segment(u16 seg)
>       return alloc_pseg(seg) ? 0 : -ENOMEM;
>   }
>
> +const unsigned long *pci_get_ro_map(u16 seg)
> +{
> +    struct pci_seg *pseg = get_pseg(seg);
> +
> +    return pseg ? pseg->ro_map : NULL;
> +}
> +
>   static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
>   {
>       struct pci_dev *pdev;
> @@ -198,6 +208,33 @@ static void free_pdev(struct pci_seg *ps
>       xfree(pdev);
>   }
>
> +int __init pci_ro_device(int seg, int bus, int devfn)
> +{
> +    struct pci_seg *pseg = alloc_pseg(seg);
> +    struct pci_dev *pdev;
> +
> +    if ( !pseg )
> +        return -ENOMEM;
> +    pdev = alloc_pdev(pseg, bus, devfn);
> +    if ( !pdev )
> +        return -ENOMEM;
> +
> +    if ( !pseg->ro_map )
> +    {
> +        size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
> +
> +        pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
> +        if ( !pseg->ro_map )
> +            return -ENOMEM;
> +        memset(pseg->ro_map, 0, sz);
> +    }
> +
> +    __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
> +    arch_pci_ro_device(seg, PCI_BDF2(bus, devfn));
> +
> +    return 0;
> +}
> +
>   struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
>   {
>       struct pci_seg *pseg = get_pseg(seg);
> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -555,6 +555,8 @@ void memguard_unguard_stack(void *p);
>
>   int  ptwr_do_page_fault(struct vcpu *, unsigned long,
>                           struct cpu_user_regs *);
> +int  mmio_ro_do_page_fault(struct vcpu *, unsigned long,
> +                           struct cpu_user_regs *);
>
>   int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
>
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -98,8 +98,11 @@ struct pci_dev *pci_lock_domain_pdev(
>   void setup_dom0_pci_devices(struct domain *, void (*)(struct pci_dev *));
>   void pci_release_devices(struct domain *d);
>   int pci_add_segment(u16 seg);
> +const unsigned long *pci_get_ro_map(u16 seg);
>   int pci_add_device(u16 seg, u8 bus, u8 devfn, const struct pci_dev_info *);
>   int pci_remove_device(u16 seg, u8 bus, u8 devfn);
> +int pci_ro_device(int seg, int bus, int devfn);
> +void arch_pci_ro_device(int seg, int bdf);
>   struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
>   struct pci_dev *pci_get_pdev_by_domain(
>       struct domain *, int seg, int bus, int devfn);
>

  reply	other threads:[~2012-06-21 15:29 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-12 12:02 [PATCH V2] amd iommu: re-enable iommu msi if dom0 disabled it Wei Wang
2012-06-12 15:13 ` Jan Beulich
2012-06-12 16:08   ` Andrew Cooper
2012-06-12 16:43     ` Jan Beulich
2012-06-14 12:13       ` Wei Wang
2012-06-14 14:18         ` Jan Beulich
2012-06-14 15:15           ` Wei Wang
2012-06-14 15:27             ` Jan Beulich
2012-06-21  9:59             ` [PATCH] PCI/MSI: don't disable AMD IOMMU MSI on Xen dom0 (was: Re: [PATCH V2] amd iommu: re-enable iommu msi if dom0 disabled it) Jan Beulich
2012-06-21 11:08               ` [PATCH] PCI/MSI: don't disable AMD IOMMU MSI on Xen dom0 Eric W. Biederman
2012-06-21 12:28                 ` Jan Beulich
2012-06-21 11:21               ` Wei Wang
2012-06-21 12:06                 ` Jan Beulich
2012-06-21 12:28                   ` Wei Wang
2012-06-21 12:45                     ` Jan Beulich
2012-06-21 13:10                       ` Wei Wang
2012-06-21 13:24                         ` Jan Beulich
2012-06-21 13:27                           ` Wei Wang
2012-06-20 15:45         ` [PATCH V2] amd iommu: re-enable iommu msi if dom0 disabled it Jan Beulich
2012-06-21 15:29           ` Wei Wang [this message]
2012-06-21 15:49             ` Jan Beulich
2012-06-21 16:31               ` Keir Fraser
2012-06-22  9:03               ` Wei Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FE33DBC.4030104@amd.com \
    --to=wei.wang2@amd.com \
    --cc=JBeulich@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=jeremy@goop.org \
    --cc=konrad.wilk@oracle.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).