From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH, RFC, resend] Re: granting access to MSI-X table and pending bit array Date: Fri, 13 Aug 2010 14:37:22 +0100 Message-ID: <4C6566B2020000780000FBD9@vpn.id2.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part89A4AD82.0__=" Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: "xen-devel@lists.xensource.com" Cc: Konrad Rzeszutek Wilk List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part89A4AD82.0__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline Below/attached is an untested and possibly not yet complete patch attempting to address the problem originally described on this thread (can't really test this myself as I don't have, with one exception, any MSI-X capable devices around, and the exceptional one doesn't have a driver making use of it). I had sent it once before, it only got refreshed since then to build with xen-unstable as of c/s 21952, and I'm re-sending it mainly because there was no feedback so far, despite the original problem representing a security issue. It tracks MMIO MFNs required to only have read-only guest access (determined when the first MSI-X interrupt gets enabled on a device) in a global range set, and enforces the write protection as translations get established. The changes are made under the assumption that p2m_mmio_direct will only ever be used for order 0 pages. An open question is whether dealing with pv guests (including the IOMMU-less case) is necessary, as handling mappings a domain may already have in place at the time the first interrupt gets set up would require scanning all of the guest's page table pages. An alternative would be to determine and insert the address ranges earlier into mmio_ro_ranges, but that would require a hook in the PCI config space writes, which is particularly problematic in case MMCONFIG accesses are being used. A second alternative would be to require Dom0 to report all devices (or at least all MSI-X capable ones) regardless of whether they would be used by that domain, and do so after resources got determined/ assigned for them (i.e. a second notification later than the one currently happening from the PCI bus scan would be needed). (Attached is also a trivial prerequisite patch.) Jan --- 2010-08-12.orig/xen/arch/x86/mm.c 2010-08-12 17:36:43.000000000 = +0200 +++ 2010-08-12/xen/arch/x86/mm.c 2010-08-12 17:16:32.000000000 = +0200 @@ -824,7 +824,13 @@ get_page_from_l1e( return 0; } =20 - return 1; + if ( !(l1f & _PAGE_RW) || IS_PRIV(pg_owner) || + !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) + return 1; + dprintk(XENLOG_G_WARNING, + "d%d: Forcing read-only access to MFN %lx\n", + l1e_owner->domain_id, mfn); + return -1; } =20 if ( unlikely(real_pg_owner !=3D pg_owner) ) @@ -1180,9 +1186,15 @@ static int alloc_l1_table(struct page_in =20 for ( i =3D 0; i < L1_PAGETABLE_ENTRIES; i++ ) { - if ( is_guest_l1_slot(i) && - unlikely(!get_page_from_l1e(pl1e[i], d, d)) ) - goto fail; + if ( is_guest_l1_slot(i) ) + switch ( get_page_from_l1e(pl1e[i], d, d) ) + { + case 0: + goto fail; + case -1: + l1e_remove_flags(pl1e[i], _PAGE_RW); + break; + } =20 adjust_guest_l1e(pl1e[i], d); } @@ -1766,8 +1778,14 @@ static int mod_l1_entry(l1_pgentry_t *pl return rc; } =20 - if ( unlikely(!get_page_from_l1e(nl1e, pt_dom, pg_dom)) ) + switch ( get_page_from_l1e(nl1e, pt_dom, pg_dom) ) + { + case 0: return 0; + case -1: + l1e_remove_flags(nl1e, _PAGE_RW); + break; + } =20 adjust_guest_l1e(nl1e, pt_dom); if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, = pt_vcpu, @@ -4992,8 +5010,9 @@ static int ptwr_emulated_update( =20 /* Check the new PTE. */ nl1e =3D l1e_from_intpte(val); - if ( unlikely(!get_page_from_l1e(nl1e, d, d)) ) + switch ( get_page_from_l1e(nl1e, d, d) ) { + case 0: if ( is_pv_32bit_domain(d) && (bytes =3D=3D 4) && (unaligned_addr = & 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) { @@ -5012,6 +5031,10 @@ static int ptwr_emulated_update( MEM_LOG("ptwr_emulate: could not get_page_from_l1e()"); return X86EMUL_UNHANDLEABLE; } + break; + case -1: + l1e_remove_flags(nl1e, _PAGE_RW); + break; } =20 adjust_guest_l1e(nl1e, d); --- 2010-08-12.orig/xen/arch/x86/mm/hap/p2m-ept.c 2010-08-12 = 17:36:43.000000000 +0200 +++ 2010-08-12/xen/arch/x86/mm/hap/p2m-ept.c 2010-08-12 17:16:32.0000000= 00 +0200 @@ -72,9 +72,13 @@ static void ept_p2m_type_to_flags(ept_en entry->r =3D entry->w =3D entry->x =3D 0; return; case p2m_ram_rw: - case p2m_mmio_direct: entry->r =3D entry->w =3D entry->x =3D 1; return; + case p2m_mmio_direct: + entry->r =3D entry->x =3D 1; + entry->w =3D !rangeset_contains_singleton(mmio_ro_ranges, + entry->mfn); + return; case p2m_ram_logdirty: case p2m_ram_ro: case p2m_ram_shared: @@ -716,6 +720,9 @@ static void ept_change_entry_type_global if ( ept_get_asr(d) =3D=3D 0 ) return; =20 + BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); + BUG_ON(ot !=3D nt && (ot =3D=3D p2m_mmio_direct || nt =3D=3D = p2m_mmio_direct)); + ept_change_entry_type_page(_mfn(ept_get_asr(d)), ept_get_wl(d), ot, = nt); =20 ept_sync_domain(d); --- 2010-08-12.orig/xen/arch/x86/mm/p2m.c 2010-08-12 17:36:43.0000000= 00 +0200 +++ 2010-08-12/xen/arch/x86/mm/p2m.c 2010-08-12 17:16:32.000000000 = +0200 @@ -72,7 +72,7 @@ boolean_param("hap_1gb", opt_hap_1gb); #define SUPERPAGE_PAGES (1UL << 9) #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))=3D=3D0) =20 -static unsigned long p2m_type_to_flags(p2m_type_t t)=20 +static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) { unsigned long flags; #ifdef __x86_64__ @@ -101,7 +101,9 @@ static unsigned long p2m_type_to_flags(p case p2m_mmio_dm: return flags; case p2m_mmio_direct: - return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD; + if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) + flags |=3D _PAGE_RW; + return flags | P2M_BASE_FLAGS | _PAGE_PCD; case p2m_populate_on_demand: return flags; } @@ -1299,8 +1301,10 @@ p2m_set_entry(struct p2m_domain *p2m, un domain_crash(p2m->domain); goto out; } + ASSERT(!mfn_valid(mfn) || p2mt !=3D p2m_mmio_direct); l3e_content =3D mfn_valid(mfn)=20 - ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt) | = _PAGE_PSE) + ? l3e_from_pfn(mfn_x(mfn), + p2m_type_to_flags(p2mt, mfn) | _PAGE_PSE) : l3e_empty(); entry_content.l1 =3D l3e_content.l3; paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, @@ -1334,7 +1338,8 @@ p2m_set_entry(struct p2m_domain *p2m, un ASSERT(p2m_entry); =20 if ( mfn_valid(mfn) || (p2mt =3D=3D p2m_mmio_direct) ) - entry_content =3D l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p= 2mt)); + entry_content =3D l1e_from_pfn(mfn_x(mfn), + p2m_type_to_flags(p2mt, mfn)); else entry_content =3D l1e_empty(); =20 @@ -1358,9 +1363,11 @@ p2m_set_entry(struct p2m_domain *p2m, un goto out; } =20 + ASSERT(!mfn_valid(mfn) || p2mt !=3D p2m_mmio_direct); if ( mfn_valid(mfn) || p2m_is_magic(p2mt) ) l2e_content =3D l2e_from_pfn(mfn_x(mfn), - p2m_type_to_flags(p2mt) | = _PAGE_PSE); + p2m_type_to_flags(p2mt, mfn) | + _PAGE_PSE); else l2e_content =3D l2e_empty(); =20 @@ -2437,6 +2444,7 @@ void p2m_change_type_global(struct p2m_d #endif /* CONFIG_PAGING_LEVELS =3D=3D 4 */ =20 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); + BUG_ON(ot !=3D nt && (ot =3D=3D p2m_mmio_direct || nt =3D=3D = p2m_mmio_direct)); =20 if ( !paging_mode_translate(p2m->domain) ) return; @@ -2478,7 +2486,7 @@ void p2m_change_type_global(struct p2m_d continue; mfn =3D l3e_get_pfn(l3e[i3]); gfn =3D get_gpfn_from_mfn(mfn); - flags =3D p2m_type_to_flags(nt); + flags =3D p2m_type_to_flags(nt, _mfn(mfn)); l1e_content =3D l1e_from_pfn(mfn, flags | _PAGE_PSE); paging_write_p2m_entry(p2m->domain, gfn, (l1_pgentry_t *)&l3e[i3], @@ -2509,7 +2517,7 @@ void p2m_change_type_global(struct p2m_d #endif ) * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;= =20 - flags =3D p2m_type_to_flags(nt); + flags =3D p2m_type_to_flags(nt, _mfn(mfn)); l1e_content =3D l1e_from_pfn(mfn, flags | _PAGE_PSE); paging_write_p2m_entry(p2m->domain, gfn, (l1_pgentry_t *)&l2e[i2], @@ -2533,7 +2541,7 @@ void p2m_change_type_global(struct p2m_d ) * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES;= =20 /* create a new 1le entry with the new type */ - flags =3D p2m_type_to_flags(nt); + flags =3D p2m_type_to_flags(nt, _mfn(mfn)); l1e_content =3D l1e_from_pfn(mfn, flags); paging_write_p2m_entry(p2m->domain, gfn, &l1e[i1], l1mfn, l1e_content, 1); --- 2010-08-12.orig/xen/arch/x86/mm/shadow/multi.c 2010-08-12 = 17:36:43.000000000 +0200 +++ 2010-08-12/xen/arch/x86/mm/shadow/multi.c 2010-08-12 17:16:32.0000000= 00 +0200 @@ -653,7 +653,9 @@ _sh_propagate(struct vcpu *v,=20 } =20 /* Read-only memory */ - if ( p2m_is_readonly(p2mt) ) + if ( p2m_is_readonly(p2mt) || + (p2mt =3D=3D p2m_mmio_direct && + rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mfn))) = ) sflags &=3D ~_PAGE_RW; =20 // protect guest page tables @@ -1204,15 +1206,19 @@ static int shadow_set_l1e(struct vcpu *v /* About to install a new reference */ =20 if ( shadow_mode_refcounts(d) ) { TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); - if ( shadow_get_page_from_l1e(new_sl1e, d, new_type) =3D=3D 0 = )=20 + switch ( shadow_get_page_from_l1e(new_sl1e, d, new_type) ) { + case 0: /* Doesn't look like a pagetable. */ flags |=3D SHADOW_SET_ERROR; new_sl1e =3D shadow_l1e_empty(); - } - else - { + break; + case -1: + shadow_l1e_remove_flags(new_sl1e, _PAGE_RW); + /* fall through */ + default: shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d); + break; } } }=20 --- 2010-08-12.orig/xen/arch/x86/msi.c 2010-08-12 17:36:43.000000000 = +0200 +++ 2010-08-12/xen/arch/x86/msi.c 2010-08-12 18:09:43.000000000 = +0200 @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -520,6 +522,43 @@ static int msi_capability_init(struct pc return 0; } =20 +static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) +{ + u8 limit; + u32 addr; + + switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) ) + { + case PCI_HEADER_TYPE_NORMAL: + limit =3D 6; + break; + case PCI_HEADER_TYPE_BRIDGE: + limit =3D 2; + break; + case PCI_HEADER_TYPE_CARDBUS: + limit =3D 1; + break; + default: + return 0; + } + + if ( bir >=3D limit ) + return 0; + addr =3D pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * = 4); + if ( (addr & PCI_BASE_ADDRESS_SPACE) =3D=3D PCI_BASE_ADDRESS_SPACE_IO = ) + return 0; + if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) =3D=3D PCI_BASE_ADDRESS_M= EM_TYPE_64 ) + { + addr &=3D ~PCI_BASE_ADDRESS_MEM_MASK; + if ( ++bir >=3D limit ) + return 0; + return addr | + ((u64)pci_conf_read32(bus, slot, func, + PCI_BASE_ADDRESS_0 + bir * 4) << = 32); + } + return addr & ~PCI_BASE_ADDRESS_MEM_MASK; +} + /** * msix_capability_init - configure device's MSI-X capability * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -532,7 +571,8 @@ static int msi_capability_init(struct pc **/ static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi, - struct msi_desc **desc) + struct msi_desc **desc, + unsigned int nr_entries) { struct msi_desc *entry; int pos; @@ -587,6 +627,69 @@ static int msix_capability_init(struct p =20 list_add_tail(&entry->list, &dev->msi_list); =20 + if ( !dev->msix_nr_entries ) + { + u64 pba_paddr; + u32 pba_offset; + + ASSERT(!dev->msix_used_entries); + WARN_ON(msi->table_base !=3D read_pci_mem_bar(bus, slot, func, = bir)); + + dev->msix_nr_entries =3D nr_entries; + dev->msix_table.first =3D PFN_DOWN(table_paddr); + dev->msix_table.last =3D PFN_DOWN(table_paddr + + nr_entries * PCI_MSIX_ENTRY_SIZE = - 1); + WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_table.fi= rst, + dev->msix_table.last)); + + pba_offset =3D pci_conf_read32(bus, slot, func, + msix_pba_offset_reg(pos)); + bir =3D (u8)(pba_offset & PCI_MSIX_BIRMASK); + pba_paddr =3D read_pci_mem_bar(bus, slot, func, bir); + WARN_ON(!pba_paddr); + pba_paddr +=3D pba_offset & ~PCI_MSIX_BIRMASK; + + dev->msix_pba.first =3D PFN_DOWN(pba_paddr); + dev->msix_pba.last =3D PFN_DOWN(pba_paddr + + BITS_TO_LONGS(nr_entries) - 1); + WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, dev->msix_pba.firs= t, + dev->msix_pba.last)); + + if ( rangeset_add_range(mmio_ro_ranges, dev->msix_table.first, + dev->msix_table.last) ) + WARN(); + if ( rangeset_add_range(mmio_ro_ranges, dev->msix_pba.first, + dev->msix_pba.last) ) + WARN(); +printk("MSIX%02x:%02x.%x: table@(%lx,%lx), pba@(%lx,%lx)\n", bus, slot, = func, + dev->msix_table.first, dev->msix_table.last, + dev->msix_pba.first, dev->msix_pba.last);//temp + + if ( dev->domain ) + p2m_change_entry_type_global(p2m_get_hostp2m(dev->domain), + p2m_mmio_direct, p2m_mmio_direct)= ; + if ( !dev->domain || !paging_mode_translate(dev->domain) ) + { + struct domain *d =3D dev->domain; + + if ( !d ) + for_each_domain(d) + if ( !paging_mode_translate(d) && + (iomem_access_permitted(d, dev->msix_table.first,= + dev->msix_table.last) || + iomem_access_permitted(d, dev->msix_pba.first, + dev->msix_pba.last)) ) + break; + if ( d ) + { + /* XXX How to deal with existing mappings? */ + } + } + } + WARN_ON(dev->msix_nr_entries !=3D nr_entries); + WARN_ON(dev->msix_table.first !=3D (table_paddr >> PAGE_SHIFT)); + ++dev->msix_used_entries; + /* Mask interrupt here */ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); =20 @@ -707,7 +810,7 @@ static int __pci_enable_msix(struct msi_ return 0; } =20 - status =3D msix_capability_init(pdev, msi, desc); + status =3D msix_capability_init(pdev, msi, desc, nr_entries); return status; } =20 @@ -732,6 +835,16 @@ static void __pci_disable_msix(struct ms writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); =20 pci_conf_write16(bus, slot, func, msix_control_reg(pos), control); + + if ( !--dev->msix_used_entries ) + { + if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first, + dev->msix_table.last) ) + WARN(); + if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first, + dev->msix_pba.last) ) + WARN(); + } } =20 /* --- 2010-08-12.orig/xen/drivers/passthrough/io.c 2010-08-12 = 17:36:43.000000000 +0200 +++ 2010-08-12/xen/drivers/passthrough/io.c 2010-08-12 17:16:32.0000000= 00 +0200 @@ -26,6 +26,8 @@ #include #include =20 +struct rangeset *__read_mostly mmio_ro_ranges; + static void hvm_dirq_assist(unsigned long _d); =20 bool_t pt_irq_need_timer(uint32_t flags) @@ -565,3 +567,11 @@ void hvm_dpci_eoi(struct domain *d, unsi unlock: spin_unlock(&d->event_lock); } + +static int __init setup_mmio_ro_ranges(void) +{ + mmio_ro_ranges =3D rangeset_new(NULL, "r/o mmio ranges", + RANGESETF_prettyprint_hex); + return 0; +} +__initcall(setup_mmio_ro_ranges); --- 2010-08-12.orig/xen/include/xen/iommu.h 2010-08-12 17:36:43.0000000= 00 +0200 +++ 2010-08-12/xen/include/xen/iommu.h 2010-08-12 17:16:32.000000000 = +0200 @@ -31,6 +31,8 @@ extern bool_t force_iommu, iommu_verbose extern bool_t iommu_workaround_bios_bug, iommu_passthrough; extern bool_t iommu_snoop, iommu_qinval, iommu_intremap; =20 +extern struct rangeset *mmio_ro_ranges; + #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) =20 #define MAX_IOMMUS 32 --- 2010-08-12.orig/xen/include/xen/pci.h 2010-08-12 17:36:43.0000000= 00 +0200 +++ 2010-08-12/xen/include/xen/pci.h 2010-08-12 17:16:32.000000000 = +0200 @@ -45,6 +45,10 @@ struct pci_dev { struct list_head domain_list; =20 struct list_head msi_list; + unsigned int msix_nr_entries, msix_used_entries; + struct { + unsigned long first, last; + } msix_table, msix_pba; int msix_table_refcnt[MAX_MSIX_TABLE_PAGES]; int msix_table_idx[MAX_MSIX_TABLE_PAGES]; spinlock_t msix_table_lock; --=__Part89A4AD82.0__= Content-Type: text/plain; name="x86-msix-protect-table-and-pba.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="x86-msix-protect-table-and-pba.patch" Below/attached is an untested and possibly not yet complete patch=0Aattempt= ing to address the problem originally described on this thread=0A(can't = really test this myself as I don't have, with one exception,=0Aany MSI-X = capable devices around, and the exceptional one doesn't have=0Aa driver = making use of it). I had sent it once before, it only got=0Arefreshed = since then to build with xen-unstable as of c/s 21952, and=0AI'm re-sending= it mainly because there was no feedback so far, despite=0Athe original = problem representing a security issue.=0A=0AIt tracks MMIO MFNs required = to only have read-only guest access=0A(determined when the first MSI-X = interrupt gets enabled on a device)=0Ain a global range set, and enforces = the write protection as=0Atranslations get established.=0A=0AThe changes = are made under the assumption that p2m_mmio_direct will=0Aonly ever be = used for order 0 pages.=0A=0AAn open question is whether dealing with pv = guests (including the=0AIOMMU-less case) is necessary, as handling = mappings a domain may=0Aalready have in place at the time the first = interrupt gets set up=0Awould require scanning all of the guest's page = table pages.=0A=0AAn alternative would be to determine and insert the = address ranges=0Aearlier into mmio_ro_ranges, but that would require a = hook in the=0APCI config space writes, which is particularly problematic = in case=0AMMCONFIG accesses are being used.=0A=0AA second alternative = would be to require Dom0 to report all devices=0A(or at least all MSI-X = capable ones) regardless of whether they would=0Abe used by that domain, = and do so after resources got determined/=0Aassigned for them (i.e. a = second notification later than the one=0Acurrently happening from the PCI = bus scan would be needed).=0A=0A--- 2010-08-12.orig/xen/arch/x86/mm.c = 2010-08-12 17:36:43.000000000 +0200=0A+++ 2010-08-12/xen/arch/x86/mm.c = 2010-08-12 17:16:32.000000000 +0200=0A@@ -824,7 +824,13 @@ get_page_from_l1= e(=0A return 0;=0A }=0A =0A- return 1;=0A+ = if ( !(l1f & _PAGE_RW) || IS_PRIV(pg_owner) ||=0A+ !rangeset_= contains_singleton(mmio_ro_ranges, mfn) )=0A+ return 1;=0A+ = dprintk(XENLOG_G_WARNING,=0A+ "d%d: Forcing read-only = access to MFN %lx\n",=0A+ l1e_owner->domain_id, mfn);=0A+ = return -1;=0A }=0A =0A if ( unlikely(real_pg_owner !=3D = pg_owner) )=0A@@ -1180,9 +1186,15 @@ static int alloc_l1_table(struct = page_in=0A =0A for ( i =3D 0; i < L1_PAGETABLE_ENTRIES; i++ )=0A = {=0A- if ( is_guest_l1_slot(i) &&=0A- unlikely(!get_page= _from_l1e(pl1e[i], d, d)) )=0A- goto fail;=0A+ if ( = is_guest_l1_slot(i) )=0A+ switch ( get_page_from_l1e(pl1e[i], = d, d) )=0A+ {=0A+ case 0:=0A+ goto = fail;=0A+ case -1:=0A+ l1e_remove_flags(pl1e[i], = _PAGE_RW);=0A+ break;=0A+ }=0A =0A = adjust_guest_l1e(pl1e[i], d);=0A }=0A@@ -1766,8 +1778,14 @@ static int = mod_l1_entry(l1_pgentry_t *pl=0A return rc;=0A }=0A = =0A- if ( unlikely(!get_page_from_l1e(nl1e, pt_dom, pg_dom)) )=0A+ = switch ( get_page_from_l1e(nl1e, pt_dom, pg_dom) )=0A+ {=0A+ = case 0:=0A return 0;=0A+ case -1:=0A+ = l1e_remove_flags(nl1e, _PAGE_RW);=0A+ break;=0A+ }=0A = =0A adjust_guest_l1e(nl1e, pt_dom);=0A if ( unlikely(!= UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,=0A@@ -4992,8 +5010,9 = @@ static int ptwr_emulated_update(=0A =0A /* Check the new PTE. */=0A = nl1e =3D l1e_from_intpte(val);=0A- if ( unlikely(!get_page_from_l1e(= nl1e, d, d)) )=0A+ switch ( get_page_from_l1e(nl1e, d, d) )=0A = {=0A+ case 0:=0A if ( is_pv_32bit_domain(d) && (bytes =3D=3D 4) = && (unaligned_addr & 4) &&=0A !do_cmpxchg && (l1e_get_flags(nl= 1e) & _PAGE_PRESENT) )=0A {=0A@@ -5012,6 +5031,10 @@ static int = ptwr_emulated_update(=0A MEM_LOG("ptwr_emulate: could not = get_page_from_l1e()");=0A return X86EMUL_UNHANDLEABLE;=0A = }=0A+ break;=0A+ case -1:=0A+ l1e_remove_flags(nl1e, = _PAGE_RW);=0A+ break;=0A }=0A =0A adjust_guest_l1e(nl1e, = d);=0A--- 2010-08-12.orig/xen/arch/x86/mm/hap/p2m-ept.c 2010-08-12 = 17:36:43.000000000 +0200=0A+++ 2010-08-12/xen/arch/x86/mm/hap/p2m-ept.c = 2010-08-12 17:16:32.000000000 +0200=0A@@ -72,9 +72,13 @@ static void = ept_p2m_type_to_flags(ept_en=0A entry->r =3D entry->w =3D = entry->x =3D 0;=0A return;=0A case p2m_ram_rw:=0A- = case p2m_mmio_direct:=0A entry->r =3D entry->w =3D entry->x = =3D 1;=0A return;=0A+ case p2m_mmio_direct:=0A+ = entry->r =3D entry->x =3D 1;=0A+ entry->w =3D !rangeset_conta= ins_singleton(mmio_ro_ranges,=0A+ = entry->mfn);=0A+ return;=0A case p2m_ram_logdi= rty:=0A case p2m_ram_ro:=0A case p2m_ram_shared:=0A@@ = -716,6 +720,9 @@ static void ept_change_entry_type_global=0A if ( = ept_get_asr(d) =3D=3D 0 )=0A return;=0A =0A+ BUG_ON(p2m_is_grant= (ot) || p2m_is_grant(nt));=0A+ BUG_ON(ot !=3D nt && (ot =3D=3D = p2m_mmio_direct || nt =3D=3D p2m_mmio_direct));=0A+=0A ept_change_entry= _type_page(_mfn(ept_get_asr(d)), ept_get_wl(d), ot, nt);=0A =0A = ept_sync_domain(d);=0A--- 2010-08-12.orig/xen/arch/x86/mm/p2m.c 2010-08-12 = 17:36:43.000000000 +0200=0A+++ 2010-08-12/xen/arch/x86/mm/p2m.c 2010-08-12 = 17:16:32.000000000 +0200=0A@@ -72,7 +72,7 @@ boolean_param("hap_1gb", = opt_hap_1gb);=0A #define SUPERPAGE_PAGES (1UL << 9)=0A #define superpage_al= igned(_x) (((_x)&(SUPERPAGE_PAGES-1))=3D=3D0)=0A =0A-static unsigned long = p2m_type_to_flags(p2m_type_t t) =0A+static unsigned long p2m_type_to_flags(= p2m_type_t t, mfn_t mfn)=0A {=0A unsigned long flags;=0A #ifdef = __x86_64__=0A@@ -101,7 +101,9 @@ static unsigned long p2m_type_to_flags(p= =0A case p2m_mmio_dm:=0A return flags;=0A case p2m_mmio_dir= ect:=0A- return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;=0A+ = if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )=0A+ = flags |=3D _PAGE_RW;=0A+ return flags | P2M_BASE_FLAGS | = _PAGE_PCD;=0A case p2m_populate_on_demand:=0A return flags;=0A = }=0A@@ -1299,8 +1301,10 @@ p2m_set_entry(struct p2m_domain *p2m, un=0A = domain_crash(p2m->domain);=0A goto out;=0A = }=0A+ ASSERT(!mfn_valid(mfn) || p2mt !=3D p2m_mmio_direct);=0A = l3e_content =3D mfn_valid(mfn) =0A- ? l3e_from_pfn(mfn_x(mfn)= , p2m_type_to_flags(p2mt) | _PAGE_PSE)=0A+ ? l3e_from_pfn(mfn_x(= mfn),=0A+ p2m_type_to_flags(p2mt, mfn) | = _PAGE_PSE)=0A : l3e_empty();=0A entry_content.l1 =3D = l3e_content.l3;=0A paging_write_p2m_entry(p2m->domain, gfn, = p2m_entry,=0A@@ -1334,7 +1338,8 @@ p2m_set_entry(struct p2m_domain *p2m, = un=0A ASSERT(p2m_entry);=0A =0A if ( mfn_valid(mfn)= || (p2mt =3D=3D p2m_mmio_direct) )=0A- entry_content =3D = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));=0A+ = entry_content =3D l1e_from_pfn(mfn_x(mfn),=0A+ = p2m_type_to_flags(p2mt, mfn));=0A else=0A = entry_content =3D l1e_empty();=0A =0A@@ -1358,9 +1363,11 @@ = p2m_set_entry(struct p2m_domain *p2m, un=0A goto out;=0A = }=0A =0A+ ASSERT(!mfn_valid(mfn) || p2mt !=3D p2m_mmio_dir= ect);=0A if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )=0A = l2e_content =3D l2e_from_pfn(mfn_x(mfn),=0A- = p2m_type_to_flags(p2mt) | _PAGE_PSE);=0A+ = p2m_type_to_flags(p2mt, mfn) |=0A+ = _PAGE_PSE);=0A else=0A l2e_content =3D = l2e_empty();=0A =0A@@ -2437,6 +2444,7 @@ void p2m_change_type_globa= l(struct p2m_d=0A #endif /* CONFIG_PAGING_LEVELS =3D=3D 4 */=0A =0A = BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));=0A+ BUG_ON(ot !=3D nt && = (ot =3D=3D p2m_mmio_direct || nt =3D=3D p2m_mmio_direct));=0A =0A if ( = !paging_mode_translate(p2m->domain) )=0A return;=0A@@ -2478,7 = +2486,7 @@ void p2m_change_type_global(struct p2m_d=0A = continue;=0A mfn =3D l3e_get_pfn(l3e[i3]);=0A = gfn =3D get_gpfn_from_mfn(mfn);=0A- flags =3D p2m_type_to= _flags(nt);=0A+ flags =3D p2m_type_to_flags(nt, _mfn(mfn));= =0A l1e_content =3D l1e_from_pfn(mfn, flags | _PAGE_PSE);= =0A paging_write_p2m_entry(p2m->domain, gfn,=0A = (l1_pgentry_t *)&l3e[i3],=0A@@ -2509,7 +2517,7 = @@ void p2m_change_type_global(struct p2m_d=0A #endif=0A = )=0A * L2_PAGETABLE_ENTRIES) * = L1_PAGETABLE_ENTRIES; =0A- flags =3D p2m_type_to_flags(n= t);=0A+ flags =3D p2m_type_to_flags(nt, _mfn(mfn));=0A = l1e_content =3D l1e_from_pfn(mfn, flags | _PAGE_PSE);=0A= paging_write_p2m_entry(p2m->domain, gfn,=0A = (l1_pgentry_t *)&l2e[i2],=0A@@ -2533,7 = +2541,7 @@ void p2m_change_type_global(struct p2m_d=0A = )=0A * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE= _ENTRIES; =0A /* create a new 1le entry with the new = type */=0A- flags =3D p2m_type_to_flags(nt);=0A+ = flags =3D p2m_type_to_flags(nt, _mfn(mfn));=0A = l1e_content =3D l1e_from_pfn(mfn, flags);=0A = paging_write_p2m_entry(p2m->domain, gfn, &l1e[i1],=0A = l1mfn, l1e_content, 1);=0A--- 2010-08-12.orig/xen/arc= h/x86/mm/shadow/multi.c 2010-08-12 17:36:43.000000000 +0200=0A+++ = 2010-08-12/xen/arch/x86/mm/shadow/multi.c 2010-08-12 17:16:32.0000000= 00 +0200=0A@@ -653,7 +653,9 @@ _sh_propagate(struct vcpu *v, =0A }=0A = =0A /* Read-only memory */=0A- if ( p2m_is_readonly(p2mt) )=0A+ = if ( p2m_is_readonly(p2mt) ||=0A+ (p2mt =3D=3D p2m_mmio_direct = &&=0A+ rangeset_contains_singleton(mmio_ro_ranges, mfn_x(target_mf= n))) )=0A sflags &=3D ~_PAGE_RW;=0A =0A // protect guest = page tables=0A@@ -1204,15 +1206,19 @@ static int shadow_set_l1e(struct = vcpu *v=0A /* About to install a new reference */ =0A = if ( shadow_mode_refcounts(d) ) {=0A TRACE_SHADOW_PATH_FLAG(T= RCE_SFLAG_SHADOW_L1_GET_REF);=0A- if ( shadow_get_page_from_l1e(= new_sl1e, d, new_type) =3D=3D 0 ) =0A+ switch ( shadow_get_page_= from_l1e(new_sl1e, d, new_type) )=0A {=0A+ case = 0:=0A /* Doesn't look like a pagetable. */=0A = flags |=3D SHADOW_SET_ERROR;=0A new_sl1e =3D shadow_l1e_= empty();=0A- }=0A- else=0A- {=0A+ = break;=0A+ case -1:=0A+ shadow_l1e_remove_= flags(new_sl1e, _PAGE_RW);=0A+ /* fall through */=0A+ = default:=0A shadow_vram_get_l1e(new_sl1e, sl1e, = sl1mfn, d);=0A+ break;=0A }=0A }=0A = } =0A--- 2010-08-12.orig/xen/arch/x86/msi.c 2010-08-12 17:36:43.0000000= 00 +0200=0A+++ 2010-08-12/xen/arch/x86/msi.c 2010-08-12 18:09:43.0000000= 00 +0200=0A@@ -16,12 +16,14 @@=0A #include =0A #include = =0A #include =0A+#include =0A = #include =0A #include =0A #include = =0A #include =0A #include =0A #include =0A+#include =0A #include =0A #include =0A #include =0A@@ -520,6 +522,43 @@ static int = msi_capability_init(struct pc=0A return 0;=0A }=0A =0A+static u64 = read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)=0A+{=0A+ u8 = limit;=0A+ u32 addr;=0A+=0A+ switch ( pci_conf_read8(bus, slot, = func, PCI_HEADER_TYPE) )=0A+ {=0A+ case PCI_HEADER_TYPE_NORMAL:=0A+ = limit =3D 6;=0A+ break;=0A+ case PCI_HEADER_TYPE_BRIDGE:=0A= + limit =3D 2;=0A+ break;=0A+ case PCI_HEADER_TYPE_CARDBUS= :=0A+ limit =3D 1;=0A+ break;=0A+ default:=0A+ = return 0;=0A+ }=0A+=0A+ if ( bir >=3D limit )=0A+ return = 0;=0A+ addr =3D pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + = bir * 4);=0A+ if ( (addr & PCI_BASE_ADDRESS_SPACE) =3D=3D PCI_BASE_ADDRE= SS_SPACE_IO )=0A+ return 0;=0A+ if ( (addr & PCI_BASE_ADDRESS_MEM= _TYPE_MASK) =3D=3D PCI_BASE_ADDRESS_MEM_TYPE_64 )=0A+ {=0A+ addr = &=3D ~PCI_BASE_ADDRESS_MEM_MASK;=0A+ if ( ++bir >=3D limit )=0A+ = return 0;=0A+ return addr |=0A+ ((u64)pci_conf= _read32(bus, slot, func,=0A+ PCI_BASE_A= DDRESS_0 + bir * 4) << 32);=0A+ }=0A+ return addr & ~PCI_BASE_ADDRESS= _MEM_MASK;=0A+}=0A+=0A /**=0A * msix_capability_init - configure device's = MSI-X capability=0A * @dev: pointer to the pci_dev data structure of = MSI-X device function=0A@@ -532,7 +571,8 @@ static int msi_capability_init(= struct pc=0A **/=0A static int msix_capability_init(struct pci_dev = *dev,=0A struct msi_info *msi,=0A- = struct msi_desc **desc)=0A+ = struct msi_desc **desc,=0A+ unsigned = int nr_entries)=0A {=0A struct msi_desc *entry;=0A int pos;=0A@@ = -587,6 +627,69 @@ static int msix_capability_init(struct p=0A =0A = list_add_tail(&entry->list, &dev->msi_list);=0A =0A+ if ( !dev->msix_nr_= entries )=0A+ {=0A+ u64 pba_paddr;=0A+ u32 pba_offset;=0A+= =0A+ ASSERT(!dev->msix_used_entries);=0A+ WARN_ON(msi->table_= base !=3D read_pci_mem_bar(bus, slot, func, bir));=0A+=0A+ = dev->msix_nr_entries =3D nr_entries;=0A+ dev->msix_table.first =3D = PFN_DOWN(table_paddr);=0A+ dev->msix_table.last =3D PFN_DOWN(table_p= addr +=0A+ nr_entries * PCI_MSIX_ENT= RY_SIZE - 1);=0A+ WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, = dev->msix_table.first,=0A+ = dev->msix_table.last));=0A+=0A+ pba_offset =3D pci_conf_read32(bus, = slot, func,=0A+ msix_pba_offset_reg(pos= ));=0A+ bir =3D (u8)(pba_offset & PCI_MSIX_BIRMASK);=0A+ = pba_paddr =3D read_pci_mem_bar(bus, slot, func, bir);=0A+ WARN_ON(!p= ba_paddr);=0A+ pba_paddr +=3D pba_offset & ~PCI_MSIX_BIRMASK;=0A+=0A= + dev->msix_pba.first =3D PFN_DOWN(pba_paddr);=0A+ dev->msix_= pba.last =3D PFN_DOWN(pba_paddr +=0A+ = BITS_TO_LONGS(nr_entries) - 1);=0A+ WARN_ON(rangeset_overlaps_range(= mmio_ro_ranges, dev->msix_pba.first,=0A+ = dev->msix_pba.last));=0A+=0A+ if ( rangeset_add_range(mmio_ro_r= anges, dev->msix_table.first,=0A+ dev->msix_= table.last) )=0A+ WARN();=0A+ if ( rangeset_add_range(mmi= o_ro_ranges, dev->msix_pba.first,=0A+ = dev->msix_pba.last) )=0A+ WARN();=0A+printk("MSIX%02x:%02x.%x: = table@(%lx,%lx), pba@(%lx,%lx)\n", bus, slot, func,=0A+ dev->msix_tab= le.first, dev->msix_table.last,=0A+ dev->msix_pba.first, dev->msix_pb= a.last);//temp=0A+=0A+ if ( dev->domain )=0A+ p2m_change_= entry_type_global(p2m_get_hostp2m(dev->domain),=0A+ = p2m_mmio_direct, p2m_mmio_direct);=0A+ if ( = !dev->domain || !paging_mode_translate(dev->domain) )=0A+ {=0A+ = struct domain *d =3D dev->domain;=0A+=0A+ if ( !d )=0A+ = for_each_domain(d)=0A+ if ( !paging_mode_t= ranslate(d) &&=0A+ (iomem_access_permitted(d, = dev->msix_table.first,=0A+ = dev->msix_table.last) ||=0A+ iomem_access_permitte= d(d, dev->msix_pba.first,=0A+ = dev->msix_pba.last)) )=0A+ break;=0A+ = if ( d )=0A+ {=0A+ /* XXX How to deal with = existing mappings? */=0A+ }=0A+ }=0A+ }=0A+ = WARN_ON(dev->msix_nr_entries !=3D nr_entries);=0A+ WARN_ON(dev->msix_tab= le.first !=3D (table_paddr >> PAGE_SHIFT));=0A+ ++dev->msix_used_entries= ;=0A+=0A /* Mask interrupt here */=0A writel(1, entry->mask_base + = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);=0A =0A@@ -707,7 +810,7 @@ static int = __pci_enable_msix(struct msi_=0A return 0;=0A }=0A =0A- = status =3D msix_capability_init(pdev, msi, desc);=0A+ status =3D = msix_capability_init(pdev, msi, desc, nr_entries);=0A return status;=0A= }=0A =0A@@ -732,6 +835,16 @@ static void __pci_disable_msix(struct ms=0A = writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);=0A =0A = pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);=0A+= =0A+ if ( !--dev->msix_used_entries )=0A+ {=0A+ if ( = rangeset_remove_range(mmio_ro_ranges, dev->msix_table.first,=0A+ = dev->msix_table.last) )=0A+ WARN();=0A+ = if ( rangeset_remove_range(mmio_ro_ranges, dev->msix_pba.first,=0A+ = dev->msix_pba.last) )=0A+ = WARN();=0A+ }=0A }=0A =0A /*=0A--- 2010-08-12.orig/xen/drivers/passthrou= gh/io.c 2010-08-12 17:36:43.000000000 +0200=0A+++ 2010-08-12/xen/drivers/pa= ssthrough/io.c 2010-08-12 17:16:32.000000000 +0200=0A@@ -26,6 +26,8 @@=0A = #include =0A #include =0A =0A+struct = rangeset *__read_mostly mmio_ro_ranges;=0A+=0A static void hvm_dirq_assist(= unsigned long _d);=0A =0A bool_t pt_irq_need_timer(uint32_t flags)=0A@@ = -565,3 +567,11 @@ void hvm_dpci_eoi(struct domain *d, unsi=0A unlock:=0A = spin_unlock(&d->event_lock);=0A }=0A+=0A+static int __init setup_mmio_ro_= ranges(void)=0A+{=0A+ mmio_ro_ranges =3D rangeset_new(NULL, "r/o mmio = ranges",=0A+ RANGESETF_prettyprint_hex);= =0A+ return 0;=0A+}=0A+__initcall(setup_mmio_ro_ranges);=0A--- = 2010-08-12.orig/xen/include/xen/iommu.h 2010-08-12 17:36:43.000000000 = +0200=0A+++ 2010-08-12/xen/include/xen/iommu.h 2010-08-12 17:16:32.0000000= 00 +0200=0A@@ -31,6 +31,8 @@ extern bool_t force_iommu, iommu_verbose=0A = extern bool_t iommu_workaround_bios_bug, iommu_passthrough;=0A extern = bool_t iommu_snoop, iommu_qinval, iommu_intremap;=0A =0A+extern struct = rangeset *mmio_ro_ranges;=0A+=0A #define domain_hvm_iommu(d) (&d->arch.= hvm_domain.hvm_iommu)=0A =0A #define MAX_IOMMUS 32=0A--- 2010-08-12.orig/xe= n/include/xen/pci.h 2010-08-12 17:36:43.000000000 +0200=0A+++ = 2010-08-12/xen/include/xen/pci.h 2010-08-12 17:16:32.000000000 = +0200=0A@@ -45,6 +45,10 @@ struct pci_dev {=0A struct list_head = domain_list;=0A =0A struct list_head msi_list;=0A+ unsigned int = msix_nr_entries, msix_used_entries;=0A+ struct {=0A+ unsigned = long first, last;=0A+ } msix_table, msix_pba;=0A int msix_table_refc= nt[MAX_MSIX_TABLE_PAGES];=0A int msix_table_idx[MAX_MSIX_TABLE_PAGES];= =0A spinlock_t msix_table_lock;=0A --=__Part89A4AD82.0__= Content-Type: text/plain; name="rangeset-overlaps.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="rangeset-overlaps.patch" --- 2010-06-15.orig/xen/common/rangeset.c 2009-11-05 10:13:22.0000000= 00 +0100=0A+++ 2010-06-15/xen/common/rangeset.c 2010-07-12 12:08:16.0000000= 00 +0200=0A@@ -251,6 +251,22 @@ int rangeset_contains_range(=0A return = contains;=0A }=0A =0A+int rangeset_overlaps_range(=0A+ struct rangeset = *r, unsigned long s, unsigned long e)=0A+{=0A+ struct range *x;=0A+ = int overlaps;=0A+=0A+ ASSERT(s <=3D e);=0A+=0A+ spin_lock(&r->lock);= =0A+ x =3D find_range(r, e);=0A+ overlaps =3D (x && (s <=3D = x->e));=0A+ spin_unlock(&r->lock);=0A+=0A+ return overlaps;=0A+}=0A+= =0A int rangeset_report_ranges(=0A struct rangeset *r, unsigned long = s, unsigned long e,=0A int (*cb)(unsigned long s, unsigned long e, = void *), void *ctxt)=0A--- 2010-06-15.orig/xen/include/xen/rangeset.h = 2009-11-05 10:13:22.000000000 +0100=0A+++ 2010-06-15/xen/include/xen/ranges= et.h 2010-07-12 12:09:55.000000000 +0200=0A@@ -53,6 +53,8 @@ int = __must_check rangeset_remove_range(=0A struct rangeset *r, unsigned = long s, unsigned long e);=0A int __must_check rangeset_contains_range(=0A = struct rangeset *r, unsigned long s, unsigned long e);=0A+int __must_che= ck rangeset_overlaps_range(=0A+ struct rangeset *r, unsigned long s, = unsigned long e);=0A int rangeset_report_ranges(=0A struct rangeset = *r, unsigned long s, unsigned long e,=0A int (*cb)(unsigned long s, = unsigned long e, void *), void *ctxt);=0A --=__Part89A4AD82.0__= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xensource.com http://lists.xensource.com/xen-devel --=__Part89A4AD82.0__=--