From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46091) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bwLxg-0001PS-6X for qemu-devel@nongnu.org; Tue, 18 Oct 2016 00:25:14 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bwLxd-000756-6a for qemu-devel@nongnu.org; Tue, 18 Oct 2016 00:25:12 -0400 Received: from ozlabs.org ([103.22.144.67]:41107) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1bwLxc-0006wv-Kh for qemu-devel@nongnu.org; Tue, 18 Oct 2016 00:25:09 -0400 Date: Tue, 18 Oct 2016 14:57:22 +1100 From: David Gibson Message-ID: <20161018035722.GE25390@umbus.fritz.box> References: <1476719064-9242-1-git-send-email-bd.aviv@gmail.com> <1476719064-9242-3-git-send-email-bd.aviv@gmail.com> MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha256; protocol="application/pgp-signature"; boundary="N6OI5UwltL9FF6Nb" Content-Disposition: inline In-Reply-To: <1476719064-9242-3-git-send-email-bd.aviv@gmail.com> Subject: Re: [Qemu-devel] [PATCH v4 RESEND 2/3] IOMMU: change iommu_op->translate's is_write to flags, add support to NO_FAIL flag mode List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: "Aviv B.D" Cc: qemu-devel@nongnu.org, Jan Kiszka , Alex Williamson , Peter Xu , "Michael S. Tsirkin" --N6OI5UwltL9FF6Nb Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Mon, Oct 17, 2016 at 06:44:23PM +0300, Aviv B.D wrote: > From: "Aviv Ben-David" >=20 > Supports translation trials without reporting error to guest on > translation failure. >=20 > Signed-off-by: Aviv Ben-David > --- > exec.c | 3 ++- > hw/i386/amd_iommu.c | 4 +-- > hw/i386/intel_iommu.c | 70 +++++++++++++++++++++++++++++++++------------= ------ > include/exec/memory.h | 6 +++-- > memory.c | 3 ++- > 5 files changed, 55 insertions(+), 31 deletions(-) >=20 > diff --git a/exec.c b/exec.c > index 374c364..266fa01 100644 > --- a/exec.c > +++ b/exec.c > @@ -432,7 +432,8 @@ MemoryRegion *address_space_translate(AddressSpace *a= s, hwaddr addr, > break; > } > =20 > - iotlb =3D mr->iommu_ops->translate(mr, addr, is_write); > + iotlb =3D mr->iommu_ops->translate(mr, addr, > + is_write ? IOMMU_WO : IOMMU_RO); > addr =3D ((iotlb.translated_addr & ~iotlb.addr_mask) > | (addr & iotlb.addr_mask)); > *plen =3D MIN(*plen, (addr | iotlb.addr_mask) - addr + 1); > diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c > index 47b79d9..1f0d76b 100644 > --- a/hw/i386/amd_iommu.c > +++ b/hw/i386/amd_iommu.c > @@ -988,7 +988,7 @@ static inline bool amdvi_is_interrupt_addr(hwaddr add= r) > } > =20 > static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr, > - bool is_write) > + IOMMUAccessFlags flags) You've also updated the intel viommu implementation for the new notifier flags semantics, but none of the others (AMD and sPAPR). That needs to be done as well. > { > AMDVIAddressSpace *as =3D container_of(iommu, AMDVIAddressSpace, iom= mu); > AMDVIState *s =3D as->iommu_state; > @@ -1017,7 +1017,7 @@ static IOMMUTLBEntry amdvi_translate(MemoryRegion *= iommu, hwaddr addr, > return ret; > } > =20 > - amdvi_do_translate(as, addr, is_write, &ret); > + amdvi_do_translate(as, addr, flags & IOMMU_WO, &ret); > trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn), > PCI_FUNC(as->devfn), addr, ret.translated_addr); > return ret; > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 69730cb..dcf45f0 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -364,7 +364,7 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUSta= te *s, uint16_t index) > /* Must not update F field now, should be done later */ > static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, > uint16_t source_id, hwaddr addr, > - VTDFaultReason fault, bool is_write) > + VTDFaultReason fault, IOMMUAccessFlags flags) > { > uint64_t hi =3D 0, lo; > hwaddr frcd_reg_addr =3D DMAR_FRCD_REG_OFFSET + (((uint64_t)index) <= < 4); > @@ -373,7 +373,7 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint1= 6_t index, > =20 > lo =3D VTD_FRCD_FI(addr); > hi =3D VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); > - if (!is_write) { > + if (!(flags =3D=3D IOMMU_WO || flags =3D=3D IOMMU_RW)) { > hi |=3D VTD_FRCD_T; > } > vtd_set_quad_raw(s, frcd_reg_addr, lo); > @@ -404,7 +404,7 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s= , uint16_t source_id) > /* Log and report an DMAR (address translation) fault to software */ > static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, > hwaddr addr, VTDFaultReason fault, > - bool is_write) > + IOMMUAccessFlags flags) > { > uint32_t fsts_reg =3D vtd_get_long_raw(s, DMAR_FSTS_REG); > =20 > @@ -415,7 +415,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s,= uint16_t source_id, > return; > } > VTD_DPRINTF(FLOG, "sid 0x%"PRIx16 ", fault %d, addr 0x%"PRIx64 > - ", is_write %d", source_id, fault, addr, is_write); > + ", flags %d", source_id, fault, addr, flags); > if (fsts_reg & VTD_FSTS_PFO) { > VTD_DPRINTF(FLOG, "new fault is not recorded due to " > "Primary Fault Overflow"); > @@ -433,7 +433,7 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s,= uint16_t source_id, > return; > } > =20 > - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_writ= e); > + vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, flags); > =20 > if (fsts_reg & VTD_FSTS_PPF) { > VTD_DPRINTF(FLOG, "there are pending faults already, " > @@ -629,7 +629,8 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, ui= nt32_t level) > /* Given the @gpa, get relevant @slptep. @slpte_level will be the last l= evel > * of the translation, can be used for deciding the size of large page. > */ > -static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, bool is_w= rite, > +static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint64_t gpa, > + IOMMUAccessFlags flags, > uint64_t *slptep, uint32_t *slpte_level, > bool *reads, bool *writes) > { > @@ -649,7 +650,20 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uin= t64_t gpa, bool is_write, > } > =20 > /* FIXME: what is the Atomics request here? */ > - access_right_check =3D is_write ? VTD_SL_W : VTD_SL_R; > + switch (flags) { > + case IOMMU_WO: > + access_right_check =3D VTD_SL_W; > + break; > + case IOMMU_RO: > + access_right_check =3D VTD_SL_R; > + break; > + case IOMMU_RW: /* passthrow */ > + case IOMMU_NO_FAIL: > + access_right_check =3D VTD_SL_R | VTD_SL_W; > + break; I'm very confused by the semantics of IOMMU_NO_FAIL. At first I thought it was an additional flag that would be ORed with the access mode to trigger extra behaviour. But here it seems that you'd use it _instead_ of a normal access mode. I don't really see how that makes sense. > + default: > + assert(0); > + } > =20 > while (true) { > offset =3D vtd_gpa_level_offset(gpa, level); > @@ -671,8 +685,9 @@ static int vtd_gpa_to_slpte(VTDContextEntry *ce, uint= 64_t gpa, bool is_write, > if (!(slpte & access_right_check)) { > VTD_DPRINTF(GENERAL, "error: lack of %s permission for " > "gpa 0x%"PRIx64 " slpte 0x%"PRIx64, > - (is_write ? "write" : "read"), gpa, slpte); > - return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; > + (flags =3D=3D IOMMU_WO ? "write" : "read"), gpa,= slpte); > + return (flags =3D=3D IOMMU_RW || flags =3D=3D IOMMU_WO) ? > + -VTD_FR_WRITE : -VTD_FR_READ; > } > if (vtd_slpte_nonzero_rsvd(slpte, level)) { > VTD_DPRINTF(GENERAL, "error: non-zero reserved field in seco= nd " > @@ -789,11 +804,12 @@ static inline bool vtd_is_interrupt_addr(hwaddr add= r) > * > * @bus_num: The bus number > * @devfn: The devfn, which is the combined of device and function numb= er > - * @is_write: The access is a write operation > + * @flags: The access is a write operation > * @entry: IOMMUTLBEntry that contain the addr to be translated and resu= lt > */ > static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, > - uint8_t devfn, hwaddr addr, bool is_w= rite, > + uint8_t devfn, hwaddr addr, > + IOMMUAccessFlags flags, > IOMMUTLBEntry *entry) > { > IntelIOMMUState *s =3D vtd_as->iommu_state; > @@ -811,7 +827,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *v= td_as, PCIBus *bus, > =20 > /* Check if the request is in interrupt address range */ > if (vtd_is_interrupt_addr(addr)) { > - if (is_write) { > + if (flags =3D=3D IOMMU_WO || flags =3D=3D IOMMU_RW) { > /* FIXME: since we don't know the length of the access here,= we > * treat Non-DWORD length write requests without PASID as > * interrupt requests, too. Withoud interrupt remapping supp= ort, > @@ -827,7 +843,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *v= td_as, PCIBus *bus, > } else { > VTD_DPRINTF(GENERAL, "error: read request from interrupt add= ress " > "gpa 0x%"PRIx64, addr); > - vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, is_wr= ite); > + vtd_report_dmar_fault(s, source_id, addr, VTD_FR_READ, flags= ); > return; > } > } > @@ -856,12 +872,14 @@ static void vtd_do_iommu_translate(VTDAddressSpace = *vtd_as, PCIBus *bus, > is_fpd_set =3D ce.lo & VTD_CONTEXT_ENTRY_FPD; > if (ret_fr) { > ret_fr =3D -ret_fr; > - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { > - VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " > + if (flags !=3D IOMMU_NO_FAIL) { > + if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { > + VTD_DPRINTF(FLOG, "fault processing is disabled for = DMA " > "requests through this context-entry " > "(with FPD Set)"); Um.. the descriptions seem to imply that NO_FAIL prevents reporting failures to the guest. But here it seems to be the opposite - you only transmit the fault if the flags are !=3D IOMMU_NO_FAIL. > - } else { > - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_wri= te); > + } else { > + vtd_report_dmar_fault(s, source_id, addr, ret_fr, fl= ags); > + } > } > return; > } > @@ -874,15 +892,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace = *vtd_as, PCIBus *bus, > cc_entry->context_cache_gen =3D s->context_cache_gen; > } > =20 > - ret_fr =3D vtd_gpa_to_slpte(&ce, addr, is_write, &slpte, &level, > + ret_fr =3D vtd_gpa_to_slpte(&ce, addr, flags, &slpte, &level, > &reads, &writes); > if (ret_fr) { > ret_fr =3D -ret_fr; > - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { > - VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requ= ests " > - "through this context-entry (with FPD Set)"); > - } else { > - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); > + if (flags !=3D IOMMU_NO_FAIL) { > + if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { > + VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " > + "requests through this context-entry (with FPD S= et)"); > + } else { > + vtd_report_dmar_fault(s, source_id, addr, ret_fr, flags); > + } > } > return; > } > @@ -1944,7 +1964,7 @@ static void vtd_mem_write(void *opaque, hwaddr addr, > } > =20 > static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr add= r, > - bool is_write) > + IOMMUAccessFlags flags) > { > VTDAddressSpace *vtd_as =3D container_of(iommu, VTDAddressSpace, iom= mu); > IntelIOMMUState *s =3D vtd_as->iommu_state; > @@ -1966,7 +1986,7 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegi= on *iommu, hwaddr addr, > } > =20 > vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn, addr, > - is_write, &ret); > + flags, &ret); > VTD_DPRINTF(MMU, > "bus %"PRIu8 " slot %"PRIu8 " func %"PRIu8 " devfn %"PRI= u8 > " gpa 0x%"PRIx64 " hpa 0x%"PRIx64, pci_bus_num(vtd_as->b= us), > diff --git a/include/exec/memory.h b/include/exec/memory.h > index 10d7eac..0d4acb9 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -57,6 +57,7 @@ typedef enum { > IOMMU_RO =3D 1, > IOMMU_WO =3D 2, > IOMMU_RW =3D 3, > + IOMMU_NO_FAIL =3D 4, /* may not be present, don't repport error to = guest */ > } IOMMUAccessFlags; > =20 > struct IOMMUTLBEntry { > @@ -168,10 +169,11 @@ struct MemoryRegionOps { > }; > =20 > typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; > - > struct MemoryRegionIOMMUOps { > /* Return a TLB entry that contains a given address. */ > - IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is= _write); > + IOMMUTLBEntry (*translate)(MemoryRegion *iommu, > + hwaddr addr, > + IOMMUAccessFlags flags); > /* Returns minimum supported page size */ > uint64_t (*get_min_page_size)(MemoryRegion *iommu); > /* Called when IOMMU Notifier flag changed */ > diff --git a/memory.c b/memory.c > index 58f9269..dfbb9a0 100644 > --- a/memory.c > +++ b/memory.c > @@ -1563,7 +1563,8 @@ void memory_region_iommu_replay(MemoryRegion *mr, I= OMMUNotifier *n, > granularity =3D memory_region_iommu_get_min_page_size(mr); > =20 > for (addr =3D 0; addr < memory_region_size(mr); addr +=3D granularit= y) { > - iotlb =3D mr->iommu_ops->translate(mr, addr, is_write); > + iotlb =3D mr->iommu_ops->translate(mr, addr, > + is_write ? IOMMU_WO : IOMMU_RO); It probably makes more sense to pust the IOMMUAccessFlags parameter into memory_region_iommu_replay(). > if (iotlb.perm !=3D IOMMU_NONE) { > n->notify(n, &iotlb); > } --=20 David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson --N6OI5UwltL9FF6Nb Content-Type: application/pgp-signature; name="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAEBCAAGBQJYBZ2fAAoJEGw4ysog2bOSKE0P/iRFnZ046XqhWrru5FjqdLvl Y7e7IPtRHeFOsPYK1Ui+cwNuR8qnGiFvsmDwqfwRATJtLQ+3Rxx6r4liQQXgBOZA jvFwe4VUvZRSzgO7I+M92JOiePMakYp/Yd7CPS9/mYonryXZFYJ0APinnEz+UJbF On0NWxds/44D5XRTSGm29YOOVMWYSAWOmEpKIv+8j2/ZTyuOkYJWzzhTddvA+CYW JXq5rDa47nsC8wfiMx0ylo01vrqJeR64hGSO4YGT7amdBjakpaRXt3Pc/vDpJIKr 3t0oH2+KJkT2GtyJOzEW+EfTGC23bBXdjnb/K+Wu55Jr+vKG4Q1fjgSXT3FzO/cT 06gu9bOg5c84IhhAuC4XwGP9vBzscDeW33HUHBPKR9yMvc+YJEf+LWh7maK2pz5r RXW3kzCVw8IwHouSFFLVXOLQtNf0ES7VIB9s3xJyKbzRbfHvQqROKMzbqprDeL1n AuSxm4pLbxjhmBVmxtKipAUngT+In8MJRPo42Qaxfi+9Ijr6+SbR9L5/WNBqhfww ih045JTKWEZwmncGnUxkIMXhcJ08ScTpHwpM1Eiwq/I3aDRCnulVw3p3mW4W5xwX zJ7yQ6ISVh2rC8BNelBmjcBLMNTmze2yoqqy+wLja9jtko78wyDlN6GE3O39yHNC N1sVOcp2ruE0fZoiBU9J =2S1e -----END PGP SIGNATURE----- --N6OI5UwltL9FF6Nb--