From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:46883) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZZaIG-000746-CR for qemu-devel@nongnu.org; Wed, 09 Sep 2015 03:59:52 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZZaIB-0005ut-KV for qemu-devel@nongnu.org; Wed, 09 Sep 2015 03:59:48 -0400 Received: from mx1.redhat.com ([209.132.183.28]:37548) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZZaIB-0005ua-7Q for qemu-devel@nongnu.org; Wed, 09 Sep 2015 03:59:43 -0400 Date: Wed, 9 Sep 2015 10:59:39 +0300 From: "Michael S. Tsirkin" Message-ID: <20150909105813-mutt-send-email-mst@redhat.com> References: <1440458369-7384-1-git-send-email-davidkiarie4@gmail.com> <1440458369-7384-3-git-send-email-davidkiarie4@gmail.com> <55ED871C.8090704@gmail.com> <55EFE386.6080608@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <55EFE386.6080608@gmail.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC 2/4] hw/i386: Introduce AMD IOMMU List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Valentine Sinitsyn Cc: David kiarie , Jan Kiszka , QEMU Developers On Wed, Sep 09, 2015 at 12:45:10PM +0500, Valentine Sinitsyn wrote: > On 09.09.2015 12:30, David kiarie wrote: > ...snip... >=20 > >>>+static void amd_iommu_cmdbuf_exec(AMDIOMMUState *s) > >>>+{ > >>>+ unsigned type; > >>>+ uint8_t cmd[IOMMU_COMMAND_SIZE]; > >>>+ > >>>+ IOMMU_DPRINTF(COMMAND, ""); > >>>+ memset(cmd, 0, IOMMU_COMMAND_SIZE); > >>>+ > >>>+ if(dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf= _head, > >>>cmd, IOMMU_COMMAND_SIZE)){ > >>>+ IOMMU_DPRINTF(COMMAND, "error: fail to access memory at > >>>0x%"PRIx64 > >>>+ " + %"PRIu8, s->cmdbuf, s->cmdbuf_head); > >>>+ } > >>>+ > >>>+ type =3D cmd[CMDBUF_ID_BYTE] >> CMDBUF_ID_RSHIFT; > >>>+ > >>>+ switch(type){ > >>>+ case CMD_COMPLETION_WAIT: > >>>+ /* pretend to wait for command execution to complete */ > >>>+ IOMMU_DPRINTF(COMMAND, "completion wait requested"); > >>>+ amd_iommu_completion_wait(s, cmd); > >>>+ break; > >>>+ case CMD_INVAL_DEVTAB_ENTRY: > >>>+ /* Not implemented yet - should just clear device table > >>>contents */ > >>>+ IOMMU_DPRINTF(COMMAND, "device table entries invalidate= d"); > >> > >>Better make it clear that these commands are not implemented yet. An = IOMMU > >>is hardly usable without invalidators. > > > >I think the "debug prinft" here is misleading. AFAIK, the commands > >listed here(there are others which I haven't listed) relate to 'cache' > >which means this commands do nothing in the absence of cache. > > > >The reason why I advertise 'cache' is because the linux driver checks > >for cache and other optional features to determine the IOMMU version > >whereby versions 2 of IOMMU are assummed to support this features. > Did you check how address translation is emulated in QEMU. I didn't :) = But > you return IOMMTLBEntry from your translation routine, and I'd rather e= nsure > it is not cached in some other layer. >=20 > Besides, omitting invalidation commands will be a problem on real hardw= are. > As IOMMU emulation is mainly useful for debugging, I'd reproduce this > behaviour in the emulator. I mean, if I forget to do CMD_INVAL_DEVTAB_E= NTRY > in my guest code, things shouldn't work properly. That's hardly a priority. IOMMU is useful for other things besides debugging. > You can just copy all data structures to AMDIOMMUState and use this "ca= hced" > copies, updated when you run invalidation commands. But this isn't a #1 > priority, you are right. >=20 > Can you share some tests results for this code? > > > >> > >>>+ break; > >>>+ case CMD_INVAL_IOMMU_PAGES: > >>>+ /* invalidate IOMMU cache for a particular domain */ > >>>+ IOMMU_DPRINTF(COMMAND, "IOMMU pages invalidated"); > >>>+ break; > >>>+ case CMD_INVAL_IOTLB_PAGES: > >>>+ /* Only present if remote IOTLB present */ > >>>+ IOMMU_DPRINTF(COMMAND, "IOTLB pages invalidated"); > >>>+ break; > >>>+ case CMD_INVAL_INTR_TABLE: > >>>+ /* no support for interrupt remapping yet */ > >>>+ IOMMU_DPRINTF(COMMAND, "interrupt table invalidated"); > >>>+ break; > >>>+ default: > >>>+ IOMMU_DPRINTF(COMMAND, "unhandled command %d", type); > >>>+ break; > >>>+ } > >>>+ > >>>+} > >>>+ > >>>+static void amd_iommu_cmdbuf_run(AMDIOMMUState *s) > >>>+{ > >>>+ IOMMU_DPRINTF(COMMAND, ""); > >>>+ > >>>+ uint64_t *mmio_cmdbuf_head =3D (uint64_t*)s->mmior + MMIO_COMMA= ND_HEAD; > >>>+ > >>>+ if(!s->cmdbuf_enabled){ > >>>+ IOMMU_DPRINTF(COMMAND, "Command buffer not enabled"); > >>>+ return; > >>>+ } > >>>+ > >>>+ while(s->cmdbuf_head !=3D s->cmdbuf_tail) { > >>>+ /* check if there is work to do. */ > >>>+ IOMMU_DPRINTF(COMMAND, "COMMAND BUFFER head at %x COMMAND B= UFFER > >>>tail at %x", > >>>+ s->cmdbuf_head, s->cmdbuf_tail); > >>>+ amd_iommu_cmdbuf_exec(s); > >>>+ s->cmdbuf_head +=3D CMDBUF_ENTRY_SIZE; > >>>+ > >>>+ /* wrap head pointer */ > >>>+ if (s->cmdbuf_head >=3D s->cmdbuf_len * CMDBUF_ENTRY_SIZE) = { > >>>+ s->cmdbuf_head =3D 0; > >>>+ } > >>>+ } > >>>+ > >>>+ *mmio_cmdbuf_head =3D cpu_to_le64(s->cmdbuf_head); > >>>+} > >>>+ > >>>+/* System Software might never read from some of this fields but an= yways > >>>*/ > >>>+static uint64_t amd_iommu_mmio_read(void *opaque, hwaddr addr, unsi= gned > >>>size) > >>>+{ > >>>+ AMDIOMMUState *s =3D opaque; > >>>+ > >>>+ uint64_t val =3D -1; > >>>+ if(addr + size > MMIO_SIZE) { > >>>+ IOMMU_DPRINTF(MMIO, "error: addr outside region: max 0x%"PR= IX64 > >>>+ ", got 0x%"PRIx64 " %d", > >>>+ (uint64_t)MMIO_SIZE, addr, size); > >>>+ return (uint64_t) - 1; > >>>+ } > >>>+ > >>>+ switch(addr & ~0x07){ > >>>+ case MMIO_DEVICE_TABLE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_DEVICE_TABLE read addr 0x%"PR= Ix64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >> > >>What makes these cases different, except debug printf()s? Maybe leave > >>printf()s inside the switch, and factor out the rest. This applies to= other > >>similar cases as well. > >> > >> > >>>+ > >>>+ case MMIO_COMMAND_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_BASE read addr 0x%"PR= Ix64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_BASE read addr 0x%"PRIx= 64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_CONTROL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_CONTROL read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_BASE read addr 0x%"PRIx6= 4 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_LIMIT: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_LIMIT read addr 0x%"PRIx= 64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_HEAD read addr 0x%"PR= Ix64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_TAIL read addr 0x%"PR= Ix64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_HEAD read addr 0x%"PRIx= 64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_TAIL read addr 0x%"PRIx= 64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_STATUS: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_STATUS read addr 0x%"PRIx64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EXT_FEATURES: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXT_FEATURES read addr 0x%"PR= Ix64 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ if(size =3D=3D 2){ > >>>+ val =3D amd_iommu_readw(s, addr); > >>>+ } else if(size =3D=3D 4){ > >>>+ val =3D amd_iommu_readl(s, addr); > >>>+ } > >>>+ else if(size =3D=3D 8){ > >>>+ val =3D amd_iommu_readq(s, addr); > >>>+ } > >>>+ break; > >>>+ > >>>+ default: > >>>+ IOMMU_DPRINTF(MMIO, "UNHANDLED MMIO read addr 0x%"PRIx6= 4 > >>>+ ", size %d offset 0x%"PRIx64, addr, size, > >>>offset); > >>>+ } > >>>+ > >>>+ return val; > >>>+} > >>>+ > >>>+static void iommu_handle_control_write(AMDIOMMUState *s) > >>>+{ > >>>+ /* read whatever is already written incase software is writing = in > >>>chucks less than 8 bytes */ > >>>+ unsigned long control =3D amd_iommu_readq(s, MMIO_CONTROL); > >>>+ s->enabled =3D !!(control & MMIO_CONTROL_IOMMUEN); > >>>+ > >>>+ s->ats_enabled =3D !!(control & MMIO_CONTROL_HTTUNEN); > >>>+ s->evtlog_enabled =3D s->enabled && !!(control & > >>>MMIO_CONTROL_EVENTLOGEN); > >>>+ > >>>+ s->evtlog_intr =3D !!(control & MMIO_CONTROL_EVENTINTEN); > >>>+ s->completion_wait_intr =3D !!(control & MMIO_CONTROL_COMWAITIN= TEN); > >>>+ s->cmdbuf_enabled =3D s->enabled && !!(control & > >>>MMIO_CONTROL_CMDBUFLEN); > >>>+ > >>>+ /* update the flags depending on the control register */ > >>>+ if(s->cmdbuf_enabled) { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) |=3D MMIO_STATUS_CMDBU= F_RUN; > >>>+ } else { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) &=3D ~MMIO_STATUS_CMDB= UF_RUN; > >>>+ } > >>>+ if (s->evtlog_enabled) { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) |=3D MMIO_STATUS_EVTLO= G_RUN; > >>>+ } else { > >>>+ (*(uint64_t*)&s->mmior[MMIO_STATUS]) &=3D ~MMIO_STATUS_EVTL= OG_RUN; > >>>+ } > >>>+ > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_STATUS state 0x%"PRIx64, control); > >>>+ > >>>+ amd_iommu_cmdbuf_run(s); > >> > >>Does any write to control register result in draining command buffer?= To me, > >>it looks strange, and I'd expect IOMMU to execute commands only if I = change > >>tail pointer. > >> > >> > >>>+} > >>>+ > >>>+/* FIXME: something might go wrong if System Software writes in chu= nks > >>>+ * of one byte but linux writes in chunks of 4 bytes so currently i= t > >>>+ * works correctly with linux but will definitely be busted if soft= ware > >>>+ * reads/writes 8 bytes > >>>+ */ > >>>+static void amd_iommu_mmio_write(void *opaque, hwaddr addr, uint64_= t val, > >>>unsigned size) > >>>+{ > >>>+ > >>>+ AMDIOMMUState *s =3D opaque; > >>>+ unsigned long offset =3D addr & 0x07; > >>>+ > >>>+ if(addr + size > MMIO_SIZE) { > >>>+ IOMMU_DPRINTF(MMIO, "error: addr outside region: max 0x%"PR= Ix64 > >>>+ ", got 0x%"PRIx64 " %d", > >>>+ (uint64_t)MMIO_SIZE, addr, size); > >>>+ return; > >>>+ } > >>>+ > >>>+ switch(addr & ~0x07){ > >>>+ case MMIO_CONTROL: > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_CONTROL write addr 0x%"PRIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset 0x%"= PRIx64 > >>>, addr, size, val, offset); > >>>+ iommu_handle_control_write(s); > >>>+ break; > >>>+ > >>>+ case MMIO_DEVICE_TABLE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_DEVICE_TABLE write addr 0x%"P= RIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ /* set device table address > >>>+ * This also suffers from inability to tell whether sof= tware > >>>is done writing > >>>+ */ > >>>+ if(offset || (size =3D=3D 8)){ > >>>+ unsigned long device_base =3D amd_iommu_readq(s, > >>>MMIO_DEVICE_TABLE); > >>>+ s->devtab =3D (dma_addr_t)(device_base & > >>>MMIO_CMDBUF_BASE_MASK); > >>>+ /* set device table length */ > >>>+ s->devtab_len =3D ((device_base & MMIO_DEVTAB_SIZE_= MASK) + > >>>1) * (MMIO_DEVTAB_SIZE_UNIT / > >>>+ MMIO_DEVTAB_ENTRY_SIZE); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_HEAD write addr 0x%"P= RIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->cmdbuf_head =3D val & MMIO_CMDBUF_HEAD_MASK; > >>>+ amd_iommu_cmdbuf_run(s); > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_BASE write addr 0x%"P= RIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ /* FIXME - make sure System Software has finished writi= ng > >>>incase it writes > >>>+ * in chucks less than 8 bytes in a robust way.As for n= ow, > >>>this hacks works > >>>+ * for the linux driver > >>>+ */ > >>>+ if(offset || (size =3D=3D 8)){ > >>>+ unsigned long command_base =3D amd_iommu_readq(s, > >>>MMIO_COMMAND_BASE); > >>>+ s->cmdbuf =3D (dma_addr_t)(command_base & > >>>MMIO_CMDBUF_BASE_MASK); > >>>+ s->cmdbuf_len =3D 1UL << (s->mmior[MMIO_CMDBUF_SIZE= _BYTE] & > >>>MMIO_CMDBUF_SIZE_MASK); > >>>+ s->cmdbuf_head =3D s->cmdbuf_tail =3D 0; > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_COMMAND_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_COMMAND_TAIL write addr 0x%"P= RIx64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ > >>>+ s->cmdbuf_tail =3D val & MMIO_CMDBUF_TAIL_MASK; > >>>+ amd_iommu_cmdbuf_run(s); > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_BASE write addr 0x%"PRI= x64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->evtlog =3D (val & MMIO_EVTLOG_BASE_MASK); > >>>+ s->evtlog_len =3D 1UL << > >>>(*(uint64_t*)&s->mmior[MMIO_EVTLOG_SIZE_BYTE] & MMIO_EVTLOG_SIZE_MAS= K); > >>>+ break; > >>>+ > >>>+ case MMIO_EXCL_LIMIT: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EXCL_LIMIT write addr 0x%"PRI= x64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ s->excl_limit =3D (val & MMIO_EXCL_LIMIT_MASK) | > >>>MMIO_EXCL_LIMIT_LOW; > >>>+ break; > >>>+ //TODO : Event handling fixes > >>>+ > >>>+ case MMIO_EVENT_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_HEAD write addr 0x%"PRI= x64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ > >>>+ case MMIO_EVENT_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_EVENT_TAIL write addr 0x%"PRI= x64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log base - unused for now */ > >>>+ case MMIO_PPR_BASE: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_BASE write addr 0x%"PRIx6= 4 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log head - also unused for now */ > >>>+ case MMIO_PPR_HEAD: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_HEAD write addr 0x%"PRIx6= 4 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ /* PPR log tail - unused for now */ > >>>+ case MMIO_PPR_TAIL: > >>>+ IOMMU_DPRINTF(MMIO, "MMIO_PPR_TAIL write addr 0x%"PRIx6= 4 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ if(size =3D=3D 2){ > >>>+ amd_iommu_writew(s, addr, val); > >>>+ } else if(size =3D=3D 4){ > >>>+ amd_iommu_writel(s, addr, val); > >>>+ } else if(size =3D=3D 8){ > >>>+ amd_iommu_writeq(s, addr, val); > >>>+ } > >>>+ break; > >>>+ > >>>+ default: > >>>+ IOMMU_DPRINTF(MMIO, "UNHANDLED MMIO write addr 0x%"PRIx= 64 > >>>+ ", size %d, val 0x%"PRIx64 ", offset > >>>0x%"PRIx64, addr, size, val, offset); > >>>+ ; > >>>+ } > >>>+ > >>>+} > >>>+ > >>>+static void amd_iommu_log_event(AMDIOMMUState *s, AMDIOMMUEvent *ev= t) > >>>+{ > >>>+ /* event logging not enabled */ > >>>+ if(!s->evtlog_enabled || *(uint64_t*)&s->mmior[MMIO_STATUS] | > >>>MMIO_STATUS_EVTLOG_OF){ > >>>+ return; > >>>+ } > >>>+ > >>>+ /* event log buffer full */ > >>>+ if(s->evtlog_tail >=3D s->evtlog_len) { > >>>+ *(uint64_t*)&s->mmior[MMIO_STATUS] |=3D MMIO_STATUS_EVTLOG_= OF; > >>>+ } > >>>+ > >>>+ if(dma_memory_write(&address_space_memory, s->evtlog_len + > >>>s->evtlog_tail, &evt, EVENT_LEN)){ > >>>+ //IOMMU_DPRINTF(ELOG, "error: fail to write at address 0%x"= PRIu8, > >>>s->evtlog + s->evtlog_tail); > >>>+ } > >>>+ > >>>+ s->evtlog_tail +=3D EVENT_LEN; > >>>+ *(uint64_t*)&s->mmior[MMIO_STATUS] |=3D MMIO_STATUS_EVTLOG_INT= R; > >>>+ /* Disable logging if Overflow ? */ > >> > >>No, the spec details event logging restart procedure. Make sure emula= ted > >>IOMMU behaves as described there. > >> > >> > >>>+} > >>>+ > >>>+static void amd_iommu_page_fault(AMDIOMMUState *s, int devfn, unsig= ned > >>>domid, dma_addr_t addr, int present, > >>>+ int is_write) > >>>+{ > >>>+ IOMMU_DPRINTF(ELOG, ""); > >>>+ > >>>+ AMDIOMMUEvent evt; > >>>+ unsigned info; > >>>+ uint16_t status; > >>>+ > >>>+ evt.devfn =3D cpu_to_le16(devfn); > >>>+ evt.reserved =3D 0; > >>>+ evt.domid =3D cpu_to_le16(domid); > >>>+ evt.addr =3D cpu_to_le16(addr); > >>>+ > >>>+ info =3D EVENT_IOPF; > >>>+ > >>>+ if(present) { > >>>+ info |=3D EVENT_IOPF_PR; > >>>+ } > >>>+ > >>>+ if (is_write) { > >>>+ info |=3D EVENT_IOPF_RW; > >>>+ } > >>>+ > >>>+ /* log a page fault */ > >>>+ amd_iommu_log_event(s, &evt); > >>>+ > >>>+ /* Abort the translation */ > >>>+ status =3D pci_get_word(s->dev.config + PCI_STATUS); > >>>+ pci_set_word(s->dev.config + PCI_STATUS, status | > >>>PCI_STATUS_SIG_TARGET_ABORT); > >>>+} > >>>+ > >>>+static inline uint64_t amd_iommu_get_perms(uint64_t entry) > >>>+{ > >>>+ return (entry &(DEV_PERM_READ | DEV_PERM_WRITE)) >> DEV_PERM_SH= IFT; > >>>+} > >>>+ > >>>+/* FIXME deduplication */ > >>>+AddressSpace *bridge_host_amd_iommu(PCIBus *bus, void *opaque, int = devfn) > >>>+{ > >>>+ AMDIOMMUState *s =3D opaque; > >>>+ AMDIOMMUAddressSpace **iommu_as; > >>>+ int bus_num =3D pci_bus_num(bus); > >>>+ > >>>+ /* just in case */ > >>>+ assert(0 <=3D bus_num && bus_num <=3D PCI_BUS_MAX); > >>>+ assert(0 <=3D devfn && devfn <=3D PCI_DEVFN_MAX); > >>>+ > >>>+ iommu_as =3D s->address_spaces[bus_num]; > >>>+ > >>>+ /* allocate memory during the first run */ > >>>+ if(!iommu_as) { > >>>+ iommu_as =3D > >>>g_malloc0(sizeof(AMDIOMMUAddressSpace*)*PCI_DEVFN_MAX); > >>>+ s->address_spaces[bus_num] =3D iommu_as; > >>>+ } > >>>+ > >>>+ /* set up IOMMU region */ > >>>+ if(!iommu_as[devfn]){ > >>>+ iommu_as[devfn] =3D g_malloc0(sizeof(AMDIOMMUAddressSpace))= ; > >>>+ iommu_as[devfn]->bus_num =3D (uint8_t)bus_num; > >>>+ iommu_as[devfn]->devfn =3D (uint8_t)devfn; > >>>+ iommu_as[devfn]->iommu_state =3D s; > >>>+ > >>>+ memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s), > >>>&s->iommu_ops, "amd-iommu", UINT64_MAX); > >>>+ address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iomm= u, > >>>"amd-iommu"); > >>>+ } > >>>+ return &iommu_as[devfn]->as; > >>>+} > >>>+ > >>>+/* validate a page table entry */ > >>>+static bool amd_iommu_validate_pte(uint64_t pte) > >>>+{ > >>>+ return pte & DEV_VALID && pte & DEV_TRANSLATION_VALID; > >> > >>I'd suggest parenthesis here. > >> > >> > >>>+} > >>>+ > >>>+/* get a device table entry given the device function */ > >>>+static bool amd_iommu_get_dte(AMDIOMMUState *s, int devfn, uint64_t > >>>*entry) > >>>+{ > >>>+ /* Could lead to out of range accesses */ > >>>+ assert(0 < devfn && devfn < PCI_DEVFN_MAX); > >>>+ > >>>+ uint32_t offset =3D devfn * DEVTAB_ENTRY_SIZE; > >>>+ if(dma_memory_read(&address_space_memory, s->devtab + offset, e= ntry, > >>>32)){ > >>>+ IOMMU_DPRINTF(MMU, "error: fail to access Device Entry devt= ab > >>>0x%"PRIx64 > >>>+ "offset 0x%"PRIx32, s->devtab, offset); > >>>+ return false; > >>>+ } > >>>+ > >>>+ if(!amd_iommu_validate_pte(entry[0])){ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "Pte entry at 0x%"PRIx64" is invalid", entry[= 0]); > >>>+ return false; > >>>+ } > >>>+ > >>>+ return true; > >>>+} > >>>+ > >>>+static uint64_t get_pte_translation_mode(uint64_t pte) > >>>+{ > >>>+ uint64_t level =3D (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > >>>+ > >>>+ if(level =3D=3D 7){ > >>>+ /* reserved level - abort ? */ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "Reserved translation level (7) detected"); > >>>+ return 0; > >>>+ } > >>>+ > >>>+ return level; > >>>+} > >>>+ > >>>+/* TODO : Mark addresses as Accessed and Dirty */ > >>>+static void amd_iommu_do_translate(AMDIOMMUAddressSpace *as, hwaddr= addr, > >>>bool is_write, IOMMUTLBEntry *ret) > >>>+{ > >>>+ AMDIOMMUState *s =3D as->iommu_state; > >>>+ > >>>+ int present; > >>>+ dma_addr_t pte_addr; > >>>+ uint64_t entry[4], pte, pte_perms; > >>>+ unsigned level; > >>>+ unsigned perms; > >>>+ > >>>+ if(!amd_iommu_get_dte(s, as->devfn, entry)){ > >>>+ goto no_translation; > >>>+ } > >>>+ > >>>+ pte =3D entry[0]; > >>>+ > >>>+ /* > >>>+ * It's okay to check for either read or write permissions > >>>+ * even for memory maps, since we don't support R/W maps. > >>>+ */ > >>>+ perms =3D is_write ? IOMMU_PERM_WRITE : IOMMU_PERM_READ; > >>>+ > >>>+ if(!(level =3D get_pte_translation_mode(pte))){ > >>>+ goto no_translation; > >>>+ } > >>>+ > >>>+ while(level > 0){ > >>>+ /* > >>>+ * check permissions: the bitwise > >>>+ * implication perms -> entry_perms must be true. Pages mus= t be > >>>present > >>>+ * and permissions on all levels must be similar > >>>+ */ > >>>+ pte_perms =3D amd_iommu_get_perms(pte); > >>>+ present =3D pte & 1; > >>>+ if(!present || perms !=3D (perms & pte_perms)){ > >>>+ amd_iommu_page_fault(s, as->devfn, entry[1] & > >>>DEV_DOMID_ID_MASK, addr, present, > >>>+ !!(perms & IOMMU_PERM_WRITE)); > >>>+ return; > >>>+ } > >>>+ > >>>+ /* go to the next lower level */ > >>>+ pte_addr =3D pte & DEV_PT_ROOT_MASK; > >>>+ pte_addr +=3D ((addr >> ( 9 * level)) & 0xff8); > >> > >>Does this work for six level page tables? The highest level has diffe= rent > >>bit size there IIRC. > >> > >>>+ pte =3D ldq_phys(&address_space_memory, pte_addr); > >>>+ level =3D (pte >> DEV_MODE_RSHIFT) & DEV_MODE_MASK; > >>>+ } > >>>+ > >>>+ ret->iova =3D addr & IOMMU_PAGE_MASK_4K; > >>>+ ret->translated_addr =3D (pte & DEV_PT_ROOT_MASK) & IOMMU_PAGE_= MASK_4K; > >>>+ ret->addr_mask =3D ~IOMMU_PAGE_MASK_4K; > >>>+ ret->perm =3D IOMMU_RW; > >>>+ return; > >>>+ > >>>+no_translation: > >>>+ ret->iova =3D addr; > >>>+ ret->translated_addr =3D addr & IOMMU_PAGE_MASK_4K; > >>>+ ret->addr_mask =3D ~IOMMU_PAGE_MASK_4K; > >>>+ ret->perm =3D IOMMU_RW; > >>>+ return; > >> > >>Are you sure these transactions needs to be passed through rather tha= n > >>target-aborted? > >> > >>>+} > >>>+ > >>>+static IOMMUTLBEntry amd_iommu_translate(MemoryRegion *iommu, hwadd= r > >>>addr, bool is_write) > >>>+{ > >>>+ > >>>+ IOMMU_DPRINTF(GENERAL, ""); > >>>+ > >>>+ AMDIOMMUAddressSpace *as =3D container_of(iommu, AMDIOMMUAddres= sSpace, > >>>iommu); > >>>+ AMDIOMMUState *s =3D as->iommu_state; > >>>+ > >>>+ IOMMUTLBEntry ret =3D { > >>>+ .target_as =3D &address_space_memory, > >>>+ .iova =3D addr, > >>>+ .translated_addr =3D 0, > >>>+ .addr_mask =3D ~(hwaddr)0, > >>>+ .perm =3D IOMMU_NONE, > >>>+ }; > >>>+ > >>>+ if(!s->enabled){ > >>>+ /* IOMMU disabled - corresponds to iommu=3Doff not failure = to > >>>provide any parameter */ > >>>+ ret.iova =3D addr & IOMMU_PAGE_MASK_4K; > >>>+ ret.translated_addr =3D addr & IOMMU_PAGE_MASK_4K; > >>>+ ret.addr_mask =3D ~IOMMU_PAGE_MASK_4K; > >>>+ ret.perm =3D IOMMU_RW; > >>>+ return ret; > >>>+ } > >>>+ > >>>+ amd_iommu_do_translate(as, addr, is_write, &ret); > >>>+ > >>>+ IOMMU_DPRINTF(MMU, > >>>+ "devid: %02x:%02x.%x gpa 0x%"PRIx64 " hpa 0x%"PRI= x64, > >>>as->bus_num, > >>>+ PCI_SLOT(as->devfn), PCI_FUNC(as->devfn), addr, > >>>ret.translated_addr); > >>>+ > >>>+ return ret; > >>>+} > >>>+ > >>>+static const MemoryRegionOps mmio_mem_ops =3D { > >>>+ .read =3D amd_iommu_mmio_read, > >>>+ .write =3D amd_iommu_mmio_write, > >>>+ .endianness =3D DEVICE_LITTLE_ENDIAN, > >>>+ .valid =3D { > >>>+ .min_access_size =3D 1, > >>>+ .max_access_size =3D 8, > >>>+ .unaligned =3D false, > >>>+ }, > >>>+ > >>>+ .valid =3D { > >>>+ .min_access_size =3D 8, > >>>+ .max_access_size =3D 8, > >>>+ }, > >>>+}; > >>>+ > >>>+static void amd_iommu_set_misc_capab(uint32_t host_va, uint32_t hos= t_pa, > >>>uint32_t guest_va, uint32_t *reg) > >>>+{ > >>>+ *reg =3D 0; > >> > >>Redundant line. > >> > >> > >>>+ > >>>+ *reg |=3D MAX_VA_ADDR | MAX_PH_ADDR | MAX_GVA_ADDR; > >>>+} > >>>+ > >>>+/* FIXME: The similar code in reset and init could be combined into= a > >>>function */ > >>>+static void amd_iommu_reset(DeviceState *dev) > >>>+{ > >>>+ AMDIOMMUState *s =3D AMD_IOMMU_DEVICE(dev); > >>>+ > >>>+ uint32_t capab_header =3D CAPAB_FEATURES; > >>>+ uint32_t capab_base_addr_low =3D s->mmio.addr & ~(0xffff0000); > >>>+ uint32_t capab_base_addr_high =3D (s->mmio.addr & ~(0xffff)) >>= 16; > >>>+ uint32_t capab_range =3D 0xff000000; > >>>+ uint32_t capab_misc =3D 0x0; > >>>+ > >>>+ /* reset MMIO */ > >>>+ memset(s->mmior, 0, MMIO_SIZE); > >>>+ amd_iommu_writeq(s, MMIO_EXT_FEATURES, EXT_FEATURES); > >>>+ /* TODO :Disable event logging and reset all buffers */ > >>>+ amd_iommu_set_misc_capab(MAX_PH_ADDR, MAX_VA_ADDR, MAX_GVA_ADDR= , > >>>&capab_misc); > >>>+ > >>>+ /* reset IOMMU specific capabilities */ > >>>+ pci_set_long(s->dev.config + s->capab_offset, capab_header); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_BAR_LOW, > >>>capab_base_addr_low); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_BAR_HIGH, > >>>capab_base_addr_high); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_RANGE, > >>>capab_range); > >>>+ pci_set_long(s->dev.config + s->capab_offset + CAPAB_MISC, > >>>capab_misc); > >> > >> > >>Do we also need to reset MSI capability block? > >> > >> > >>>+ > >>>+ /* reset device ident */ > >>>+ pci_config_set_vendor_id(s->dev.config, PCI_VENDOR_ID_AMD); > >>>+ pci_config_set_device_id(s->dev.config, PCI_DEVICE_ID_RD890_IOM= MU); > >>>+ pci_config_set_class(s->dev.config, 0x0806); > >>>+ pci_config_set_prog_interface(s->dev.config, 00); > >>>+} > >>>+ > >>>+static void amd_iommu_write_capab(PCIDevice *dev, uint32_t addr, ui= nt32_t > >>>val, int len) > >>>+{ > >>>+ pci_default_write_config(dev, addr, val, len); > >>>+} > >>>+ > >>>+/* I honestly don't know how to reserve MMIO */ > >>>+static void amd_iommu_mmio_map(AMDIOMMUState *s, hwaddr addr) > >>>+{ > >>>+ if(s->mmio.addr =3D=3D addr){ > >>>+ return; > >>>+ } > >>>+ > >>>+// if(s->mmio.addr !=3D (hwaddr)-1) { > >>>+// memory_region_del_subregion(get_system_memory(), &s->mmio= ); > >>>+// } > >>>+ > >>>+ s->mmio.addr =3D addr; > >>>+ memory_region_add_subregion(get_system_memory(), addr, &s->mmio= ); > >>>+ > >>>+} > >>>+ > >>>+static int amd_iommu_pci_initfn(PCIDevice *dev) > >>>+{ > >>>+ AMDIOMMUState *s =3D container_of(dev, AMDIOMMUState, dev); > >>>+ uint32_t capab_header =3D CAPAB_FEATURES; > >>>+ uint32_t capab_base_addr_low =3D (s->mmio.addr & ~(0xffff0000))= ; > >>>+ uint32_t capab_base_addr_high =3D (s->mmio.addr & ~(0x0000ffff)= >> 16); > >>>+ uint32_t capab_range =3D 0xff000000; > >>>+ uint32_t capab_misc =3D 0x0; > >>>+ > >>>+ memset(s->mmior, 0, MMIO_SIZE); > >>>+ amd_iommu_set_misc_capab(MAX_PH_ADDR, MAX_VA_ADDR, MAX_GVA_ADDR= , > >>>&capab_misc); > >>>+ > >>>+ s->iommu_ops.translate =3D amd_iommu_translate; > >>>+ s->enabled =3D false; > >>>+ s->ats_enabled =3D false; > >>>+ s->cmdbuf_enabled =3D false; > >>>+ s->devtab_len =3D 0; > >>>+ s->cmdbuf_len =3D 0; > >>>+ s->cmdbuf_head =3D 0; > >>>+ s->cmdbuf_tail =3D 0; > >>>+ s->evtlog_head =3D 0; > >>>+ s->evtlog_tail =3D 0; > >>>+ s->excl_enabled =3D false; > >>>+ s->excl_allow =3D false; > >>>+ s->mmio_enabled =3D false; > >>>+ dev->config_write =3D amd_iommu_write_capab; > >>>+ > >>>+ size_t offset; > >>>+ /* Save offset for IVRS */ > >>>+ s->capab_offset =3D offset =3D pci_add_capability(dev, PCI_CAP_= ID_SEC, 0, > >>>CAPAB_SIZE); > >>>+ pci_set_long(s->dev.config + offset, capab_header); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_BAR_LOW, > >>>capab_base_addr_low); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_BAR_HIGH, > >>>capab_base_addr_high); > >>>+ pci_set_long(s->dev.config + offset + CAPAB_RANGE, capab_range)= ; > >>>+ pci_set_long(s->dev.config + offset + CAPAB_MISC, capab_misc); > >>>+ > >>>+ /* add msi and hypertransport capabilities */ > >>>+ pci_add_capability(dev, PCI_CAP_ID_MSI, 0, CAPAB_REG_SIZE); > >>>+ pci_add_capability(dev, PCI_CAP_ID_HT, 0, CAPAB_REG_SIZE); > >>>+ > >>>+ /* set up MMIO */ > >>>+ memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, > >>>"amd-iommu", MMIO_SIZE); > >>>+ amd_iommu_mmio_map(s, BUS_AMD_IOMMU_ADDR); > >>>+ > >>>+ /* reset device ident */ > >>>+ pci_config_set_vendor_id(dev->config, PCI_VENDOR_ID_AMD); > >>>+ pci_config_set_device_id(dev->config, PCI_DEVICE_ID_RD890_IOMMU= ); > >>>+ > >>>+ /* may not be necessary */ > >>>+ pci_config_set_interrupt_pin(dev->config, 0x1a); > >>>+ > >>>+ return 0; > >>>+} > >>>+ > >>>+static void amd_iommu_realize(PCIDevice *dev, Error **error) > >>>+{ > >>>+ amd_iommu_pci_initfn(dev); > >>>+} > >>>+ > >>>+static const VMStateDescription vmstate_amd_iommu =3D { > >>>+ .name =3D "amd-iommu", > >>>+ .fields =3D (VMStateField[]) { > >>>+ VMSTATE_PCI_DEVICE(dev, AMDIOMMUState), > >>>+ VMSTATE_END_OF_LIST() > >>>+ } > >>>+}; > >>>+ > >>>+static Property amd_iommu_properties[] =3D { > >>>+ DEFINE_PROP_UINT32("version", AMDIOMMUState, version, 2), > >>>+ DEFINE_PROP_END_OF_LIST(), > >>>+}; > >>>+ > >>>+/* not used for now but will be when cache is added */ > >>>+static void amd_iommu_uninit(PCIDevice *dev) > >>>+{ > >>>+ return; > >>>+} > >>>+ > >>>+static void amd_iommu_class_init(ObjectClass *klass, void* data) > >>>+{ > >>>+ DeviceClass *dc =3D DEVICE_CLASS(klass); > >>>+ PCIDeviceClass *pci =3D PCI_DEVICE_CLASS(klass); > >>>+ > >>>+ pci->realize =3D amd_iommu_realize; > >>>+ pci->exit =3D amd_iommu_uninit; > >>>+ > >>>+ dc->reset =3D amd_iommu_reset; > >>>+ dc->vmsd =3D &vmstate_amd_iommu; > >>>+ dc->props =3D amd_iommu_properties; > >>>+} > >>>+ > >>>+static const TypeInfo amd_iommu =3D { > >>>+ .name =3D TYPE_AMD_IOMMU_DEVICE, > >>>+ .parent =3D TYPE_PCI_DEVICE, > >>>+ .instance_size =3D sizeof(AMDIOMMUState), > >>>+ .class_init =3D amd_iommu_class_init > >>>+}; > >>>+ > >>>+static void amd_iommu_register_types(void) > >>>+{ > >>>+ type_register_static(&amd_iommu); > >>>+} > >>>+ > >>>+type_init(amd_iommu_register_types); > >>>diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h > >>>new file mode 100644 > >>>index 0000000..90309eb > >>>--- /dev/null > >>>+++ b/hw/i386/amd_iommu.h > >>>@@ -0,0 +1,298 @@ > >>>+/* > >>>+ * QEMU emulation of an AMD IOMMU (AMD-Vi) > >>>+ * > >>>+ * Copyright (C) 2011 Eduard-Gabriel Mateanu > >>>+ * Copyright (C) 2015 David Kiarie, > >>>+ * > >>>+ * This program is free software; you can redistribute it and/or mo= dify > >>>+ * it under the terms of the GNU General Public License as publishe= d by > >>>+ * the Free Software Foundation; either version 2 of the License, o= r > >>>+ * (at your option) any later version. > >>>+ > >>>+ * This program is distributed in the hope that it will be useful, > >>>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>>+ * GNU General Public License for more details. > >>>+ > >>>+ * You should have received a copy of the GNU General Public Licens= e > >>>along > >>>+ * with this program; if not, see . > >>>+ */ > >>>+ > >>>+#ifndef AMD_IOMMU_H_ > >>>+#define AMD_IOMMU_H_ > >>>+ > >>>+#include "hw/hw.h" > >>>+#include "hw/pci/pci.h" > >>>+#include "hw/sysbus.h" > >>>+#include "sysemu/dma.h" > >>>+#include "qemu/osdep.h" > >>>+#include "qemu/event_notifier.h" > >>>+ > >>>+/* Capability registers */ > >>>+#define CAPAB_HEADER 0x00 > >>>+#define CAPAB_REV_TYPE 0x02 > >>>+#define CAPAB_FLAGS 0x03 > >>>+#define CAPAB_BAR_LOW 0x04 > >>>+#define CAPAB_BAR_HIGH 0x08 > >>>+#define CAPAB_RANGE 0x0C > >>>+#define CAPAB_MISC 0x10 > >>>+#define CAPAB_MISC1 0x14 > >>>+ > >>>+#define CAPAB_SIZE 0x18 > >>>+#define CAPAB_REG_SIZE 0x04 > >>>+ > >>>+/* Capability header data */ > >>>+#define CAPAB_FLAT_EXT (1 << 28) > >>>+#define CAPAB_EFR_SUP (1 << 27) > >>>+#define CAPAB_FLAG_NPCACHE (1 << 26) > >>>+#define CAPAB_FLAG_HTTUNNEL (1 << 25) > >>>+#define CAPAB_FLAG_IOTLBSUP (1 << 24) > >>>+#define CAPAB_INIT_REV (1 << 19) > >>>+#define CAPAB_INIT_TYPE (3 << 16) > >>>+#define CAPAB_INIT_REV_TYPE (CAPAB_REV | CAPAB_TYPE) > >>>+#define CAPAB_INIT_FLAGS (CAPAB_FLAG_NPCACHE | > >>>CAPAB_FLAG_HTTUNNEL) > >>>+#define CAPAB_INIT_MISC ((64 << 15) | (48 << 8)) > >>>+#define CAPAB_BAR_MASK (~((1UL << 14) - 1)) > >>>+ > >>>+/* MMIO registers */ > >>>+#define MMIO_DEVICE_TABLE 0x0000 > >>>+#define MMIO_COMMAND_BASE 0x0008 > >>>+#define MMIO_EVENT_BASE 0x0010 > >>>+#define MMIO_CONTROL 0x0018 > >>>+#define MMIO_EXCL_BASE 0x0020 > >>>+#define MMIO_EXCL_LIMIT 0x0028 > >>>+#define MMIO_EXT_FEATURES 0x0030 > >>>+#define MMIO_COMMAND_HEAD 0x2000 > >>>+#define MMIO_COMMAND_TAIL 0x2008 > >>>+#define MMIO_EVENT_HEAD 0x2010 > >>>+#define MMIO_EVENT_TAIL 0x2018 > >>>+#define MMIO_STATUS 0x2020 > >>>+#define MMIO_PPR_BASE 0x0038 > >>>+#define MMIO_PPR_HEAD 0x2030 > >>>+#define MMIO_PPR_TAIL 0x2038 > >>>+ > >>>+#define MMIO_SIZE 0x4000 > >>>+ > >>>+#define MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) > >>>+#define MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & > >>>~MMIO_DEVTAB_SIZE_MASK) > >>>+#define MMIO_DEVTAB_ENTRY_SIZE 32 > >>>+#define MMIO_DEVTAB_SIZE_UNIT 4096 > >>>+ > >>>+#define MMIO_CMDBUF_SIZE_BYTE (MMIO_COMMAND_BASE + 7) > >>>+#define MMIO_CMDBUF_SIZE_MASK 0x0F > >>>+#define MMIO_CMDBUF_BASE_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_CMDBUF_DEFAULT_SIZE 8 > >>>+#define MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > >>>+#define MMIO_CMDBUF_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > >>>+ > >>>+#define MMIO_EVTLOG_SIZE_BYTE (MMIO_EVENT_BASE + 7) > >>>+#define MMIO_EVTLOG_SIZE_MASK MMIO_CMDBUF_SIZE_MASK > >>>+#define MMIO_EVTLOG_BASE_MASK MMIO_CMDBUF_BASE_MASK > >>>+#define MMIO_EVTLOG_DEFAULT_SIZE MMIO_CMDBUF_DEFAULT_SIZE > >>>+#define MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0F) > >>>+#define MMIO_EVTLOG_TAIL_MASK MMIO_EVTLOG_HEAD_MASK > >>>+ > >>>+#define MMIO_EXCL_BASE_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_EXCL_ENABLED_MASK (1ULL << 0) > >>>+#define MMIO_EXCL_ALLOW_MASK (1ULL << 1) > >>>+#define MMIO_EXCL_LIMIT_MASK MMIO_DEVTAB_BASE_MASK > >>>+#define MMIO_EXCL_LIMIT_LOW 0xFFF > >>>+ > >>>+#define MMIO_CONTROL_IOMMUEN (1ULL << 0) > >>>+#define MMIO_CONTROL_HTTUNEN (1ULL << 1) > >>>+#define MMIO_CONTROL_EVENTLOGEN (1ULL << 2) > >>>+#define MMIO_CONTROL_EVENTINTEN (1ULL << 3) > >>>+#define MMIO_CONTROL_COMWAITINTEN (1ULL << 4) > >>>+#define MMIO_CONTROL_CMDBUFLEN (1ULL << 12) > >>>+ > >>>+#define MMIO_STATUS_EVTLOG_OF (1ULL << 0) > >>>+#define MMIO_STATUS_EVTLOG_INTR (1ULL << 1) > >>>+#define MMIO_STATUS_COMWAIT_INTR (1ULL << 2) > >>>+#define MMIO_STATUS_EVTLOG_RUN (1ULL << 3) > >>>+#define MMIO_STATUS_CMDBUF_RUN (1ULL << 4) > >>>+ > >>>+#define CMDBUF_ID_BYTE 0x07 > >>>+#define CMDBUF_ID_RSHIFT 4 > >>>+#define CMDBUF_ENTRY_SIZE 0x10 > >>>+ > >>>+#define CMD_COMPLETION_WAIT 0x01 > >>>+#define CMD_INVAL_DEVTAB_ENTRY 0x02 > >>>+#define CMD_INVAL_IOMMU_PAGES 0x03 > >>>+#define CMD_INVAL_IOTLB_PAGES 0x04 > >>>+#define CMD_INVAL_INTR_TABLE 0x05 > >>>+ > >>>+#define DEVTAB_ENTRY_SIZE 32 > >>>+ > >>>+/* Device table entry bits 0:63 */ > >>>+#define DEV_VALID (1ULL << 0) > >>>+#define DEV_TRANSLATION_VALID (1ULL << 1) > >>>+#define DEV_MODE_MASK 0x7 > >>>+#define DEV_MODE_RSHIFT 9 > >>>+#define DEV_PT_ROOT_MASK 0xFFFFFFFFFF000 > >>>+#define DEV_PT_ROOT_RSHIFT 12 > >>>+#define DEV_PERM_SHIFT 61 > >>>+#define DEV_PERM_READ (1ULL << 61) > >>>+#define DEV_PERM_WRITE (1ULL << 62) > >>>+ > >>>+/* Device table entry bits 64:127 */ > >>>+#define DEV_DOMID_ID_MASK ((1ULL << 16) - 1) > >>>+#define DEV_IOTLB_SUPPORT (1ULL << 17) > >>>+#define DEV_SUPPRESS_PF (1ULL << 18) > >>>+#define DEV_SUPPRESS_ALL_PF (1ULL << 19) > >>>+#define DEV_IOCTL_MASK (~3) > >>>+#define DEV_IOCTL_RSHIFT 20 > >>>+#define DEV_IOCTL_DENY 0 > >>>+#define DEV_IOCTL_PASSTHROUGH 1 > >>>+#define DEV_IOCTL_TRANSLATE 2 > >>>+#define DEV_CACHE (1ULL << 37) > >>>+#define DEV_SNOOP_DISABLE (1ULL << 38) > >>>+#define DEV_EXCL (1ULL << 39) > >>>+ > >>>+/* Event codes and flags, as stored in the info field */ > >>>+#define EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 24) > >>>+#define EVENT_IOPF (0x2U << 24) > >>>+#define EVENT_IOPF_I (1U << 3) > >>>+#define EVENT_IOPF_PR (1U << 4) > >>>+#define EVENT_IOPF_RW (1U << 5) > >>>+#define EVENT_IOPF_PE (1U << 6) > >>>+#define EVENT_IOPF_RZ (1U << 7) > >>>+#define EVENT_IOPF_TR (1U << 8) > >>>+#define EVENT_DEV_TAB_HW_ERROR (0x3U << 24) > >>>+#define EVENT_PAGE_TAB_HW_ERROR (0x4U << 24) > >>>+#define EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 24) > >>>+#define EVENT_COMMAND_HW_ERROR (0x6U << 24) > >>>+#define EVENT_IOTLB_INV_TIMEOUT (0x7U << 24) > >>>+#define EVENT_INVALID_DEV_REQUEST (0x8U << 24) > >>>+ > >>>+#define EVENT_LEN 16 > >>>+ > >>>+#define IOMMU_PERM_READ (1 << 0) > >>>+#define IOMMU_PERM_WRITE (1 << 1) > >>>+#define IOMMU_PERM_RW (IOMMU_PERM_READ | IOMMU_PERM_W= RITE) > >>>+ > >>>+/* AMD RD890 Chipset */ > >>>+#define PCI_DEVICE_ID_RD890_IOMMU 0x20 > >>>+ > >>>+#define PCI_CAP_ID_SEC 0xf /* IOMMU capability heade= r > >>>register */ > >>>+#define PCI_CAP_ID_MMIO_LOW 0x0 /* MMIO base address low > >>>register */ > >>>+#define PCI_CAP_ID_MMIO_HIGH 0x0 /* MMIO base address high > >>>register */ > >>>+#define PCI_CAP_ID_RANGE 0x0 /* Device range register > >>>*/ > >>>+#define PCI_CAP_ID_MISC 0x0 /* miscellaneous Informat= ion > >>>register 0 */ > >>>+#define PCI_CAP_ID_MISC1 0x0 /* miscellaneous Informat= ion > >>>register 1 */ > >>>+ > >>>+#define FEATURE_PREFETCH (1ULL<<0) > >>>+#define FEATURE_PPR (1ULL<<1) > >>>+#define FEATURE_NX (1ULL<<3) > >>>+#define FEATURE_GT (1ULL<<4) > >>>+#define FEATURE_IA (1ULL<<6) > >>>+#define FEATURE_GA (1ULL<<7) > >>>+#define FEATURE_HE (1ULL<<8) > >>>+#define FEATURE_PC (1ULL<<9) > >>>+ > >>>+/* IOMMU paging mode */ > >>>+#define GATS_MODE (6ULL << 12) > >>>+#define HATS_MODE (6ULL << 10) > >>>+/* PCI SIG constants */ > >>>+#define PCI_BUS_MAX 256 > >>>+#define PCI_SLOT_MAX 32 > >>>+#define PCI_FUNC_MAX 8 > >>>+#define PCI_DEVFN_MAX 256 > >>>+ > >>>+/* extended feature support */ > >>>+#define EXT_FEATURES (FEATURE_PREFETCH | FEATURE_PPR | FEATURE_NX | > >>>FEATURE_GT | FEATURE_IA | FEATURE_GA | FEATURE_HE | GATS_MODE | HATS= _MODE ) > >>>+ > >>>+/* capabilities header */ > >>>+#define CAPAB_FEATURES (CAPAB_FLAT_EXT | CAPAB_FLAG_NPCACHE | > >>>CAPAB_FLAG_IOTLBSUP | PCI_CAP_ID_SEC | CAPAB_INIT_TYPE | CAPAB_FLAG_= HTTUNNEL > >>>| CAPAB_EFR_SUP) > >>>+ > >>>+/* command constants */ > >>>+#define COM_STORE_ADDRESS_MASK 0xffffffffffff8 > >>>+#define COM_COMPLETION_STORE_MASK 0x1 > >>>+#define COM_COMPLETION_INTR 0x2 > >>>+#define COM_COMPLETION_DATA_OFF 0x8 > >>>+#define IOMMU_COMMAND_SIZE 0x10 > >>>+ > >>>+/* IOMMU default address */ > >>>+#define BUS_AMD_IOMMU_ADDR 0xfeb00000 > >>>+ > >>>+/* page management constants */ > >>>+#define IOMMU_PAGE_SHIFT 12 > >>>+#define IOMMU_PAGE_SIZE (1ULL << IOMMU_PAGE_SHIFT) > >>>+ > >>>+#define IOMMU_PAGE_SHIFT_4K 12 > >>>+#define IOMMU_PAGE_MASK_4K (~((1ULL << IOMMU_PAGE_SHIFT_4K) - 1)) > >>>+#define IOMMU_PAGE_SHIFT_2M 21 > >>>+#define IOMMU_PAGE_MASK_2M (~((1ULL << IOMMU_PAGE_SHIFT_2M) -1)) > >>>+#define IOMMU_PAGE_SHIFT_1G 30 > >>>+#define IOMMU_PAGE_MASK_1G (~((1ULL << IOMMU_PAGE_SHIFT_1G) - 1)) > >>>+ > >>>+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu" > >>>+#define AMD_IOMMU_DEVICE(obj)\ > >>>+ OBJECT_CHECK(AMDIOMMUState, (obj), TYPE_AMD_IOMMU_DEVICE) > >>>+ > >>>+typedef struct AMDIOMMUState AMDIOMMUState; > >>>+ > >>>+typedef struct AMDIOMMUAddressSpace{ > >>>+ uint8_t bus_num; /* bus number > >>>*/ > >>>+ uint8_t devfn; /* device function > >>>*/ > >>>+ AMDIOMMUState *iommu_state; /* IOMMU - one per machine > >>>*/ > >>>+ MemoryRegion iommu; /* Device's iommu region > >>>*/ > >>>+ AddressSpace as; /* device's corresponding address s= pace > >>>*/ > >>>+}AMDIOMMUAddressSpace; > >>>+ > >>>+struct AMDIOMMUState { > >>>+ PCIDevice dev; /* The PCI device itself */ > >>>+ > >>>+ uint32_t version; > >>>+ > >>>+ int capab_offset; /* capability offset pointer */ > >>>+ unsigned char *capab; /* capabilities registers */ > >>>+ > >>>+ bool enabled; /* IOMMU enabled */ > >>>+ bool ats_enabled; /* address translation enabled */ > >>>+ bool cmdbuf_enabled; /* command buffer enabled */ > >>>+ bool evtlog_enabled; /* event log enabled */ > >>>+ bool excl_enabled; > >>>+ > >>>+ dma_addr_t devtab; /* base address device table */ > >>>+ size_t devtab_len; /* device table length */ > >>>+ > >>>+ dma_addr_t cmdbuf; /* command buffer base address */ > >>>+ uint64_t cmdbuf_len; /* command buffer length */ > >>>+ unsigned cmdbuf_head; /* current IOMMU read position */ > >>>+ unsigned cmdbuf_tail; /* next Software write position */ > >>>+ int completion_wait_intr; > >>>+ > >>>+ dma_addr_t evtlog; /* base address event log */ > >>>+ size_t evtlog_intr; > >>>+ size_t evtlog_len; /* event log length */ > >>>+ size_t evtlog_head; /* event log head */ > >>>+ size_t evtlog_tail; /* event log tail */ > >>>+ > >>>+ /* unused for now */ > >>>+ dma_addr_t excl_base; /* base DVA - IOMMU exclusion rang= e */ > >>>+ dma_addr_t excl_limit; /* limit of IOMMU exclusion range = */ > >>>+ bool excl_allow; /* translate accesses to the exclu= sion > >>>range */ > >>>+ bool excl_enable; /* exclusion range enabled = */ > >>>+ > >>>+ MemoryRegion mmio; /* MMIO region */ > >>>+ unsigned char mmior[MMIO_SIZE]; > >>>+ bool mmio_enabled; > >>>+ > >>>+ /* IOMMU function */ > >>>+ MemoryRegionIOMMUOps iommu_ops; > >>>+ > >>>+ /* for each served device */ > >>>+ AMDIOMMUAddressSpace **address_spaces[PCI_BUS_MAX]; > >>>+}; > >>>+ > >>>+typedef struct AMDIOMMUEvent { > >>>+ uint16_t devfn; > >>>+ uint16_t reserved; > >>>+ uint16_t domid; > >>>+ uint16_t info; > >>>+ uint16_t addr; > >>>+} __attribute__((packed)) AMDIOMMUEvent; > >>>+ > >>>+AddressSpace *bridge_host_amd_iommu(PCIBus *bus, void *opaque, int > >>>devfn); > >>>+ > >>>+#endif > >>> > >> > >>Regards, > >>Valentine >=20 > --=20 > =D0=A1 =D1=83=D0=B2=D0=B0=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC, > =D0=A1=D0=B8=D0=BD=D0=B8=D1=86=D1=8B=D0=BD =D0=92=D0=B0=D0=BB=D0=B5=D0=BD= =D1=82=D0=B8=D0=BD