From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Jfj43-0003Ct-I5 for qemu-devel@nongnu.org; Sat, 29 Mar 2008 17:58:15 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Jfj43-0003CY-6j for qemu-devel@nongnu.org; Sat, 29 Mar 2008 17:58:15 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Jfj42-0003CR-L2 for qemu-devel@nongnu.org; Sat, 29 Mar 2008 17:58:14 -0400 Received: from e4.ny.us.ibm.com ([32.97.182.144]) by monty-python.gnu.org with esmtps (TLS-1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1Jfj42-000119-AL for qemu-devel@nongnu.org; Sat, 29 Mar 2008 17:58:14 -0400 Received: from d01relay02.pok.ibm.com (d01relay02.pok.ibm.com [9.56.227.234]) by e4.ny.us.ibm.com (8.13.8/8.13.8) with ESMTP id m2TLuA3r009831 for ; Sat, 29 Mar 2008 17:56:10 -0400 Received: from d01av01.pok.ibm.com (d01av01.pok.ibm.com [9.56.224.215]) by d01relay02.pok.ibm.com (8.13.8/8.13.8/NCO v8.7) with ESMTP id m2TLuABf249764 for ; Sat, 29 Mar 2008 17:56:10 -0400 Received: from d01av01.pok.ibm.com (loopback [127.0.0.1]) by d01av01.pok.ibm.com (8.12.11.20060308/8.13.3) with ESMTP id m2TLu9bk006449 for ; Sat, 29 Mar 2008 17:56:09 -0400 From: Anthony Liguori Date: Sat, 29 Mar 2008 16:55:56 -0500 Message-Id: <1206827760-4566-2-git-send-email-aliguori@us.ibm.com> In-Reply-To: <1206827760-4566-1-git-send-email-aliguori@us.ibm.com> References: <1206827760-4566-1-git-send-email-aliguori@us.ibm.com> Subject: [Qemu-devel] [PATCH 2/6] PCI DMA API Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: kvm-devel@lists.sourceforge.net, Marcelo Tosatti , Anthony Liguori , Aurelien Jarno This patch introduces a PCI DMA API and some generic code to support other DMA APIs. Two types are introduced: PhysIOVector and IOVector. A DMA API maps a PhysIOVector, which is composed of target_phys_addr_t, into an IOVector, which is composed of void *. This enables zero-copy IO to be preformed without introducing assumptions of phys_ram_base. This API is at the PCI device level to enable support of per-device IOMMU remapping. Signed-off-by: Anthony Liguori diff --git a/Makefile.target b/Makefile.target index 5ac29a7..94f3e58 100644 --- a/Makefile.target +++ b/Makefile.target @@ -173,7 +173,7 @@ all: $(PROGS) ######################################################### # cpu emulator library LIBOBJS=exec.o kqemu.o translate-all.o cpu-exec.o\ - translate.o host-utils.o + translate.o host-utils.o iovector.o ifndef CONFIG_NO_DYNGEN_OP LIBOBJS+=op.o endif diff --git a/cpu-all.h b/cpu-all.h index 9e5d33b..23b0a11 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -24,6 +24,8 @@ #define WORDS_ALIGNED #endif +#include "iovector.h" + /* some important defines: * * WORDS_ALIGNED : if defined, the host cpu can only make word aligned @@ -835,6 +837,8 @@ void cpu_register_physical_memory(target_phys_addr_t start_addr, unsigned long size, unsigned long phys_offset); ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr); +IOVector *cpu_translate_physical_page_vector(PhysIOVector *phys); +void cpu_physical_page_vector_set_dirty(PhysIOVector *phys); ram_addr_t qemu_ram_alloc(unsigned int size); void qemu_ram_free(ram_addr_t addr); int cpu_register_io_memory(int io_index, diff --git a/exec.c b/exec.c index c25872d..4b4b1a9 100644 --- a/exec.c +++ b/exec.c @@ -2085,6 +2085,65 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr) return p->phys_offset; } +IOVector *cpu_translate_physical_page_vector(PhysIOVector *phys) +{ + unsigned int i; + IOVector *virt; + + /* QEMU represents guest physical memory as virtually contiguous so we + never should need more IOVector's than PhysIOVectors */ + + virt = qemu_malloc(sizeof(IOVector) + phys->num * sizeof(IOVectorElement)); + + virt->num = phys->num; + for (i = 0; i < phys->num; i++) { + ram_addr_t base_offset = 0; + ram_addr_t offset; + + /* we need to check that the guest is trying to DMA to somewhere they + shouldn't */ + for (offset = 0; offset < phys->sg[i].len; offset += TARGET_PAGE_SIZE){ + ram_addr_t phys_offset; + + /* DMA'ing to MMIO, just skip */ + phys_offset = cpu_get_physical_page_desc(phys->sg[i].base); + if ((phys_offset & ~TARGET_PAGE_MASK) != IO_MEM_RAM) { + fprintf(stderr, "dma'ing to non-RAM region\n"); + qemu_free(virt); + return NULL; + } + + phys_offset &= TARGET_PAGE_MASK; + phys_offset += phys->sg[i].base & ~TARGET_PAGE_MASK; + + if (offset == 0) + base_offset = phys_offset; + else if ((phys_offset - base_offset) != offset) { + fprintf(stderr, "bug: discontiguous guest memory?\n"); + qemu_free(virt); + return NULL; + } + } + + virt->sg[i].base = phys_ram_base + base_offset; + virt->sg[i].len = phys->sg[i].len; + } + + return virt; +} + +void cpu_physical_page_vector_set_dirty(PhysIOVector *phys) +{ + int i; + + for (i = 0; i < phys->num; i++) { + ram_addr_t offset; + for (offset = 0; offset < phys->sg[i].len; + offset += TARGET_PAGE_SIZE) + cpu_physical_memory_set_dirty(phys->sg[i].base + offset); + } +} + /* XXX: better than nothing */ ram_addr_t qemu_ram_alloc(unsigned int size) { diff --git a/hw/pci.c b/hw/pci.c index bc55989..99c206f 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -145,6 +145,20 @@ int pci_device_load(PCIDevice *s, QEMUFile *f) return 0; } +IOVector *pci_device_dma_map(PCIDevice *s, PhysIOVector *phys) +{ + return cpu_translate_physical_page_vector(phys); +} + +void pci_device_dma_unmap(PCIDevice *s, PhysIOVector *phys, IOVector *virt, + int write) +{ + /* mark memory as dirty if necessary */ + if (write) + cpu_physical_page_vector_set_dirty(phys); + qemu_free(virt); +} + /* -1 for devfn means auto assign */ PCIDevice *pci_register_device(PCIBus *bus, const char *name, int instance_size, int devfn, diff --git a/hw/pci.h b/hw/pci.h index e870987..b965919 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -81,6 +81,10 @@ void pci_default_write_config(PCIDevice *d, void pci_device_save(PCIDevice *s, QEMUFile *f); int pci_device_load(PCIDevice *s, QEMUFile *f); +IOVector *pci_device_dma_map(PCIDevice *s, PhysIOVector *phys); +void pci_device_dma_unmap(PCIDevice *s, PhysIOVector *phys, IOVector *virt, + int write); + typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level); typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, diff --git a/iovector.c b/iovector.c new file mode 100644 index 0000000..432b483 --- /dev/null +++ b/iovector.c @@ -0,0 +1,121 @@ +/* + * IO Vectors + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "iovector.h" + +static size_t iovector_rw(void *buffer, size_t size, IOVector *iov, int read) +{ + uint8_t *ptr = buffer; + size_t offset = 0; + int i; + + for (i = 0; i < iov->num; i++) { + size_t len; + + len = MIN(iov->sg[i].len, size - offset); + + if (read) + memcpy(ptr + offset, iov->sg[i].base, len); + else + memcpy(iov->sg[i].base, ptr + offset, len); + + offset += len; + } + + return offset; +} + +size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size, + const IOVector *iov) +{ + IOVector *sg; + size_t len; + + if (offset) + sg = iovector_trim(iov, offset, size); + else + sg = (IOVector *)iov; + + len = iovector_rw(buffer, size, sg, 1); + + if (offset) + qemu_free(sg); + + return len; +} + +size_t memcpy_to_iovector(const void *buffer, size_t offset, size_t size, + IOVector *iov) +{ + IOVector *sg; + size_t len; + + if (offset) + sg = iovector_trim(iov, offset, size); + else + sg = iov; + + len = iovector_rw((void *)buffer, size, sg, 0); + + if (offset) + qemu_free(sg); + + return len; +} + +IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size) +{ + IOVector *ret; + size_t off, total_size; + int i; + + ret = qemu_malloc(sizeof(IOVector) + sizeof(IOVectorElement) * iov->num); + if (ret == NULL) + return NULL; + + total_size = 0; + ret->num = 0; + off = 0; + for (i = 0; i < iov->num; i++) { + if (off >= offset || offset < (off + iov->sg[i].len)) { + size_t fudge = 0; + if (off < offset) + fudge = offset - off; + + ret->sg[ret->num].base = iov->sg[i].base + fudge; + ret->sg[ret->num].len = MIN(iov->sg[i].len - fudge, + size - total_size); + total_size += ret->sg[ret->num].len; + ret->num++; + + if (total_size == size) + break; + } + + off += iov->sg[i].len; + } + + return ret; +} + +size_t iovector_size(const IOVector *iov) +{ + size_t size = 0; + int i; + + for (i = 0; i < iov->num; i++) + size += iov->sg[i].len; + + return size; +} diff --git a/iovector.h b/iovector.h new file mode 100644 index 0000000..042ea3a --- /dev/null +++ b/iovector.h @@ -0,0 +1,49 @@ +/* + * IO Vectors + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_IOVECTOR_H +#define _QEMU_IOVECTOR_H + +typedef struct IOVectorElement IOVectorElement; + +typedef struct PhysIOVectorElement PhysIOVectorElement; + +typedef struct IOVector +{ + int num; + struct IOVectorElement { + void *base; + size_t len; + } sg[0]; +} IOVector; + +typedef struct PhysIOVector +{ + int num; + struct PhysIOVectorElement { + target_phys_addr_t base; + size_t len; + } sg[0]; +} PhysIOVector; + +size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size, + const IOVector *iov); + +size_t memcpy_to_iovector(const void *buffer, size_t offset, size_t size, + IOVector *iov); + +IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size); + +size_t iovector_size(const IOVector *iov); + +#endif