* [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them @ 2015-01-22 8:36 Dan Aloni [not found] ` <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Dan Aloni @ 2015-01-22 8:36 UTC (permalink / raw) To: dev-VfR2kkLFssw While VFIO doesn't allow us to map complete BARs with MSI-X tables, it does allow us to map around them in PAGE_SIZE granularity. There might be adapters that provide their registers in the same BAR but on a different page. For example, Intel's NVME adapter, though not a network adapter, provides only one MMIO BAR that contains the MSI-X table. Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> CC: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> --- lib/librte_eal/linuxapp/eal/eal_pci.c | 5 +- lib/librte_eal/linuxapp/eal/eal_pci_init.h | 2 +- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 4 +- lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 99 +++++++++++++++++++++++++++--- lib/librte_eal/linuxapp/eal/eal_vfio.h | 8 ++- 5 files changed, 101 insertions(+), 17 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index b5f54101e8aa..4a74a9372a15 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -118,13 +118,14 @@ pci_find_max_end_va(void) /* map a particular resource from a file */ void * -pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) { void *mapaddr; /* Map the PCI memory resource of device */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, offset); + MAP_SHARED | additional_flags, fd, offset); if (mapaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 1070eb88fe0a..0a0853d4c4df 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -66,7 +66,7 @@ extern void *pci_map_addr; void *pci_find_max_end_va(void); void *pci_map_resource(void *requested_addr, int fd, off_t offset, - size_t size); + size_t size, int additional_flags); /* map IGB_UIO resource prototype */ int pci_uio_map_resource(struct rte_pci_device *dev); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index e53f06b82430..eaa2e36f643e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -139,7 +139,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) if (pci_map_resource(uio_res->maps[i].addr, fd, (off_t)uio_res->maps[i].offset, - (size_t)uio_res->maps[i].size) + (size_t)uio_res->maps[i].size, 0) != uio_res->maps[i].addr) { RTE_LOG(ERR, EAL, "Cannot mmap device resource\n"); @@ -379,7 +379,7 @@ pci_uio_map_resource(struct rte_pci_device *dev) pci_map_addr = pci_find_max_end_va(); mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset, - (size_t)maps[j].size); + (size_t)maps[j].size, 0); if (mapaddr == MAP_FAILED) fail = 1; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 20e097727f80..f6542a1f1464 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -62,6 +62,9 @@ #ifdef VFIO_PRESENT +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + #define VFIO_DIR "/dev/vfio" #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" #define VFIO_GROUP_FMT "/dev/vfio/%u" @@ -72,10 +75,12 @@ static struct vfio_config vfio_cfg; /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, int *msix_bar) +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, + uint32_t *msix_table_size) { int ret; uint32_t reg; + uint16_t flags; uint8_t cap_id, cap_offset; /* read PCI capability pointer from config space */ @@ -134,7 +139,18 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar) return -1; } + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; + *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); return 0; } @@ -532,6 +548,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) int i, ret, msix_bar; struct mapped_pci_resource *vfio_res = NULL; struct pci_map *maps; + uint32_t msix_table_offset = 0; + uint32_t msix_table_size = 0; dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; @@ -657,9 +675,10 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } /* get MSI-X BAR, if any (we have to know where it is because we can't - * mmap it when using VFIO) */ + * easily mmap it when using VFIO) */ msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar); + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, + &msix_table_offset, &msix_table_size); if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); close(vfio_dev_fd); @@ -702,6 +721,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev) for (i = 0; i < (int) vfio_res->nb_maps; i++) { struct vfio_region_info reg = { .argsz = sizeof(reg) }; void *bar_addr; + struct memreg { + uint32_t offset, size; + } memreg[2] = {}; reg.index = i; @@ -720,21 +742,78 @@ pci_vfio_map_resource(struct rte_pci_device *dev) if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) continue; - /* skip MSI-X BAR */ - if (i == msix_bar) - continue; + if (i == msix_bar) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table_offset; + uint32_t table_end = table_start + msix_table_size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= reg.size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); + continue; + } else { + memreg[0].offset = reg.offset; + memreg[0].size = table_start; + memreg[1].offset = table_end; + memreg[1].size = reg.size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR %d that contains the MSI-X " + "table. Trying offsets: " + "%04x:%04x, %04x:%04x\n", i, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } + } else { + memreg[0].offset = reg.offset; + memreg[0].size = reg.size; + } + /* try to figure out an address */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); - bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, - reg.size); + bar_addr = pci_map_addr; pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); } else { - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, - reg.size); + bar_addr = maps[i].addr; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + uint8_t *second_addr = + ((uint8_t *)bar_addr + memreg[1].offset); + map_addr = pci_map_resource((void *)second_addr, + vfio_dev_fd, memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, reg.size); + bar_addr = MAP_FAILED; + } } if (bar_addr == MAP_FAILED || diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 03e693e01bf0..72ec3f62a3d8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -43,9 +43,13 @@ #include <linux/vfio.h> #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff #else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE #endif #define VFIO_PRESENT -- 1.9.3 ^ permalink raw reply related [flat|nested] 8+ messages in thread
[parent not found: <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> @ 2015-01-28 14:06 ` Dan Aloni 2015-01-28 15:01 ` Burakov, Anatoly 1 sibling, 0 replies; 8+ messages in thread From: Dan Aloni @ 2015-01-28 14:06 UTC (permalink / raw) To: dev-VfR2kkLFssw On Thu, Jan 22, 2015 at 10:36:11AM +0200, Dan Aloni wrote: > While VFIO doesn't allow us to map complete BARs with MSI-X tables, > it does allow us to map around them in PAGE_SIZE granularity. There > might be adapters that provide their registers in the same BAR > but on a different page. For example, Intel's NVME adapter, though > not a network adapter, provides only one MMIO BAR that contains > the MSI-X table. > > Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> > CC: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> Has anyone reviewed this yet? I am asking because I am interested to know whether someone is aiming to integrate storage controllers support into DPDK, and this patch could be instrumental. -- Dan Aloni ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> 2015-01-28 14:06 ` Dan Aloni @ 2015-01-28 15:01 ` Burakov, Anatoly [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 1 sibling, 1 reply; 8+ messages in thread From: Burakov, Anatoly @ 2015-01-28 15:01 UTC (permalink / raw) To: Dan Aloni, dev-VfR2kkLFssw@public.gmane.org Hi Dan Apologies for not looking at it earlier. > While VFIO doesn't allow us to map complete BARs with MSI-X tables, > it does allow us to map around them in PAGE_SIZE granularity. There > might be adapters that provide their registers in the same BAR > but on a different page. For example, Intel's NVME adapter, though > not a network adapter, provides only one MMIO BAR that contains > the MSI-X table. > > Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> > CC: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> > --- > lib/librte_eal/linuxapp/eal/eal_pci.c | 5 +- > lib/librte_eal/linuxapp/eal/eal_pci_init.h | 2 +- > lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 4 +- > lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 99 > +++++++++++++++++++++++++++--- > lib/librte_eal/linuxapp/eal/eal_vfio.h | 8 ++- > 5 files changed, 101 insertions(+), 17 deletions(-) > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c > b/lib/librte_eal/linuxapp/eal/eal_pci.c > index b5f54101e8aa..4a74a9372a15 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c > @@ -118,13 +118,14 @@ pci_find_max_end_va(void) > > /* map a particular resource from a file */ > void * > -pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) > +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, > + int additional_flags) > { > void *mapaddr; > > /* Map the PCI memory resource of device */ > mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, > - MAP_SHARED, fd, offset); > + MAP_SHARED | additional_flags, fd, offset); > if (mapaddr == MAP_FAILED) { > RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, > 0x%lx): %s (%p)\n", > __func__, fd, requested_addr, > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h > b/lib/librte_eal/linuxapp/eal/eal_pci_init.h > index 1070eb88fe0a..0a0853d4c4df 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h > @@ -66,7 +66,7 @@ extern void *pci_map_addr; > void *pci_find_max_end_va(void); > > void *pci_map_resource(void *requested_addr, int fd, off_t offset, > - size_t size); > + size_t size, int additional_flags); > > /* map IGB_UIO resource prototype */ > int pci_uio_map_resource(struct rte_pci_device *dev); > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > index e53f06b82430..eaa2e36f643e 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c > @@ -139,7 +139,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) > > if (pci_map_resource(uio_res->maps[i].addr, fd, > (off_t)uio_res->maps[i].offset, > - (size_t)uio_res->maps[i].size) > + (size_t)uio_res->maps[i].size, 0) > != uio_res->maps[i].addr) { > RTE_LOG(ERR, EAL, > "Cannot mmap device resource\n"); > @@ -379,7 +379,7 @@ pci_uio_map_resource(struct rte_pci_device *dev) > pci_map_addr = > pci_find_max_end_va(); > > mapaddr = > pci_map_resource(pci_map_addr, fd, (off_t)offset, > - (size_t)maps[j].size); > + (size_t)maps[j].size, 0); > if (mapaddr == MAP_FAILED) > fail = 1; > > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > index 20e097727f80..f6542a1f1464 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c > @@ -62,6 +62,9 @@ > > #ifdef VFIO_PRESENT > > +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) > +#define PAGE_MASK (~(PAGE_SIZE - 1)) > + > #define VFIO_DIR "/dev/vfio" > #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" > #define VFIO_GROUP_FMT "/dev/vfio/%u" > @@ -72,10 +75,12 @@ static struct vfio_config vfio_cfg; > > /* get PCI BAR number where MSI-X interrupts are */ > static int > -pci_vfio_get_msix_bar(int fd, int *msix_bar) > +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, > + uint32_t *msix_table_size) > { > int ret; > uint32_t reg; > + uint16_t flags; > uint8_t cap_id, cap_offset; > > /* read PCI capability pointer from config space */ > @@ -134,7 +139,18 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar) > return -1; > } > > + ret = pread64(fd, &flags, sizeof(flags), > + > VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + > + cap_offset + 2); > + if (ret != sizeof(flags)) { > + RTE_LOG(ERR, EAL, "Cannot read table flags > from PCI config " > + "space!\n"); > + return -1; > + } > + > *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; > + *msix_table_offset = reg & > RTE_PCI_MSIX_TABLE_OFFSET; > + *msix_table_size = 16 * (1 + (flags & > RTE_PCI_MSIX_FLAGS_QSIZE)); > > return 0; > } > @@ -532,6 +548,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > int i, ret, msix_bar; > struct mapped_pci_resource *vfio_res = NULL; > struct pci_map *maps; > + uint32_t msix_table_offset = 0; > + uint32_t msix_table_size = 0; > > dev->intr_handle.fd = -1; > dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; > @@ -657,9 +675,10 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > } > > /* get MSI-X BAR, if any (we have to know where it is because we > can't > - * mmap it when using VFIO) */ > + * easily mmap it when using VFIO) */ > msix_bar = -1; > - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar); > + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, > + &msix_table_offset, &msix_table_size); > if (ret < 0) { > RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", > pci_addr); > close(vfio_dev_fd); > @@ -702,6 +721,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev) > for (i = 0; i < (int) vfio_res->nb_maps; i++) { > struct vfio_region_info reg = { .argsz = sizeof(reg) }; > void *bar_addr; > + struct memreg { > + uint32_t offset, size; > + } memreg[2] = {}; > > reg.index = i; > > @@ -720,21 +742,78 @@ pci_vfio_map_resource(struct rte_pci_device > *dev) > if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) > continue; > > - /* skip MSI-X BAR */ > - if (i == msix_bar) > - continue; > + if (i == msix_bar) { > + /* > + * VFIO will not let us map the MSI-X table, > + * but we can map around it. > + */ > + uint32_t table_start = msix_table_offset; > + uint32_t table_end = table_start + msix_table_size; > + table_end = (table_end + ~PAGE_MASK) & > PAGE_MASK; > + table_start &= PAGE_MASK; > + > + if (table_start == 0 && table_end >= reg.size) { > + /* Cannot map this BAR */ > + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", > i); > + continue; > + } else { > + memreg[0].offset = reg.offset; > + memreg[0].size = table_start; > + memreg[1].offset = table_end; > + memreg[1].size = reg.size - table_end; > + > + RTE_LOG(DEBUG, EAL, > + "Trying to map BAR %d that contains > the MSI-X " > + "table. Trying offsets: " > + "%04x:%04x, %04x:%04x\n", i, > + memreg[0].offset, memreg[0].size, > + memreg[1].offset, memreg[1].size); > + } > + } else { > + memreg[0].offset = reg.offset; > + memreg[0].size = reg.size; > + } > > + /* try to figure out an address */ > if (internal_config.process_type == RTE_PROC_PRIMARY) { > /* try mapping somewhere close to the end of > hugepages */ > if (pci_map_addr == NULL) > pci_map_addr = pci_find_max_end_va(); > > - bar_addr = pci_map_resource(pci_map_addr, > vfio_dev_fd, reg.offset, > - reg.size); > + bar_addr = pci_map_addr; > pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) > reg.size); > } else { > - bar_addr = pci_map_resource(maps[i].addr, > vfio_dev_fd, reg.offset, > - reg.size); > + bar_addr = maps[i].addr; > + } > + > + /* reserve the address using an inaccessible mapping */ > + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | > + MAP_ANONYMOUS, -1, 0); > + if (bar_addr != MAP_FAILED) { > + void *map_addr = NULL; > + if (memreg[0].size) { > + /* actual map of first part */ > + map_addr = pci_map_resource(bar_addr, > vfio_dev_fd, > + memreg[0].offset, > + memreg[0].size, > + MAP_FIXED); > + } > + > + /* if there's a second part, try to map it */ > + if (map_addr != MAP_FAILED > + && memreg[1].offset && memreg[1].size) { > + uint8_t *second_addr = > + ((uint8_t *)bar_addr + > memreg[1].offset); Nitpicking, but probably better to use void* and RTE_PTR_ADD here. > + map_addr = pci_map_resource((void > *)second_addr, > + vfio_dev_fd, > memreg[1].offset, > + memreg[1].size, > + MAP_FIXED); > + } > + > + if (map_addr == MAP_FAILED || !map_addr) { > + munmap(bar_addr, reg.size); > + bar_addr = MAP_FAILED; > + } > } > > if (bar_addr == MAP_FAILED || > diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h > b/lib/librte_eal/linuxapp/eal/eal_vfio.h > index 03e693e01bf0..72ec3f62a3d8 100644 > --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h > +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h > @@ -43,9 +43,13 @@ > #include <linux/vfio.h> > > #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) > -#define RTE_PCI_MSIX_TABLE_BIR 0x7 > +#define RTE_PCI_MSIX_TABLE_BIR 0x7 > +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 > +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff > #else > -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR > +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR > +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET > +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE > #endif > > #define VFIO_PRESENT > -- > 1.9.3 Otherwise, no issues from me. Thanks, Anatoly ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2015-01-28 22:04 ` Dan Aloni 2015-01-28 22:04 ` [PATCH v2] " Dan Aloni 1 sibling, 0 replies; 8+ messages in thread From: Dan Aloni @ 2015-01-28 22:04 UTC (permalink / raw) To: Burakov, Anatoly; +Cc: dev-VfR2kkLFssw@public.gmane.org On Wed, Jan 28, 2015 at 03:01:38PM +0000, Burakov, Anatoly wrote: > Hi Dan > > Apologies for not looking at it earlier. No problem, we are all quite busy :) > > + if (map_addr != MAP_FAILED > > + && memreg[1].offset && memreg[1].size) { > > + uint8_t *second_addr = > > + ((uint8_t *)bar_addr + > > memreg[1].offset); > > Nitpicking, but probably better to use void* and RTE_PTR_ADD here. Nitpicking very justified. New patch coming your way. -- Dan Aloni ^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2015-01-28 22:04 ` Dan Aloni @ 2015-01-28 22:04 ` Dan Aloni [not found] ` <1422482693-14158-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> 1 sibling, 1 reply; 8+ messages in thread From: Dan Aloni @ 2015-01-28 22:04 UTC (permalink / raw) To: dev-VfR2kkLFssw While VFIO doesn't allow us to map complete BARs with MSI-X tables, it does allow us to map around them in PAGE_SIZE granularity. There might be adapters that provide their registers in the same BAR but on a different page. For example, Intel's NVME adapter, though not a network adapter, provides only one MMIO BAR that contains the MSI-X table. Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> Signed-off-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> --- lib/librte_eal/linuxapp/eal/eal_pci.c | 5 +- lib/librte_eal/linuxapp/eal/eal_pci_init.h | 2 +- lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 4 +- lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 98 +++++++++++++++++++++++++++--- lib/librte_eal/linuxapp/eal/eal_vfio.h | 8 ++- 5 files changed, 100 insertions(+), 17 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index b5f54101e8aa..4a74a9372a15 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -118,13 +118,14 @@ pci_find_max_end_va(void) /* map a particular resource from a file */ void * -pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size) +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) { void *mapaddr; /* Map the PCI memory resource of device */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, offset); + MAP_SHARED | additional_flags, fd, offset); if (mapaddr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h index 1070eb88fe0a..0a0853d4c4df 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -66,7 +66,7 @@ extern void *pci_map_addr; void *pci_find_max_end_va(void); void *pci_map_resource(void *requested_addr, int fd, off_t offset, - size_t size); + size_t size, int additional_flags); /* map IGB_UIO resource prototype */ int pci_uio_map_resource(struct rte_pci_device *dev); diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index e53f06b82430..eaa2e36f643e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -139,7 +139,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev) if (pci_map_resource(uio_res->maps[i].addr, fd, (off_t)uio_res->maps[i].offset, - (size_t)uio_res->maps[i].size) + (size_t)uio_res->maps[i].size, 0) != uio_res->maps[i].addr) { RTE_LOG(ERR, EAL, "Cannot mmap device resource\n"); @@ -379,7 +379,7 @@ pci_uio_map_resource(struct rte_pci_device *dev) pci_map_addr = pci_find_max_end_va(); mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset, - (size_t)maps[j].size); + (size_t)maps[j].size, 0); if (mapaddr == MAP_FAILED) fail = 1; diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 20e097727f80..c8df91c0f800 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -62,6 +62,9 @@ #ifdef VFIO_PRESENT +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + #define VFIO_DIR "/dev/vfio" #define VFIO_CONTAINER_PATH "/dev/vfio/vfio" #define VFIO_GROUP_FMT "/dev/vfio/%u" @@ -72,10 +75,12 @@ static struct vfio_config vfio_cfg; /* get PCI BAR number where MSI-X interrupts are */ static int -pci_vfio_get_msix_bar(int fd, int *msix_bar) +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, + uint32_t *msix_table_size) { int ret; uint32_t reg; + uint16_t flags; uint8_t cap_id, cap_offset; /* read PCI capability pointer from config space */ @@ -134,7 +139,18 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar) return -1; } + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; + *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); return 0; } @@ -532,6 +548,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) int i, ret, msix_bar; struct mapped_pci_resource *vfio_res = NULL; struct pci_map *maps; + uint32_t msix_table_offset = 0; + uint32_t msix_table_size = 0; dev->intr_handle.fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; @@ -657,9 +675,10 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } /* get MSI-X BAR, if any (we have to know where it is because we can't - * mmap it when using VFIO) */ + * easily mmap it when using VFIO) */ msix_bar = -1; - ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar); + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, + &msix_table_offset, &msix_table_size); if (ret < 0) { RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); close(vfio_dev_fd); @@ -702,6 +721,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev) for (i = 0; i < (int) vfio_res->nb_maps; i++) { struct vfio_region_info reg = { .argsz = sizeof(reg) }; void *bar_addr; + struct memreg { + uint32_t offset, size; + } memreg[2] = {}; reg.index = i; @@ -720,21 +742,77 @@ pci_vfio_map_resource(struct rte_pci_device *dev) if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) continue; - /* skip MSI-X BAR */ - if (i == msix_bar) - continue; + if (i == msix_bar) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table_offset; + uint32_t table_end = table_start + msix_table_size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= reg.size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); + continue; + } else { + memreg[0].offset = reg.offset; + memreg[0].size = table_start; + memreg[1].offset = table_end; + memreg[1].size = reg.size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR %d that contains the MSI-X " + "table. Trying offsets: " + "%04x:%04x, %04x:%04x\n", i, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } + } else { + memreg[0].offset = reg.offset; + memreg[0].size = reg.size; + } + /* try to figure out an address */ if (internal_config.process_type == RTE_PROC_PRIMARY) { /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); - bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset, - reg.size); + bar_addr = pci_map_addr; pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); } else { - bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset, - reg.size); + bar_addr = maps[i].addr; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, reg.size); + bar_addr = MAP_FAILED; + } } if (bar_addr == MAP_FAILED || diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 03e693e01bf0..72ec3f62a3d8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -43,9 +43,13 @@ #include <linux/vfio.h> #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) -#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff #else -#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE #endif #define VFIO_PRESENT -- 1.9.3 ^ permalink raw reply related [flat|nested] 8+ messages in thread
[parent not found: <1422482693-14158-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>]
* Re: [PATCH v2] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <1422482693-14158-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> @ 2015-01-29 10:22 ` Burakov, Anatoly [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 0 siblings, 1 reply; 8+ messages in thread From: Burakov, Anatoly @ 2015-01-29 10:22 UTC (permalink / raw) To: Dan Aloni, dev-VfR2kkLFssw@public.gmane.org Hi Dan, > > While VFIO doesn't allow us to map complete BARs with MSI-X tables, > it does allow us to map around them in PAGE_SIZE granularity. There > might be adapters that provide their registers in the same BAR > but on a different page. For example, Intel's NVME adapter, though > not a network adapter, provides only one MMIO BAR that contains > the MSI-X table. > > Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> > Signed-off-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> Why is it signed-off by me? :-) Otherwise, Acked-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> ^ permalink raw reply [flat|nested] 8+ messages in thread
[parent not found: <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org>]
* Re: [PATCH v2] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> @ 2015-01-29 10:25 ` Dan Aloni 2015-02-23 20:58 ` Thomas Monjalon 1 sibling, 0 replies; 8+ messages in thread From: Dan Aloni @ 2015-01-29 10:25 UTC (permalink / raw) To: Burakov, Anatoly; +Cc: dev-VfR2kkLFssw@public.gmane.org On Thu, Jan 29, 2015 at 10:22:58AM +0000, Burakov, Anatoly wrote: > Hi Dan, > > > > > While VFIO doesn't allow us to map complete BARs with MSI-X tables, > > it does allow us to map around them in PAGE_SIZE granularity. There > > might be adapters that provide their registers in the same BAR > > but on a different page. For example, Intel's NVME adapter, though > > not a network adapter, provides only one MMIO BAR that contains > > the MSI-X table. > > > > Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> > > Signed-off-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> > > Why is it signed-off by me? :-) The change you have requested got in, it makes you an author too :) -- Dan Aloni ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2] eal/linux: allow to map BARs with MSI-X tables, around them [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2015-01-29 10:25 ` Dan Aloni @ 2015-02-23 20:58 ` Thomas Monjalon 1 sibling, 0 replies; 8+ messages in thread From: Thomas Monjalon @ 2015-02-23 20:58 UTC (permalink / raw) To: Burakov, Anatoly, Dan Aloni; +Cc: dev-VfR2kkLFssw > > While VFIO doesn't allow us to map complete BARs with MSI-X tables, > > it does allow us to map around them in PAGE_SIZE granularity. There > > might be adapters that provide their registers in the same BAR > > but on a different page. For example, Intel's NVME adapter, though > > not a network adapter, provides only one MMIO BAR that contains > > the MSI-X table. > > > > Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> > > Signed-off-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> [...] > Acked-by: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> Applied, thanks Note: EAL Linux VFIO has no official maintainer. ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2015-02-23 20:58 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2015-01-22 8:36 [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them Dan Aloni [not found] ` <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> 2015-01-28 14:06 ` Dan Aloni 2015-01-28 15:01 ` Burakov, Anatoly [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2015-01-28 22:04 ` Dan Aloni 2015-01-28 22:04 ` [PATCH v2] " Dan Aloni [not found] ` <1422482693-14158-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org> 2015-01-29 10:22 ` Burakov, Anatoly [not found] ` <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org> 2015-01-29 10:25 ` Dan Aloni 2015-02-23 20:58 ` Thomas Monjalon
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).