All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>
To: dev-VfR2kkLFssw@public.gmane.org
Subject: [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them
Date: Thu, 22 Jan 2015 10:36:11 +0200	[thread overview]
Message-ID: <1421915771-10376-1-git-send-email-dan@kernelim.com> (raw)

While VFIO doesn't allow us to map complete BARs with MSI-X tables,
it does allow us to map around them in PAGE_SIZE granularity. There
might be adapters that provide their registers in the same BAR
but on a different page. For example, Intel's NVME adapter, though
not a network adapter, provides only one MMIO BAR that contains
the MSI-X table.

Signed-off-by: Dan Aloni <dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>
CC: Anatoly Burakov <anatoly.burakov-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 lib/librte_eal/linuxapp/eal/eal_pci.c      |  5 +-
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |  2 +-
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c  |  4 +-
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 99 +++++++++++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/eal_vfio.h     |  8 ++-
 5 files changed, 101 insertions(+), 17 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index b5f54101e8aa..4a74a9372a15 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -118,13 +118,14 @@ pci_find_max_end_va(void)
 
 /* map a particular resource from a file */
 void *
-pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size)
+pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size,
+		 int additional_flags)
 {
 	void *mapaddr;
 
 	/* Map the PCI memory resource of device */
 	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
-			MAP_SHARED, fd, offset);
+			MAP_SHARED | additional_flags, fd, offset);
 	if (mapaddr == MAP_FAILED) {
 		RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
 			__func__, fd, requested_addr,
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 1070eb88fe0a..0a0853d4c4df 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -66,7 +66,7 @@ extern void *pci_map_addr;
 void *pci_find_max_end_va(void);
 
 void *pci_map_resource(void *requested_addr, int fd, off_t offset,
-		size_t size);
+	       size_t size, int additional_flags);
 
 /* map IGB_UIO resource prototype */
 int pci_uio_map_resource(struct rte_pci_device *dev);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index e53f06b82430..eaa2e36f643e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -139,7 +139,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
 
 			if (pci_map_resource(uio_res->maps[i].addr, fd,
 					     (off_t)uio_res->maps[i].offset,
-					     (size_t)uio_res->maps[i].size)
+					     (size_t)uio_res->maps[i].size, 0)
 			    != uio_res->maps[i].addr) {
 				RTE_LOG(ERR, EAL,
 					"Cannot mmap device resource\n");
@@ -379,7 +379,7 @@ pci_uio_map_resource(struct rte_pci_device *dev)
 					pci_map_addr = pci_find_max_end_va();
 
 				mapaddr = pci_map_resource(pci_map_addr, fd, (off_t)offset,
-						(size_t)maps[j].size);
+						(size_t)maps[j].size, 0);
 				if (mapaddr == MAP_FAILED)
 					fail = 1;
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 20e097727f80..f6542a1f1464 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -62,6 +62,9 @@
 
 #ifdef VFIO_PRESENT
 
+#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#define PAGE_MASK   (~(PAGE_SIZE - 1))
+
 #define VFIO_DIR "/dev/vfio"
 #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
 #define VFIO_GROUP_FMT "/dev/vfio/%u"
@@ -72,10 +75,12 @@ static struct vfio_config vfio_cfg;
 
 /* get PCI BAR number where MSI-X interrupts are */
 static int
-pci_vfio_get_msix_bar(int fd, int *msix_bar)
+pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
+		      uint32_t *msix_table_size)
 {
 	int ret;
 	uint32_t reg;
+	uint16_t flags;
 	uint8_t cap_id, cap_offset;
 
 	/* read PCI capability pointer from config space */
@@ -134,7 +139,18 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar)
 				return -1;
 			}
 
+			ret = pread64(fd, &flags, sizeof(flags),
+					VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+					cap_offset + 2);
+			if (ret != sizeof(flags)) {
+				RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config "
+						"space!\n");
+				return -1;
+			}
+
 			*msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR;
+			*msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET;
+			*msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE));
 
 			return 0;
 		}
@@ -532,6 +548,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 	int i, ret, msix_bar;
 	struct mapped_pci_resource *vfio_res = NULL;
 	struct pci_map *maps;
+	uint32_t msix_table_offset = 0;
+	uint32_t msix_table_size = 0;
 
 	dev->intr_handle.fd = -1;
 	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
@@ -657,9 +675,10 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 	}
 
 	/* get MSI-X BAR, if any (we have to know where it is because we can't
-	 * mmap it when using VFIO) */
+	 * easily mmap it when using VFIO) */
 	msix_bar = -1;
-	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar);
+	ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar,
+				    &msix_table_offset, &msix_table_size);
 	if (ret < 0) {
 		RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n", pci_addr);
 		close(vfio_dev_fd);
@@ -702,6 +721,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 	for (i = 0; i < (int) vfio_res->nb_maps; i++) {
 		struct vfio_region_info reg = { .argsz = sizeof(reg) };
 		void *bar_addr;
+		struct memreg {
+			uint32_t offset, size;
+		} memreg[2] = {};
 
 		reg.index = i;
 
@@ -720,21 +742,78 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 		if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
 			continue;
 
-		/* skip MSI-X BAR */
-		if (i == msix_bar)
-			continue;
+		if (i == msix_bar) {
+			/*
+			 * VFIO will not let us map the MSI-X table,
+			 * but we can map around it.
+			 */
+			uint32_t table_start = msix_table_offset;
+			uint32_t table_end = table_start + msix_table_size;
+			table_end = (table_end + ~PAGE_MASK) & PAGE_MASK;
+			table_start &= PAGE_MASK;
+
+			if (table_start == 0  &&  table_end >= reg.size) {
+				/* Cannot map this BAR */
+				RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i);
+				continue;
+			} else {
+				memreg[0].offset = reg.offset;
+				memreg[0].size = table_start;
+				memreg[1].offset = table_end;
+				memreg[1].size = reg.size - table_end;
+
+				RTE_LOG(DEBUG, EAL,
+					"Trying to map BAR %d that contains the MSI-X "
+					"table. Trying offsets: "
+					"%04x:%04x, %04x:%04x\n", i,
+					memreg[0].offset, memreg[0].size,
+					memreg[1].offset, memreg[1].size);
+			}
+		} else {
+			memreg[0].offset = reg.offset;
+			memreg[0].size = reg.size;
+		}
 
+		/* try to figure out an address */
 		if (internal_config.process_type == RTE_PROC_PRIMARY) {
 			/* try mapping somewhere close to the end of hugepages */
 			if (pci_map_addr == NULL)
 				pci_map_addr = pci_find_max_end_va();
 
-			bar_addr = pci_map_resource(pci_map_addr, vfio_dev_fd, reg.offset,
-					reg.size);
+			bar_addr = pci_map_addr;
 			pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
 		} else {
-			bar_addr = pci_map_resource(maps[i].addr, vfio_dev_fd, reg.offset,
-					reg.size);
+			bar_addr = maps[i].addr;
+		}
+
+		/* reserve the address using an inaccessible mapping */
+		bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE |
+				MAP_ANONYMOUS, -1, 0);
+		if (bar_addr != MAP_FAILED) {
+			void *map_addr = NULL;
+			if (memreg[0].size) {
+				/* actual map of first part */
+				map_addr = pci_map_resource(bar_addr, vfio_dev_fd,
+							    memreg[0].offset,
+							    memreg[0].size,
+							    MAP_FIXED);
+			}
+
+			/* if there's a second part, try to map it */
+			if (map_addr != MAP_FAILED
+			    &&  memreg[1].offset  &&  memreg[1].size) {
+				uint8_t *second_addr =
+					((uint8_t *)bar_addr + memreg[1].offset);
+				map_addr = pci_map_resource((void *)second_addr,
+							    vfio_dev_fd, memreg[1].offset,
+							    memreg[1].size,
+							    MAP_FIXED);
+			}
+
+			if (map_addr == MAP_FAILED  ||  !map_addr) {
+				munmap(bar_addr, reg.size);
+				bar_addr = MAP_FAILED;
+			}
 		}
 
 		if (bar_addr == MAP_FAILED ||
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 03e693e01bf0..72ec3f62a3d8 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -43,9 +43,13 @@
 #include <linux/vfio.h>
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
-#define RTE_PCI_MSIX_TABLE_BIR 0x7
+#define RTE_PCI_MSIX_TABLE_BIR    0x7
+#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8
+#define RTE_PCI_MSIX_FLAGS_QSIZE  0x07ff
 #else
-#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR
+#define RTE_PCI_MSIX_TABLE_BIR    PCI_MSIX_TABLE_BIR
+#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET
+#define RTE_PCI_MSIX_FLAGS_QSIZE  PCI_MSIX_FLAGS_QSIZE
 #endif
 
 #define VFIO_PRESENT
-- 
1.9.3

             reply	other threads:[~2015-01-22  8:36 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-22  8:36 Dan Aloni [this message]
     [not found] ` <1421915771-10376-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>
2015-01-28 14:06   ` [PATCH] eal/linux: allow to map BARs with MSI-X tables, around them Dan Aloni
2015-01-28 15:01   ` Burakov, Anatoly
     [not found]     ` <C6ECDF3AB251BE4894318F4E4512369780C3ECFA-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2015-01-28 22:04       ` Dan Aloni
2015-01-28 22:04       ` [PATCH v2] " Dan Aloni
     [not found]         ` <1422482693-14158-1-git-send-email-dan-HWkDggknmVpWk0Htik3J/w@public.gmane.org>
2015-01-29 10:22           ` Burakov, Anatoly
     [not found]             ` <C6ECDF3AB251BE4894318F4E4512369780C3F021-kPTMFJFq+rHjxeytcECX8bfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2015-01-29 10:25               ` Dan Aloni
2015-02-23 20:58               ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1421915771-10376-1-git-send-email-dan@kernelim.com \
    --to=dan-hwkdggknmvpwk0htik3j/w@public.gmane.org \
    --cc=dev-VfR2kkLFssw@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.