From: Ben Walker <benjamin.walker@intel.com>
To: dev@dpdk.org
Cc: Ben Walker <benjamin.walker@intel.com>
Subject: [PATCH v3] eal: Support running as unprivileged user
Date: Tue, 31 Jan 2017 10:40:54 -0700 [thread overview]
Message-ID: <20170131174054.15324-1-benjamin.walker@intel.com> (raw)
In-Reply-To: <20170103225600.51310-1-benjamin.walker@intel.com>
For Linux kernel 4.0 and newer, the ability to obtain
physical page frame numbers for unprivileged users from
/proc/self/pagemap was removed. Instead, when an IOMMU
is present, simply choose our own DMA addresses instead.
Signed-off-by: Ben Walker <benjamin.walker@intel.com>
---
lib/librte_eal/common/eal_private.h | 12 +++++
lib/librte_eal/linuxapp/eal/eal_memory.c | 75 +++++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_pci.c | 6 ++-
3 files changed, 71 insertions(+), 22 deletions(-)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 9e7d8f6..8b2d323 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -34,6 +34,7 @@
#ifndef _EAL_PRIVATE_H_
#define _EAL_PRIVATE_H_
+#include <stdbool.h>
#include <stdio.h>
#include <rte_pci.h>
@@ -301,4 +302,15 @@ int rte_eal_hugepage_init(void);
*/
int rte_eal_hugepage_attach(void);
+/**
+ * Returns true if the system is able to obtain
+ * physical addresses. Return false if using DMA
+ * addresses through an IOMMU.
+ *
+ * Drivers based on uio will not load unless physical
+ * addresses are obtainable. It is only possible to get
+ * physical addresses when running as a privileged user.
+ */
+bool rte_eal_using_phys_addrs(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb2..cbb99bd 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,24 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +189,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +228,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -242,7 +244,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- unsigned i;
+ unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -255,6 +257,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ static phys_addr_t addr = 0;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -951,7 +969,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1061,22 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1318,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1426,3 +1455,9 @@ rte_eal_hugepage_attach(void)
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index e2fc219..61d55b9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -99,8 +99,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
--
2.9.3
next prev parent reply other threads:[~2017-01-31 17:41 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-03 22:30 [PATCH] eal: Support running as unprivileged user Ben Walker
2017-01-03 22:56 ` [PATCH v2] " Ben Walker
2017-01-18 17:05 ` Sergio Gonzalez Monroy
2017-01-31 17:40 ` Ben Walker [this message]
2017-01-31 17:44 ` [PATCH v4] " Ben Walker
2017-02-17 14:59 ` Sergio Gonzalez Monroy
2017-03-09 16:10 ` Thomas Monjalon
2017-02-17 19:28 ` Stephen Hemminger
2017-03-09 16:11 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170131174054.15324-1-benjamin.walker@intel.com \
--to=benjamin.walker@intel.com \
--cc=dev@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.