public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
From: Alex Vainman <alexonlists-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: roland <rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org>
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH] libibverbs: Add huge page support to ibv_madvise_range()
Date: Sun, 29 Nov 2009 19:08:08 +0200	[thread overview]
Message-ID: <4B12AA78.7090401@gmail.com> (raw)


ibv_reg_mr() fails to register a memory region allocated on huge page and not
the default page size. This happens because ibv_madvise_range() aligns memory
region to the default system page size before calling to madvise() which fails
with EINVAL error. madvise() fails because it expects that the start and end
pointer of the memory range be huge page aligned.
Patch handles the issue by:
1. ibv_fork_init() gets kernel's default huge page size in addition
   to the default page size.
2. ibv_madvise_range() first tries aligning users memory range to default
   page size and if madvise() fails with EINVAL error then it tries to align
   users memory range by huge page size and tries madvise() again.

Signed-off-by: Alex Vaynman <alexv-smomgflXvOZWk0Htik3J/w@public.gmane.org>
---
 src/memory.c |   69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/src/memory.c b/src/memory.c
index 550015a..73db083 100644
--- a/src/memory.c
+++ b/src/memory.c
@@ -40,6 +40,9 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <string.h>
 
 #include "ibverbs.h"
 
@@ -54,6 +57,8 @@
 #define MADV_DOFORK	11
 #endif
 
+#define MEMINFO_SIZE	2048
+
 struct ibv_mem_node {
 	enum {
 		IBV_RED,
@@ -68,8 +73,51 @@ struct ibv_mem_node {
 static struct ibv_mem_node *mm_root;
 static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int page_size;
+static int huge_page_size;
 static int too_late;
 
+/*
+ * Get the kernel default huge page size.
+ */
+static int get_huge_page_size()
+{
+	int fd;
+	char buf[MEMINFO_SIZE];
+	int mem_file_len;
+	char *p_hpage_val = NULL;
+	char *end_pointer = NULL;
+	char file_name[] = "/proc/meminfo";
+	const char label[] = "Hugepagesize:";
+	int ret_val = 0;
+
+	fd = open(file_name, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	mem_file_len = read(fd, buf, sizeof(buf) - 1);
+
+	close(fd);
+	if (mem_file_len < 0)
+		return mem_file_len;
+
+	buf[mem_file_len] = '\0';
+
+	p_hpage_val = strstr(buf, label);
+	if (!p_hpage_val) {
+		errno = EINVAL;
+		return -1;
+	}
+	p_hpage_val += strlen(label);
+
+	errno = 0;
+	ret_val = strtol(p_hpage_val, &end_pointer, 0);
+
+	if (errno != 0)
+		return -1;
+
+	return ret_val * 1024;
+}
+
 int ibv_fork_init(void)
 {
 	void *tmp;
@@ -85,6 +133,8 @@ int ibv_fork_init(void)
 	if (page_size < 0)
 		return errno;
 
+	huge_page_size = get_huge_page_size();
+
 	if (posix_memalign(&tmp, page_size, page_size))
 		return ENOMEM;
 
@@ -554,7 +604,8 @@ static struct ibv_mem_node *prepare_to_roll_back(struct ibv_mem_node *node,
 	return node;
 }
 
-static int ibv_madvise_range(void *base, size_t size, int advice)
+static int ibv_madvise_range_helper(void *base, size_t size, int advice,
+				    int page_size)
 {
 	uintptr_t start, end;
 	struct ibv_mem_node *node, *tmp;
@@ -646,6 +697,22 @@ out:
 	return ret;
 }
 
+static int ibv_madvise_range(void *base, size_t size, int advice)
+{
+	int ret_val = 0;
+
+	ret_val = ibv_madvise_range_helper(base, size, advice, page_size);
+
+	/*
+	 * if memory is backed by huge pages we need to align it
+	 * to huge page boundary in order madvise() will succeed.
+	 */
+	if (ret_val == -1 && errno == EINVAL && huge_page_size > 0)
+		ret_val = ibv_madvise_range_helper(base, size, advice, huge_page_size);
+
+	return ret_val;
+}
+
 int ibv_dontfork_range(void *base, size_t size)
 {
 	if (mm_root)
-- 
1.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

             reply	other threads:[~2009-11-29 17:08 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-29 17:08 Alex Vainman [this message]
     [not found] ` <4B12AA78.7090401-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2009-12-08 14:03   ` [PATCH] libibverbs: Add huge page support to ibv_madvise_range() Alex Vainman
     [not found]     ` <4B1E5CA9.3090707-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-01-12  9:26       ` Alex Vainman
2010-01-12 14:25   ` Eli Cohen
2010-01-14 15:12     ` Alex Vainman
2010-01-15 18:59   ` Roland Dreier
     [not found]     ` <ada8wbzi490.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-17  9:30       ` Alex Vainman
     [not found]         ` <4B52D8A8.7060804-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-01-17 17:19           ` Roland Dreier
     [not found]             ` <adak4vghcoo.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-01-18 12:53               ` Alex Vainman
     [not found]                 ` <4B5459E3.2040902-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-02-17 14:52                   ` Chuck Hartley
2010-04-22  7:35       ` Alex Vainman
     [not found]         ` <4BCFFC48.4060401-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-05-06 20:51           ` Roland Dreier
     [not found]             ` <adazl0c92kp.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-05-13 14:04               ` Alex Vainman
     [not found]                 ` <4BEC06DB.30505-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2010-05-13 15:50                   ` Roland Dreier
     [not found]                     ` <adapr0zsswc.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
2010-05-14  0:04                       ` Pradeep Satyanarayana
     [not found]                         ` <4BEC937F.5000808-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
2010-05-18  5:29                           ` Pradeep Satyanarayana

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B12AA78.7090401@gmail.com \
    --to=alexonlists-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=alexv-smomgflXvOZWk0Htik3J/w@public.gmane.org \
    --cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=rolandd-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox