All of lore.kernel.org
 help / color / mirror / Atom feed
From: Prateek <kprateek283@gmail.com>
To: io-uring@vger.kernel.org
Cc: Prateek <kprateek283@gmail.com>
Subject: [PATCH] setup: dynamically detect default huge page size
Date: Sat, 20 Jun 2026 17:06:09 +0530	[thread overview]
Message-ID: <20260620113609.123575-1-kprateek283@gmail.com> (raw)

    Replaces the hardcoded 2MB huge page size with dynamic detection by
    parsing /proc/meminfo. This fixes no-mmap allocation failures on
    architectures with different default huge page sizes (like ARM64
    which often uses 512MB) or x86 systems configured for 1GB pages.

    - Safely parses /proc/meminfo without allocating memory.
    - Uses raw syscalls and manual byte-by-byte matching to maintain
      strict compatibility with CONFIG_NOLIBC builds (avoiding strstr).
    - Drops the MAP_HUGE_2MB mmap flag to allow the kernel to correctly
      apply the system's default huge page size.
    - Falls back safely to 2MB if /proc/meminfo is unreadable.

Signed-off-by: Prateek <kprateek283@gmail.com>
---
 src/setup.c | 84 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 16 deletions(-)

diff --git a/src/setup.c b/src/setup.c
index ea6f11fd..46e20e0b 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -220,15 +220,67 @@ __cold int io_uring_ring_dontfork(struct io_uring *ring)
 	return 0;
 }
 
-#ifndef MAP_HUGE_SHIFT
-#define MAP_HUGE_SHIFT	26
-#endif
-#ifndef MAP_HUGE_2MB
-#define MAP_HUGE_2MB	(21U << MAP_HUGE_SHIFT)
-#endif
 
-/* FIXME */
-static size_t huge_page_size = 2 * 1024 * 1024;
+static size_t get_huge_page_size(void)
+{
+	static size_t hps;
+	size_t ret = 2 * 1024 * 1024; /* fallback: 2MB */
+	char buf[4096];
+	char *p, *end;
+	unsigned long val;
+	ssize_t n;
+	int fd;
+
+	if (hps)
+		return hps;
+
+	fd = __sys_open("/proc/meminfo", O_RDONLY, 0);
+	if (fd < 0)
+		goto out;
+
+	n = __sys_read(fd, buf, sizeof(buf) - 1);
+	__sys_close(fd);
+	if (n <= 0)
+		goto out;
+	buf[n] = '\0';
+
+	/*
+	 * Scan line-by-line for "Hugepagesize:". We avoid strstr() and
+	 * memcmp() because they are not available in CONFIG_NOLIBC builds.
+	 */
+	p = buf;
+	end = buf + n;
+	while (p < end) {
+		/* Check if this line starts with "Hugepagesize:" (13 chars) */
+		if (p + 13 <= end &&
+		    p[0]  == 'H' && p[1]  == 'u' && p[2]  == 'g' &&
+		    p[3]  == 'e' && p[4]  == 'p' && p[5]  == 'a' &&
+		    p[6]  == 'g' && p[7]  == 'e' && p[8]  == 's' &&
+		    p[9]  == 'i' && p[10] == 'z' && p[11] == 'e' &&
+		    p[12] == ':') {
+			p += 13;
+			while (p < end && (*p == ' ' || *p == '\t'))
+				p++;
+			val = 0;
+			while (p < end && *p >= '0' && *p <= '9') {
+				val = val * 10 + (*p - '0');
+				p++;
+			}
+			if (val)
+				ret = val * 1024; /* kB -> bytes */
+			break;
+		}
+		/* Advance to next line */
+		while (p < end && *p != '\n')
+			p++;
+		if (p < end)
+			p++;
+	}
+out:
+	hps = ret;
+	return hps;
+}
+
 
 #define KRING_SIZE	64
 
@@ -261,13 +313,13 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 	mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
 
 	/*
-	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
-	 * huge page by itself, so the SQ entries won't fit in the same huge
-	 * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
+	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 can fill a
+	 * single huge page by itself, so the SQ entries won't fit in the same
+	 * huge page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
 	 * but check that too to future-proof (e.g. against different huge page
 	 * sizes). Bail out early so we don't overrun.
 	 */
-	if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
+	if (!buf && (sqes_mem > get_huge_page_size() || ring_mem > get_huge_page_size()))
 		return -ENOMEM;
 
 	if (buf) {
@@ -279,8 +331,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 		if (sqes_mem <= page_size)
 			buf_size = page_size;
 		else {
-			buf_size = huge_page_size;
-			map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+			buf_size = get_huge_page_size();
+			map_hugetlb = MAP_HUGETLB;
 		}
 		sqes_size = buf_size;
 		ptr = __sys_mmap(NULL, sqes_size, PROT_READ|PROT_WRITE,
@@ -302,8 +354,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 		if (ring_mem <= page_size)
 			buf_size = page_size;
 		else {
-			buf_size = huge_page_size;
-			map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+			buf_size = get_huge_page_size();
+			map_hugetlb = MAP_HUGETLB;
 		}
 		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
 					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
-- 
2.43.0


             reply	other threads:[~2026-06-20 11:37 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-20 11:36 Prateek [this message]
2026-06-22 16:49 ` [PATCH] setup: dynamically detect default huge page size Gabriel Krisman Bertazi
2026-06-23 11:09   ` Prateek
2026-06-23 15:11     ` Gabriel Krisman Bertazi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260620113609.123575-1-kprateek283@gmail.com \
    --to=kprateek283@gmail.com \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.