From: Prateek <kprateek283@gmail.com>
To: io-uring@vger.kernel.org
Cc: Prateek <kprateek283@gmail.com>
Subject: [PATCH] setup: dynamically detect default huge page size
Date: Sat, 20 Jun 2026 17:06:09 +0530 [thread overview]
Message-ID: <20260620113609.123575-1-kprateek283@gmail.com> (raw)
Replaces the hardcoded 2MB huge page size with dynamic detection by
parsing /proc/meminfo. This fixes no-mmap allocation failures on
architectures with different default huge page sizes (like ARM64
which often uses 512MB) or x86 systems configured for 1GB pages.
- Safely parses /proc/meminfo without allocating memory.
- Uses raw syscalls and manual byte-by-byte matching to maintain
strict compatibility with CONFIG_NOLIBC builds (avoiding strstr).
- Drops the MAP_HUGE_2MB mmap flag to allow the kernel to correctly
apply the system's default huge page size.
- Falls back safely to 2MB if /proc/meminfo is unreadable.
Signed-off-by: Prateek <kprateek283@gmail.com>
---
src/setup.c | 84 +++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 68 insertions(+), 16 deletions(-)
diff --git a/src/setup.c b/src/setup.c
index ea6f11fd..46e20e0b 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -220,15 +220,67 @@ __cold int io_uring_ring_dontfork(struct io_uring *ring)
return 0;
}
-#ifndef MAP_HUGE_SHIFT
-#define MAP_HUGE_SHIFT 26
-#endif
-#ifndef MAP_HUGE_2MB
-#define MAP_HUGE_2MB (21U << MAP_HUGE_SHIFT)
-#endif
-/* FIXME */
-static size_t huge_page_size = 2 * 1024 * 1024;
+static size_t get_huge_page_size(void)
+{
+ static size_t hps;
+ size_t ret = 2 * 1024 * 1024; /* fallback: 2MB */
+ char buf[4096];
+ char *p, *end;
+ unsigned long val;
+ ssize_t n;
+ int fd;
+
+ if (hps)
+ return hps;
+
+ fd = __sys_open("/proc/meminfo", O_RDONLY, 0);
+ if (fd < 0)
+ goto out;
+
+ n = __sys_read(fd, buf, sizeof(buf) - 1);
+ __sys_close(fd);
+ if (n <= 0)
+ goto out;
+ buf[n] = '\0';
+
+ /*
+ * Scan line-by-line for "Hugepagesize:". We avoid strstr() and
+ * memcmp() because they are not available in CONFIG_NOLIBC builds.
+ */
+ p = buf;
+ end = buf + n;
+ while (p < end) {
+ /* Check if this line starts with "Hugepagesize:" (13 chars) */
+ if (p + 13 <= end &&
+ p[0] == 'H' && p[1] == 'u' && p[2] == 'g' &&
+ p[3] == 'e' && p[4] == 'p' && p[5] == 'a' &&
+ p[6] == 'g' && p[7] == 'e' && p[8] == 's' &&
+ p[9] == 'i' && p[10] == 'z' && p[11] == 'e' &&
+ p[12] == ':') {
+ p += 13;
+ while (p < end && (*p == ' ' || *p == '\t'))
+ p++;
+ val = 0;
+ while (p < end && *p >= '0' && *p <= '9') {
+ val = val * 10 + (*p - '0');
+ p++;
+ }
+ if (val)
+ ret = val * 1024; /* kB -> bytes */
+ break;
+ }
+ /* Advance to next line */
+ while (p < end && *p != '\n')
+ p++;
+ if (p < end)
+ p++;
+ }
+out:
+ hps = ret;
+ return hps;
+}
+
#define KRING_SIZE 64
@@ -261,13 +313,13 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
/*
- * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
- * huge page by itself, so the SQ entries won't fit in the same huge
- * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
+ * A maxed-out number of CQ entries with IORING_SETUP_CQE32 can fill a
+ * single huge page by itself, so the SQ entries won't fit in the same
+ * huge page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
* but check that too to future-proof (e.g. against different huge page
* sizes). Bail out early so we don't overrun.
*/
- if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
+ if (!buf && (sqes_mem > get_huge_page_size() || ring_mem > get_huge_page_size()))
return -ENOMEM;
if (buf) {
@@ -279,8 +331,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
if (sqes_mem <= page_size)
buf_size = page_size;
else {
- buf_size = huge_page_size;
- map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+ buf_size = get_huge_page_size();
+ map_hugetlb = MAP_HUGETLB;
}
sqes_size = buf_size;
ptr = __sys_mmap(NULL, sqes_size, PROT_READ|PROT_WRITE,
@@ -302,8 +354,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
if (ring_mem <= page_size)
buf_size = page_size;
else {
- buf_size = huge_page_size;
- map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+ buf_size = get_huge_page_size();
+ map_hugetlb = MAP_HUGETLB;
}
ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
--
2.43.0
next reply other threads:[~2026-06-20 11:37 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-20 11:36 Prateek [this message]
2026-06-22 16:49 ` [PATCH] setup: dynamically detect default huge page size Gabriel Krisman Bertazi
2026-06-23 11:09 ` Prateek
2026-06-23 15:11 ` Gabriel Krisman Bertazi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260620113609.123575-1-kprateek283@gmail.com \
--to=kprateek283@gmail.com \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox