All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] setup: dynamically detect default huge page size
@ 2026-06-20 11:36 Prateek
  2026-06-22 16:49 ` Gabriel Krisman Bertazi
  0 siblings, 1 reply; 4+ messages in thread
From: Prateek @ 2026-06-20 11:36 UTC (permalink / raw)
  To: io-uring; +Cc: Prateek

    Replaces the hardcoded 2MB huge page size with dynamic detection by
    parsing /proc/meminfo. This fixes no-mmap allocation failures on
    architectures with different default huge page sizes (like ARM64
    which often uses 512MB) or x86 systems configured for 1GB pages.

    - Safely parses /proc/meminfo without allocating memory.
    - Uses raw syscalls and manual byte-by-byte matching to maintain
      strict compatibility with CONFIG_NOLIBC builds (avoiding strstr).
    - Drops the MAP_HUGE_2MB mmap flag to allow the kernel to correctly
      apply the system's default huge page size.
    - Falls back safely to 2MB if /proc/meminfo is unreadable.

Signed-off-by: Prateek <kprateek283@gmail.com>
---
 src/setup.c | 84 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 16 deletions(-)

diff --git a/src/setup.c b/src/setup.c
index ea6f11fd..46e20e0b 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -220,15 +220,67 @@ __cold int io_uring_ring_dontfork(struct io_uring *ring)
 	return 0;
 }
 
-#ifndef MAP_HUGE_SHIFT
-#define MAP_HUGE_SHIFT	26
-#endif
-#ifndef MAP_HUGE_2MB
-#define MAP_HUGE_2MB	(21U << MAP_HUGE_SHIFT)
-#endif
 
-/* FIXME */
-static size_t huge_page_size = 2 * 1024 * 1024;
+static size_t get_huge_page_size(void)
+{
+	static size_t hps;
+	size_t ret = 2 * 1024 * 1024; /* fallback: 2MB */
+	char buf[4096];
+	char *p, *end;
+	unsigned long val;
+	ssize_t n;
+	int fd;
+
+	if (hps)
+		return hps;
+
+	fd = __sys_open("/proc/meminfo", O_RDONLY, 0);
+	if (fd < 0)
+		goto out;
+
+	n = __sys_read(fd, buf, sizeof(buf) - 1);
+	__sys_close(fd);
+	if (n <= 0)
+		goto out;
+	buf[n] = '\0';
+
+	/*
+	 * Scan line-by-line for "Hugepagesize:". We avoid strstr() and
+	 * memcmp() because they are not available in CONFIG_NOLIBC builds.
+	 */
+	p = buf;
+	end = buf + n;
+	while (p < end) {
+		/* Check if this line starts with "Hugepagesize:" (13 chars) */
+		if (p + 13 <= end &&
+		    p[0]  == 'H' && p[1]  == 'u' && p[2]  == 'g' &&
+		    p[3]  == 'e' && p[4]  == 'p' && p[5]  == 'a' &&
+		    p[6]  == 'g' && p[7]  == 'e' && p[8]  == 's' &&
+		    p[9]  == 'i' && p[10] == 'z' && p[11] == 'e' &&
+		    p[12] == ':') {
+			p += 13;
+			while (p < end && (*p == ' ' || *p == '\t'))
+				p++;
+			val = 0;
+			while (p < end && *p >= '0' && *p <= '9') {
+				val = val * 10 + (*p - '0');
+				p++;
+			}
+			if (val)
+				ret = val * 1024; /* kB -> bytes */
+			break;
+		}
+		/* Advance to next line */
+		while (p < end && *p != '\n')
+			p++;
+		if (p < end)
+			p++;
+	}
+out:
+	hps = ret;
+	return hps;
+}
+
 
 #define KRING_SIZE	64
 
@@ -261,13 +313,13 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 	mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
 
 	/*
-	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
-	 * huge page by itself, so the SQ entries won't fit in the same huge
-	 * page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
+	 * A maxed-out number of CQ entries with IORING_SETUP_CQE32 can fill a
+	 * single huge page by itself, so the SQ entries won't fit in the same
+	 * huge page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
 	 * but check that too to future-proof (e.g. against different huge page
 	 * sizes). Bail out early so we don't overrun.
 	 */
-	if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
+	if (!buf && (sqes_mem > get_huge_page_size() || ring_mem > get_huge_page_size()))
 		return -ENOMEM;
 
 	if (buf) {
@@ -279,8 +331,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 		if (sqes_mem <= page_size)
 			buf_size = page_size;
 		else {
-			buf_size = huge_page_size;
-			map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+			buf_size = get_huge_page_size();
+			map_hugetlb = MAP_HUGETLB;
 		}
 		sqes_size = buf_size;
 		ptr = __sys_mmap(NULL, sqes_size, PROT_READ|PROT_WRITE,
@@ -302,8 +354,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
 		if (ring_mem <= page_size)
 			buf_size = page_size;
 		else {
-			buf_size = huge_page_size;
-			map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
+			buf_size = get_huge_page_size();
+			map_hugetlb = MAP_HUGETLB;
 		}
 		ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
 					MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-23 15:11 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-20 11:36 [PATCH] setup: dynamically detect default huge page size Prateek
2026-06-22 16:49 ` Gabriel Krisman Bertazi
2026-06-23 11:09   ` Prateek
2026-06-23 15:11     ` Gabriel Krisman Bertazi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.