All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tools/mm: Add madvise tool
@ 2025-09-04 17:57 kirill
  2025-09-04 19:07 ` Liam R. Howlett
                   ` (7 more replies)
  0 siblings, 8 replies; 24+ messages in thread
From: kirill @ 2025-09-04 17:57 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R . Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, linux-mm
  Cc: Usama Arif, Kiryl Shutsemau

From: Kiryl Shutsemau <kas@kernel.org>

Add a simple tool that allows to issue an advice on a process or a file.

It can be useful to experiment with effects of an advice on a workload
without modifying the workload itself.

Only supports advices available for process_madvise().

Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
---
 tools/mm/.gitignore |   4 +-
 tools/mm/Makefile   |   2 +-
 tools/mm/madvise.c  | 170 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 tools/mm/madvise.c

diff --git a/tools/mm/.gitignore b/tools/mm/.gitignore
index 922879f93fc8..b713fcf4a2e0 100644
--- a/tools/mm/.gitignore
+++ b/tools/mm/.gitignore
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-slabinfo
+madvise
 page-types
 page_owner_sort
+slabinfo
+thp_swap_allocator_test
diff --git a/tools/mm/Makefile b/tools/mm/Makefile
index f5725b5c23aa..db315a48adcd 100644
--- a/tools/mm/Makefile
+++ b/tools/mm/Makefile
@@ -3,7 +3,7 @@
 #
 include ../scripts/Makefile.include
 
-BUILD_TARGETS=page-types slabinfo page_owner_sort thp_swap_allocator_test
+BUILD_TARGETS= madvise page-types page_owner_sort slabinfo thp_swap_allocator_test
 INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps
 
 LIB_DIR = ../lib/api
diff --git a/tools/mm/madvise.c b/tools/mm/madvise.c
new file mode 100644
index 000000000000..038b3e1076ea
--- /dev/null
+++ b/tools/mm/madvise.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+static void usage(void)
+{
+	printf("madvise TARGET ADVICE START END\n\n");
+	printf("Arguments:\n");
+	printf("\t<TARGET>\n");
+	printf("\t\tA process ID or a file to give the advice to.\n\n");
+	printf("\t\tUse \"./\" prefix if the file name is all digits.\n\n");
+	printf("\t<ADVICE>\n");
+	printf("\t\tcold\t\t- Deactivate a given range of pages\n");
+	printf("\t\tcollapse\t- Collapse pages in a given range into THPs\n");
+	printf("\t\tpageout\t\t- Reclaim a given range of pages\n");
+	printf("\t\twillneed\t- The specified data will be accessed in the near future\n");
+	printf("\n\t\tSee madvise(2) for more details.\n\n");
+	printf("\t<START>/<END>\n");
+	printf("\t\tStart and end addressed for the advice. Must be page-aligned.\n\n");
+	printf("\t\tFor PID case, it is addresses in the target process address space.\n\n");
+	printf("\t\tFor file case, it is offsets in the file.\n\n");
+}
+
+static void error(const char *fmt, ...)
+{
+	if (fmt) {
+		va_list argp;
+
+		va_start(argp, fmt);
+		vfprintf(stderr, fmt, argp);
+		va_end(argp);
+		printf("\n");
+	}
+
+	usage();
+	exit(-1);
+}
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+static unsigned long read_pmd_pagesize(void)
+{
+	int fd;
+	char buf[20];
+	ssize_t num_read;
+
+	fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	num_read = read(fd, buf, 19);
+	if (num_read < 1) {
+		close(fd);
+		return 0;
+	}
+	buf[num_read] = '\0';
+	close(fd);
+
+	return strtoul(buf, NULL, 10);
+}
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(SYS_pidfd_open, pid, flags);
+}
+
+int main(int argc, const char *argv[])
+{
+	unsigned long pid, start, end, page_size;
+	int advice;
+	char *err;
+	int fd;
+
+	if (argc != 5)
+		error(NULL);
+
+	pid = strtoul(argv[1], &err, 10);
+	if (*err || err == argv[1] ||
+	    pid > INT_MAX || (pid_t)pid <= 0) {
+		// Not a PID, assume argv[1] is a file name
+		pid = 0;
+	}
+
+	if (pid) {
+		fd = pidfd_open(pid, 0);
+		if (fd < 0)
+			perror("pidfd_open()"), exit(-1);
+	} else {
+		fd = open(argv[1], O_RDWR);
+		if (fd < 0)
+			perror("open"), exit(-1);
+	}
+
+	if (!strcmp(argv[2], "cold"))
+		advice = MADV_COLD;
+	else if (!strcmp(argv[2], "collapse"))
+		advice = MADV_COLLAPSE;
+	else if (!strcmp(argv[2], "pageout"))
+		advice = MADV_PAGEOUT;
+	else if (!strcmp(argv[2], "willneed"))
+		advice = MADV_WILLNEED;
+	else
+		error("Unknown advice: %s\n", argv[2]);
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	start = strtoul(argv[3], &err, 0);
+	if (*err || err == argv[3])
+		error("Cannot parse start address\n");
+	if (start % page_size)
+		error("Start address is not aligned to page size\n");
+	end = strtoul(argv[4], &err, 0);
+	if (*err || err == argv[4])
+		error("Cannot parse end address\n");
+	if (end % page_size)
+		error("End address is not aligned to page size\n");
+
+	if (pid) {
+		struct iovec vec = {
+			.iov_base = (void *)start,
+			.iov_len = end - start,
+		};
+		ssize_t ret;
+
+		ret = process_madvise(fd, &vec, 1, advice, 0);
+		if (ret < 0)
+			perror("process_madvise"), exit(-1);
+
+		if ((unsigned long)ret != end - start)
+			printf("Partial advice occurred. Stopped at %#lx\n", start + ret);
+	} else {
+		unsigned long addr, hpage_pmd_size;
+		void *p;
+		int ret;
+
+		hpage_pmd_size = read_pmd_pagesize();
+		if (!hpage_pmd_size) {
+			printf("Reading PMD pagesize failed");
+			exit(-1);
+		}
+
+		// Allocate virtual address space to align the target mmap to PMD size
+		// Some advices require this.
+		p = mmap(NULL, end - start + hpage_pmd_size, PROT_NONE,
+			 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+		if (p == MAP_FAILED)
+			perror("mmap0"), exit(-1);
+		addr = (unsigned long)p;
+		addr += hpage_pmd_size - 1;
+		addr &= ~(hpage_pmd_size - 1);
+
+		p = mmap((void *)addr, end - start,
+			 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED | MAP_POPULATE, fd, start);
+		if (p == MAP_FAILED)
+			perror("mmap"), exit(-1);
+
+		ret = madvise(p, end - start, advice);
+		if (ret)
+			perror("madvise"), exit(-1);
+	}
+
+	return 0;
+}
-- 
2.50.1



^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2025-09-10 11:35 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-04 17:57 [PATCH] tools/mm: Add madvise tool kirill
2025-09-04 19:07 ` Liam R. Howlett
2025-09-05  3:26 ` wang lian
2025-09-05 10:28   ` Kiryl Shutsemau
2025-09-08  1:09     ` wang lian
2025-09-05  9:17 ` Michal Hocko
2025-09-05 10:16   ` Lorenzo Stoakes
2025-09-05 10:56     ` Michal Hocko
2025-09-05 11:02       ` Lorenzo Stoakes
2025-09-05 10:21   ` Kiryl Shutsemau
2025-09-05 11:02     ` Michal Hocko
2025-09-05 10:19 ` David Hildenbrand
2025-09-05 10:24   ` Kiryl Shutsemau
2025-09-05 10:33     ` David Hildenbrand
2025-09-05 10:35       ` Kiryl Shutsemau
2025-09-05 10:37 ` Lorenzo Stoakes
2025-09-05 13:35   ` Kiryl Shutsemau
2025-09-05 16:30   ` Kiryl Shutsemau
2025-09-05 10:45 ` Usama Arif
2025-09-05 10:59 ` Mike Rapoport
2025-09-05 13:36   ` Kiryl Shutsemau
2025-09-06  8:00     ` Mike Rapoport
2025-09-08 10:04       ` Kiryl Shutsemau
2025-09-10 11:34 ` [ANNOUNCE] mm-tools: Random tools MM-related tools Kiryl Shutsemau

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.