Linux Test Project
 help / color / mirror / Atom feed
From: Jan Polensky <japo@linux.ibm.com>
To: ltp@lists.linux.it
Subject: [LTP] [PATCH v2 1/1] thp04: Fix PTRACE mode for CONFIG_PROC_MEM_FORCE_PTRACE=y
Date: Fri, 12 Jun 2026 19:17:07 +0200	[thread overview]
Message-ID: <20260612171712.324175-1-japo@linux.ibm.com> (raw)

The PTRACE mode was failing on s390 systems with CONFIG_PROC_MEM_FORCE_PTRACE=y
because writes to /proc/pid/mem require the tracee to be in a stopped state
(PTRACE_MODE_ATTACH). The previous implementation called PTRACE_CONT before
attempting writes, causing the tracee to run and writes to return 0.

Fixed by implementing a write-stop-continue cycle:
- Parent writes to /proc/pid/mem while tracee is stopped
- Parent calls PTRACE_CONT to let tracee run one iteration
- Tracee executes madvise() calls and checks for pollution
- Tracee calls raise(SIGSTOP) to stop itself
- Parent waits for SIGSTOP and repeats

This ensures writes always happen while the tracee is stopped, as required
by the kernel's /proc/pid/mem implementation.

Tested on s390x (kernel 7.1.0-rc7) with CONFIG_PROC_MEM_FORCE_PTRACE=y:
- Test now passes with TPASS result
- No more "short write return value 0" errors

Signed-off-by: Jan Polensky <japo@linux.ibm.com>
---

Link: https://lore.kernel.org/all/20260526150813.201280-1-japo@linux.ibm.com/

Changes since v1:
- detect proc_mem.force_override / kernel config instead of relying only on a probe write
- fix kernel parameter naming per review
- address feedback from Cyril Hrubis in previous thread


 testcases/kernel/mem/thp/thp04.c | 367 +++++++++++++++++++++++++------
 1 file changed, 303 insertions(+), 64 deletions(-)

diff --git a/testcases/kernel/mem/thp/thp04.c b/testcases/kernel/mem/thp/thp04.c
index 16d766c349b7..82a2d98479a9 100644
--- a/testcases/kernel/mem/thp/thp04.c
+++ b/testcases/kernel/mem/thp/thp04.c
@@ -21,27 +21,58 @@
  * On old kernel such as 4.9, it has fixed the Dirty Cow bug but a similar check
  * in huge_memory.c was forgotten.  As a result, remote memory writes to ro regions
  * of memory backed by transparent huge pages cause an infinite loop in the kernel.
- * While in this state the process is stil SIGKILLable, but little else works.
+ * While in this state the process is still SIGKILLable, but little else works.
  * It is also a regression test about kernel
  * commit 8310d48b125d("huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp").
+ *
+ * Test Modes:
+ *
+ * PROC_MEM_ALWAYS: Direct writes to /proc/self/mem (default on most systems)
+ *   - Child process writes to its own memory via /proc/self/mem
+ *   - Concurrent execution: writes race with madvise() calls
+ *
+ * PROC_MEM_PTRACE: Ptrace-based writes to /proc/pid/mem (CONFIG_PROC_MEM_FORCE_PTRACE=y)
+ *   - Parent writes to tracee's memory via /proc/pid/mem
+ *   - Write-stop-continue cycle: tracee must be STOPPED for writes to succeed
+ *   - Alternating execution: parent writes → tracee runs madvise → tracee stops → repeat
+ *   - Required because /proc/pid/mem writes need PTRACE_MODE_ATTACH (stopped state)
  */

-#include "tst_test.h"
-#include "lapi/mmap.h"
+#include <signal.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#include "tst_checkpoint.h"
 #include "tst_fuzzy_sync.h"
+#include "tst_kconfig.h"
+#include "tst_test.h"

-static char *write_thp, *read_thp;
-static int *write_ptr, *read_ptr;
-static size_t thp_size;
-static int writefd = -1, readfd = -1;
-static struct tst_fzsync_pair fzsync_pair;
+enum proc_mem_mode {
+	PROC_MEM_ALWAYS,
+	PROC_MEM_PTRACE,
+	PROC_MEM_NEVER,
+};

-static void *alloc_zero_page(void *baseaddr)
+struct child_state {
+	char *write_thp;
+	char *read_thp;
+	int *write_ptr;
+	int *read_ptr;
+	size_t thp_size;
+	int writefd;
+	int readfd;
+	struct tst_fzsync_pair fzsync_pair;
+};
+
+static pid_t tracee_pid;
+static enum proc_mem_mode proc_mem_mode = PROC_MEM_ALWAYS;
+static struct child_state *child;
+
+static void *alloc_zero_page(void *baseaddr, size_t thp_size)
 {
 	int i;
 	void *ret;

-	/* Find aligned chunk of address space. MAP_HUGETLB doesn't work. */
 	for (i = 0; i < 16; i++, baseaddr += thp_size) {
 		ret = mmap(baseaddr, thp_size, PROT_READ,
 			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -67,94 +98,300 @@ static void *alloc_zero_page(void *baseaddr)
 	}

 	tst_brk(TBROK, "Cannot map huge zero page near the specified address");
-	return NULL;	/* Silence compiler warning */
+	return NULL;
 }

-static void setup(void)
+static void child_cleanup(void)
+{
+	if (!child)
+		return;
+
+	tst_fzsync_pair_cleanup(&child->fzsync_pair);
+
+	if (child->readfd >= 0)
+		SAFE_CLOSE(child->readfd);
+
+	if (child->writefd >= 0)
+		SAFE_CLOSE(child->writefd);
+
+	if (child->read_thp)
+		SAFE_MUNMAP(child->read_thp, child->thp_size);
+
+	if (child->write_thp)
+		SAFE_MUNMAP(child->write_thp, child->thp_size);
+}
+
+static void child_setup(void)
 {
 	size_t i;

-	thp_size = tst_get_hugepage_size();
+	child->thp_size = tst_get_hugepage_size();

-	if (!thp_size)
+	if (!child->thp_size)
 		tst_brk(TCONF, "Kernel does not support huge pages");

-	write_thp = alloc_zero_page((void *)thp_size);
+	child->write_thp = alloc_zero_page((void *)child->thp_size,
+		child->thp_size);

-	for (i = 0; i < thp_size; i++) {
-		if (write_thp[i])
+	for (i = 0; i < child->thp_size; i++) {
+		if (child->write_thp[i])
 			tst_brk(TCONF, "Huge zero page is pre-polluted");
 	}

-	/* leave a hole between read and write THP to prevent merge */
-	read_thp = alloc_zero_page(write_thp + 2 * thp_size);
-	write_ptr = (int *)(write_thp + thp_size - sizeof(int));
-	read_ptr = (int *)(read_thp + thp_size - sizeof(int));
-	writefd = SAFE_OPEN("/proc/self/mem", O_RDWR);
-	readfd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+	child->read_thp = alloc_zero_page(child->write_thp + 2 * child->thp_size,
+		child->thp_size);
+	/* write_ptr points to last int in write_thp page */
+	child->write_ptr = (int *)(child->write_thp + child->thp_size - sizeof(int));
+	/* read_ptr points to last int in read_thp page */
+	child->read_ptr = (int *)(child->read_thp + child->thp_size - sizeof(int));

-	fzsync_pair.exec_loops = 100000;
-	tst_fzsync_pair_init(&fzsync_pair);
-}
-
-static void *thread_run(void *arg)
-{
-	int c;
-
-	while (tst_fzsync_run_b(&fzsync_pair)) {
-		tst_fzsync_start_race_b(&fzsync_pair);
-		madvise(write_thp, thp_size, MADV_DONTNEED);
-		memcpy(&c, write_ptr, sizeof(c));
-		SAFE_LSEEK(readfd, (off_t)write_ptr, SEEK_SET);
-		SAFE_READ(1, readfd, &c, sizeof(int));
-		tst_fzsync_end_race_b(&fzsync_pair);
-		/* Wait for dirty page handling before next madvise() */
-		usleep(10);
+	/* In ptrace mode, parent opens /proc/<pid>/mem, not child */
+	if (proc_mem_mode == PROC_MEM_ALWAYS) {
+		child->writefd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+	} else {
+		child->writefd = -1;  /* Parent will open /proc/<tracee_pid>/mem */
 	}

-	return arg;
+	child->readfd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+	child->fzsync_pair.exec_loops = 1000;
+	tst_fzsync_pair_init(&child->fzsync_pair);
 }

-static void run(void)
+
+static void child_run(void)
 {
 	int c = 0xdeadbeef;
+	int i;

-	tst_fzsync_pair_reset(&fzsync_pair, thread_run);
+	if (!child) {
+		tst_brk(TBROK, "child struct is NULL in child_run()");
+		return;
+	}

-	while (tst_fzsync_run_a(&fzsync_pair)) {
-		/* Write into the main huge page */
-		tst_fzsync_start_race_a(&fzsync_pair);
-		SAFE_LSEEK(writefd, (off_t)write_ptr, SEEK_SET);
-		madvise(write_thp, thp_size, MADV_DONTNEED);
-		SAFE_WRITE(SAFE_WRITE_ALL, writefd, &c, sizeof(int));
-		tst_fzsync_end_race_a(&fzsync_pair);
+	/* In PROC_MEM_ALWAYS mode, child does writes itself */
+	if (proc_mem_mode == PROC_MEM_ALWAYS) {

-		/* Check the other huge zero page for pollution */
-		madvise(read_thp, thp_size, MADV_DONTNEED);
+		/* Simplified test loop without thread */
+		for (i = 0; i < 1000; i++) {
+			/* Write via /proc/self/mem */
+			SAFE_LSEEK(child->writefd, (off_t)child->write_ptr, SEEK_SET);
+			SAFE_WRITE(SAFE_WRITE_ALL, child->writefd, &c, sizeof(int));

-		if (*read_ptr != 0) {
-			tst_res(TFAIL, "Huge zero page was polluted");
-			return;
+			/* Call madvise on write page */
+			madvise(child->write_thp, child->thp_size, MADV_DONTNEED);
+
+			/* Call madvise on read page */
+			madvise(child->read_thp, child->thp_size, MADV_DONTNEED);
+
+			/* Check if read page was polluted */
+			if (*child->read_ptr != 0) {
+				tst_res(TFAIL, "Huge zero page was polluted");
+				return;
+			}
+
+			usleep(100);
 		}
+	} else {
+		/* In PROC_MEM_PTRACE mode: single iteration per continue, then stop */
+
+		/* Loop 1000 times, but parent controls via PTRACE_CONT */
+		for (i = 0; i < 1000; i++) {
+			madvise(child->write_thp, child->thp_size, MADV_DONTNEED);
+			madvise(child->read_thp, child->thp_size, MADV_DONTNEED);
+
+			if (*child->read_ptr != 0) {
+				tst_res(TFAIL, "Huge zero page was polluted");
+				return;
+			}
+
+			/* Stop self to let parent write next iteration */
+			raise(SIGSTOP);
+		}
+
 	}

 	tst_res(TPASS, "Huge zero page is still clean");
 }

+static void tracee_main(void)
+{
+	/* child struct is already mapped by parent before fork */
+	child->writefd = -1;
+	child->readfd = -1;
+
+	child_setup();
+
+	TST_CHECKPOINT_WAKE(0);
+
+	/* Parent will PTRACE_CONT us when ready - no checkpoint needed */
+
+	child_run();
+	child_cleanup();
+}
+
+static void setup_ptrace_tracee(void)
+{
+	int status;
+
+	/* Map child struct BEFORE fork so both parent and child can access it */
+	child = SAFE_MMAP(NULL, sizeof(*child), PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	memset(child, 0, sizeof(*child));
+	child->writefd = -1;
+	child->readfd = -1;
+
+	tracee_pid = SAFE_FORK();
+	if (!tracee_pid) {
+		tracee_main();
+		exit(0);
+	}
+
+	TST_CHECKPOINT_WAIT(0);
+
+	SAFE_PTRACE(PTRACE_SEIZE, tracee_pid, NULL, NULL);
+	SAFE_PTRACE(PTRACE_INTERRUPT, tracee_pid, NULL, NULL);
+	SAFE_WAITPID(tracee_pid, &status, 0);
+
+	if (!WIFSTOPPED(status))
+		tst_brk(TBROK, "Ptrace seize did not stop tracee: %s",
+			tst_strstatus(status));
+
+	/* Tracee stays stopped - parent_run() will PTRACE_CONT when ready */
+}
+
+static void setup(void)
+{
+	int test_val = 0;
+	int explicit_mode = 0;
+
+	static struct tst_kcmdline_var params[] = {
+		TST_KCMDLINE_INIT("proc_mem.force_override"),
+	};
+
+	tst_kcmdline_parse(params, ARRAY_SIZE(params));
+
+	if (params[0].found) {
+		explicit_mode = 1;
+
+		if (!strcmp(params[0].value, "always")) {
+			proc_mem_mode = PROC_MEM_ALWAYS;
+		} else if (!strcmp(params[0].value, "ptrace")) {
+			proc_mem_mode = PROC_MEM_PTRACE;
+		} else {
+			proc_mem_mode = PROC_MEM_NEVER;
+			tst_brk(TCONF,
+				"Writes to /proc/self/mem disabled on kernel cmdline");
+		}
+	}
+
+	/* First try without ptrace to detect PROC_MEM_ALWAYS mode */
+	child = SAFE_MMAP(NULL, sizeof(*child), PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	memset(child, 0, sizeof(*child));
+	child->writefd = -1;
+	child->readfd = -1;
+
+	proc_mem_mode = PROC_MEM_ALWAYS;
+	child_setup();
+
+	TEST(lseek(child->writefd, (off_t)child->write_ptr, SEEK_SET));
+	if (TST_RET == -1)
+		tst_brk(TBROK | TTERRNO, "lseek on /proc/self/mem failed");
+
+	TEST(write(child->writefd, &test_val, sizeof(test_val)));
+
+	if (TST_RET == sizeof(test_val)) {
+		proc_mem_mode = PROC_MEM_ALWAYS;
+		return;
+	}
+
+	if (TST_RET == -1 && TST_ERR != EIO)
+		tst_brk(TBROK | TTERRNO, "test write to /proc/self/mem failed");
+
+	/* /proc/self/mem write failed, cleanup and try ptrace mode */
+	child_cleanup();
+	SAFE_MUNMAP(child, sizeof(*child));
+	child = NULL;
+
+	if (explicit_mode && proc_mem_mode == PROC_MEM_ALWAYS)
+		tst_brk(TCONF,
+			"Writes to /proc/self/mem disabled despite always mode");
+
+	if (!explicit_mode || proc_mem_mode == PROC_MEM_PTRACE) {
+		proc_mem_mode = PROC_MEM_PTRACE;
+		setup_ptrace_tracee();
+		return;
+	}
+
+	tst_brk(TCONF, "Writes to /proc/self/mem disabled in kernel policy");
+}
+
+static void parent_run(void)
+{
+	char path[64];
+	int writefd;
+	int c = 0xdeadbeef;
+	int i;
+	int status;
+
+	snprintf(path, sizeof(path), "/proc/%d/mem", tracee_pid);
+	writefd = SAFE_OPEN(path, O_RDWR);
+
+	/* Write-stop-continue cycle: tracee must be stopped for writes */
+	for (i = 0; i < 1000; i++) {
+		/* Write to /proc/pid/mem while tracee is stopped */
+		SAFE_LSEEK(writefd, (off_t)child->write_ptr, SEEK_SET);
+		SAFE_WRITE(SAFE_WRITE_ALL, writefd, &c, sizeof(int));
+
+		/* Let tracee run one iteration (madvise + check) */
+		SAFE_PTRACE(PTRACE_CONT, tracee_pid, NULL, NULL);
+
+		/* Wait for tracee to stop itself with raise(SIGSTOP) */
+		SAFE_WAITPID(tracee_pid, &status, 0);
+
+		if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP)
+			tst_brk(TBROK, "Tracee did not stop with SIGSTOP: %s",
+				tst_strstatus(status));
+	}
+
+	SAFE_CLOSE(writefd);
+}
+
+static void run(void)
+{
+	int status;
+
+	if (proc_mem_mode == PROC_MEM_ALWAYS) {
+		child_run();
+		return;
+	}
+
+	/* In ptrace mode: write-stop-continue cycle */
+	parent_run();
+
+	/* After 1000 iterations, let tracee exit cleanly */
+	SAFE_PTRACE(PTRACE_CONT, tracee_pid, NULL, NULL);
+	SAFE_WAITPID(tracee_pid, &status, 0);
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+		tst_brk(TBROK, "Tracee exited unexpectedly: %s",
+			tst_strstatus(status));
+}
+
 static void cleanup(void)
 {
-	tst_fzsync_pair_cleanup(&fzsync_pair);
+	int status;

-	if (readfd >= 0)
-		SAFE_CLOSE(readfd);
+	if (tracee_pid > 0) {
+		/* Tracee may have already exited - don't fail if it's gone */
+		if (kill(tracee_pid, SIGKILL) == 0)
+			SAFE_WAITPID(tracee_pid, &status, 0);
+	}

-	if (writefd >= 0)
-		SAFE_CLOSE(writefd);
+	child_cleanup();

-	if (read_thp)
-		SAFE_MUNMAP(read_thp, thp_size);
-	if (write_thp)
-		SAFE_MUNMAP(write_thp, thp_size);
+	if (child)
+		SAFE_MUNMAP(child, sizeof(*child));
 }

 static struct tst_test test = {
@@ -162,6 +399,8 @@ static struct tst_test test = {
 	.setup = setup,
 	.cleanup = cleanup,
 	.runtime = 150,
+	.forks_child = 1,
+	.needs_checkpoints = 1,
 	.tags = (const struct tst_tag[]) {
 		{"linux-git", "a8f97366452e"},
 		{"linux-git", "8310d48b125d"},
--
2.54.0


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

             reply	other threads:[~2026-06-12 17:17 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-12 17:17 Jan Polensky [this message]
2026-06-12 19:21 ` [LTP] thp04: Fix PTRACE mode for CONFIG_PROC_MEM_FORCE_PTRACE=y linuxtestproject.agent

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260612171712.324175-1-japo@linux.ibm.com \
    --to=japo@linux.ibm.com \
    --cc=ltp@lists.linux.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox