From: Jan Polensky <japo@linux.ibm.com>
To: ltp@lists.linux.it
Subject: [LTP] [PATCH v2 1/1] thp04: Fix PTRACE mode for CONFIG_PROC_MEM_FORCE_PTRACE=y
Date: Fri, 12 Jun 2026 19:17:07 +0200 [thread overview]
Message-ID: <20260612171712.324175-1-japo@linux.ibm.com> (raw)
The PTRACE mode was failing on s390 systems with CONFIG_PROC_MEM_FORCE_PTRACE=y
because writes to /proc/pid/mem require the tracee to be in a stopped state
(PTRACE_MODE_ATTACH). The previous implementation called PTRACE_CONT before
attempting writes, causing the tracee to run and writes to return 0.
Fixed by implementing a write-stop-continue cycle:
- Parent writes to /proc/pid/mem while tracee is stopped
- Parent calls PTRACE_CONT to let tracee run one iteration
- Tracee executes madvise() calls and checks for pollution
- Tracee calls raise(SIGSTOP) to stop itself
- Parent waits for SIGSTOP and repeats
This ensures writes always happen while the tracee is stopped, as required
by the kernel's /proc/pid/mem implementation.
Tested on s390x (kernel 7.1.0-rc7) with CONFIG_PROC_MEM_FORCE_PTRACE=y:
- Test now passes with TPASS result
- No more "short write return value 0" errors
Signed-off-by: Jan Polensky <japo@linux.ibm.com>
---
Link: https://lore.kernel.org/all/20260526150813.201280-1-japo@linux.ibm.com/
Changes since v1:
- detect proc_mem.force_override / kernel config instead of relying only on a probe write
- fix kernel parameter naming per review
- address feedback from Cyril Hrubis in previous thread
testcases/kernel/mem/thp/thp04.c | 367 +++++++++++++++++++++++++------
1 file changed, 303 insertions(+), 64 deletions(-)
diff --git a/testcases/kernel/mem/thp/thp04.c b/testcases/kernel/mem/thp/thp04.c
index 16d766c349b7..82a2d98479a9 100644
--- a/testcases/kernel/mem/thp/thp04.c
+++ b/testcases/kernel/mem/thp/thp04.c
@@ -21,27 +21,58 @@
* On old kernel such as 4.9, it has fixed the Dirty Cow bug but a similar check
* in huge_memory.c was forgotten. As a result, remote memory writes to ro regions
* of memory backed by transparent huge pages cause an infinite loop in the kernel.
- * While in this state the process is stil SIGKILLable, but little else works.
+ * While in this state the process is still SIGKILLable, but little else works.
* It is also a regression test about kernel
* commit 8310d48b125d("huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp").
+ *
+ * Test Modes:
+ *
+ * PROC_MEM_ALWAYS: Direct writes to /proc/self/mem (default on most systems)
+ * - Child process writes to its own memory via /proc/self/mem
+ * - Concurrent execution: writes race with madvise() calls
+ *
+ * PROC_MEM_PTRACE: Ptrace-based writes to /proc/pid/mem (CONFIG_PROC_MEM_FORCE_PTRACE=y)
+ * - Parent writes to tracee's memory via /proc/pid/mem
+ * - Write-stop-continue cycle: tracee must be STOPPED for writes to succeed
+ * - Alternating execution: parent writes → tracee runs madvise → tracee stops → repeat
+ * - Required because /proc/pid/mem writes need PTRACE_MODE_ATTACH (stopped state)
*/
-#include "tst_test.h"
-#include "lapi/mmap.h"
+#include <signal.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#include "tst_checkpoint.h"
#include "tst_fuzzy_sync.h"
+#include "tst_kconfig.h"
+#include "tst_test.h"
-static char *write_thp, *read_thp;
-static int *write_ptr, *read_ptr;
-static size_t thp_size;
-static int writefd = -1, readfd = -1;
-static struct tst_fzsync_pair fzsync_pair;
+enum proc_mem_mode {
+ PROC_MEM_ALWAYS,
+ PROC_MEM_PTRACE,
+ PROC_MEM_NEVER,
+};
-static void *alloc_zero_page(void *baseaddr)
+struct child_state {
+ char *write_thp;
+ char *read_thp;
+ int *write_ptr;
+ int *read_ptr;
+ size_t thp_size;
+ int writefd;
+ int readfd;
+ struct tst_fzsync_pair fzsync_pair;
+};
+
+static pid_t tracee_pid;
+static enum proc_mem_mode proc_mem_mode = PROC_MEM_ALWAYS;
+static struct child_state *child;
+
+static void *alloc_zero_page(void *baseaddr, size_t thp_size)
{
int i;
void *ret;
- /* Find aligned chunk of address space. MAP_HUGETLB doesn't work. */
for (i = 0; i < 16; i++, baseaddr += thp_size) {
ret = mmap(baseaddr, thp_size, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -67,94 +98,300 @@ static void *alloc_zero_page(void *baseaddr)
}
tst_brk(TBROK, "Cannot map huge zero page near the specified address");
- return NULL; /* Silence compiler warning */
+ return NULL;
}
-static void setup(void)
+static void child_cleanup(void)
+{
+ if (!child)
+ return;
+
+ tst_fzsync_pair_cleanup(&child->fzsync_pair);
+
+ if (child->readfd >= 0)
+ SAFE_CLOSE(child->readfd);
+
+ if (child->writefd >= 0)
+ SAFE_CLOSE(child->writefd);
+
+ if (child->read_thp)
+ SAFE_MUNMAP(child->read_thp, child->thp_size);
+
+ if (child->write_thp)
+ SAFE_MUNMAP(child->write_thp, child->thp_size);
+}
+
+static void child_setup(void)
{
size_t i;
- thp_size = tst_get_hugepage_size();
+ child->thp_size = tst_get_hugepage_size();
- if (!thp_size)
+ if (!child->thp_size)
tst_brk(TCONF, "Kernel does not support huge pages");
- write_thp = alloc_zero_page((void *)thp_size);
+ child->write_thp = alloc_zero_page((void *)child->thp_size,
+ child->thp_size);
- for (i = 0; i < thp_size; i++) {
- if (write_thp[i])
+ for (i = 0; i < child->thp_size; i++) {
+ if (child->write_thp[i])
tst_brk(TCONF, "Huge zero page is pre-polluted");
}
- /* leave a hole between read and write THP to prevent merge */
- read_thp = alloc_zero_page(write_thp + 2 * thp_size);
- write_ptr = (int *)(write_thp + thp_size - sizeof(int));
- read_ptr = (int *)(read_thp + thp_size - sizeof(int));
- writefd = SAFE_OPEN("/proc/self/mem", O_RDWR);
- readfd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+ child->read_thp = alloc_zero_page(child->write_thp + 2 * child->thp_size,
+ child->thp_size);
+ /* write_ptr points to last int in write_thp page */
+ child->write_ptr = (int *)(child->write_thp + child->thp_size - sizeof(int));
+ /* read_ptr points to last int in read_thp page */
+ child->read_ptr = (int *)(child->read_thp + child->thp_size - sizeof(int));
- fzsync_pair.exec_loops = 100000;
- tst_fzsync_pair_init(&fzsync_pair);
-}
-
-static void *thread_run(void *arg)
-{
- int c;
-
- while (tst_fzsync_run_b(&fzsync_pair)) {
- tst_fzsync_start_race_b(&fzsync_pair);
- madvise(write_thp, thp_size, MADV_DONTNEED);
- memcpy(&c, write_ptr, sizeof(c));
- SAFE_LSEEK(readfd, (off_t)write_ptr, SEEK_SET);
- SAFE_READ(1, readfd, &c, sizeof(int));
- tst_fzsync_end_race_b(&fzsync_pair);
- /* Wait for dirty page handling before next madvise() */
- usleep(10);
+ /* In ptrace mode, parent opens /proc/<pid>/mem, not child */
+ if (proc_mem_mode == PROC_MEM_ALWAYS) {
+ child->writefd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+ } else {
+ child->writefd = -1; /* Parent will open /proc/<tracee_pid>/mem */
}
- return arg;
+ child->readfd = SAFE_OPEN("/proc/self/mem", O_RDWR);
+ child->fzsync_pair.exec_loops = 1000;
+ tst_fzsync_pair_init(&child->fzsync_pair);
}
-static void run(void)
+
+static void child_run(void)
{
int c = 0xdeadbeef;
+ int i;
- tst_fzsync_pair_reset(&fzsync_pair, thread_run);
+ if (!child) {
+ tst_brk(TBROK, "child struct is NULL in child_run()");
+ return;
+ }
- while (tst_fzsync_run_a(&fzsync_pair)) {
- /* Write into the main huge page */
- tst_fzsync_start_race_a(&fzsync_pair);
- SAFE_LSEEK(writefd, (off_t)write_ptr, SEEK_SET);
- madvise(write_thp, thp_size, MADV_DONTNEED);
- SAFE_WRITE(SAFE_WRITE_ALL, writefd, &c, sizeof(int));
- tst_fzsync_end_race_a(&fzsync_pair);
+ /* In PROC_MEM_ALWAYS mode, child does writes itself */
+ if (proc_mem_mode == PROC_MEM_ALWAYS) {
- /* Check the other huge zero page for pollution */
- madvise(read_thp, thp_size, MADV_DONTNEED);
+ /* Simplified test loop without thread */
+ for (i = 0; i < 1000; i++) {
+ /* Write via /proc/self/mem */
+ SAFE_LSEEK(child->writefd, (off_t)child->write_ptr, SEEK_SET);
+ SAFE_WRITE(SAFE_WRITE_ALL, child->writefd, &c, sizeof(int));
- if (*read_ptr != 0) {
- tst_res(TFAIL, "Huge zero page was polluted");
- return;
+ /* Call madvise on write page */
+ madvise(child->write_thp, child->thp_size, MADV_DONTNEED);
+
+ /* Call madvise on read page */
+ madvise(child->read_thp, child->thp_size, MADV_DONTNEED);
+
+ /* Check if read page was polluted */
+ if (*child->read_ptr != 0) {
+ tst_res(TFAIL, "Huge zero page was polluted");
+ return;
+ }
+
+ usleep(100);
}
+ } else {
+ /* In PROC_MEM_PTRACE mode: single iteration per continue, then stop */
+
+ /* Loop 1000 times, but parent controls via PTRACE_CONT */
+ for (i = 0; i < 1000; i++) {
+ madvise(child->write_thp, child->thp_size, MADV_DONTNEED);
+ madvise(child->read_thp, child->thp_size, MADV_DONTNEED);
+
+ if (*child->read_ptr != 0) {
+ tst_res(TFAIL, "Huge zero page was polluted");
+ return;
+ }
+
+ /* Stop self to let parent write next iteration */
+ raise(SIGSTOP);
+ }
+
}
tst_res(TPASS, "Huge zero page is still clean");
}
+static void tracee_main(void)
+{
+ /* child struct is already mapped by parent before fork */
+ child->writefd = -1;
+ child->readfd = -1;
+
+ child_setup();
+
+ TST_CHECKPOINT_WAKE(0);
+
+ /* Parent will PTRACE_CONT us when ready - no checkpoint needed */
+
+ child_run();
+ child_cleanup();
+}
+
+static void setup_ptrace_tracee(void)
+{
+ int status;
+
+ /* Map child struct BEFORE fork so both parent and child can access it */
+ child = SAFE_MMAP(NULL, sizeof(*child), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ memset(child, 0, sizeof(*child));
+ child->writefd = -1;
+ child->readfd = -1;
+
+ tracee_pid = SAFE_FORK();
+ if (!tracee_pid) {
+ tracee_main();
+ exit(0);
+ }
+
+ TST_CHECKPOINT_WAIT(0);
+
+ SAFE_PTRACE(PTRACE_SEIZE, tracee_pid, NULL, NULL);
+ SAFE_PTRACE(PTRACE_INTERRUPT, tracee_pid, NULL, NULL);
+ SAFE_WAITPID(tracee_pid, &status, 0);
+
+ if (!WIFSTOPPED(status))
+ tst_brk(TBROK, "Ptrace seize did not stop tracee: %s",
+ tst_strstatus(status));
+
+ /* Tracee stays stopped - parent_run() will PTRACE_CONT when ready */
+}
+
+static void setup(void)
+{
+ int test_val = 0;
+ int explicit_mode = 0;
+
+ static struct tst_kcmdline_var params[] = {
+ TST_KCMDLINE_INIT("proc_mem.force_override"),
+ };
+
+ tst_kcmdline_parse(params, ARRAY_SIZE(params));
+
+ if (params[0].found) {
+ explicit_mode = 1;
+
+ if (!strcmp(params[0].value, "always")) {
+ proc_mem_mode = PROC_MEM_ALWAYS;
+ } else if (!strcmp(params[0].value, "ptrace")) {
+ proc_mem_mode = PROC_MEM_PTRACE;
+ } else {
+ proc_mem_mode = PROC_MEM_NEVER;
+ tst_brk(TCONF,
+ "Writes to /proc/self/mem disabled on kernel cmdline");
+ }
+ }
+
+ /* First try without ptrace to detect PROC_MEM_ALWAYS mode */
+ child = SAFE_MMAP(NULL, sizeof(*child), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ memset(child, 0, sizeof(*child));
+ child->writefd = -1;
+ child->readfd = -1;
+
+ proc_mem_mode = PROC_MEM_ALWAYS;
+ child_setup();
+
+ TEST(lseek(child->writefd, (off_t)child->write_ptr, SEEK_SET));
+ if (TST_RET == -1)
+ tst_brk(TBROK | TTERRNO, "lseek on /proc/self/mem failed");
+
+ TEST(write(child->writefd, &test_val, sizeof(test_val)));
+
+ if (TST_RET == sizeof(test_val)) {
+ proc_mem_mode = PROC_MEM_ALWAYS;
+ return;
+ }
+
+ if (TST_RET == -1 && TST_ERR != EIO)
+ tst_brk(TBROK | TTERRNO, "test write to /proc/self/mem failed");
+
+ /* /proc/self/mem write failed, cleanup and try ptrace mode */
+ child_cleanup();
+ SAFE_MUNMAP(child, sizeof(*child));
+ child = NULL;
+
+ if (explicit_mode && proc_mem_mode == PROC_MEM_ALWAYS)
+ tst_brk(TCONF,
+ "Writes to /proc/self/mem disabled despite always mode");
+
+ if (!explicit_mode || proc_mem_mode == PROC_MEM_PTRACE) {
+ proc_mem_mode = PROC_MEM_PTRACE;
+ setup_ptrace_tracee();
+ return;
+ }
+
+ tst_brk(TCONF, "Writes to /proc/self/mem disabled in kernel policy");
+}
+
+static void parent_run(void)
+{
+ char path[64];
+ int writefd;
+ int c = 0xdeadbeef;
+ int i;
+ int status;
+
+ snprintf(path, sizeof(path), "/proc/%d/mem", tracee_pid);
+ writefd = SAFE_OPEN(path, O_RDWR);
+
+ /* Write-stop-continue cycle: tracee must be stopped for writes */
+ for (i = 0; i < 1000; i++) {
+ /* Write to /proc/pid/mem while tracee is stopped */
+ SAFE_LSEEK(writefd, (off_t)child->write_ptr, SEEK_SET);
+ SAFE_WRITE(SAFE_WRITE_ALL, writefd, &c, sizeof(int));
+
+ /* Let tracee run one iteration (madvise + check) */
+ SAFE_PTRACE(PTRACE_CONT, tracee_pid, NULL, NULL);
+
+ /* Wait for tracee to stop itself with raise(SIGSTOP) */
+ SAFE_WAITPID(tracee_pid, &status, 0);
+
+ if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP)
+ tst_brk(TBROK, "Tracee did not stop with SIGSTOP: %s",
+ tst_strstatus(status));
+ }
+
+ SAFE_CLOSE(writefd);
+}
+
+static void run(void)
+{
+ int status;
+
+ if (proc_mem_mode == PROC_MEM_ALWAYS) {
+ child_run();
+ return;
+ }
+
+ /* In ptrace mode: write-stop-continue cycle */
+ parent_run();
+
+ /* After 1000 iterations, let tracee exit cleanly */
+ SAFE_PTRACE(PTRACE_CONT, tracee_pid, NULL, NULL);
+ SAFE_WAITPID(tracee_pid, &status, 0);
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ tst_brk(TBROK, "Tracee exited unexpectedly: %s",
+ tst_strstatus(status));
+}
+
static void cleanup(void)
{
- tst_fzsync_pair_cleanup(&fzsync_pair);
+ int status;
- if (readfd >= 0)
- SAFE_CLOSE(readfd);
+ if (tracee_pid > 0) {
+ /* Tracee may have already exited - don't fail if it's gone */
+ if (kill(tracee_pid, SIGKILL) == 0)
+ SAFE_WAITPID(tracee_pid, &status, 0);
+ }
- if (writefd >= 0)
- SAFE_CLOSE(writefd);
+ child_cleanup();
- if (read_thp)
- SAFE_MUNMAP(read_thp, thp_size);
- if (write_thp)
- SAFE_MUNMAP(write_thp, thp_size);
+ if (child)
+ SAFE_MUNMAP(child, sizeof(*child));
}
static struct tst_test test = {
@@ -162,6 +399,8 @@ static struct tst_test test = {
.setup = setup,
.cleanup = cleanup,
.runtime = 150,
+ .forks_child = 1,
+ .needs_checkpoints = 1,
.tags = (const struct tst_tag[]) {
{"linux-git", "a8f97366452e"},
{"linux-git", "8310d48b125d"},
--
2.54.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
next reply other threads:[~2026-06-12 17:17 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-12 17:17 Jan Polensky [this message]
2026-06-12 19:21 ` [LTP] thp04: Fix PTRACE mode for CONFIG_PROC_MEM_FORCE_PTRACE=y linuxtestproject.agent
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260612171712.324175-1-japo@linux.ibm.com \
--to=japo@linux.ibm.com \
--cc=ltp@lists.linux.it \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox