Linux Kernel Selftest development
 help / color / mirror / Atom feed
From: Mike Rapoport <rppt@kernel.org>
To: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
Cc: akpm@linux-foundation.org, peterx@redhat.com, david@kernel.org,
	ljs@kernel.org, surenb@google.com, vbabka@kernel.org,
	Liam.Howlett@oracle.com, ziy@nvidia.com, corbet@lwn.net,
	skhan@linuxfoundation.org, seanjc@google.com,
	pbonzini@redhat.com, jthoughton@google.com, aarcange@redhat.com,
	sj@kernel.org, usama.arif@linux.dev, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	linux-kselftest@vger.kernel.org, kvm@vger.kernel.org,
	kernel-team@meta.com
Subject: Re: [PATCH v2 13/14] selftests/mm: add userfaultfd RWP tests
Date: Wed, 13 May 2026 09:06:17 +0300	[thread overview]
Message-ID: <agQU2c2b3VqpYRdi@kernel.org> (raw)
In-Reply-To: <e097db49bd0ada5f3c22f9c98c548c3b8ca24ba7.1778254670.git.kas@kernel.org>

On Fri, May 08, 2026 at 04:55:25PM +0100, Kiryl Shutsemau (Meta) wrote:
> Coverage for UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT:
> 
>   rwp-async          async mode — touch pages, verify permissions are
>                      auto-restored without a message
>   rwp-sync           sync mode — access blocks, handler resolves via
>                      UFFDIO_RWPROTECT
>   rwp-pagemap        PAGEMAP_SCAN reports still-cold pages via
>                      inverted PAGE_IS_ACCESSED
>   rwp-mprotect       RWP survives mprotect(PROT_NONE) ->
>                      mprotect(PROT_READ|PROT_WRITE) round-trip
>   rwp-gup            GUP walks through a protnone RWP PTE (pipe
>                      write/read drives the GUP path)
>   rwp-async-toggle   UFFDIO_SET_MODE flips between sync and async
>                      without re-registering
>   rwp-close          closing the uffd restores page permissions
>   rwp-fork           RWP survives fork() with EVENT_FORK; child's
>                      PTEs keep the uffd bit
>   rwp-fork-pin       RWP survives fork() on an RO-longterm-pinned
>                      anon page (forces copy_present_page()); child
>                      read auto-resolves and clears the bit, proving
>                      PAGE_NONE was in place
>   rwp-wp-exclusive   register with MODE_WP|MODE_RWP returns -EINVAL
> 
> All tests run against anon, shmem, shmem-private, hugetlb, and
> hugetlb-private memory, except rwp-fork-pin which is anon-only —
> copy_present_page() is the private-anon pinned-exclusive fork path.
> 
> Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> Assisted-by: Claude:claude-opus-4-6
> ---
>  tools/testing/selftests/mm/uffd-unit-tests.c | 774 +++++++++++++++++++
>  1 file changed, 774 insertions(+)
> 
> diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
> index 6f5e404a446c..a35fb677e4cc 100644
> --- a/tools/testing/selftests/mm/uffd-unit-tests.c
> +++ b/tools/testing/selftests/mm/uffd-unit-tests.c
> @@ -7,6 +7,7 @@
>  
>  #include "uffd-common.h"
>  
> +#include <linux/fs.h>
>  #include "../../../../mm/gup_test.h"
>  
>  #ifdef __NR_userfaultfd
> @@ -167,6 +168,23 @@ static int test_uffd_api(bool use_dev)
>  		goto out;
>  	}
>  
> +	/* Verify returned fd-level ioctls bitmask */
> +	{
> +		uint64_t expected_ioctls =

can be const uint64_t and declared at the top of the function to avoid
extra indentation here.

> +			BIT_ULL(_UFFDIO_REGISTER) |
> +			BIT_ULL(_UFFDIO_UNREGISTER) |
> +			BIT_ULL(_UFFDIO_API) |
> +			BIT_ULL(_UFFDIO_SET_MODE);
> +
> +		if ((uffdio_api.ioctls & expected_ioctls) != expected_ioctls) {
> +			uffd_test_fail("UFFDIO_API missing expected ioctls: "
> +				       "got=0x%"PRIx64", expected=0x%"PRIx64,
> +				       (uint64_t)uffdio_api.ioctls,
> +				       expected_ioctls);
> +			goto out;
> +		}
> +	}
> +
>  	/* Test double requests of UFFDIO_API with a random feature set */
>  	uffdio_api.features = BIT_ULL(0);
>  	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {

...

> +static void uffd_rwp_pagemap_test(uffd_global_test_opts_t *gopts,
> +					  uffd_test_args_t *args)
> +{
> +	unsigned long nr_pages = gopts->nr_pages;
> +	unsigned long page_size = gopts->page_size;
> +	unsigned long p;
> +	struct page_region regions[16];
> +	struct pm_scan_arg pm_arg;
> +	int pagemap_fd;
> +	long ret;

...

> +	/*
> +	 * PAGE_IS_ACCESSED is set once the uffd-wp bit has been cleared
> +	 * (access happened, or the user resolved). Invert it to select
> +	 * still-protected (cold) pages.
> +	 */
> +	memset(&pm_arg, 0, sizeof(pm_arg));
> +	pm_arg.size = sizeof(pm_arg);
> +	pm_arg.start = (uint64_t)gopts->area_dst;
> +	pm_arg.end = (uint64_t)gopts->area_dst + nr_pages * page_size;
> +	pm_arg.vec = (uint64_t)regions;
> +	pm_arg.vec_len = 16;

ARRAY_SIZE(regions)?

> +	pm_arg.category_mask = PAGE_IS_ACCESSED;
> +	pm_arg.category_inverted = PAGE_IS_ACCESSED;
> +	pm_arg.return_mask = PAGE_IS_ACCESSED;
> +
> +}
> +
> +/*
> + * Test that RWP protection survives a mprotect(PROT_NONE) ->
> + * mprotect(PROT_READ|PROT_WRITE) round-trip. The uffd-wp bit on a
> + * VM_UFFD_RWP VMA must continue to carry PROT_NONE semantics after
> + * mprotect() changes the base protection; otherwise accesses would
> + * silently succeed and the pagemap bit would stick without a fault
> + * ever clearing it.
> + */
> +static void uffd_rwp_mprotect_test(uffd_global_test_opts_t *gopts,
> +				   uffd_test_args_t *args)
> +{
> +	unsigned long nr_pages = gopts->nr_pages;
> +	unsigned long page_size = gopts->page_size;
> +	unsigned long p;
> +	struct page_region regions[16];
> +	struct pm_scan_arg pm_arg;
> +	int pagemap_fd;
> +	long ret;

...

> +	memset(&pm_arg, 0, sizeof(pm_arg));
> +	pm_arg.size = sizeof(pm_arg);
> +	pm_arg.start = (uint64_t)gopts->area_dst;
> +	pm_arg.end = (uint64_t)gopts->area_dst + nr_pages * page_size;
> +	pm_arg.vec = (uint64_t)regions;
> +	pm_arg.vec_len = 16;

ARRAY_SIZE(regions)?

> +	pm_arg.category_mask = PAGE_IS_ACCESSED;
> +	pm_arg.category_inverted = PAGE_IS_ACCESSED;
> +	pm_arg.return_mask = PAGE_IS_ACCESSED;
> +
> +	ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &pm_arg);
> +	close(pagemap_fd);
> +
> +	if (ret < 0) {
> +		uffd_test_fail("PAGEMAP_SCAN failed: %s", strerror(errno));
> +		return;
> +	}
> +	if (ret != 0) {
> +		uffd_test_fail("expected no cold pages after mprotect()+touch, got %ld regions",
> +			       ret);
> +		return;
> +	}
> +
> +	uffd_test_pass();
> +}
> +
> +/*
> + * Test that GUP resolves through protnone PTEs (async mode).
> + * RW-protect pages, then use a pipe to exercise GUP on the RW-protected
> + * memory. write() from RW-protected pages triggers GUP which must fault
> + * through the protnone PTE.
> + */
> +static void uffd_rwp_gup_test(uffd_global_test_opts_t *gopts,
> +				     uffd_test_args_t *args)
> +{
> +	unsigned long page_size = gopts->page_size;
> +	char *buf;
> +	int pipefd[2];
> +
> +	buf = malloc(page_size);
> +	if (!buf)
> +		err("malloc");
> +
> +	/* Populate first page with known content */
> +	memset(gopts->area_dst, 0xCD, page_size);
> +
> +	if (uffd_register_rwp(gopts->uffd, gopts->area_dst, page_size))
> +		err("register failure");
> +
> +	rwprotect_range(gopts->uffd, (uint64_t)gopts->area_dst, page_size, true);
> +
> +	if (pipe(pipefd))
> +		err("pipe");
> +
> +	/*
> +	 * write() from the RW-protected page into the pipe. This triggers
> +	 * GUP on the protnone PTE; in async mode the kernel auto-restores
> +	 * permissions and GUP succeeds. One byte is enough to exercise
> +	 * the GUP path and avoids any concern about pipe buffer sizing on
> +	 * large-page archs.
> +	 */
> +	if (write(pipefd[1], gopts->area_dst, 1) != 1) {
> +		uffd_test_fail("write from RW-protected page failed: %s",
> +			       strerror(errno));
> +		goto out;
> +	}

Sashiko (https://sashiko.dev/#/patchset/cover.1778254670.git.kas%40kernel.org?part=13):

	Could this write() implementation be bypassing the intended test
	logic?
	... the write() call here will trigger standard hardware page
	faults during copy_from_user() rather than the intended
	get_user_pages() code path.

It also suggests to use vmsplice().

> +
> +	if (read(pipefd[0], buf, 1) != 1) {
> +		uffd_test_fail("read from pipe failed");
> +		goto out;
> +	}
> +
> +	if (buf[0] != (char)0xCD) {
> +		uffd_test_fail("content mismatch: got 0x%02x, expected 0xCD",
> +			       (unsigned char)buf[0]);
> +		goto out;
> +	}
> +
> +	uffd_test_pass();
> +out:
> +	close(pipefd[0]);
> +	close(pipefd[1]);
> +	free(buf);
> +}
> +
> +/*
> + * Test runtime toggle between async and sync modes.
> + * Start in async mode (detection), flip to sync (eviction), verify faults
> + * block, resolve them, flip back to async.
> + */
> +static void uffd_rwp_async_toggle_test(uffd_global_test_opts_t *gopts,
> +					      uffd_test_args_t *args)
> +{
> +	unsigned long nr_pages = gopts->nr_pages;
> +	unsigned long page_size = gopts->page_size;
> +	struct uffd_args uargs = { };
> +	pthread_t uffd_mon;
> +	bool started = false;
> +	char c = '\0';
> +	unsigned long p;
> +
> +	uargs.gopts = gopts;
> +	uargs.handle_fault = uffd_handle_rwp_fault;
> +
> +	/* Populate */
> +	for (p = 0; p < nr_pages; p++)
> +		memset(gopts->area_dst + p * page_size, p % 255 + 1, page_size);
> +
> +	if (uffd_register_rwp(gopts->uffd, gopts->area_dst,
> +			  nr_pages * page_size))
> +		err("register failure");
> +
> +	/* Phase 1: async detection — RW-protect, access first half */
> +	rwprotect_range(gopts->uffd, (uint64_t)gopts->area_dst,
> +			 nr_pages * page_size, true);
> +
> +	for (p = 0; p < nr_pages / 2; p++) {
> +		volatile char *page = gopts->area_dst + p * page_size;
> +		(void)*page;  /* auto-resolves in async mode */
> +	}
> +
> +	/* Phase 2: flip to sync for eviction */
> +	set_async_mode(gopts->uffd, false);
> +
> +	/* Start handler — will receive faults for cold pages */
> +	if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &uargs))
> +		err("uffd_poll_thread create");
> +	started = true;
> +
> +	/* Access second half (cold pages) — should trigger sync faults */
> +	for (p = nr_pages / 2; p < nr_pages; p++) {
> +		unsigned char *page = (unsigned char *)gopts->area_dst +
> +				      p * page_size;
> +		if (page[0] != (p % 255 + 1)) {
> +			uffd_test_fail("page %lu content mismatch", p);
> +			goto out;
> +		}
> +	}
> +
> +	/*
> +	 * Stop the handler before reading minor_faults: the last fault
> +	 * resolution rwprotect_range()s before incrementing the counter,
> +	 * so the main thread can race ahead of the increment. Stopping
> +	 * here also makes Phase 3 a clean async-only test -- with the
> +	 * handler still running it would silently resolve any sync fault
> +	 * the kernel erroneously delivers, masking a regression.
> +	 */
> +	if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
> +		err("pipe write");
> +	if (pthread_join(uffd_mon, NULL))
> +		err("join() failed");
> +	started = false;

I think 'started' is misleading, would "running_sync_test" better?

> +
> +	if (uargs.minor_faults == 0) {
> +		uffd_test_fail("expected sync faults, got 0");
> +		goto out;
> +	}

And it seems here we can just return and then started is not needed at
all.

> +
> +	/* Phase 3: flip back to async */
> +	set_async_mode(gopts->uffd, true);
> +
> +	/* RW-protect and access again — should auto-resolve */
> +	rwprotect_range(gopts->uffd, (uint64_t)gopts->area_dst,
> +			 nr_pages * page_size, true);
> +
> +	for (p = 0; p < nr_pages; p++) {
> +		volatile char *page = gopts->area_dst + p * page_size;
> +		(void)*page;
> +	}
> +
> +	uffd_test_pass();
> +out:
> +	if (started) {
> +		if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c))
> +			err("pipe write");
> +		if (pthread_join(uffd_mon, NULL))
> +			err("join() failed");
> +	}
> +}

-- 
Sincerely yours,
Mike.

  reply	other threads:[~2026-05-13  6:06 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-08 15:55 [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 01/14] mm: decouple protnone helpers from CONFIG_NUMA_BALANCING Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 02/14] mm: rename uffd-wp PTE bit macros to uffd Kiryl Shutsemau (Meta)
2026-05-08 23:52   ` SeongJae Park
2026-05-08 15:55 ` [PATCH v2 03/14] mm: rename uffd-wp PTE accessors " Kiryl Shutsemau (Meta)
2026-05-08 15:55 ` [PATCH v2 04/14] mm: add VM_UFFD_RWP VMA flag Kiryl Shutsemau (Meta)
2026-05-12 16:48   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 05/14] mm: add MM_CP_UFFD_RWP change_protection() flag Kiryl Shutsemau (Meta)
2026-05-12 16:45   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 06/14] mm: preserve RWP marker across PTE rewrites Kiryl Shutsemau (Meta)
2026-05-12 16:59   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 07/14] mm: handle VM_UFFD_RWP in khugepaged, rmap, and GUP Kiryl Shutsemau (Meta)
2026-05-12 17:00   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 08/14] userfaultfd: add UFFDIO_REGISTER_MODE_RWP and UFFDIO_RWPROTECT plumbing Kiryl Shutsemau (Meta)
2026-05-12 17:20   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 09/14] mm/userfaultfd: add RWP fault delivery and expose UFFDIO_REGISTER_MODE_RWP Kiryl Shutsemau (Meta)
2026-05-12 17:29   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 10/14] mm/pagemap: add PAGE_IS_ACCESSED for RWP tracking Kiryl Shutsemau (Meta)
2026-05-12 17:41   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 11/14] userfaultfd: add UFFD_FEATURE_RWP_ASYNC for async fault resolution Kiryl Shutsemau (Meta)
2026-05-12 18:05   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 12/14] userfaultfd: add UFFDIO_SET_MODE for runtime sync/async toggle Kiryl Shutsemau (Meta)
2026-05-12 18:11   ` Mike Rapoport
2026-05-08 15:55 ` [PATCH v2 13/14] selftests/mm: add userfaultfd RWP tests Kiryl Shutsemau (Meta)
2026-05-13  6:06   ` Mike Rapoport [this message]
2026-05-08 15:55 ` [PATCH v2 14/14] Documentation/userfaultfd: document RWP working set tracking Kiryl Shutsemau (Meta)
2026-05-13  6:26   ` Mike Rapoport
2026-05-08 17:32 ` [PATCH v2 00/14] userfaultfd: working set tracking for VM guest memory Andrew Morton
2026-05-08 22:48   ` Kiryl Shutsemau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=agQU2c2b3VqpYRdi@kernel.org \
    --to=rppt@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=jthoughton@google.com \
    --cc=kas@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=seanjc@google.com \
    --cc=sj@kernel.org \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=usama.arif@linux.dev \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox