All of lore.kernel.org
 help / color / mirror / Atom feed
From: Youngjun Park <her0gyugyu@gmail.com>
To: akpm@linux-foundation.org
Cc: chrisl@kernel.org, youngjun.park@lge.com, linux-mm@kvack.org,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	kasong@tencent.com, hannes@cmpxchg.org, mhocko@kernel.org,
	roman.gushchin@linux.dev, shakeel.butt@linux.dev,
	muchun.song@linux.dev, shikemeng@huaweicloud.com,
	nphamcs@gmail.com, baoquan.he@linux.dev, baohua@kernel.org,
	yosry@kernel.org, gunho.lee@lge.com, taejoon.song@lge.com,
	hyungjun.cho@lge.com, mkoutny@suse.com, baver.bae@lge.com,
	matia.kim@lge.com
Subject: [PATCH v9 6/6] selftests/cgroup: add a swap tier routing test
Date: Sun, 21 Jun 2026 03:16:31 +0900	[thread overview]
Message-ID: <20260620181635.299364-7-youngjun.park@lge.com> (raw)
In-Reply-To: <20260620181635.299364-1-youngjun.park@lge.com>

This commit adds a test program for the per-cgroup swap tier control
memory.swap.tiers.max. It checks the default mask, toggling a tier,
rejection of invalid input, and that recreating a tier resets the mask.
It also checks that a cgroup's pages swap only to an allowed tier,
including across the parent and child hierarchy. The routing check uses
two zram devices placed in different tiers.

Assisted-by: Claude:claude-opus-4-8
Signed-off-by: Youngjun Park <youngjun.park@lge.com>
---
 tools/testing/selftests/cgroup/.gitignore     |   1 +
 tools/testing/selftests/cgroup/Makefile       |   2 +
 tools/testing/selftests/cgroup/config         |   2 +
 .../selftests/cgroup/test_swap_tiers.c        | 500 ++++++++++++++++++
 4 files changed, 505 insertions(+)
 create mode 100644 tools/testing/selftests/cgroup/test_swap_tiers.c

diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 952e4448bf07..77b8e6c3e592 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -8,5 +8,6 @@ test_kill
 test_kmem
 test_memcontrol
 test_pids
+test_swap_tiers
 test_zswap
 wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index e01584c2189a..a98e3c414cd5 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -16,6 +16,7 @@ TEST_GEN_PROGS += test_kill
 TEST_GEN_PROGS += test_kmem
 TEST_GEN_PROGS += test_memcontrol
 TEST_GEN_PROGS += test_pids
+TEST_GEN_PROGS += test_swap_tiers
 TEST_GEN_PROGS += test_zswap
 
 LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
@@ -32,4 +33,5 @@ $(OUTPUT)/test_kill: $(LIBCGROUP_O)
 $(OUTPUT)/test_kmem: $(LIBCGROUP_O)
 $(OUTPUT)/test_memcontrol: $(LIBCGROUP_O)
 $(OUTPUT)/test_pids: $(LIBCGROUP_O)
+$(OUTPUT)/test_swap_tiers: $(LIBCGROUP_O)
 $(OUTPUT)/test_zswap: $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
index 39f979690dd3..6086bb5bba97 100644
--- a/tools/testing/selftests/cgroup/config
+++ b/tools/testing/selftests/cgroup/config
@@ -4,3 +4,5 @@ CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_MEMCG=y
 CONFIG_PAGE_COUNTER=y
+CONFIG_SWAP=y
+CONFIG_ZRAM=y
diff --git a/tools/testing/selftests/cgroup/test_swap_tiers.c b/tools/testing/selftests/cgroup/test_swap_tiers.c
new file mode 100644
index 000000000000..24420c1ef398
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_swap_tiers.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/swap.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+#include "cgroup_util.h"
+
+#ifndef MADV_PAGEOUT
+#define MADV_PAGEOUT 21
+#endif
+
+#define TIERS_PATH "/sys/kernel/mm/swap/tiers"
+#define TIERS_MAX "memory.swap.tiers.max"
+
+static int tiers_write(const char *cmd)
+{
+	int fd, ret = 0;
+
+	fd = open(TIERS_PATH, O_WRONLY);
+	if (fd < 0)
+		return -errno;
+	if (write(fd, cmd, strlen(cmd)) < 0)
+		ret = -errno;
+	close(fd);
+	return ret;
+}
+
+static int tier_count(void)
+{
+	char buf[4096], *line, *save;
+	int fd, count = 0;
+	ssize_t n;
+
+	fd = open(TIERS_PATH, O_RDONLY);
+	if (fd < 0)
+		return -1;
+	n = read(fd, buf, sizeof(buf) - 1);
+	close(fd);
+	if (n < 0)
+		return -1;
+	buf[n] = '\0';
+
+	for (line = strtok_r(buf, "\n", &save); line;
+	     line = strtok_r(NULL, "\n", &save)) {
+		char name[64];
+		int idx, s, e;
+
+		if (sscanf(line, "%63s %d %d %d", name, &idx, &s, &e) == 4)
+			count++;
+	}
+	return count;
+}
+
+static long swap_used_kb(const char *dev)
+{
+	char line[256];
+	long used = -1;
+	FILE *f;
+
+	f = fopen("/proc/swaps", "r");
+	if (!f)
+		return -1;
+	while (fgets(line, sizeof(line), f)) {
+		char name[128], type[64];
+		long size, u, prio;
+
+		if (sscanf(line, "%127s %63s %ld %ld %ld",
+			   name, type, &size, &u, &prio) >= 4 &&
+		    !strcmp(name, dev)) {
+			used = u;
+			break;
+		}
+	}
+	fclose(f);
+	return used;
+}
+
+static int swap_active_count(void)
+{
+	char line[256];
+	int n = 0;
+	FILE *f;
+
+	f = fopen("/proc/swaps", "r");
+	if (!f)
+		return -1;
+	if (fgets(line, sizeof(line), f))		/* header */
+		while (fgets(line, sizeof(line), f))
+			n++;
+	fclose(f);
+	return n;
+}
+
+static int zram_add(long size)
+{
+	char path[128], val[64];
+	ssize_t n;
+	int idx, fd;
+
+	fd = open("/sys/class/zram-control/hot_add", O_RDONLY);
+	if (fd < 0)
+		return -1;
+	n = read(fd, val, sizeof(val) - 1);
+	close(fd);
+	if (n <= 0)
+		return -1;
+	val[n] = '\0';
+	idx = atoi(val);
+
+	snprintf(path, sizeof(path), "/sys/block/zram%d/disksize", idx);
+	fd = open(path, O_WRONLY);
+	if (fd < 0)
+		return -1;
+	snprintf(val, sizeof(val), "%ld", size);
+	n = write(fd, val, strlen(val));
+	close(fd);
+	return n < 0 ? -1 : idx;
+}
+
+static void zram_remove(int idx)
+{
+	char val[16];
+	int fd;
+
+	fd = open("/sys/class/zram-control/hot_remove", O_WRONLY);
+	if (fd < 0)
+		return;
+	snprintf(val, sizeof(val), "%d", idx);
+	if (write(fd, val, strlen(val)) < 0)
+		; /* ignore: best-effort cleanup */
+	close(fd);
+}
+
+static int swap_setup(const char *dev, int prio)
+{
+	char cmd[128];
+
+	snprintf(cmd, sizeof(cmd), "mkswap %s >/dev/null 2>&1", dev);
+	if (system(cmd))
+		return -1;
+	return swapon(dev, SWAP_FLAG_PREFER | (prio & SWAP_FLAG_PRIO_MASK));
+}
+
+/* A new cgroup may use every tier ("max"). */
+static int test_default(const char *root)
+{
+	char *cg = cg_name(root, "swaptier_default");
+	int ret = KSFT_FAIL;
+
+	if (!cg || cg_create(cg))
+		goto out;
+	if (!cg_read_strstr(cg, TIERS_MAX, "fast max") &&
+	    !cg_read_strstr(cg, TIERS_MAX, "slow max"))
+		ret = KSFT_PASS;
+out:
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return ret;
+}
+
+/* A tier can be disabled and re-enabled, and the change reads back. */
+static int test_toggle(const char *root)
+{
+	char *cg = cg_name(root, "swaptier_toggle");
+	int ret = KSFT_FAIL;
+
+	if (!cg || cg_create(cg))
+		goto out;
+	if (cg_write(cg, TIERS_MAX, "fast 0"))
+		goto out;
+	if (cg_read_strstr(cg, TIERS_MAX, "fast 0"))
+		goto out;
+	if (cg_write(cg, TIERS_MAX, "fast max"))
+		goto out;
+	if (cg_read_strstr(cg, TIERS_MAX, "fast max"))
+		goto out;
+	ret = KSFT_PASS;
+out:
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return ret;
+}
+
+/* An unknown tier name or a bad value must be rejected. */
+static int test_invalid(const char *root)
+{
+	char *cg = cg_name(root, "swaptier_invalid");
+	int ret = KSFT_FAIL;
+
+	if (!cg || cg_create(cg))
+		goto out;
+	if (!cg_write(cg, TIERS_MAX, "nosuchtier 0"))
+		goto out;
+	if (!cg_write(cg, TIERS_MAX, "fast bogus"))
+		goto out;
+	ret = KSFT_PASS;
+out:
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return ret;
+}
+
+/* A tier recreated by the same name is allowed again, even if disabled before. */
+static int test_recreate(const char *root)
+{
+	char *cg = cg_name(root, "swaptier_recreate");
+	int ret = KSFT_FAIL;
+
+	if (!cg || cg_create(cg))
+		goto out;
+	if (cg_write(cg, TIERS_MAX, "fast 0"))
+		goto out;
+	if (cg_read_strstr(cg, TIERS_MAX, "fast 0"))
+		goto out;
+	if (tiers_write("-fast") || tiers_write("+fast:10"))
+		goto out;
+	if (cg_read_strstr(cg, TIERS_MAX, "fast max"))
+		goto out;
+	ret = KSFT_PASS;
+out:
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return ret;
+}
+
+/* Map anon memory, fault it in, push it to swap, then wait to be killed. */
+static int swapout_child(const char *cgroup, void *arg)
+{
+	size_t size = (size_t)arg;
+	char *mem;
+	size_t i;
+	int page_size;
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mem == MAP_FAILED)
+		return -1;
+
+	page_size = sysconf(_SC_PAGE_SIZE);
+	for (i = 0; i < size; i += page_size)
+		mem[i] = 'x';
+	if (madvise(mem, size, MADV_PAGEOUT))
+		return -1;
+	/* Hold the swap entries while the parent inspects /proc/swaps. */
+	pause();
+	return 0;
+}
+
+static int run_routing_case(const char *cg)
+{
+	char fast_dev[32], slow_dev[32];
+	int zfast = -1, zslow = -1;
+	long used_fast, used_slow;
+	int ret = KSFT_SKIP;
+	pid_t pid = -1;
+	int i;
+
+	/* Only our devices must be present, so usage is unambiguous. */
+	if (swap_active_count() != 0)
+		return KSFT_SKIP;
+
+	zfast = zram_add(MB(128));
+	zslow = zram_add(MB(128));
+	if (zfast < 0 || zslow < 0)
+		goto out;
+	snprintf(fast_dev, sizeof(fast_dev), "/dev/zram%d", zfast);
+	snprintf(slow_dev, sizeof(slow_dev), "/dev/zram%d", zslow);
+
+	/* prio 10 -> 'fast' tier [10, MAX]; prio 0 -> 'slow' tier [-1, 9]. */
+	if (swap_setup(fast_dev, 10) || swap_setup(slow_dev, 0))
+		goto out;
+
+	ret = KSFT_FAIL;
+
+	pid = cg_run_nowait(cg, swapout_child, (void *)MB(64));
+	if (pid < 0)
+		goto out;
+
+	for (i = 0; i < 50; i++) {		/* up to ~5s for pageout */
+		if (swap_used_kb(slow_dev) > 0)
+			break;
+		usleep(100000);
+	}
+
+	used_fast = swap_used_kb(fast_dev);
+	used_slow = swap_used_kb(slow_dev);
+	if (used_slow > 0 && used_fast == 0)
+		ret = KSFT_PASS;
+	else
+		ksft_print_msg("routing[%s]: fast=%ldKB slow=%ldKB (want fast=0, slow>0)\n",
+			       cg, used_fast, used_slow);
+out:
+	if (pid > 0) {
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+	}
+	if (zfast >= 0) {
+		swapoff(fast_dev);
+		zram_remove(zfast);
+	}
+	if (zslow >= 0) {
+		swapoff(slow_dev);
+		zram_remove(zslow);
+	}
+	return ret;
+}
+
+/*
+ * A cgroup that disabled the high-priority 'fast' tier must swap only to the
+ * 'slow' tier's device; the fast device must stay untouched.
+ */
+static int test_routing(const char *root)
+{
+	char *cg = cg_name(root, "swaptier_routing");
+	int ret = KSFT_FAIL;
+
+	if (!cg || cg_create(cg))
+		goto out;
+	if (cg_write(cg, TIERS_MAX, "fast 0"))
+		goto out;
+	ret = run_routing_case(cg);
+out:
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return ret;
+}
+
+/* Create @name under @root and delegate the memory controller to its children. */
+static char *make_parent(const char *root, const char *name)
+{
+	char *cg = cg_name(root, name);
+
+	if (cg && !cg_create(cg) &&
+	    !cg_write(cg, "cgroup.subtree_control", "+memory"))
+		return cg;
+
+	if (cg) {
+		cg_destroy(cg);
+		free(cg);
+	}
+	return NULL;
+}
+
+/*
+ * The effective mask is the parent's intersected with the child's, so a tier
+ * the parent disabled stays disabled for the child even if the child re-enables
+ * it.  Parent disables 'fast', child sets 'fast max' -> child still swaps slow.
+ */
+static int test_routing_parent_wins(const char *root)
+{
+	char *parent = make_parent(root, "swaptier_pwins");
+	char *child = NULL;
+	int ret = KSFT_FAIL;
+
+	if (!parent)
+		goto out;
+	if (cg_write(parent, TIERS_MAX, "fast 0"))
+		goto out;
+
+	child = cg_name(parent, "child");
+	if (!child || cg_create(child))
+		goto out;
+	if (cg_write(child, TIERS_MAX, "fast max"))	/* child tries to re-enable */
+		goto out;
+
+	ret = run_routing_case(child);
+out:
+	if (child) {
+		cg_destroy(child);
+		free(child);
+	}
+	if (parent) {
+		cg_destroy(parent);
+		free(parent);
+	}
+	return ret;
+}
+
+/*
+ * A child can restrict below its parent: the parent leaves all tiers enabled,
+ * the child disables 'fast' on its own -> the child swaps only to slow.
+ */
+static int test_routing_child_restricts(const char *root)
+{
+	char *parent = make_parent(root, "swaptier_crestr");
+	char *child = NULL;
+	int ret = KSFT_FAIL;
+
+	if (!parent)
+		goto out;
+
+	child = cg_name(parent, "child");
+	if (!child || cg_create(child))
+		goto out;
+	if (cg_write(child, TIERS_MAX, "fast 0"))
+		goto out;
+
+	ret = run_routing_case(child);
+out:
+	if (child) {
+		cg_destroy(child);
+		free(child);
+	}
+	if (parent) {
+		cg_destroy(parent);
+		free(parent);
+	}
+	return ret;
+}
+
+/* Remove all remaining tiers, so a mid-test failure still leaves them empty. */
+static void tiers_clear(void)
+{
+	char buf[4096], *line, *save;
+	int fd;
+	ssize_t n;
+
+	fd = open(TIERS_PATH, O_RDONLY);
+	if (fd < 0)
+		return;
+	n = read(fd, buf, sizeof(buf) - 1);
+	close(fd);
+	if (n < 0)
+		return;
+	buf[n] = '\0';
+
+	for (line = strtok_r(buf, "\n", &save); line;
+	     line = strtok_r(NULL, "\n", &save)) {
+		char name[64], cmd[80];
+		int idx, s, e;
+
+		if (sscanf(line, "%63s %d %d %d", name, &idx, &s, &e) != 4)
+			continue;
+		snprintf(cmd, sizeof(cmd), "-%s", name);
+		tiers_write(cmd);
+	}
+}
+
+int main(void)
+{
+	char root[PATH_MAX];
+
+	ksft_print_header();
+	ksft_set_plan(7);
+
+	if (geteuid() != 0)
+		ksft_exit_skip("test requires root\n");
+	if (cg_find_unified_root(root, sizeof(root), NULL))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+		ksft_exit_skip("memory controller isn't available\n");
+	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+		if (cg_write(root, "cgroup.subtree_control", "+memory"))
+			ksft_exit_skip("failed to enable memory controller\n");
+	if (access(TIERS_PATH, F_OK))
+		ksft_exit_skip("swap tiers interface not present\n");
+	if (tier_count() != 0)
+		ksft_exit_skip("swap tiers already configured; run on a clean system\n");
+
+	/* Two tiers: fast = [10, MAX], slow = [-1, 9]. */
+	if (tiers_write("+slow:-1 +fast:10"))
+		ksft_exit_skip("failed to configure swap tiers\n");
+
+	ksft_test_result(test_default(root) == KSFT_PASS, "default mask is max\n");
+	ksft_test_result(test_toggle(root) == KSFT_PASS, "enable/disable tier\n");
+	ksft_test_result(test_invalid(root) == KSFT_PASS, "invalid input rejected\n");
+	ksft_test_result(test_recreate(root) == KSFT_PASS,
+			 "recreated tier resets cgroup mask\n");
+
+	ksft_test_result_code(test_routing(root),
+			      "swapout honors tier mask", NULL);
+	ksft_test_result_code(test_routing_parent_wins(root),
+			      "child cannot re-enable a parent-disabled tier", NULL);
+	ksft_test_result_code(test_routing_child_restricts(root),
+			      "child can restrict tiers below its parent", NULL);
+
+	tiers_clear();
+
+	ksft_finished();
+}
-- 
2.48.1


  parent reply	other threads:[~2026-06-20 18:17 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-20 18:16 [PATCH v9 0/6] mm/swap, memcg: Introduce swap tiers for cgroup based swap control Youngjun Park
2026-06-20 18:16 ` [PATCH v9 1/6] mm: swap: introduce swap tier infrastructure Youngjun Park
2026-06-20 18:16 ` [PATCH v9 2/6] mm: swap: associate swap devices with tiers Youngjun Park
2026-06-20 18:16 ` [PATCH v9 3/6] mm: memcontrol: add interface for swap tier selection Youngjun Park
2026-06-22  5:03   ` Youngjun Park
2026-06-22 21:21   ` Yosry Ahmed
2026-06-22 22:10     ` Joshua Hahn
2026-06-22 22:26       ` Yosry Ahmed
2026-06-22 23:19         ` Joshua Hahn
2026-06-22 23:46           ` Yosry Ahmed
2026-06-23  0:40             ` Joshua Hahn
2026-06-20 18:16 ` [PATCH v9 4/6] mm: swap: filter swap allocation by memcg tier mask Youngjun Park
2026-06-20 18:16 ` [PATCH v9 5/6] selftests/mm: add a swap tier configuration test Youngjun Park
2026-06-20 18:16 ` Youngjun Park [this message]
2026-06-22 21:23 ` [PATCH v9 0/6] mm/swap, memcg: Introduce swap tiers for cgroup based swap control Yosry Ahmed
2026-06-23  1:29   ` Youngjun Park

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260620181635.299364-7-youngjun.park@lge.com \
    --to=her0gyugyu@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=baoquan.he@linux.dev \
    --cc=baver.bae@lge.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chrisl@kernel.org \
    --cc=gunho.lee@lge.com \
    --cc=hannes@cmpxchg.org \
    --cc=hyungjun.cho@lge.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=matia.kim@lge.com \
    --cc=mhocko@kernel.org \
    --cc=mkoutny@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=nphamcs@gmail.com \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=shikemeng@huaweicloud.com \
    --cc=taejoon.song@lge.com \
    --cc=yosry@kernel.org \
    --cc=youngjun.park@lge.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.