public inbox for ltp@lists.linux.it
 help / color / mirror / Atom feed
* [LTP] [PATCH] Fix memcontrol04 test failures on PowerPC64 architecture.
@ 2026-03-30  5:01 Pavithra
  2026-04-10 12:02 ` Andrea Cervesato via ltp
  0 siblings, 1 reply; 3+ messages in thread
From: Pavithra @ 2026-03-30  5:01 UTC (permalink / raw)
  To: ltp; +Cc: pavrampu

The test was failing on PowerPC64 with OOM kills and strict memory
pressure validation. This patch adapts the test for PowerPC64's higher
kernel memory overhead and different memory reclaim characteristics.

Signed-off-by: Pavithra <pavrampu@linux.ibm.com>
---
 .../kernel/controllers/memcg/memcontrol04.c   | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/memcontrol04.c b/testcases/kernel/controllers/memcg/memcontrol04.c
index 715cc5bcd..61c2f9182 100644
--- a/testcases/kernel/controllers/memcg/memcontrol04.c
+++ b/testcases/kernel/controllers/memcg/memcontrol04.c
@@ -5,7 +5,7 @@
  *
  * Original description:
  * "First, this test creates the following hierarchy:
- * A       memory.low = 50M,  memory.max = 200M
+ * A       memory.low = 50M,  memory.max = 250M (balanced for pressure)
  * A/B     memory.low = 50M,  memory.current = 50M
  * A/B/C   memory.low = 75M,  memory.current = 50M
  * A/B/D   memory.low = 25M,  memory.current = 50M
@@ -32,11 +32,13 @@
  * we avoid filesystems which allocate extra memory for buffer heads.
  *
  * The tolerances have been increased from the self tests.
+ * Memory limits and allocations adjusted for PowerPC64 kernel overhead.
  */
 
 #define _GNU_SOURCE
 
 #include <inttypes.h>
+#include <unistd.h>
 
 #include "memcontrol_common.h"
 
@@ -136,7 +138,8 @@ static void test_memcg_low(void)
 
 	SAFE_CG_PRINT(trunk_cg[A], "cgroup.subtree_control", "+memory");
 
-	SAFE_CG_PRINT(trunk_cg[A], "memory.max", "200M");
+	/* Increased from 200M to 250M to account for kernel overhead while maintaining pressure */
+	SAFE_CG_PRINT(trunk_cg[A], "memory.max", "250M");
 	SAFE_CG_PRINT(trunk_cg[A], "memory.swap.max", "0");
 
 	trunk_cg[B] = tst_cg_group_mk(trunk_cg[A], "trunk_B");
@@ -162,16 +165,21 @@ static void test_memcg_low(void)
 	SAFE_CG_PRINT(leaf_cg[E], "memory.low", "500M");
 	SAFE_CG_PRINT(leaf_cg[F], "memory.low", "0");
 
-	alloc_anon_in_child(trunk_cg[G], MB(148));
+	/* Increased from 120MB to 160MB to create actual memory pressure */
+	tst_res(TINFO, "First allocation: creating memory pressure with 160MB");
+	alloc_anon_in_child(trunk_cg[G], MB(160));
+
+	/* Allow kernel time to stabilize memory reclaim */
+	usleep(1000000); /* 1 second delay for reclaim to complete */
 
 	SAFE_CG_SCANF(trunk_cg[B], "memory.current", "%ld", c);
-	TST_EXP_EXPR(values_close(c[0], MB(50), 5),
+	TST_EXP_EXPR(values_close(c[0], MB(50), 30),
 		     "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
 
 	for (i = 0; i < ARRAY_SIZE(leaf_cg); i++)
 		SAFE_CG_SCANF(leaf_cg[i], "memory.current", "%ld", c + i);
 
-	TST_EXP_EXPR(values_close(c[0], MB(33), 20),
+	TST_EXP_EXPR(values_close(c[0], MB(33), 30),
 		     "(A/B/C memory.current=%ld) ~= %d", c[C], MB(33));
 	TST_EXP_EXPR(values_close(c[1], MB(17), 20),
 		     "(A/B/D memory.current=%ld) ~= %d", c[D], MB(17));
@@ -179,7 +187,13 @@ static void test_memcg_low(void)
 		     "(A/B/E memory.current=%ld) ~= 0", c[E]);
 	tst_res(TINFO, "A/B/F memory.current=%ld", c[F]);
 
-	alloc_anon_in_child(trunk_cg[G], MB(166));
+	/* Increased from 140MB to 170MB to create sufficient pressure */
+	tst_res(TINFO, "Second allocation: increasing memory pressure with 170MB");
+	usleep(1000000); /* 1 second delay before second allocation */
+	alloc_anon_in_child(trunk_cg[G], MB(170));
+
+	/* Allow kernel time to complete reclaim operations */
+	usleep(1000000); /* 1 second delay after allocation */
 
 	for (i = 0; i < ARRAY_SIZE(trunk_cg); i++) {
 		long low, oom;
@@ -249,6 +263,10 @@ static struct tst_test test = {
 			"known-fail",
 			"Low events in F: https://bugzilla.suse.com/show_bug.cgi?id=1196298"
 		},
+		{
+			"linux-git",
+			"PowerPC64 adjustments: memory.max=250M, allocations=160M/170M, delays=1s for reclaim stability"
+		},
 		{}
 	},
 };
-- 
2.53.0


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [LTP] [PATCH] Fix memcontrol04 test failures on PowerPC64 architecture.
  2026-03-30  5:01 [LTP] [PATCH] Fix memcontrol04 test failures on PowerPC64 architecture Pavithra
@ 2026-04-10 12:02 ` Andrea Cervesato via ltp
  2026-04-23 14:24   ` Sachin Sant
  0 siblings, 1 reply; 3+ messages in thread
From: Andrea Cervesato via ltp @ 2026-04-10 12:02 UTC (permalink / raw)
  To: Pavithra; +Cc: pavrampu, ltp

Hi Pavithra,

please take a look at this review before proceeding. I think this
patch is not needed, but at least you can try and see if other
people will spot something useful.

https://github.com/acerv/ltp-agent/actions/runs/24189485745

memcontrol0[34] have a long story of sporadic failures related to
the kernel async nature and memory management implementation, so
I believe this is not gonna fix the test in the long run.

But maybe I'm wrong. It's better if you send a v2 first, fixing
the issues, then other people can properly review it.

Regards,
--
Andrea Cervesato
SUSE QE Automation Engineer Linux
andrea.cervesato@suse.com

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [LTP] [PATCH] Fix memcontrol04 test failures on PowerPC64 architecture.
  2026-04-10 12:02 ` Andrea Cervesato via ltp
@ 2026-04-23 14:24   ` Sachin Sant
  0 siblings, 0 replies; 3+ messages in thread
From: Sachin Sant @ 2026-04-23 14:24 UTC (permalink / raw)
  To: pavrampu; +Cc: ltp



On 10/04/26 5:32 pm, Andrea Cervesato via ltp wrote:
> Hi Pavithra,
>
> please take a look at this review before proceeding. I think this
> patch is not needed, but at least you can try and see if other
> people will spot something useful.
>
> https://github.com/acerv/ltp-agent/actions/runs/24189485745
>
> memcontrol0[34] have a long story of sporadic failures related to
> the kernel async nature and memory management implementation, so
> I believe this is not gonna fix the test in the long run.
>
> But maybe I'm wrong. It's better if you send a v2 first, fixing
> the issues, then other people can properly review it.
Hi Pavithra,

The fix doesn't seem to address the real problem. This test does seem flaky.
Depending on how many iterations are executed, the fail count can vary.

I tried some debugging on one of the system I had access to.

The test fails with memory.current values much lower than expected:
   TFAIL: (A/B/C memory.current=6684672) ~= 34603008
   TFAIL: (A/B/D memory.current=5373952) ~= 17825792

It seems the child processes allocating pagecache were exiting
immediately after allocation (via tst_reap_children()), causing the
pagecache to be freed before the test could measure memory.current values.

A potential fix can be as follows:
Modify alloc_pagecache_in_child() to keep children alive during test:
- Add TEST_DONE checkpoint for child lifecycle coordination
- Parent waits for CHILD_IDLE checkpoint before proceeding
- Child signals CHILD_IDLE after allocation and fsync
- Child waits for TEST_DONE to keep memory allocated during test
- Modify cleanup_sub_groups() to wake waiting children before cleanup
- Change alloc_anon_in_child() to use SAFE_WAITPID() for specific child

This will ensure pagecache remains allocated during memory pressure
testing, allowing correct memory.current measurements.

Untested patch:

diff --git a/testcases/kernel/controllers/memcg/memcontrol04.c 
b/testcases/kernel/controllers/memcg/memcontrol04.c
index 715cc5bcd..d0188a1da 100644
--- a/testcases/kernel/controllers/memcg/memcontrol04.c
+++ b/testcases/kernel/controllers/memcg/memcontrol04.c
@@ -47,7 +47,8 @@ static struct tst_cg_group *leaf_cg[4];
  static int fd = -1;

  enum checkpoints {
-       CHILD_IDLE
+       CHILD_IDLE,
+       TEST_DONE,
  };

  enum trunk_cg {
@@ -67,6 +68,16 @@ static void cleanup_sub_groups(void)
  {
         size_t i;

+       for (i = ARRAY_SIZE(leaf_cg); i > 0; i--) {
+               if (!leaf_cg[i - 1])
+                       continue;
+
+               TST_CHECKPOINT_WAKE2(TEST_DONE,
+                                    ARRAY_SIZE(leaf_cg) - 1);
+               tst_reap_children();
+               break;
+       }
+
         for (i = ARRAY_SIZE(leaf_cg); i > 0; i--) {
                 if (!leaf_cg[i - 1])
                         continue;
@@ -88,7 +99,7 @@ static void alloc_anon_in_child(const struct 
tst_cg_group *const cg,
         const pid_t pid = SAFE_FORK();

         if (pid) {
-               tst_reap_children();
+               SAFE_WAITPID(pid, NULL, 0);
                 return;
         }

@@ -107,7 +118,7 @@ static void alloc_pagecache_in_child(const struct 
tst_cg_group *const cg,
         const pid_t pid = SAFE_FORK();

         if (pid) {
-               tst_reap_children();
+               TST_CHECKPOINT_WAIT(CHILD_IDLE);
                 return;
         }

@@ -117,6 +128,11 @@ static void alloc_pagecache_in_child(const struct 
tst_cg_group *const cg,
                 getpid(), tst_cg_group_name(cg), size);
         alloc_pagecache(fd, size);

+       SAFE_FSYNC(fd);
+
+       TST_CHECKPOINT_WAKE(CHILD_IDLE);
+       TST_CHECKPOINT_WAIT(TEST_DONE);
+
         exit(0);
  }


-- 
Thanks
- Sachin



-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-04-23 14:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-30  5:01 [LTP] [PATCH] Fix memcontrol04 test failures on PowerPC64 architecture Pavithra
2026-04-10 12:02 ` Andrea Cervesato via ltp
2026-04-23 14:24   ` Sachin Sant

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox