All of lore.kernel.org
 help / color / mirror / Atom feed
* [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
@ 2025-05-05 10:53 Martin Doucha
  2025-05-06  6:59 ` Li Wang via ltp
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-05 10:53 UTC (permalink / raw)
  To: ltp

The first trunk_G allocation has 2MB safety margin to avoid triggering
OOM killer. However, on systems with 64K pagesize, this may not be enough.
Account for process size as reported by cgroup memory stats before
allocating memory in child processes.

Signed-off-by: Martin Doucha <mdoucha@suse.cz>
---
 .../kernel/controllers/memcg/memcontrol03.c   | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/memcontrol03.c b/testcases/kernel/controllers/memcg/memcontrol03.c
index b5bbb9954..d2e489ad6 100644
--- a/testcases/kernel/controllers/memcg/memcontrol03.c
+++ b/testcases/kernel/controllers/memcg/memcontrol03.c
@@ -94,17 +94,23 @@ static void cleanup_sub_groups(void)
 }
 
 static void alloc_anon_in_child(const struct tst_cg_group *const cg,
-				const size_t size, const int expect_oom)
+	size_t size, const int expect_oom)
 {
 	int status;
 	const pid_t pid = SAFE_FORK();
+	size_t cgmem;
 
 	if (!pid) {
 		SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
+		SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
+		size = size > cgmem ? size - cgmem : 0;
 
 		tst_res(TINFO, "Child %d in %s: Allocating anon: %"PRIdPTR,
 		getpid(), tst_cg_group_name(cg), size);
-		alloc_anon(size);
+
+		if (size)
+			alloc_anon(size);
+
 		exit(0);
 	}
 
@@ -128,9 +134,10 @@ static void alloc_anon_in_child(const struct tst_cg_group *const cg,
 }
 
 static void alloc_pagecache_in_child(const struct tst_cg_group *const cg,
-				     const size_t size)
+	size_t size)
 {
 	const pid_t pid = SAFE_FORK();
+	size_t cgmem;
 
 	if (pid) {
 		TST_CHECKPOINT_WAIT(CHILD_IDLE);
@@ -138,10 +145,15 @@ static void alloc_pagecache_in_child(const struct tst_cg_group *const cg,
 	}
 
 	SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
+	SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
+	size = size > cgmem ? size - cgmem : 0;
 
 	tst_res(TINFO, "Child %d in %s: Allocating pagecache: %"PRIdPTR,
 		getpid(), tst_cg_group_name(cg), size);
-	alloc_pagecache(fd, size);
+
+	if (size)
+		alloc_pagecache(fd, size);
+
 	SAFE_FSYNC(fd);
 
 	TST_CHECKPOINT_WAKE(CHILD_IDLE);
-- 
2.49.0


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-05 10:53 [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Martin Doucha
@ 2025-05-06  6:59 ` Li Wang via ltp
  2025-05-07 14:23 ` Cyril Hrubis
  2025-09-03  9:19 ` [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Cyril Hrubis
  2 siblings, 0 replies; 16+ messages in thread
From: Li Wang via ltp @ 2025-05-06  6:59 UTC (permalink / raw)
  To: Martin Doucha; +Cc: ltp

On Mon, May 5, 2025 at 6:53 PM Martin Doucha <mdoucha@suse.cz> wrote:

> The first trunk_G allocation has 2MB safety margin to avoid triggering
> OOM killer. However, on systems with 64K pagesize, this may not be enough.
> Account for process size as reported by cgroup memory stats before
> allocating memory in child processes.
>
> Signed-off-by: Martin Doucha <mdoucha@suse.cz>
>

Reviewed-by: Li Wang <liwang@redhat.com>

---
>  .../kernel/controllers/memcg/memcontrol03.c   | 20 +++++++++++++++----
>  1 file changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/testcases/kernel/controllers/memcg/memcontrol03.c
> b/testcases/kernel/controllers/memcg/memcontrol03.c
> index b5bbb9954..d2e489ad6 100644
> --- a/testcases/kernel/controllers/memcg/memcontrol03.c
> +++ b/testcases/kernel/controllers/memcg/memcontrol03.c
> @@ -94,17 +94,23 @@ static void cleanup_sub_groups(void)
>  }
>
>  static void alloc_anon_in_child(const struct tst_cg_group *const cg,
> -                               const size_t size, const int expect_oom)
> +       size_t size, const int expect_oom)
>  {
>         int status;
>         const pid_t pid = SAFE_FORK();
> +       size_t cgmem;
>
>         if (!pid) {
>                 SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +               SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
> +               size = size > cgmem ? size - cgmem : 0;
>
>                 tst_res(TINFO, "Child %d in %s: Allocating anon: %"PRIdPTR,
>                 getpid(), tst_cg_group_name(cg), size);
> -               alloc_anon(size);
> +
> +               if (size)
> +                       alloc_anon(size);
> +
>                 exit(0);
>         }
>
> @@ -128,9 +134,10 @@ static void alloc_anon_in_child(const struct
> tst_cg_group *const cg,
>  }
>
>  static void alloc_pagecache_in_child(const struct tst_cg_group *const cg,
> -                                    const size_t size)
> +       size_t size)
>  {
>         const pid_t pid = SAFE_FORK();
> +       size_t cgmem;
>
>         if (pid) {
>                 TST_CHECKPOINT_WAIT(CHILD_IDLE);
> @@ -138,10 +145,15 @@ static void alloc_pagecache_in_child(const struct
> tst_cg_group *const cg,
>         }
>
>         SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +       SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
> +       size = size > cgmem ? size - cgmem : 0;
>
>         tst_res(TINFO, "Child %d in %s: Allocating pagecache: %"PRIdPTR,
>                 getpid(), tst_cg_group_name(cg), size);
> -       alloc_pagecache(fd, size);
> +
> +       if (size)
> +               alloc_pagecache(fd, size);
> +
>         SAFE_FSYNC(fd);
>
>         TST_CHECKPOINT_WAKE(CHILD_IDLE);
> --
> 2.49.0
>
>
> --
> Mailing list info: https://lists.linux.it/listinfo/ltp
>
>

-- 
Regards,
Li Wang

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-05 10:53 [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Martin Doucha
  2025-05-06  6:59 ` Li Wang via ltp
@ 2025-05-07 14:23 ` Cyril Hrubis
  2025-05-07 15:36   ` Martin Doucha
  2025-09-03  9:19 ` [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Cyril Hrubis
  2 siblings, 1 reply; 16+ messages in thread
From: Cyril Hrubis @ 2025-05-07 14:23 UTC (permalink / raw)
  To: Martin Doucha; +Cc: ltp

Hi!
> The first trunk_G allocation has 2MB safety margin to avoid triggering
> OOM killer. However, on systems with 64K pagesize, this may not be enough.
> Account for process size as reported by cgroup memory stats before
> allocating memory in child processes.

Is there a reason to keep the 2MB safety after this patch?

Or can we do:

diff --git a/testcases/kernel/controllers/memcg/memcontrol03.c b/testcases/kernel/controllers/memcg/memcontrol03.c
index b5bbb9954..e7f126880 100644
--- a/testcases/kernel/controllers/memcg/memcontrol03.c
+++ b/testcases/kernel/controllers/memcg/memcontrol03.c
@@ -200,7 +200,7 @@ static void test_memcg_min(void)
                sleep(1);
        }

-       alloc_anon_in_child(trunk_cg[G], MB(148), 0);
+       alloc_anon_in_child(trunk_cg[G], MB(150), 0);

        SAFE_CG_SCANF(trunk_cg[B], "memory.current", "%ld", c);
        TST_EXP_EXPR(values_close(c[0], MB(50), 5),

> --- a/testcases/kernel/controllers/memcg/memcontrol03.c
> +++ b/testcases/kernel/controllers/memcg/memcontrol03.c
> @@ -94,17 +94,23 @@ static void cleanup_sub_groups(void)
>  }
>  
>  static void alloc_anon_in_child(const struct tst_cg_group *const cg,
> -				const size_t size, const int expect_oom)
> +	size_t size, const int expect_oom)
>  {
>  	int status;
>  	const pid_t pid = SAFE_FORK();
> +	size_t cgmem;
>  
>  	if (!pid) {
>  		SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +		SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
> +		size = size > cgmem ? size - cgmem : 0;

Here we depend on the fact that process memory has been properly
accounted for when it starts running its code. Are you sure that we can
rely on this or does this just happen to work?

-- 
Cyril Hrubis
chrubis@suse.cz

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-07 14:23 ` Cyril Hrubis
@ 2025-05-07 15:36   ` Martin Doucha
  2025-05-09  9:21     ` Cyril Hrubis
  0 siblings, 1 reply; 16+ messages in thread
From: Martin Doucha @ 2025-05-07 15:36 UTC (permalink / raw)
  To: Cyril Hrubis; +Cc: ltp

On 07. 05. 25 16:23, Cyril Hrubis wrote:
> Hi!
>> The first trunk_G allocation has 2MB safety margin to avoid triggering
>> OOM killer. However, on systems with 64K pagesize, this may not be enough.
>> Account for process size as reported by cgroup memory stats before
>> allocating memory in child processes.
> 
> Is there a reason to keep the 2MB safety after this patch?

I'd say there's no reason to remove it. On x86_64, the patch will 
increase the safety margin by only 256KB and that memory is already 
allocated to the cgroup. If we remove the safety margin, any additional 
buffer allocation in glibc may trigger OOM.

> Or can we do:
> 
> diff --git a/testcases/kernel/controllers/memcg/memcontrol03.c b/testcases/kernel/controllers/memcg/memcontrol03.c
> index b5bbb9954..e7f126880 100644
> --- a/testcases/kernel/controllers/memcg/memcontrol03.c
> +++ b/testcases/kernel/controllers/memcg/memcontrol03.c
> @@ -200,7 +200,7 @@ static void test_memcg_min(void)
>                  sleep(1);
>          }
> 
> -       alloc_anon_in_child(trunk_cg[G], MB(148), 0);
> +       alloc_anon_in_child(trunk_cg[G], MB(150), 0);
> 
>          SAFE_CG_SCANF(trunk_cg[B], "memory.current", "%ld", c);
>          TST_EXP_EXPR(values_close(c[0], MB(50), 5),
> 
>> --- a/testcases/kernel/controllers/memcg/memcontrol03.c
>> +++ b/testcases/kernel/controllers/memcg/memcontrol03.c
>> @@ -94,17 +94,23 @@ static void cleanup_sub_groups(void)
>>   }
>>   
>>   static void alloc_anon_in_child(const struct tst_cg_group *const cg,
>> -				const size_t size, const int expect_oom)
>> +	size_t size, const int expect_oom)
>>   {
>>   	int status;
>>   	const pid_t pid = SAFE_FORK();
>> +	size_t cgmem;
>>   
>>   	if (!pid) {
>>   		SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
>> +		SAFE_CG_SCANF(cg, "memory.current", "%zu", &cgmem);
>> +		size = size > cgmem ? size - cgmem : 0;
> 
> Here we depend on the fact that process memory has been properly
> accounted for when it starts running its code. Are you sure that we can
> rely on this or does this just happen to work?

Actually, my commit message is slightly misleading because the existing 
process memory does not get migrated to the new cgroup. But the cgroup 
itself may already have non-zero memory usage even when empty, likely 
for internal kernel structures. Any new allocations of kernel structures 
should also be finished when the process migration completes. So unless 
the migration behavior changes in the near future, we can rely on this.

This sentence in the commit message:
"Account for process size as reported by cgroup memory stats before..."
should be changed to:
"Account for existing cgroup memory usage before..."

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-07 15:36   ` Martin Doucha
@ 2025-05-09  9:21     ` Cyril Hrubis
  2025-05-09  9:40       ` Martin Doucha
  0 siblings, 1 reply; 16+ messages in thread
From: Cyril Hrubis @ 2025-05-09  9:21 UTC (permalink / raw)
  To: Martin Doucha; +Cc: ltp

Hi!
> > Here we depend on the fact that process memory has been properly
> > accounted for when it starts running its code. Are you sure that we can
> > rely on this or does this just happen to work?
> 
> Actually, my commit message is slightly misleading because the existing 
> process memory does not get migrated to the new cgroup. But the cgroup 
> itself may already have non-zero memory usage even when empty, likely 
> for internal kernel structures. Any new allocations of kernel structures 
> should also be finished when the process migration completes. So unless 
> the migration behavior changes in the near future, we can rely on this.

I suppose that the cgroup is charged for the memory it needs to track
the resources, that makes sense. I wonder if we can read that once at
the start of the test when we create the cgroups and use that value
later on.

> This sentence in the commit message:
> "Account for process size as reported by cgroup memory stats before..."
> should be changed to:
> "Account for existing cgroup memory usage before..."

That sounds better. I suppose that we can get this merged with this
change.

Reviewed-by: Cyril Hrubis <chrubis@suse.cz>

-- 
Cyril Hrubis
chrubis@suse.cz

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-09  9:21     ` Cyril Hrubis
@ 2025-05-09  9:40       ` Martin Doucha
  2025-05-09 10:01         ` Cyril Hrubis
  0 siblings, 1 reply; 16+ messages in thread
From: Martin Doucha @ 2025-05-09  9:40 UTC (permalink / raw)
  To: Cyril Hrubis; +Cc: ltp

On 09. 05. 25 11:21, Cyril Hrubis wrote:
> Hi!
>>> Here we depend on the fact that process memory has been properly
>>> accounted for when it starts running its code. Are you sure that we can
>>> rely on this or does this just happen to work?
>>
>> Actually, my commit message is slightly misleading because the existing
>> process memory does not get migrated to the new cgroup. But the cgroup
>> itself may already have non-zero memory usage even when empty, likely
>> for internal kernel structures. Any new allocations of kernel structures
>> should also be finished when the process migration completes. So unless
>> the migration behavior changes in the near future, we can rely on this.
> 
> I suppose that the cgroup is charged for the memory it needs to track
> the resources, that makes sense. I wonder if we can read that once at
> the start of the test when we create the cgroups and use that value
> later on.

Unfortunately, we can't. I've tested this and memory.current can change 
a lot during the first process migration.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-09  9:40       ` Martin Doucha
@ 2025-05-09 10:01         ` Cyril Hrubis
  2025-05-09 10:11           ` Martin Doucha
       [not found]           ` <qbca5sxzfw53o6nku5ulu2dl2xygxqghgsuerjjjfoea62bacs@a5qm6cl7hhnu>
  0 siblings, 2 replies; 16+ messages in thread
From: Cyril Hrubis @ 2025-05-09 10:01 UTC (permalink / raw)
  To: Martin Doucha; +Cc: Michal Koutný, ltp

Hi!
> >>> Here we depend on the fact that process memory has been properly
> >>> accounted for when it starts running its code. Are you sure that we can
> >>> rely on this or does this just happen to work?
> >>
> >> Actually, my commit message is slightly misleading because the existing
> >> process memory does not get migrated to the new cgroup. But the cgroup
> >> itself may already have non-zero memory usage even when empty, likely
> >> for internal kernel structures. Any new allocations of kernel structures
> >> should also be finished when the process migration completes. So unless
> >> the migration behavior changes in the near future, we can rely on this.
> > 
> > I suppose that the cgroup is charged for the memory it needs to track
> > the resources, that makes sense. I wonder if we can read that once at
> > the start of the test when we create the cgroups and use that value
> > later on.
> 
> Unfortunately, we can't. I've tested this and memory.current can change 
> a lot during the first process migration.

That does sound strange. @Michal any idea what happens here?

-- 
Cyril Hrubis
chrubis@suse.cz

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-09 10:01         ` Cyril Hrubis
@ 2025-05-09 10:11           ` Martin Doucha
  2025-05-20 15:29             ` Martin Doucha
       [not found]           ` <qbca5sxzfw53o6nku5ulu2dl2xygxqghgsuerjjjfoea62bacs@a5qm6cl7hhnu>
  1 sibling, 1 reply; 16+ messages in thread
From: Martin Doucha @ 2025-05-09 10:11 UTC (permalink / raw)
  To: Cyril Hrubis; +Cc: Michal Koutný, ltp

On 09. 05. 25 12:01, Cyril Hrubis wrote:
> Hi!
>>>>> Here we depend on the fact that process memory has been properly
>>>>> accounted for when it starts running its code. Are you sure that we can
>>>>> rely on this or does this just happen to work?
>>>>
>>>> Actually, my commit message is slightly misleading because the existing
>>>> process memory does not get migrated to the new cgroup. But the cgroup
>>>> itself may already have non-zero memory usage even when empty, likely
>>>> for internal kernel structures. Any new allocations of kernel structures
>>>> should also be finished when the process migration completes. So unless
>>>> the migration behavior changes in the near future, we can rely on this.
>>>
>>> I suppose that the cgroup is charged for the memory it needs to track
>>> the resources, that makes sense. I wonder if we can read that once at
>>> the start of the test when we create the cgroups and use that value
>>> later on.
>>
>> Unfortunately, we can't. I've tested this and memory.current can change
>> a lot during the first process migration.
> 
> That does sound strange. @Michal any idea what happens here?

My guess is that the kernel structure allocation is just lazy. The 
cgroup memory counter usually starts at zero. Then it allocates 
structures on the first process migration and keeps them until the 
cgroup gets destroyed.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
       [not found]           ` <qbca5sxzfw53o6nku5ulu2dl2xygxqghgsuerjjjfoea62bacs@a5qm6cl7hhnu>
@ 2025-05-09 14:41             ` Martin Doucha
  2025-05-20 17:57               ` ALOK TIWARI via ltp
       [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
  0 siblings, 2 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-09 14:41 UTC (permalink / raw)
  To: Michal Koutný, Cyril Hrubis; +Cc: ltp

On 09. 05. 25 16:11, Michal Koutný wrote:
> On Fri, May 09, 2025 at 12:01:47PM +0200, Cyril Hrubis <chrubis@suse.cz> wrote:
>>> Unfortunately, we can't. I've tested this and memory.current can change
>>> a lot during the first process migration.
>>
>> That does sound strange. @Michal any idea what happens here?
> 
> [Process migrates itself (echo 0 >$target_cg/cgroup.procs) or] it's
> otherwise active during the migration?
> 
> (Also, the apparent increase of memory.current may be amplified because
> of MEMCG_CHARGE_BATCH even with initially small allocation.)

The process migrates itself:
SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());

We're dealing with an issue where the test has 2MB safety margin from 
triggering OOM but immediately after the process migrates itself into 
the cgroup on PPC64LE, memory.current will be ~4MB and the process will 
randomly trigger OOM anyway. So we're increasing the safety margin by 
whatever memory.current says immediately after the migration.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-09 10:11           ` Martin Doucha
@ 2025-05-20 15:29             ` Martin Doucha
  0 siblings, 0 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-20 15:29 UTC (permalink / raw)
  To: Cyril Hrubis; +Cc: Michal Koutný, ltp

Hi,
are you waiting for any additional info or resubmission? I'd like to get 
this fix merged into the new release.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-09 14:41             ` Martin Doucha
@ 2025-05-20 17:57               ` ALOK TIWARI via ltp
  2025-05-26 13:52                 ` Martin Doucha
       [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
  1 sibling, 1 reply; 16+ messages in thread
From: ALOK TIWARI via ltp @ 2025-05-20 17:57 UTC (permalink / raw)
  To: Martin Doucha, Michal Koutný, Cyril Hrubis; +Cc: ltp



On 09-05-2025 20:11, Martin Doucha wrote:
> On 09. 05. 25 16:11, Michal Koutný wrote:
>> On Fri, May 09, 2025 at 12:01:47PM +0200, Cyril Hrubis 
>> <chrubis@suse.cz> wrote:
>>>> Unfortunately, we can't. I've tested this and memory.current can change
>>>> a lot during the first process migration.
>>>
>>> That does sound strange. @Michal any idea what happens here?
>>
>> [Process migrates itself (echo 0 >$target_cg/cgroup.procs) or] it's
>> otherwise active during the migration?
>>
>> (Also, the apparent increase of memory.current may be amplified because
>> of MEMCG_CHARGE_BATCH even with initially small allocation.)
> 
> The process migrates itself:
> SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> 
> We're dealing with an issue where the test has 2MB safety margin from 
> triggering OOM but immediately after the process migrates itself into 
> the cgroup on PPC64LE, memory.current will be ~4MB and the process will 
> randomly trigger OOM anyway. So we're increasing the safety margin by 
> whatever memory.current says immediately after the migration.
> 


Error log without this commit:
===============================================================
I was seeing error on 64K image aarch64 (failure can occur randomly):
tst_test.c:1875: TINFO: === Testing on ext4 ===
tst_test.c:1209: TINFO: Formatting /dev/loop0 with ext4 opts='' extra 
opts=''
mke2fs 1.47.1 (20-May-2024)
tst_test.c:1221: TINFO: Mounting /dev/loop0 to 
/tmpdir/ltp-Cw5kgjUp5v/LTP_memW9rz73/mntdir fstyp=ext4 flags=0
memcontrol03.c:142: TINFO: Child 28192 in leaf_C: Allocating pagecache: 
52428800
memcontrol03.c:142: TINFO: Child 28193 in leaf_D: Allocating pagecache: 
52428800
memcontrol03.c:142: TINFO: Child 28194 in leaf_F: Allocating pagecache: 
52428800
memcontrol03.c:105: TINFO: Child 28195 in trunk_G: Allocating anon: 
155189248
memcontrol03.c:119: TPASS: Child 28195 exited
memcontrol03.c:206: TPASS: Expect: (A/B memory.current=49217536) ~= 52428800
memcontrol03.c:212: TFAIL: Expect: (A/B/C memory.current=21168128) ~= 
34603008
memcontrol03.c:214: TPASS: Expect: (A/B/D memory.current=25624576) ~= 
17825792
memcontrol03.c:216: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol03.c:105: TINFO: Child 28196 in trunk_G: Allocating anon: 
178257920
memcontrol03.c:114: TPASS: Child 28196 killed by OOM
memcontrol03.c:222: TPASS: Expect: (A/B memory.current=49217536) ~= 52428800

Summary:
passed   34
failed   1
broken   0
skipped  0
warnings 0
<<<execution_status>>>

LTP test PASSED with commit:
===============================================================
here my observation for arrch64 64K page Image with this commit:

tst_test.c:1875: TINFO: === Testing on ext4 ===
tst_test.c:1209: TINFO: Formatting /dev/loop0 with ext4 opts='' extra 
opts=''
mke2fs 1.47.1 (20-May-2024)
tst_test.c:1221: TINFO: Mounting /dev/loop0 to /tmp/LTP_mem5Qmtgc/mntdir 
fstyp=ext4 flags=0
memcontrol03.c:151: TINFO: Child 28367 in leaf_C: Allocating pagecache: 
48234496
memcontrol03.c:151: TINFO: Child 28368 in leaf_D: Allocating pagecache: 
48234496
memcontrol03.c:151: TINFO: Child 28369 in leaf_F: Allocating pagecache: 
48234496
memcontrol03.c:108: TINFO: Child 28370 in trunk_G: Allocating anon: 
150994944
memcontrol03.c:125: TPASS: Child 28370 exited
memcontrol03.c:218: TPASS: Expect: (A/B memory.current=54132736) ~= 52428800
memcontrol03.c:224: TPASS: Expect: (A/B/C memory.current=21299200) ~= 
34603008
memcontrol03.c:226: TPASS: Expect: (A/B/D memory.current=25690112) ~= 
17825792
memcontrol03.c:228: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol03.c:108: TINFO: Child 28371 in trunk_G: Allocating anon: 
173998080
memcontrol03.c:120: TPASS: Child 28371 killed by OOM
memcontrol03.c:234: TPASS: Expect: (A/B memory.current=49479680) ~= 52428800

Summary:
passed   35
failed   0
broken   0
skipped  0
warnings 0
--------------------------------

Is there any case where this LTP test depends on the upstream commit 
1bc542c6a0d ('mm/vmscan: wake up flushers conditionally to avoid cgroup 
OOM')?


Thanks,
Alok


-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-20 17:57               ` ALOK TIWARI via ltp
@ 2025-05-26 13:52                 ` Martin Doucha
  0 siblings, 0 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-26 13:52 UTC (permalink / raw)
  To: ALOK TIWARI, Michal Koutný, Cyril Hrubis; +Cc: ltp

Hi!

On 20. 05. 25 19:57, ALOK TIWARI wrote:
> Error log without this commit:
> ===============================================================
> I was seeing error on 64K image aarch64 (failure can occur randomly):
> <snip>
> memcontrol03.c:212: TFAIL: Expect: (A/B/C memory.current=21168128) ~= 
> 34603008
> <snip>
> 
> LTP test PASSED with commit:
> ===============================================================
> here my observation for arrch64 64K page Image with this commit:
> 
> <snip>
> memcontrol03.c:224: TPASS: Expect: (A/B/C memory.current=21299200) ~= 
> 34603008
> <snip>

This failure is unrelated to the patch and happens randomly.

> Is there any case where this LTP test depends on the upstream commit 
> 1bc542c6a0d ('mm/vmscan: wake up flushers conditionally to avoid cgroup 
> OOM')?

The test predates the kernel commit by several years. And while the 
mm/vmscan change might help prevent the OOM this patch is trying to fix, 
the OOM still looks valid to me and should be dealt with in the test code.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging
       [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
@ 2025-05-28 12:04                 ` Martin Doucha
  2025-05-30 11:44                 ` Martin Doucha
  2025-07-03  9:36                 ` Martin Doucha
  2 siblings, 0 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-28 12:04 UTC (permalink / raw)
  To: Michal Koutný; +Cc: ltp

On 28. 05. 25 13:40, Michal Koutný wrote:
> Hello.
> 
> So I tried looking into the behavior and I've come up with a theory that
> I describe in the commit message above. I don't have a reproducer for
> this at hand (namely a 64k pages machine). Would you be able to test
> this if there was a test kernel in OBS?

If you build it for PPC64LE, we can test it for you. Don't forget to 
enable the Publish flag in the IBS repo.

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging
       [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
  2025-05-28 12:04                 ` [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging Martin Doucha
@ 2025-05-30 11:44                 ` Martin Doucha
  2025-07-03  9:36                 ` Martin Doucha
  2 siblings, 0 replies; 16+ messages in thread
From: Martin Doucha @ 2025-05-30 11:44 UTC (permalink / raw)
  To: Michal Koutný; +Cc: ltp

On 28. 05. 25 13:40, Michal Koutný wrote:
> Hello.
> 
> So I tried looking into the behavior and I've come up with a theory that
> I describe in the commit message above. I don't have a reproducer for
> this at hand (namely a 64k pages machine). Would you be able to test
> this if there was a test kernel in OBS?

I ran a few dozen tests with the provided PPC64LE kernel and could not 
reproduce any unexpected OOM issues in memcontrol03. I guess the patch 
helps. Full results (see also Next & previous tab in each job):
https://openqa.opensuse.org/tests/overview?distri=opensuse&version=Tumbleweed&build=debug_memcontrol

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging
       [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
  2025-05-28 12:04                 ` [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging Martin Doucha
  2025-05-30 11:44                 ` Martin Doucha
@ 2025-07-03  9:36                 ` Martin Doucha
  2 siblings, 0 replies; 16+ messages in thread
From: Martin Doucha @ 2025-07-03  9:36 UTC (permalink / raw)
  To: Michal Koutný; +Cc: ltp

On 28. 05. 25 13:40, Michal Koutný wrote:
> Hello.
> 
> So I tried looking into the behavior and I've come up with a theory that
> I describe in the commit message above. I don't have a reproducer for
> this at hand (namely a 64k pages machine). Would you be able to test
> this if there was a test kernel in OBS?
> 
> Thanks,
> Michal

Hello,
what is the conclusion with your kernel patch? Is the OOM issue going to 
be fixed in the kernel, or should we proceed with fixing the test using 
my patch?

-- 
Martin Doucha   mdoucha@suse.cz
SW Quality Engineer
SUSE LINUX, s.r.o.
CORSO IIa
Krizikova 148/34
186 00 Prague 8
Czech Republic

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation
  2025-05-05 10:53 [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Martin Doucha
  2025-05-06  6:59 ` Li Wang via ltp
  2025-05-07 14:23 ` Cyril Hrubis
@ 2025-09-03  9:19 ` Cyril Hrubis
  2 siblings, 0 replies; 16+ messages in thread
From: Cyril Hrubis @ 2025-09-03  9:19 UTC (permalink / raw)
  To: Martin Doucha; +Cc: ltp

Hi!
Applied, thanks.

-- 
Cyril Hrubis
chrubis@suse.cz

-- 
Mailing list info: https://lists.linux.it/listinfo/ltp

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2025-09-03  9:19 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-05 10:53 [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Martin Doucha
2025-05-06  6:59 ` Li Wang via ltp
2025-05-07 14:23 ` Cyril Hrubis
2025-05-07 15:36   ` Martin Doucha
2025-05-09  9:21     ` Cyril Hrubis
2025-05-09  9:40       ` Martin Doucha
2025-05-09 10:01         ` Cyril Hrubis
2025-05-09 10:11           ` Martin Doucha
2025-05-20 15:29             ` Martin Doucha
     [not found]           ` <qbca5sxzfw53o6nku5ulu2dl2xygxqghgsuerjjjfoea62bacs@a5qm6cl7hhnu>
2025-05-09 14:41             ` Martin Doucha
2025-05-20 17:57               ` ALOK TIWARI via ltp
2025-05-26 13:52                 ` Martin Doucha
     [not found]               ` <6msduqbs42k7lnysck7oxoqyicbo6yzktstxdjan5ktpv4qzrx@s2xpicy3johi>
2025-05-28 12:04                 ` [LTP] [TEST PATCH] memcontrol: Wait for draining of remote stocks when charging Martin Doucha
2025-05-30 11:44                 ` Martin Doucha
2025-07-03  9:36                 ` Martin Doucha
2025-09-03  9:19 ` [LTP] [PATCH] memcontrol03: Account for process size in cgroup allocation Cyril Hrubis

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.