[LTP] [PATCH 1/4] memcg_process

public inbox for ltp@lists.linux.it
 help / color / mirror / Atom feed

* [LTP] [PATCH 1/4] memcg_process_stress: cleanup
@ 2016-04-22 15:23 Stanislav Kholmanskikh
  2016-04-22 15:23 ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Stanislav Kholmanskikh
  2016-05-11 14:16 ` [LTP] [PATCH 1/4] memcg_process_stress: cleanup Cyril Hrubis
  0 siblings, 2 replies; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-04-22 15:23 UTC (permalink / raw)
  To: ltp

Plus added simple error handling for sigaction() calls.

Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
---
 .../memcg/stress/memcg_process_stress.c            |   35 +++++++++----------
 1 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c b/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
index 92627fe..c2569bb 100644
--- a/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
+++ b/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
@@ -28,16 +28,14 @@
 #include <string.h>
 #include <unistd.h>
 
-int flag_exit;
-int flag_ready;
-
-int interval;
-unsigned long memsize;
-
-char **pages;
-int nr_page;
-
-void touch_memory()
+static volatile int flag_exit;
+static volatile int flag_ready;
+static int interval;
+static unsigned long memsize;
+static char **pages;
+static int nr_page;
+
+static void touch_memory(void)
 {
 	int i;
 
@@ -45,30 +43,30 @@ void touch_memory()
 		pages[i][0] = 0xef;
 }
 
-void sigusr_handler(int __attribute__ ((unused)) signo)
+static void sigusr_handler(int __attribute__ ((unused)) signo)
 {
 	int i;
 	int pagesize;
 
-	pagesize = getpagesize();
+	pagesize = sysconf(_SC_PAGE_SIZE);
 
 	nr_page = ceil((double)memsize / pagesize);
 
 	pages = calloc(nr_page, sizeof(char *));
 	if (pages == NULL)
-		errx(1, "calloc failed");
+		err(1, "calloc");
 
 	for (i = 0; i < nr_page; i++) {
 		pages[i] = mmap(NULL, pagesize, PROT_WRITE | PROT_READ,
 				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
 		if (pages[i] == MAP_FAILED)
-			err(1, "map failed\n");
+			err(1, "mmap");
 	}
 
 	flag_ready = 1;
 }
 
-void sigint_handler(int __attribute__ ((unused)) signo)
+static void sigint_handler(int __attribute__ ((unused)) signo)
 {
 	flag_exit = 1;
 }
@@ -91,14 +89,15 @@ int main(int argc, char *argv[])
 	if (interval <= 0)
 		interval = 1;
 
-	/* TODO (garrcoop): add error handling. */
 	memset(&sigint_action, 0, sizeof(sigint_action));
 	sigint_action.sa_handler = &sigint_handler;
-	sigaction(SIGINT, &sigint_action, NULL);
+	if (sigaction(SIGINT, &sigint_action, NULL))
+		err(1, "sigaction(SIGINT)");
 
 	memset(&sigusr_action, 0, sizeof(sigusr_action));
 	sigusr_action.sa_handler = &sigusr_handler;
-	sigaction(SIGUSR1, &sigusr_action, NULL);
+	if (sigaction(SIGUSR1, &sigusr_action, NULL))
+		err(1, "sigaction(SIGUSR1)");
 
 	while (!flag_exit) {
 		sleep(interval);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler
  2016-04-22 15:23 [LTP] [PATCH 1/4] memcg_process_stress: cleanup Stanislav Kholmanskikh
@ 2016-04-22 15:23 ` Stanislav Kholmanskikh
  2016-04-22 15:23   ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Stanislav Kholmanskikh
  2016-05-11 14:39   ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Cyril Hrubis
  2016-05-11 14:16 ` [LTP] [PATCH 1/4] memcg_process_stress: cleanup Cyril Hrubis
  1 sibling, 2 replies; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-04-22 15:23 UTC (permalink / raw)
  To: ltp

Since it's considered as bad practice.

Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
---
 .../memcg/stress/memcg_process_stress.c            |   32 +++++++++++++-------
 1 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c b/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
index c2569bb..9c93f0d 100644
--- a/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
+++ b/testcases/kernel/controllers/memcg/stress/memcg_process_stress.c
@@ -35,22 +35,14 @@ static unsigned long memsize;
 static char **pages;
 static int nr_page;
 
-static void touch_memory(void)
-{
-	int i;
-
-	for (i = 0; i < nr_page; i++)
-		pages[i][0] = 0xef;
-}
-
-static void sigusr_handler(int __attribute__ ((unused)) signo)
+static void alloc_memory(void)
 {
 	int i;
 	int pagesize;
 
 	pagesize = sysconf(_SC_PAGE_SIZE);
 
-	nr_page = ceil((double)memsize / pagesize);
+	nr_page = memsize / pagesize;
 
 	pages = calloc(nr_page, sizeof(char *));
 	if (pages == NULL)
@@ -62,7 +54,18 @@ static void sigusr_handler(int __attribute__ ((unused)) signo)
 		if (pages[i] == MAP_FAILED)
 			err(1, "mmap");
 	}
+}
 
+static void touch_memory(void)
+{
+	int i;
+
+	for (i = 0; i < nr_page; i++)
+		pages[i][0] = 0xef;
+}
+
+static void sigusr_handler(int __attribute__ ((unused)) signo)
+{
 	flag_ready = 1;
 }
 
@@ -76,6 +79,7 @@ int main(int argc, char *argv[])
 	char *end;
 	struct sigaction sigint_action;
 	struct sigaction sigusr_action;
+	int allocated = 0;
 
 	if (argc != 3)
 		errx(1, "wrong argument num");
@@ -102,8 +106,14 @@ int main(int argc, char *argv[])
 	while (!flag_exit) {
 		sleep(interval);
 
-		if (flag_ready)
+		if (flag_ready) {
+			if (!allocated) {
+				alloc_memory();
+				allocated = 1;
+			}
+
 			touch_memory();
+		}
 	}
 
 	return 0;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite
  2016-04-22 15:23 ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Stanislav Kholmanskikh
@ 2016-04-22 15:23   ` Stanislav Kholmanskikh
  2016-04-22 15:23     ` [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes Stanislav Kholmanskikh
  2016-05-11 15:01     ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Cyril Hrubis
  2016-05-11 14:39   ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Cyril Hrubis
  1 sibling, 2 replies; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-04-22 15:23 UTC (permalink / raw)
  To: ltp

 * Use the LTP API for shell test cases

 * Verify command exit codes by using ROD

 * Now we send the termination signal to all the processes,
   and only then we do 'wait'. With this scheme SIGINT
   does not make the test execute for "much longer time",
   so the SIGKILL workaround implemented in 9b9a6bb10258bc
   is not required anymore.

Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
---
 .../controllers/memcg/stress/memcg_stress_test.sh  |  104 +++++++++++++-------
 1 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
index dc610f1..0d541e5 100755
--- a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
+++ b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
@@ -1,4 +1,4 @@
-#! /bin/sh
+#!/bin/sh
 
 ################################################################################
 ##                                                                            ##
@@ -25,37 +25,57 @@
 ##                                                                            ##
 ################################################################################
 
-cd $LTPROOT/testcases/bin
-export TCID="memcg_stress_test"
-export TST_TOTAL=2
-export TST_COUNT=0
+TCID=memcg_stress_test
+TST_TOTAL=2
+. test.sh
 
 if [ "x$(grep -w memory /proc/cgroups | cut -f4)" != "x1" ]; then
-        echo "WARNING:";
-        echo "Either Kernel does not support for memory resource controller or feature not enabled";
-        echo "Skipping all memcgroup testcases....";
-        exit 0
+	tst_brkm TCONF "Kernel does not support the memory resource controller"
 fi
 
 RUN_TIME=$(( 60 * 60 ))
 
+children=""
+nr_children=0
+memcg_path=/dev/memcg
+memcg_created=0
+
 cleanup()
 {
-	if [ -e /dev/memcg ]; then
-		umount /dev/memcg 2>/dev/null
-		rmdir /dev/memcg 2>/dev/null
+	for child in $children; do
+		kill -s KILL $child 2> /dev/null
+	done
+	wait
+
+	if [ "$memcg_created" -ne 0 ]; then
+		for i in $(seq 0 $(( $nr_children - 1 ))); do
+			rmdir "$memcg_path/$i" 2> /dev/null
+		done
+		umount "$memcg_path"
+		rmdir "$memcg_path"
 	fi
 }
+TST_CLEANUP=cleanup
 
+do_unmount()
+{
+	ROD umount "$memcg_path"
+	ROD rmdir "$memcg_path"
+	memcg_created=0
+}
 
 do_mount()
 {
-	cleanup;
-
-	mkdir /dev/memcg 2> /dev/null
-	mount -t cgroup -omemory memcg /dev/memcg
+	ROD mkdir "$memcg_path"
+	memcg_created=1
+	ROD mount -t cgroup -omemory memcg "$memcg_path"
 }
 
+is_int()
+{
+	[ "$1" -eq "$1" ] 2> /dev/null
+	return $?
+}
 
 # Run the stress test
 #
@@ -65,33 +85,41 @@ do_mount()
 # $4 - How long does this test run ? in second
 run_stress()
 {
-	do_mount;
+	nr_children=0
+	children=""
+
+	do_mount
 
-	for i in $(seq 0 $(($1-1)))
-	do
-		mkdir /dev/memcg/$i 2> /dev/null
+	for i in $(seq 0 $(( $1 - 1 ))); do
+		ROD mkdir "$memcg_path/$i"
 		./memcg_process_stress $2 $3 &
-		eval pid$i=$!
+		child=$!
 
-		eval echo \$pid$i > /dev/memcg/$i/tasks
+		nr_children=$(( $nr_children + 1 ))
+		children="$children $child"
+
+		ROD echo $child \> "$memcg_path/$i/tasks"
 	done
 
-	for i in $(seq 0 $(($1-1)))
-	do
-		eval /bin/kill -s SIGUSR1 \$pid$i 2> /dev/null
+	for child in $children; do
+		ROD /bin/kill -s SIGUSR1 $child
 	done
 
 	sleep $4
 
-	for i in $(seq 0 $(($1-1)))
-	do
-		eval /bin/kill -s SIGKILL \$pid$i 2> /dev/null
-		eval wait \$pid$i
+	for child in $children; do
+		ROD /bin/kill -s SIGINT $child
+	done
+
+	for child in $children; do
+		ROD wait $child
+	done
 
-		rmdir /dev/memcg/$i 2> /dev/null
+	for i in $(seq 0 $(( $nr_children - 1 ))); do
+		ROD rmdir "$memcg_path/$i"
 	done
 
-	cleanup;
+	do_unmount
 }
 
 testcase_1()
@@ -108,19 +136,23 @@ testcase_2()
 	tst_resm TPASS "stress test 2 passed"
 }
 
-echo 3 > /proc/sys/vm/drop_caches
+ROD echo 3 \> /proc/sys/vm/drop_caches
 sleep 2
+
 mem_free=`cat /proc/meminfo | grep MemFree | awk '{ print $2 }'`
+is_int "$mem_free" || tst_brkm TBROK "Unable to determine mem_free"
+
 swap_free=`cat /proc/meminfo | grep SwapFree | awk '{ print $2 }'`
+is_int "$swap_free" || tst_brkm TBROK "Unable to determine swap_free"
 
 mem=$(( $mem_free + $swap_free / 2 ))
-mem=$(( mem / 1024 ))
+mem=$(( $mem / 1024 ))
+[ "$mem" -gt 0 ] || tst_brkm TBROK "mem is negative: $mem"
 
 date
-export TST_COUNT=$(( $TST_COUNT + 1 ))
 testcase_1
-export TST_COUNT=$(( $TST_COUNT + 1 ))
+date
 testcase_2
 date
 
-exit 0
+tst_exit
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-04-22 15:23   ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Stanislav Kholmanskikh
@ 2016-04-22 15:23     ` Stanislav Kholmanskikh
  2016-05-12 13:42       ` Cyril Hrubis
  2016-05-11 15:01     ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Cyril Hrubis
  1 sibling, 1 reply; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-04-22 15:23 UTC (permalink / raw)
  To: ltp

If we run this test case on a system, where the amount of
physical RAM is significantly more than the amount of swap
space (for example, 128G vs 1G), one of the memcg_process_stress
processes will invoke the OOM-killer, and the test will fail
(in the least bad case).

The key here is to find out a more generic formula instead of

mem=$(( $mem_free + $swap_free / 2 ))

Per my understanding, this test case does not require all the
memory of the system.

Given that, (CommitLimit - Committed_AS) with overcommit_memory == 1,
looks to be a suitable formula, since the kernel lets allocate this
amount of memory and there will be some memory for other tasks.

Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
---
 .../controllers/memcg/stress/memcg_stress_test.sh  |   28 ++++++++++++++++---
 1 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
index 0d541e5..75baab4 100755
--- a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
+++ b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
@@ -39,6 +39,8 @@ children=""
 nr_children=0
 memcg_path=/dev/memcg
 memcg_created=0
+overcommit_path=/proc/sys/vm/overcommit_memory
+overcommit=""
 
 cleanup()
 {
@@ -54,6 +56,10 @@ cleanup()
 		umount "$memcg_path"
 		rmdir "$memcg_path"
 	fi
+
+	if is_int "$overcommit"; then
+		echo "$overcommit" > "$overcommit_path"
+	fi
 }
 TST_CLEANUP=cleanup
 
@@ -139,20 +145,32 @@ testcase_2()
 ROD echo 3 \> /proc/sys/vm/drop_caches
 sleep 2
 
-mem_free=`cat /proc/meminfo | grep MemFree | awk '{ print $2 }'`
-is_int "$mem_free" || tst_brkm TBROK "Unable to determine mem_free"
+# We enable the "always overcommit" memory policy and allocate
+# less than CommitLimit bytes. Given that no other memory-consuming
+# process should be running, this gives us a more-or-less strong
+# guarantee that our allocations will result in an OOM situation.
+
+overcommit=$(cat "$overcommit_path")
+is_int "$overcommit" || tst_brkm TBROK "Unable to determine overcommit"
 
-swap_free=`cat /proc/meminfo | grep SwapFree | awk '{ print $2 }'`
-is_int "$swap_free" || tst_brkm TBROK "Unable to determine swap_free"
+commit_limit=$(cat /proc/meminfo | grep CommitLimit | awk '{ print $2 }')
+is_int "$commit_limit" || tst_brkm TBROK "Unable to determine commit_limit"
 
-mem=$(( $mem_free + $swap_free / 2 ))
+committed_as=$(cat /proc/meminfo | grep Committed_AS | awk '{ print $2 }')
+is_int "$committed_as" || tst_brkm TBROK "Unable to determine committed_as"
+
+mem=$(( $commit_limit - $committed_as ))
 mem=$(( $mem / 1024 ))
 [ "$mem" -gt 0 ] || tst_brkm TBROK "mem is negative: $mem"
 
+ROD echo 1 \> "$overcommit_path"
+
 date
 testcase_1
 date
 testcase_2
 date
 
+ROD echo "$overcommit" \> "$overcommit_path"
+
 tst_exit
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 1/4] memcg_process_stress: cleanup
  2016-04-22 15:23 [LTP] [PATCH 1/4] memcg_process_stress: cleanup Stanislav Kholmanskikh
  2016-04-22 15:23 ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Stanislav Kholmanskikh
@ 2016-05-11 14:16 ` Cyril Hrubis
  1 sibling, 0 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-11 14:16 UTC (permalink / raw)
  To: ltp

Hi!
> -void sigusr_handler(int __attribute__ ((unused)) signo)
> +static void sigusr_handler(int __attribute__ ((unused)) signo)
>  {
>  	int i;
>  	int pagesize;
>  
> -	pagesize = getpagesize();
> +	pagesize = sysconf(_SC_PAGE_SIZE);

Well sysconf() unlinek getpagesize() can fail and even if it's unlikely
with _SC_PAGE_SIZE this change may generate warnings in static analysis
of the code.

I would not bother and left the code with getpagesize() here.


Otherwise it's fine.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler
  2016-04-22 15:23 ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Stanislav Kholmanskikh
  2016-04-22 15:23   ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Stanislav Kholmanskikh
@ 2016-05-11 14:39   ` Cyril Hrubis
  2016-05-12 11:09     ` Stanislav Kholmanskikh
  1 sibling, 1 reply; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-11 14:39 UTC (permalink / raw)
  To: ltp

Hi!
> -static void sigusr_handler(int __attribute__ ((unused)) signo)
> +static void alloc_memory(void)
>  {
>  	int i;
>  	int pagesize;
>  
>  	pagesize = sysconf(_SC_PAGE_SIZE);
>  
> -	nr_page = ceil((double)memsize / pagesize);
> +	nr_page = memsize / pagesize;

This will cause to allocate one less page than the previous code in case
that memsize % pagesize != 0.

ceil((double)memsize/pagesize) == (memsize + pagesize - 1)/pagesize

In case that you want to avoid floating point.

>  	pages = calloc(nr_page, sizeof(char *));
>  	if (pages == NULL)
> @@ -62,7 +54,18 @@ static void sigusr_handler(int __attribute__ ((unused)) signo)
>  		if (pages[i] == MAP_FAILED)
>  			err(1, "mmap");
>  	}
> +}
>  
> +static void touch_memory(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < nr_page; i++)
> +		pages[i][0] = 0xef;
> +}
> +
> +static void sigusr_handler(int __attribute__ ((unused)) signo)
> +{
>  	flag_ready = 1;
>  }
>  
> @@ -76,6 +79,7 @@ int main(int argc, char *argv[])
>  	char *end;
>  	struct sigaction sigint_action;
>  	struct sigaction sigusr_action;
> +	int allocated = 0;
>  
>  	if (argc != 3)
>  		errx(1, "wrong argument num");
> @@ -102,8 +106,14 @@ int main(int argc, char *argv[])
>  	while (!flag_exit) {
>  		sleep(interval);
>  
> -		if (flag_ready)
> +		if (flag_ready) {
> +			if (!allocated) {
> +				alloc_memory();
> +				allocated = 1;
> +			}
> +
>  			touch_memory();
> +		}
>  	}


We can do even better if we change the sleep() for pause().

>  	return 0;

Otherwise it looks good.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite
  2016-04-22 15:23   ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Stanislav Kholmanskikh
  2016-04-22 15:23     ` [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes Stanislav Kholmanskikh
@ 2016-05-11 15:01     ` Cyril Hrubis
  1 sibling, 0 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-11 15:01 UTC (permalink / raw)
  To: ltp

Hi!
> -cd $LTPROOT/testcases/bin
> -export TCID="memcg_stress_test"
> -export TST_TOTAL=2
> -export TST_COUNT=0
> +TCID=memcg_stress_test
> +TST_TOTAL=2
> +. test.sh
>  
>  if [ "x$(grep -w memory /proc/cgroups | cut -f4)" != "x1" ]; then
> -        echo "WARNING:";
> -        echo "Either Kernel does not support for memory resource controller or feature not enabled";
> -        echo "Skipping all memcgroup testcases....";
> -        exit 0
> +	tst_brkm TCONF "Kernel does not support the memory resource controller"
>  fi
>  
>  RUN_TIME=$(( 60 * 60 ))
>  
> +children=""
> +nr_children=0
> +memcg_path=/dev/memcg
> +memcg_created=0
> +
>  cleanup()
>  {
> -	if [ -e /dev/memcg ]; then
> -		umount /dev/memcg 2>/dev/null
> -		rmdir /dev/memcg 2>/dev/null
> +	for child in $children; do
> +		kill -s KILL $child 2> /dev/null
> +	done
> +	wait
> +
> +	if [ "$memcg_created" -ne 0 ]; then
> +		for i in $(seq 0 $(( $nr_children - 1 ))); do
> +			rmdir "$memcg_path/$i" 2> /dev/null
> +		done
> +		umount "$memcg_path"
> +		rmdir "$memcg_path"
>  	fi
>  }
> +TST_CLEANUP=cleanup
>  
> +do_unmount()
> +{
> +	ROD umount "$memcg_path"
> +	ROD rmdir "$memcg_path"
> +	memcg_created=0
> +}
>  
>  do_mount()
>  {
> -	cleanup;
> -
> -	mkdir /dev/memcg 2> /dev/null
> -	mount -t cgroup -omemory memcg /dev/memcg
> +	ROD mkdir "$memcg_path"
> +	memcg_created=1
> +	ROD mount -t cgroup -omemory memcg "$memcg_path"
>  }
>  
> +is_int()
> +{
> +	[ "$1" -eq "$1" ] 2> /dev/null
> +	return $?
> +}
>  
>  # Run the stress test
>  #
> @@ -65,33 +85,41 @@ do_mount()
>  # $4 - How long does this test run ? in second
>  run_stress()
>  {
> -	do_mount;
> +	nr_children=0
> +	children=""
> +
> +	do_mount
>  
> -	for i in $(seq 0 $(($1-1)))
> -	do
> -		mkdir /dev/memcg/$i 2> /dev/null
> +	for i in $(seq 0 $(( $1 - 1 ))); do
> +		ROD mkdir "$memcg_path/$i"
>  		./memcg_process_stress $2 $3 &

We should drop the ./ here since the binary is in $PATH

> -		eval pid$i=$!
> +		child=$!
>  
> -		eval echo \$pid$i > /dev/memcg/$i/tasks
> +		nr_children=$(( $nr_children + 1 ))
> +		children="$children $child"
> +
> +		ROD echo $child \> "$memcg_path/$i/tasks"
>  	done
>  
> -	for i in $(seq 0 $(($1-1)))
> -	do
> -		eval /bin/kill -s SIGUSR1 \$pid$i 2> /dev/null
> +	for child in $children; do
> +		ROD /bin/kill -s SIGUSR1 $child

Here drop the /bin/ and remove the SIG from the signal name.

>  	done
>  
>  	sleep $4
>  
> -	for i in $(seq 0 $(($1-1)))
> -	do
> -		eval /bin/kill -s SIGKILL \$pid$i 2> /dev/null
> -		eval wait \$pid$i
> +	for child in $children; do
> +		ROD /bin/kill -s SIGINT $child

Here as well.

> +	done
> +
> +	for child in $children; do
> +		ROD wait $child
> +	done
>  
> -		rmdir /dev/memcg/$i 2> /dev/null
> +	for i in $(seq 0 $(( $nr_children - 1 ))); do
> +		ROD rmdir "$memcg_path/$i"
>  	done
>  
> -	cleanup;
> +	do_unmount
>  }
>  
>  testcase_1()
> @@ -108,19 +136,23 @@ testcase_2()
>  	tst_resm TPASS "stress test 2 passed"
>  }
>  
> -echo 3 > /proc/sys/vm/drop_caches
> +ROD echo 3 \> /proc/sys/vm/drop_caches
>  sleep 2
> +
>  mem_free=`cat /proc/meminfo | grep MemFree | awk '{ print $2 }'`

mem_free=$(awk '/MemFree/ {print $2}' /proc/meminfo)

> +is_int "$mem_free" || tst_brkm TBROK "Unable to determine mem_free"
> +
>  swap_free=`cat /proc/meminfo | grep SwapFree | awk '{ print $2 }'`

Here as well.

> +is_int "$swap_free" || tst_brkm TBROK "Unable to determine swap_free"
>  
>  mem=$(( $mem_free + $swap_free / 2 ))
> -mem=$(( mem / 1024 ))
> +mem=$(( $mem / 1024 ))
> +[ "$mem" -gt 0 ] || tst_brkm TBROK "mem is negative: $mem"
>  
>  date
> -export TST_COUNT=$(( $TST_COUNT + 1 ))
>  testcase_1
> -export TST_COUNT=$(( $TST_COUNT + 1 ))
> +date
>  testcase_2
>  date
>  
> -exit 0
> +tst_exit

The rest looks good.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler
  2016-05-11 14:39   ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Cyril Hrubis
@ 2016-05-12 11:09     ` Stanislav Kholmanskikh
  2016-05-12 11:26       ` Cyril Hrubis
  0 siblings, 1 reply; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-05-12 11:09 UTC (permalink / raw)
  To: ltp

Hi!

On 05/11/2016 05:39 PM, Cyril Hrubis wrote:
> Hi!
>> -static void sigusr_handler(int __attribute__ ((unused)) signo)
>> +static void alloc_memory(void)
>>   {
>>   	int i;
>>   	int pagesize;
>>
>>   	pagesize = sysconf(_SC_PAGE_SIZE);
>>
>> -	nr_page = ceil((double)memsize / pagesize);
>> +	nr_page = memsize / pagesize;
>
> This will cause to allocate one less page than the previous code in case
> that memsize % pagesize != 0.
>
> ceil((double)memsize/pagesize) == (memsize + pagesize - 1)/pagesize
>
> In case that you want to avoid floating point.
>
>>   	pages = calloc(nr_page, sizeof(char *));
>>   	if (pages == NULL)
>> @@ -62,7 +54,18 @@ static void sigusr_handler(int __attribute__ ((unused)) signo)
>>   		if (pages[i] == MAP_FAILED)
>>   			err(1, "mmap");
>>   	}
>> +}
>>
>> +static void touch_memory(void)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < nr_page; i++)
>> +		pages[i][0] = 0xef;
>> +}
>> +
>> +static void sigusr_handler(int __attribute__ ((unused)) signo)
>> +{
>>   	flag_ready = 1;
>>   }
>>
>> @@ -76,6 +79,7 @@ int main(int argc, char *argv[])
>>   	char *end;
>>   	struct sigaction sigint_action;
>>   	struct sigaction sigusr_action;
>> +	int allocated = 0;
>>
>>   	if (argc != 3)
>>   		errx(1, "wrong argument num");
>> @@ -102,8 +106,14 @@ int main(int argc, char *argv[])
>>   	while (!flag_exit) {
>>   		sleep(interval);
>>
>> -		if (flag_ready)
>> +		if (flag_ready) {
>> +			if (!allocated) {
>> +				alloc_memory();
>> +				allocated = 1;
>> +			}
>> +
>>   			touch_memory();
>> +		}
>>   	}
>
>
> We can do even better if we change the sleep() for pause().

Thank you for the review. I agree with all other comments to this 
series, except this one.

I'd keep sleep() here, because it allows the process to perform a series 
of touch_memory() invocations before it receives the final SIGINT signal.



>
>>   	return 0;
>
> Otherwise it looks good.
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler
  2016-05-12 11:09     ` Stanislav Kholmanskikh
@ 2016-05-12 11:26       ` Cyril Hrubis
  0 siblings, 0 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-12 11:26 UTC (permalink / raw)
  To: ltp

Hi!
> >>   	while (!flag_exit) {
> >>   		sleep(interval);
> >>
> >> -		if (flag_ready)
> >> +		if (flag_ready) {
> >> +			if (!allocated) {
> >> +				alloc_memory();
> >> +				allocated = 1;
> >> +			}
> >> +
> >>   			touch_memory();
> >> +		}
> >>   	}
> >
> >
> > We can do even better if we change the sleep() for pause().
> 
> Thank you for the review. I agree with all other comments to this 
> series, except this one.
> 
> I'd keep sleep() here, because it allows the process to perform a series 
> of touch_memory() invocations before it receives the final SIGINT signal.

You are right, I've missed that yesterday.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-04-22 15:23     ` [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes Stanislav Kholmanskikh
@ 2016-05-12 13:42       ` Cyril Hrubis
  2016-05-17 12:52         ` Stanislav Kholmanskikh
  0 siblings, 1 reply; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-12 13:42 UTC (permalink / raw)
  To: ltp

Hi!
> Given that, (CommitLimit - Committed_AS) with overcommit_memory == 1,
> looks to be a suitable formula, since the kernel lets allocate this
> amount of memory and there will be some memory for other tasks.

I wonder what the original purpose of the test is, it looks to me like
the whole point is to attach processes to number of memory cgroups and
then stress it by allocating memory while forcing heavy swapping. Hence
the $mem_free + $swap_free/2 which is wrong obviously.

So for the new formula the CommitLimit seems to be calculated as
SWAP + RAM * overcommit_ratio which will default to SWAP + RAM/2 in most
of the cases. Wouldn't that cause too much swap trashing in case that you
have SWAP == 2 * RAM? Shouldn't be something simple as 0.8 * RAM better
for the purpose of the test? Or something as 0.9 * mem_free - 50MB after
caches has been dropped?

Also I wonder if it makes sense to run the test if machine has no swap
configured. Maybe we should TCONF right away in that case.

> Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
> ---
>  .../controllers/memcg/stress/memcg_stress_test.sh  |   28 ++++++++++++++++---
>  1 files changed, 23 insertions(+), 5 deletions(-)
> 
> diff --git a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
> index 0d541e5..75baab4 100755
> --- a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
> +++ b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
> @@ -39,6 +39,8 @@ children=""
>  nr_children=0
>  memcg_path=/dev/memcg
>  memcg_created=0
> +overcommit_path=/proc/sys/vm/overcommit_memory
> +overcommit=""
>  
>  cleanup()
>  {
> @@ -54,6 +56,10 @@ cleanup()
>  		umount "$memcg_path"
>  		rmdir "$memcg_path"
>  	fi
> +
> +	if is_int "$overcommit"; then
> +		echo "$overcommit" > "$overcommit_path"
> +	fi
>  }
>  TST_CLEANUP=cleanup
>  
> @@ -139,20 +145,32 @@ testcase_2()
>  ROD echo 3 \> /proc/sys/vm/drop_caches
>  sleep 2
>  
> -mem_free=`cat /proc/meminfo | grep MemFree | awk '{ print $2 }'`
> -is_int "$mem_free" || tst_brkm TBROK "Unable to determine mem_free"
> +# We enable the "always overcommit" memory policy and allocate
> +# less than CommitLimit bytes. Given that no other memory-consuming
> +# process should be running, this gives us a more-or-less strong
> +# guarantee that our allocations will result in an OOM situation.
                                        ^
					missing not?

I've just looked into newer patch that has not in the sentence...

> +overcommit=$(cat "$overcommit_path")
> +is_int "$overcommit" || tst_brkm TBROK "Unable to determine overcommit"
>  
> -swap_free=`cat /proc/meminfo | grep SwapFree | awk '{ print $2 }'`
> -is_int "$swap_free" || tst_brkm TBROK "Unable to determine swap_free"
> +commit_limit=$(cat /proc/meminfo | grep CommitLimit | awk '{ print $2 }')
> +is_int "$commit_limit" || tst_brkm TBROK "Unable to determine commit_limit"
>  
> -mem=$(( $mem_free + $swap_free / 2 ))
> +committed_as=$(cat /proc/meminfo | grep Committed_AS | awk '{ print $2 }')
> +is_int "$committed_as" || tst_brkm TBROK "Unable to determine committed_as"
> +
> +mem=$(( $commit_limit - $committed_as ))
>  mem=$(( $mem / 1024 ))
>  [ "$mem" -gt 0 ] || tst_brkm TBROK "mem is negative: $mem"
>  
> +ROD echo 1 \> "$overcommit_path"
> +
>  date
>  testcase_1
>  date
>  testcase_2
>  date
>  
> +ROD echo "$overcommit" \> "$overcommit_path"

Shouldn't this be done in the cleanup so that it's restored if the test
is aborted somewhere in the middle?

Something as:

cleanup()
{
...
	if [ -n "$overcommit" ]; then
		echo "$overcommit" \> "$overcommit_path"
	fi
...


-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-12 13:42       ` Cyril Hrubis
@ 2016-05-17 12:52         ` Stanislav Kholmanskikh
  2016-05-17 13:02           ` Stanislav Kholmanskikh
  2016-05-18 14:39           ` Cyril Hrubis
  0 siblings, 2 replies; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-05-17 12:52 UTC (permalink / raw)
  To: ltp

Hi,

On 05/12/2016 04:42 PM, Cyril Hrubis wrote:
> Hi!
>> Given that, (CommitLimit - Committed_AS) with overcommit_memory == 1,
>> looks to be a suitable formula, since the kernel lets allocate this
>> amount of memory and there will be some memory for other tasks.
>
> I wonder what the original purpose of the test is, it looks to me like
> the whole point is to attach processes to number of memory cgroups and
> then stress it by allocating memory while forcing heavy swapping. Hence
> the $mem_free + $swap_free/2 which is wrong obviously.
>
> So for the new formula the CommitLimit seems to be calculated as
> SWAP + RAM * overcommit_ratio which will default to SWAP + RAM/2 in most
> of the cases. Wouldn't that cause too much swap trashing in case that you
> have SWAP == 2 * RAM? Shouldn't be something simple as 0.8 * RAM better
> for the purpose of the test? Or something as 0.9 * mem_free - 50MB after
> caches has been dropped?

I'm afraid that const * mem_free couldn't be the best formula, since if 
swap is small we may get an OOM here. At least this is what I get in a 
ldom with 128g memory, and < 1gb swap.

There is an idea. If we set memory.limit_in_bytes of a cgroup to a value 
less than the amount of memory.usage_in_bytes, then activities of 
processes of this cgroup will involve swapping.

So what do you think about this scheme:

mem = RAM * overcommit_ratio - CommitLimit
overcommit_memory = 1

plus this run_test():

# $1 - Number of cgroups
# $2 - Allocated how much memory in one process? in MB
# $3 - The interval to touch memory in a process
# $4 - How long does this test run ? in second
run_stress()
{
         nr_children=0
         children=""

         do_mount

         for i in $(seq 0 $(( $1 - 1 ))); do
                 ROD mkdir "$memcg_path/$i"
                 memcg_process_stress $2 $3 &
                 child=$!

                 nr_children=$(( $nr_children + 1 ))
                 children="$children $child"

                 ROD echo $child \> "$memcg_path/$i/tasks"
                 ROD echo $(( $2 * 1024 * 1024 )) \> 
"$memcg_path/$i/memory.limit_in_bytes"
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
         done
<cut>

?

Since the total amount of memory consumed by memcg_process_stress is 
always greater than the amount of memory it tries to mmap(), we are 
guaranteed that swapping will be happening for each of the control groups.


>
> Also I wonder if it makes sense to run the test if machine has no swap
> configured. Maybe we should TCONF right away in that case.

With the above scheme we just need to make sure that there is enough 
swap space to hold text (and some others) segments of all 
memcg_process_stress processes. I think something like 2 mb per process 
should be enough.

>
>> Signed-off-by: Stanislav Kholmanskikh <stanislav.kholmanskikh@oracle.com>
>> ---
>>   .../controllers/memcg/stress/memcg_stress_test.sh  |   28 ++++++++++++++++---
>>   1 files changed, 23 insertions(+), 5 deletions(-)
>>
>> diff --git a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
>> index 0d541e5..75baab4 100755
>> --- a/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
>> +++ b/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
>> @@ -39,6 +39,8 @@ children=""
>>   nr_children=0
>>   memcg_path=/dev/memcg
>>   memcg_created=0
>> +overcommit_path=/proc/sys/vm/overcommit_memory
>> +overcommit=""
>>
>>   cleanup()
>>   {
>> @@ -54,6 +56,10 @@ cleanup()
>>   		umount "$memcg_path"
>>   		rmdir "$memcg_path"
>>   	fi
>> +
>> +	if is_int "$overcommit"; then
>> +		echo "$overcommit" > "$overcommit_path"
>> +	fi
>>   }
>>   TST_CLEANUP=cleanup
>>
>> @@ -139,20 +145,32 @@ testcase_2()
>>   ROD echo 3 \> /proc/sys/vm/drop_caches
>>   sleep 2
>>
>> -mem_free=`cat /proc/meminfo | grep MemFree | awk '{ print $2 }'`
>> -is_int "$mem_free" || tst_brkm TBROK "Unable to determine mem_free"
>> +# We enable the "always overcommit" memory policy and allocate
>> +# less than CommitLimit bytes. Given that no other memory-consuming
>> +# process should be running, this gives us a more-or-less strong
>> +# guarantee that our allocations will result in an OOM situation.
>                                          ^
> 					missing not?
>
> I've just looked into newer patch that has not in the sentence...


You are correct, there was a missing 'not'. I fixed it in the newer 
patch, but didn't note it in the v1-v2 changes.


>
>> +overcommit=$(cat "$overcommit_path")
>> +is_int "$overcommit" || tst_brkm TBROK "Unable to determine overcommit"
>>
>> -swap_free=`cat /proc/meminfo | grep SwapFree | awk '{ print $2 }'`
>> -is_int "$swap_free" || tst_brkm TBROK "Unable to determine swap_free"
>> +commit_limit=$(cat /proc/meminfo | grep CommitLimit | awk '{ print $2 }')
>> +is_int "$commit_limit" || tst_brkm TBROK "Unable to determine commit_limit"
>>
>> -mem=$(( $mem_free + $swap_free / 2 ))
>> +committed_as=$(cat /proc/meminfo | grep Committed_AS | awk '{ print $2 }')
>> +is_int "$committed_as" || tst_brkm TBROK "Unable to determine committed_as"
>> +
>> +mem=$(( $commit_limit - $committed_as ))
>>   mem=$(( $mem / 1024 ))
>>   [ "$mem" -gt 0 ] || tst_brkm TBROK "mem is negative: $mem"
>>
>> +ROD echo 1 \> "$overcommit_path"
>> +
>>   date
>>   testcase_1
>>   date
>>   testcase_2
>>   date
>>
>> +ROD echo "$overcommit" \> "$overcommit_path"
>
> Shouldn't this be done in the cleanup so that it's restored if the test
> is aborted somewhere in the middle?

Right. It's already in cleanup(), so this call here seems redundant.


>
> Something as:
>
> cleanup()
> {
> ...
> 	if [ -n "$overcommit" ]; then
> 		echo "$overcommit" \> "$overcommit_path"
> 	fi
> ...
>
>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-17 12:52         ` Stanislav Kholmanskikh
@ 2016-05-17 13:02           ` Stanislav Kholmanskikh
  2016-05-18 14:39           ` Cyril Hrubis
  1 sibling, 0 replies; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-05-17 13:02 UTC (permalink / raw)
  To: ltp



On 05/17/2016 03:52 PM, Stanislav Kholmanskikh wrote:
> Hi,
>
> On 05/12/2016 04:42 PM, Cyril Hrubis wrote:
>> Hi!
>>> Given that, (CommitLimit - Committed_AS) with overcommit_memory == 1,
>>> looks to be a suitable formula, since the kernel lets allocate this
>>> amount of memory and there will be some memory for other tasks.
>>
>> I wonder what the original purpose of the test is, it looks to me like
>> the whole point is to attach processes to number of memory cgroups and
>> then stress it by allocating memory while forcing heavy swapping. Hence
>> the $mem_free + $swap_free/2 which is wrong obviously.
>>
>> So for the new formula the CommitLimit seems to be calculated as
>> SWAP + RAM * overcommit_ratio which will default to SWAP + RAM/2 in most
>> of the cases. Wouldn't that cause too much swap trashing in case that you
>> have SWAP == 2 * RAM? Shouldn't be something simple as 0.8 * RAM better
>> for the purpose of the test? Or something as 0.9 * mem_free - 50MB after
>> caches has been dropped?
>
> I'm afraid that const * mem_free couldn't be the best formula, since if
> swap is small we may get an OOM here. At least this is what I get in a
> ldom with 128g memory, and < 1gb swap.
>
> There is an idea. If we set memory.limit_in_bytes of a cgroup to a value
> less than the amount of memory.usage_in_bytes, then activities of
> processes of this cgroup will involve swapping.
>
> So what do you think about this scheme:
>
> mem = RAM * overcommit_ratio - CommitLimit

Sorry, I meant Committed_AS here.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-17 12:52         ` Stanislav Kholmanskikh
  2016-05-17 13:02           ` Stanislav Kholmanskikh
@ 2016-05-18 14:39           ` Cyril Hrubis
  2016-05-18 17:29             ` Stanislav Kholmanskikh
  2016-05-19  9:17             ` Michal Hocko
  1 sibling, 2 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-18 14:39 UTC (permalink / raw)
  To: ltp

Hi!
> There is an idea. If we set memory.limit_in_bytes of a cgroup to a value 
> less than the amount of memory.usage_in_bytes, then activities of 
> processes of this cgroup will involve swapping.
> 
> So what do you think about this scheme:
> 
> mem = RAM * overcommit_ratio - Committed_AS
> overcommit_memory = 1

I do not understand why we choose exactly this number.

RAM * overcommit_ratio is something as half of RAM on usuall system,
right?  Then we substract Committed_AS, which is amount of memory system
would have needed to back up all allocations so we may easily end up
with a negative number if more than half of the RAM was requested by
running programs.

On my notebook I have 6.6Gb Commited_AS and RAM * overcommit_ratio = 2Gb
the CommitLimit is 10Gb since I have 4GB RAM and 8GB Swap.

I guess that when we decide to create the pressure inside of the memory
cgroup, instead of stressing the whole system, we may as well choose
small enough amount of memory, something as (RAM - 250Mb)/10 and be done
with it.

[CCing Michal Hocko]

Michal is there a way to figure out how much memory should be allocated
and faulted on a system in order to cause some amount of pages to be
swapped back and forth?

What this testcase does it to create a process(es) that allocate memory
and read/write it concurently to stress the system a bit but the
estimate on how much memory it should use is wrong and it causes OOM
when the amount of swap is much less than amount of RAM.

The original testcase[1] drops caches then runs child(s) to allocate
(in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
cgroup but the amount of memory was choosen so that the whole system
would be under memory pressure. Does such test even make sense to you?

[1]:
https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-18 14:39           ` Cyril Hrubis
@ 2016-05-18 17:29             ` Stanislav Kholmanskikh
  2016-05-19 13:38               ` Cyril Hrubis
  2016-05-19  9:17             ` Michal Hocko
  1 sibling, 1 reply; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-05-18 17:29 UTC (permalink / raw)
  To: ltp



On 05/18/2016 05:39 PM, Cyril Hrubis wrote:
> Hi!
>> There is an idea. If we set memory.limit_in_bytes of a cgroup to a value
>> less than the amount of memory.usage_in_bytes, then activities of
>> processes of this cgroup will involve swapping.
>>
>> So what do you think about this scheme:
>>
>> mem = RAM * overcommit_ratio - Committed_AS
>> overcommit_memory = 1
>
> I do not understand why we choose exactly this number.
>
> RAM * overcommit_ratio is something as half of RAM on usuall system,
> right?  Then we substract Committed_AS, which is amount of memory system
> would have needed to back up all allocations so we may easily end up
> with a negative number if more than half of the RAM was requested by
> running programs.
>
> On my notebook I have 6.6Gb Commited_AS and RAM * overcommit_ratio = 2Gb
> the CommitLimit is 10Gb since I have 4GB RAM and 8GB Swap.
>

I run LTP mostly on systems with no load, and somehow missed this 
obvious fact, that Committed_AS could be large. Sorry if I misguided you.

> I guess that when we decide to create the pressure inside of the memory
> cgroup, instead of stressing the whole system, we may as well choose
> small enough amount of memory, something as (RAM - 250Mb)/10 and be done
> with it.

Yes, this should work as well.

I don't know how we could stress the whole system without the risk of 
hitting an OOM. One idea which comes to my mind is about using a 
top-level control group with a significant amount of memory assigned to 
it (like mem_free / 2).

Could you, please, have a look at the attachment? I was playing with 
this patch today and was able to stress my system without OOM.

However, this scheme may result in a very intensive swapping if mem_free 
is large (given that there is enough swap).



>
> [CCing Michal Hocko]
>
> Michal is there a way to figure out how much memory should be allocated
> and faulted on a system in order to cause some amount of pages to be
> swapped back and forth?
>
> What this testcase does it to create a process(es) that allocate memory
> and read/write it concurently to stress the system a bit but the
> estimate on how much memory it should use is wrong and it causes OOM
> when the amount of swap is much less than amount of RAM.
>
> The original testcase[1] drops caches then runs child(s) to allocate
> (in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
> cgroup but the amount of memory was choosen so that the whole system
> would be under memory pressure. Does such test even make sense to you?
>
> [1]:
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-draft.patch
Type: text/x-patch
Size: 3646 bytes
Desc: not available
URL: <http://lists.linux.it/pipermail/ltp/attachments/20160518/8bcafce2/attachment-0001.bin>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-18 14:39           ` Cyril Hrubis
  2016-05-18 17:29             ` Stanislav Kholmanskikh
@ 2016-05-19  9:17             ` Michal Hocko
  2016-05-19 12:56               ` Cyril Hrubis
  1 sibling, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2016-05-19  9:17 UTC (permalink / raw)
  To: ltp

On Wed 18-05-16 16:39:18, Cyril Hrubis wrote:
[...]
> Michal is there a way to figure out how much memory should be allocated
> and faulted on a system in order to cause some amount of pages to be
> swapped back and forth?

that depends on how the memory is used. If it is largerly anonymous then
you will get swap out when you hit watermarks.
 
> What this testcase does it to create a process(es) that allocate memory
> and read/write it concurently to stress the system a bit but the
> estimate on how much memory it should use is wrong and it causes OOM
> when the amount of swap is much less than amount of RAM.

Are we talking about memcg or the global here?
 
> The original testcase[1] drops caches then runs child(s) to allocate
> (in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
> cgroup but the amount of memory was choosen so that the whole system
> would be under memory pressure. Does such test even make sense to you?

Well, it depends. It makes sense to see how the global memory pressure
gets distributed into two memcgs which are the source of that pressure
but I am not really sure how you want to evaluate good vs. bad case as
the load will be quite timing sensitive. So the main question would be,
what do you expect the test will tell you?
 
> [1]:
> https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/controllers/memcg/stress/memcg_stress_test.sh
> 
> -- 
> Cyril Hrubis
> chrubis@suse.cz

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-19  9:17             ` Michal Hocko
@ 2016-05-19 12:56               ` Cyril Hrubis
  2016-05-19 19:21                 ` Michal Hocko
  0 siblings, 1 reply; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-19 12:56 UTC (permalink / raw)
  To: ltp

Hi!
> > Michal is there a way to figure out how much memory should be allocated
> > and faulted on a system in order to cause some amount of pages to be
> > swapped back and forth?
> 
> that depends on how the memory is used. If it is largerly anonymous then
> you will get swap out when you hit watermarks.

The test processes do mmap() with MAP_PRIVATE|MAP_ANONYMOUS so 99% of
the consumed memory is anonymous.

If I understand this right watermarks are set to a few megabytes so
MemFree must get low enough. Isn't there a chance to hit OOM if we try
to allocate little less than MemFree anway (assuming that we dropped
caches before we measured MemFree)? There always may be some background
daemon forking on the system while the test runs which may get use close
enough to out of memory condition.

Or did I miss something?

> > What this testcase does it to create a process(es) that allocate memory
> > and read/write it concurently to stress the system a bit but the
> > estimate on how much memory it should use is wrong and it causes OOM
> > when the amount of swap is much less than amount of RAM.
> 
> Are we talking about memcg or the global here?

The test is trying to cause pressure at the global level while the test
process(es) allocate memory in separate cgroups. No idea how sensible
this approach is.

> > The original testcase[1] drops caches then runs child(s) to allocate
> > (in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
> > cgroup but the amount of memory was choosen so that the whole system
> > would be under memory pressure. Does such test even make sense to you?
> 
> Well, it depends. It makes sense to see how the global memory pressure
> gets distributed into two memcgs which are the source of that pressure
> but I am not really sure how you want to evaluate good vs. bad case as
> the load will be quite timing sensitive. So the main question would be,
> what do you expect the test will tell you?

This is a stress test not a benchmark, so the test genereates pressure
for an hour and then it reports success if the machine outlived it.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-18 17:29             ` Stanislav Kholmanskikh
@ 2016-05-19 13:38               ` Cyril Hrubis
  2016-05-23 11:12                 ` Stanislav Kholmanskikh
  0 siblings, 1 reply; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-19 13:38 UTC (permalink / raw)
  To: ltp

Hi!
> > I guess that when we decide to create the pressure inside of the memory
> > cgroup, instead of stressing the whole system, we may as well choose
> > small enough amount of memory, something as (RAM - 250Mb)/10 and be done
> > with it.
> 
> Yes, this should work as well.
> 
> I don't know how we could stress the whole system without the risk of 
> hitting an OOM. One idea which comes to my mind is about using a 
> top-level control group with a significant amount of memory assigned to 
> it (like mem_free / 2).
> 
> Could you, please, have a look at the attachment? I was playing with 
> this patch today and was able to stress my system without OOM.
> 
> However, this scheme may result in a very intensive swapping if mem_free 
> is large (given that there is enough swap).

Then we should place upper limit on the amount of used swap as well.

Something as MIN(MemFree/2, SwapFree, 1GB) to be used for the limit and
abort the test if this number is to small?

Apart from that the root memory cgroup should be better named something
as ltp_stres_root so that it's clear where it came from...

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-19 12:56               ` Cyril Hrubis
@ 2016-05-19 19:21                 ` Michal Hocko
  2016-05-24 16:21                   ` Cyril Hrubis
  0 siblings, 1 reply; 21+ messages in thread
From: Michal Hocko @ 2016-05-19 19:21 UTC (permalink / raw)
  To: ltp

On Thu 19-05-16 14:56:43, Cyril Hrubis wrote:
> Hi!
> > > Michal is there a way to figure out how much memory should be allocated
> > > and faulted on a system in order to cause some amount of pages to be
> > > swapped back and forth?
> > 
> > that depends on how the memory is used. If it is largerly anonymous then
> > you will get swap out when you hit watermarks.
> 
> The test processes do mmap() with MAP_PRIVATE|MAP_ANONYMOUS so 99% of
> the consumed memory is anonymous.
> 
> If I understand this right watermarks are set to a few megabytes so
> MemFree must get low enough. Isn't there a chance to hit OOM if we try
> to allocate little less than MemFree anway (assuming that we dropped
> caches before we measured MemFree)? There always may be some background
> daemon forking on the system while the test runs which may get use close
> enough to out of memory condition.

As long as there is a reclaimable memory (aka swap space for the
anonymous memory) we shouldn't go OOM.

> Or did I miss something?
> 
> > > What this testcase does it to create a process(es) that allocate memory
> > > and read/write it concurently to stress the system a bit but the
> > > estimate on how much memory it should use is wrong and it causes OOM
> > > when the amount of swap is much less than amount of RAM.
> > 
> > Are we talking about memcg or the global here?
> 
> The test is trying to cause pressure at the global level while the test
> process(es) allocate memory in separate cgroups. No idea how sensible
> this approach is.

Dunno. I fail to see what would be the role of the memcg then.

> > > The original testcase[1] drops caches then runs child(s) to allocate
> > > (in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
> > > cgroup but the amount of memory was choosen so that the whole system
> > > would be under memory pressure. Does such test even make sense to you?
> > 
> > Well, it depends. It makes sense to see how the global memory pressure
> > gets distributed into two memcgs which are the source of that pressure
> > but I am not really sure how you want to evaluate good vs. bad case as
> > the load will be quite timing sensitive. So the main question would be,
> > what do you expect the test will tell you?
> 
> This is a stress test not a benchmark, so the test genereates pressure
> for an hour and then it reports success if the machine outlived it.

But then why to bother with memcg configuration? I could understand if
you compared the same load with and without memcg in the game but other
than that it sounds like a random bashing of the system.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-19 13:38               ` Cyril Hrubis
@ 2016-05-23 11:12                 ` Stanislav Kholmanskikh
  2016-05-24 16:46                   ` Cyril Hrubis
  0 siblings, 1 reply; 21+ messages in thread
From: Stanislav Kholmanskikh @ 2016-05-23 11:12 UTC (permalink / raw)
  To: ltp



On 05/19/2016 04:38 PM, Cyril Hrubis wrote:
> Hi!
>>> I guess that when we decide to create the pressure inside of the memory
>>> cgroup, instead of stressing the whole system, we may as well choose
>>> small enough amount of memory, something as (RAM - 250Mb)/10 and be done
>>> with it.
>>
>> Yes, this should work as well.
>>
>> I don't know how we could stress the whole system without the risk of
>> hitting an OOM. One idea which comes to my mind is about using a
>> top-level control group with a significant amount of memory assigned to
>> it (like mem_free / 2).
>>
>> Could you, please, have a look at the attachment? I was playing with
>> this patch today and was able to stress my system without OOM.
>>
>> However, this scheme may result in a very intensive swapping if mem_free
>> is large (given that there is enough swap).
>
> Then we should place upper limit on the amount of used swap as well.
>
> Something as MIN(MemFree/2, SwapFree, 1GB) to be used for the limit and
> abort the test if this number is to small?

Now, after weekends are gone, I believe that there is no need in an 
extra control group, if we let the tests allocate MemTotal bytes of 
memory. There will always be swapping, and its intensity will not depend 
on the amount of swap space configured, and it will not be "too much" 
unless there are some mm-using processes running on the system.

I believe the attached patch introduces the minimal set of changes to 
the original test case idea (as we understand it), but lets the test 
case runs in different environments.

What do you think about it? Did I miss something?

Thanks.


>
> Apart from that the root memory cgroup should be better named something
> as ltp_stres_root so that it's clear where it came from...
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-memcg_stress_test.sh-allocate-MemTotal.patch
Type: text/x-patch
Size: 2386 bytes
Desc: not available
URL: <http://lists.linux.it/pipermail/ltp/attachments/20160523/004c8d30/attachment.bin>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-19 19:21                 ` Michal Hocko
@ 2016-05-24 16:21                   ` Cyril Hrubis
  0 siblings, 0 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-24 16:21 UTC (permalink / raw)
  To: ltp

Hi!
> > The test is trying to cause pressure at the global level while the test
> > process(es) allocate memory in separate cgroups. No idea how sensible
> > this approach is.
> 
> Dunno. I fail to see what would be the role of the memcg then.

So do I.

> > > > The original testcase[1] drops caches then runs child(s) to allocate
> > > > (in sum) MemFree + SwapFree/2 memory. Each child runs in its own memory
> > > > cgroup but the amount of memory was choosen so that the whole system
> > > > would be under memory pressure. Does such test even make sense to you?
> > > 
> > > Well, it depends. It makes sense to see how the global memory pressure
> > > gets distributed into two memcgs which are the source of that pressure
> > > but I am not really sure how you want to evaluate good vs. bad case as
> > > the load will be quite timing sensitive. So the main question would be,
> > > what do you expect the test will tell you?
> > 
> > This is a stress test not a benchmark, so the test genereates pressure
> > for an hour and then it reports success if the machine outlived it.
> 
> But then why to bother with memcg configuration? I could understand if
> you compared the same load with and without memcg in the game but other
> than that it sounds like a random bashing of the system.

Well that is the reason I've CCed you as well, since the test was not
making much sense to me but I can always miss something.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes
  2016-05-23 11:12                 ` Stanislav Kholmanskikh
@ 2016-05-24 16:46                   ` Cyril Hrubis
  0 siblings, 0 replies; 21+ messages in thread
From: Cyril Hrubis @ 2016-05-24 16:46 UTC (permalink / raw)
  To: ltp

Hi!
> Now, after weekends are gone, I believe that there is no need in an 
> extra control group, if we let the tests allocate MemTotal bytes of 
> memory. There will always be swapping, and its intensity will not depend 
> on the amount of swap space configured, and it will not be "too much" 
> unless there are some mm-using processes running on the system.
> 
> I believe the attached patch introduces the minimal set of changes to 
> the original test case idea (as we understand it), but lets the test 
> case runs in different environments.
> 
> What do you think about it? Did I miss something?

This sounds reasonable enough to me.

I wonder if the test has anything to do with memory cgroups though,
maybe we should rename it to memory_stress and get rid of the cgroup
bits in it.

-- 
Cyril Hrubis
chrubis@suse.cz

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-05-24 16:46 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-22 15:23 [LTP] [PATCH 1/4] memcg_process_stress: cleanup Stanislav Kholmanskikh
2016-04-22 15:23 ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Stanislav Kholmanskikh
2016-04-22 15:23   ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Stanislav Kholmanskikh
2016-04-22 15:23     ` [LTP] [RFC PATCH 4/4] memcg_stress_test.sh: allocate less than CommitLimit bytes Stanislav Kholmanskikh
2016-05-12 13:42       ` Cyril Hrubis
2016-05-17 12:52         ` Stanislav Kholmanskikh
2016-05-17 13:02           ` Stanislav Kholmanskikh
2016-05-18 14:39           ` Cyril Hrubis
2016-05-18 17:29             ` Stanislav Kholmanskikh
2016-05-19 13:38               ` Cyril Hrubis
2016-05-23 11:12                 ` Stanislav Kholmanskikh
2016-05-24 16:46                   ` Cyril Hrubis
2016-05-19  9:17             ` Michal Hocko
2016-05-19 12:56               ` Cyril Hrubis
2016-05-19 19:21                 ` Michal Hocko
2016-05-24 16:21                   ` Cyril Hrubis
2016-05-11 15:01     ` [LTP] [PATCH 3/4] memcg_stress_test.sh: rewrite Cyril Hrubis
2016-05-11 14:39   ` [LTP] [PATCH 2/4] memcg_process_stress: allocate memory not in the signal handler Cyril Hrubis
2016-05-12 11:09     ` Stanislav Kholmanskikh
2016-05-12 11:26       ` Cyril Hrubis
2016-05-11 14:16 ` [LTP] [PATCH 1/4] memcg_process_stress: cleanup Cyril Hrubis

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox