[LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine

All of lore.kernel.org
 help / color / mirror / Atom feed

* [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine
@ 2021-01-27  3:18 Leo Yu-Chi Liang
  2021-01-27  9:52 ` Richard Palethorpe
  2021-01-27 10:37 ` Richard Palethorpe
  0 siblings, 2 replies; 3+ messages in thread
From: Leo Yu-Chi Liang @ 2021-01-27  3:18 UTC (permalink / raw)
  To: ltp

Fuzzy sync library uses spin waiting mechanism to implement thread
barrier behavior, which would cause this test to be time-consuming
on single core machine.

Fix this by adding sched_yield in the spin waiting loop, so that the
thread yields cpu as soon as it enters the waiting loop.

Signed-off-by: Leo Yu-Chi Liang <ycliang@andestech.com>
---
 include/tst_fuzzy_sync.h | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h
index 4141f5c64..37f3d06f2 100644
--- a/include/tst_fuzzy_sync.h
+++ b/include/tst_fuzzy_sync.h
@@ -59,12 +59,15 @@
  * @sa tst_fzsync_pair
  */
 
-#include <sys/time.h>
-#include <time.h>
 #include <math.h>
-#include <stdlib.h>
 #include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <time.h>
 #include "tst_atomic.h"
+#include "tst_cpu.h"
 #include "tst_timer.h"
 #include "tst_safe_pthread.h"
 
@@ -180,6 +183,15 @@ struct tst_fzsync_pair {
 	int exec_loop;
 	/** Internal; The second thread or 0 */
 	pthread_t thread_b;
+	/** 
+	 * Internal; The flag indicates single core machines or not
+	 * 
+	 * If running on single core machines, it would take considerable
+	 * amount of time to run fuzzy sync library.
+	 * Thus call sched_yield to give up cpu to decrease the test time.
+	 */
+	bool yield_in_wait;
+
 };
 
 #define CHK(param, low, hi, def) do {					      \
@@ -206,6 +218,7 @@ static void tst_fzsync_pair_init(struct tst_fzsync_pair *pair)
 	CHK(max_dev_ratio, 0, 1, 0.1);
 	CHK(exec_time_p, 0, 1, 0.5);
 	CHK(exec_loops, 20, INT_MAX, 3000000);
+	CHK(yield_in_wait, 0, 1, (tst_ncpus() <= 1));
 }
 #undef CHK
 
@@ -550,7 +563,8 @@ static void tst_fzsync_pair_update(struct tst_fzsync_pair *pair)
  */
 static inline void tst_fzsync_pair_wait(int *our_cntr,
 					int *other_cntr,
-					int *spins)
+					int *spins,
+					bool yield_in_wait)
 {
 	if (tst_atomic_inc(other_cntr) == INT_MAX) {
 		/*
@@ -564,6 +578,8 @@ static inline void tst_fzsync_pair_wait(int *our_cntr,
 		       && tst_atomic_load(our_cntr) < INT_MAX) {
 			if (spins)
 				(*spins)++;
+			if(yield_in_wait)
+				sched_yield();
 		}
 
 		tst_atomic_store(0, other_cntr);
@@ -581,6 +597,8 @@ static inline void tst_fzsync_pair_wait(int *our_cntr,
 		while (tst_atomic_load(our_cntr) < tst_atomic_load(other_cntr)) {
 			if (spins)
 				(*spins)++;
+			if(yield_in_wait)
+				sched_yield();
 		}
 	}
 }
@@ -593,7 +611,7 @@ static inline void tst_fzsync_pair_wait(int *our_cntr,
  */
 static inline void tst_fzsync_wait_a(struct tst_fzsync_pair *pair)
 {
-	tst_fzsync_pair_wait(&pair->a_cntr, &pair->b_cntr, NULL);
+	tst_fzsync_pair_wait(&pair->a_cntr, &pair->b_cntr, NULL, pair->yield_in_wait);
 }
 
 /**
@@ -604,7 +622,7 @@ static inline void tst_fzsync_wait_a(struct tst_fzsync_pair *pair)
  */
 static inline void tst_fzsync_wait_b(struct tst_fzsync_pair *pair)
 {
-	tst_fzsync_pair_wait(&pair->b_cntr, &pair->a_cntr, NULL);
+	tst_fzsync_pair_wait(&pair->b_cntr, &pair->a_cntr, NULL, pair->yield_in_wait);
 }
 
 /**
@@ -709,7 +727,7 @@ static inline void tst_fzsync_start_race_a(struct tst_fzsync_pair *pair)
 static inline void tst_fzsync_end_race_a(struct tst_fzsync_pair *pair)
 {
 	tst_fzsync_time(&pair->a_end);
-	tst_fzsync_pair_wait(&pair->a_cntr, &pair->b_cntr, &pair->spins);
+	tst_fzsync_pair_wait(&pair->a_cntr, &pair->b_cntr, &pair->spins, pair->yield_in_wait);
 }
 
 /**
@@ -740,7 +758,7 @@ static inline void tst_fzsync_start_race_b(struct tst_fzsync_pair *pair)
 static inline void tst_fzsync_end_race_b(struct tst_fzsync_pair *pair)
 {
 	tst_fzsync_time(&pair->b_end);
-	tst_fzsync_pair_wait(&pair->b_cntr, &pair->a_cntr, &pair->spins);
+	tst_fzsync_pair_wait(&pair->b_cntr, &pair->a_cntr, &pair->spins, pair->yield_in_wait);
 }
 
 /**
-- 
2.17.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine
  2021-01-27  3:18 [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine Leo Yu-Chi Liang
@ 2021-01-27  9:52 ` Richard Palethorpe
  2021-01-27 10:37 ` Richard Palethorpe
  1 sibling, 0 replies; 3+ messages in thread
From: Richard Palethorpe @ 2021-01-27  9:52 UTC (permalink / raw)
  To: ltp

Hello Leo,

Leo Yu-Chi Liang <ycliang@andestech.com> writes:

> Fuzzy sync library uses spin waiting mechanism to implement thread
> barrier behavior, which would cause this test to be time-consuming
> on single core machine.
>
> Fix this by adding sched_yield in the spin waiting loop, so that the
> thread yields cpu as soon as it enters the waiting loop.

Thanks for this, a full review will take some time, but I have some
minor comments already.

>
> Signed-off-by: Leo Yu-Chi Liang <ycliang@andestech.com>
> ---
>  include/tst_fuzzy_sync.h | 34 ++++++++++++++++++++++++++--------
>  1 file changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/include/tst_fuzzy_sync.h b/include/tst_fuzzy_sync.h
> index 4141f5c64..37f3d06f2 100644
> --- a/include/tst_fuzzy_sync.h
> +++ b/include/tst_fuzzy_sync.h
> @@ -59,12 +59,15 @@
>   * @sa tst_fzsync_pair
>   */
>  
> -#include <sys/time.h>
> -#include <time.h>
>  #include <math.h>
> -#include <stdlib.h>

Please don't sort existing includes. It will apear in git-blame that you
added these. :-)

>  #include <pthread.h>
> +#include <sched.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <sys/time.h>
> +#include <time.h>
>  #include "tst_atomic.h"
> +#include "tst_cpu.h"
>  #include "tst_timer.h"
>  #include "tst_safe_pthread.h"
>  
> @@ -180,6 +183,15 @@ struct tst_fzsync_pair {
>  	int exec_loop;
>  	/** Internal; The second thread or 0 */
>  	pthread_t thread_b;
> +	/** 
> +	 * Internal; The flag indicates single core machines or not

Actually I think the user can set this. It does not have to be internal.

> +	 * 
> +	 * If running on single core machines, it would take considerable
> +	 * amount of time to run fuzzy sync library.
> +	 * Thus call sched_yield to give up cpu to decrease the test time.
> +	 */
> +	bool yield_in_wait;
> +
>  };
>  
>  #define CHK(param, low, hi, def) do {					      \
> @@ -206,6 +218,7 @@ static void tst_fzsync_pair_init(struct tst_fzsync_pair *pair)
>  	CHK(max_dev_ratio, 0, 1, 0.1);
>  	CHK(exec_time_p, 0, 1, 0.5);
>  	CHK(exec_loops, 20, INT_MAX, 3000000);
> +	CHK(yield_in_wait, 0, 1, (tst_ncpus() <= 1));

The CHK macro will override the user if they set yield_in_wait to
zero. This only effects single core machines; so that it is impossible
for the user to disable yield (at least before calling init which is the
"correct way").

It still allows the user to enable yield on multicore which is good.

To avoid confusion I think it should be noted that users can force
enable yield on multicore, but they can not disable it on single core.

-- 
Thank you,
Richard.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine
  2021-01-27  3:18 [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine Leo Yu-Chi Liang
  2021-01-27  9:52 ` Richard Palethorpe
@ 2021-01-27 10:37 ` Richard Palethorpe
  1 sibling, 0 replies; 3+ messages in thread
From: Richard Palethorpe @ 2021-01-27 10:37 UTC (permalink / raw)
  To: ltp

Hello Leo,

Leo Yu-Chi Liang <ycliang@andestech.com> writes:

> +	/**

Trailing whitespace

> +	 * Internal; The flag indicates single core machines or not
> +	 *

Same as above

> +	 * If running on single core machines, it would take considerable
> +	 * amount of time to run fuzzy sync library.
> +	 * Thus call sched_yield to give up cpu to decrease the test time.
> +	 */
> +	bool yield_in_wait;

Actually it appears the CHK macro is not compatible with bool, it
produces compiler warnings. You can either just change this to 'int
yield_in_wait:1;' or don't use the CHK macro.


> +
>  };
>  
>  #define CHK(param, low, hi, def) do {					      \
> @@ -206,6 +218,7 @@ static void tst_fzsync_pair_init(struct tst_fzsync_pair *pair)
>  	CHK(max_dev_ratio, 0, 1, 0.1);
>  	CHK(exec_time_p, 0, 1, 0.5);
>  	CHK(exec_loops, 20, INT_MAX, 3000000);
> +	CHK(yield_in_wait, 0, 1, (tst_ncpus() <= 1));
>  }
>  #undef CHK
>  
> @@ -550,7 +563,8 @@ static void tst_fzsync_pair_update(struct tst_fzsync_pair *pair)
>   */
>  static inline void tst_fzsync_pair_wait(int *our_cntr,
>  					int *other_cntr,
> -					int *spins)
> +					int *spins,
> +					bool yield_in_wait)
>  {
>  	if (tst_atomic_inc(other_cntr) == INT_MAX) {
>  		/*
> @@ -564,6 +578,8 @@ static inline void tst_fzsync_pair_wait(int *our_cntr,
>  		       && tst_atomic_load(our_cntr) < INT_MAX) {
>  			if (spins)
>  				(*spins)++;
> +			if(yield_in_wait)
> +				sched_yield();
>  		}
>  
>  		tst_atomic_store(0, other_cntr);
> @@ -581,6 +597,8 @@ static inline void tst_fzsync_pair_wait(int *our_cntr,
>  		while (tst_atomic_load(our_cntr) < tst_atomic_load(other_cntr)) {
>  			if (spins)
>  				(*spins)++;
> +			if(yield_in_wait)
> +				sched_yield();

After disassembling this, it appears the compiler does not move the
yield branch outside the loop. The spins branch is optimised out because
it is a compile time constant when NULL.

This might not matter, but it will need testing on a lot of
platforms. OTOH we could manually move the branch outside of the loop.

-- 
Thank you,
Richard.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-01-27 10:37 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-01-27  3:18 [LTP] [PATCH v2 1/1] fzsync: Add sched_yield for single core machine Leo Yu-Chi Liang
2021-01-27  9:52 ` Richard Palethorpe
2021-01-27 10:37 ` Richard Palethorpe

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.