[PATCH] md/raid6 algorithms: scale test duration for speedier boots

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] md/raid6 algorithms: scale test duration for speedier boots
@ 2024-08-05 17:08 Colin Ian King
  2024-08-06  3:20 ` kernel test robot
  2024-08-06  4:01 ` kernel test robot
  0 siblings, 2 replies; 8+ messages in thread
From: Colin Ian King @ 2024-08-05 17:08 UTC (permalink / raw)
  To: Andrew Morton, Masahiro Yamada, rppt; +Cc: linux-kernel

Instead of using jiffies and waiting for jiffies to wrap before
measuring use the higher precision local_time for benchmarking.
Measure 2500 loops, which works out to be accurate enough for
benchmarking the raid algo data rates. Also add division by zero
checking in case timing measurements are bogus.

Speeds up raid benchmarkingm, reduces calibration time from
48,000 usecs to 4000 usecs on a i9-19200.

Signed-off-by: Colin Ian King <colin.king@intel.com>
---
 lib/raid6/algos.c | 56 ++++++++++++++++++++---------------------------
 1 file changed, 24 insertions(+), 32 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index cd2e88ee1..5d974ae8d 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,8 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
+
 /* In .bss so it's zeroed */
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 EXPORT_SYMBOL(raid6_empty_zero_page);
@@ -155,12 +157,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -170,26 +175,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++)
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
-			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name,
+				(ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -206,31 +205,24 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n",
+			(ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  2024-08-05 17:08 [PATCH] md/raid6 algorithms: scale test duration for speedier boots Colin Ian King
@ 2024-08-06  3:20 ` kernel test robot
  2024-08-06  4:01 ` kernel test robot
  1 sibling, 0 replies; 8+ messages in thread
From: kernel test robot @ 2024-08-06  3:20 UTC (permalink / raw)
  To: Colin Ian King, Andrew Morton, Masahiro Yamada, rppt
  Cc: oe-kbuild-all, Linux Memory Management List, linux-kernel

Hi Colin,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-nonmm-unstable]
[also build test ERROR on akpm-mm/mm-everything linus/master v6.11-rc2 next-20240805]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Colin-Ian-King/md-raid6-algorithms-scale-test-duration-for-speedier-boots/20240806-012444
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link:    https://lore.kernel.org/r/20240805170816.6025-1-colin.king%40intel.com
patch subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
config: parisc-defconfig (https://download.01.org/0day-ci/archive/20240806/202408061040.0kdEdUBV-lkp@intel.com/config)
compiler: hppa-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240806/202408061040.0kdEdUBV-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408061040.0kdEdUBV-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

WARNING: modpost: missing MODULE_DESCRIPTION() in lib/asn1_decoder.o
>> ERROR: modpost: "__udivdi3" [lib/raid6/raid6_pq.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  2024-08-05 17:08 [PATCH] md/raid6 algorithms: scale test duration for speedier boots Colin Ian King
  2024-08-06  3:20 ` kernel test robot
@ 2024-08-06  4:01 ` kernel test robot
  2024-08-06 21:28   ` King, Colin
  1 sibling, 1 reply; 8+ messages in thread
From: kernel test robot @ 2024-08-06  4:01 UTC (permalink / raw)
  To: Colin Ian King, Andrew Morton, Masahiro Yamada, rppt
  Cc: oe-kbuild-all, Linux Memory Management List, linux-kernel

Hi Colin,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-nonmm-unstable]
[also build test ERROR on akpm-mm/mm-everything linus/master v6.11-rc2 next-20240805]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Colin-Ian-King/md-raid6-algorithms-scale-test-duration-for-speedier-boots/20240806-012444
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link:    https://lore.kernel.org/r/20240805170816.6025-1-colin.king%40intel.com
patch subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
config: i386-buildonly-randconfig-002-20240806 (https://download.01.org/0day-ci/archive/20240806/202408061134.O5o8RpVV-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240806/202408061134.O5o8RpVV-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408061134.O5o8RpVV-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: lib/raid6/algos.o: in function `raid6_choose_gen.constprop.0':
>> algos.c:(.text+0xe6): undefined reference to `__udivdi3'
>> ld: algos.c:(.text+0x163): undefined reference to `__udivdi3'
   ld: algos.c:(.text+0x1fd): undefined reference to `__udivdi3'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  2024-08-06  4:01 ` kernel test robot
@ 2024-08-06 21:28   ` King, Colin
  0 siblings, 0 replies; 8+ messages in thread
From: King, Colin @ 2024-08-06 21:28 UTC (permalink / raw)
  To: lkp, Andrew Morton, Masahiro Yamada, rppt@kernel.org
  Cc: oe-kbuild-all@lists.linux.dev, Linux Memory Management List,
	linux-kernel@vger.kernel.org

Hi,

Two issues with the patch, firstly the 64 bit divisions should be using div64_u64() and also on i386 testing in a VM the selection of the raid6 avx2* algorithm is sometimes incorrect because of a difference in benchmark speed rates on avx2x2 and avx2x1 - so I think this patch needs to be reverted/removed for now until I can figure out why it's not fully stable.

Colin

-----Original Message-----
From: lkp <lkp@intel.com> 
Sent: Tuesday, August 6, 2024 5:01 AM
To: King, Colin <colin.king@intel.com>; Andrew Morton <akpm@linux-foundation.org>; Masahiro Yamada <masahiroy@kernel.org>; rppt@kernel.org
Cc: oe-kbuild-all@lists.linux.dev; Linux Memory Management List <linux-mm@kvack.org>; linux-kernel@vger.kernel.org
Subject: Re: [PATCH] md/raid6 algorithms: scale test duration for speedier boots

Hi Colin,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-nonmm-unstable] [also build test ERROR on akpm-mm/mm-everything linus/master v6.11-rc2 next-20240805] [If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Colin-Ian-King/md-raid6-algorithms-scale-test-duration-for-speedier-boots/20240806-012444
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link:    https://lore.kernel.org/r/20240805170816.6025-1-colin.king%40intel.com
patch subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
config: i386-buildonly-randconfig-002-20240806 (https://download.01.org/0day-ci/archive/20240806/202408061134.O5o8RpVV-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240806/202408061134.O5o8RpVV-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: 
| https://lore.kernel.org/oe-kbuild-all/202408061134.O5o8RpVV-lkp@intel.
| com/

All errors (new ones prefixed by >>):

   ld: lib/raid6/algos.o: in function `raid6_choose_gen.constprop.0':
>> algos.c:(.text+0xe6): undefined reference to `__udivdi3'
>> ld: algos.c:(.text+0x163): undefined reference to `__udivdi3'
   ld: algos.c:(.text+0x1fd): undefined reference to `__udivdi3'

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] md/raid6 algorithms: scale test duration for speedier boots
@ 2025-04-07 14:31 Colin Ian King
  2025-04-07 18:35 ` kernel test robot
  2025-04-08  2:58 ` kernel test robot
  0 siblings, 2 replies; 8+ messages in thread
From: Colin Ian King @ 2025-04-07 14:31 UTC (permalink / raw)
  To: Andrew Morton, Song Liu; +Cc: linux-kernel

Instead of using jiffies (and waiting for jiffies to wrap before
benchmarking the algorithms) instead use the higher precision local_time
for benchmarking. This patch performs 2,500 iterations of the benchmark
measurements which works out to be accurate enough for benchmarking the
raid algorithm data rates. Also add division by zero checking in case
timing measurements are bogus.

Measuring 100 re-boots on Intel(R) Core(TM) Ultra 9 285K with
improves raid64 benchmarking loop from ~68000 usecs to ~5300 usec.

This patch has been in use in Clear Linux for ~2 years w/o issues.

Signed-off-by: Colin Ian King <colin.king@intel.com>
---
 lib/raid6/algos.c | 53 ++++++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index cd2e88ee1f14..b846635542bc 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,8 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
+
 /* In .bss so it's zeroed */
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 EXPORT_SYMBOL(raid6_empty_zero_page);
@@ -155,12 +157,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -170,26 +175,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
 			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -206,31 +205,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  2025-04-07 14:31 Colin Ian King
@ 2025-04-07 18:35 ` kernel test robot
  2025-04-08  2:58 ` kernel test robot
  1 sibling, 0 replies; 8+ messages in thread
From: kernel test robot @ 2025-04-07 18:35 UTC (permalink / raw)
  To: Colin Ian King, Andrew Morton, Song Liu
  Cc: oe-kbuild-all, Linux Memory Management List, linux-kernel

Hi Colin,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-nonmm-unstable]
[also build test ERROR on akpm-mm/mm-everything linus/master v6.15-rc1 next-20250407]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Colin-Ian-King/md-raid6-algorithms-scale-test-duration-for-speedier-boots/20250407-225435
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link:    https://lore.kernel.org/r/20250407143105.60-1-colin.king%40intel.com
patch subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
config: i386-buildonly-randconfig-002-20250407 (https://download.01.org/0day-ci/archive/20250408/202504080240.huURqAeM-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250408/202504080240.huURqAeM-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202504080240.huURqAeM-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: lib/raid6/algos.o: in function `raid6_choose_gen':
>> lib/raid6/algos.c:191: undefined reference to `__udivdi3'
>> ld: lib/raid6/algos.c:209: undefined reference to `__udivdi3'
   ld: lib/raid6/algos.c:223: undefined reference to `__udivdi3'


vim +191 lib/raid6/algos.c

   156	
   157	static inline const struct raid6_calls *raid6_choose_gen(
   158		void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
   159	{
   160		unsigned long perf;
   161		const unsigned long max_perf = 2500;
   162		int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
   163		const struct raid6_calls *const *algo;
   164		const struct raid6_calls *best;
   165		const u64 ns_per_mb = 1000000000 >> 20;
   166		u64 n, ns, t, ns_best = ~0ULL;
   167	
   168		for (best = NULL, algo = raid6_algos; *algo; algo++) {
   169			if (!best || (*algo)->priority >= best->priority) {
   170				if ((*algo)->valid && !(*algo)->valid())
   171					continue;
   172	
   173				if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
   174					best = *algo;
   175					break;
   176				}
   177	
   178				preempt_disable();
   179				t = local_clock();
   180				for (perf = 0; perf < max_perf; perf++) {
   181					(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
   182				}
   183				ns = local_clock() - t;
   184				preempt_enable();
   185	
   186				if (ns < ns_best) {
   187					ns_best = ns;
   188					best = *algo;
   189				}
   190				n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
 > 191				pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
   192			}
   193		}
   194	
   195		if (!best) {
   196			pr_err("raid6: Yikes! No algorithm found!\n");
   197			goto out;
   198		}
   199	
   200		raid6_call = *best;
   201	
   202		if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
   203			pr_info("raid6: skipped pq benchmark and selected %s\n",
   204				best->name);
   205			goto out;
   206		}
   207	
   208		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
 > 209		pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
   210			best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
   211	
   212		if (best->xor_syndrome) {
   213			preempt_disable();
   214			t = local_clock();
   215			for (perf = 0; perf < max_perf; perf++) {
   216				best->xor_syndrome(disks, start, stop,
   217						   PAGE_SIZE, *dptrs);
   218			}
   219			ns = local_clock() - t;
   220			preempt_enable();
   221	
   222			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
   223			pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
   224		}
   225	out:
   226		return best;
   227	}
   228	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
  2025-04-07 14:31 Colin Ian King
  2025-04-07 18:35 ` kernel test robot
@ 2025-04-08  2:58 ` kernel test robot
  1 sibling, 0 replies; 8+ messages in thread
From: kernel test robot @ 2025-04-08  2:58 UTC (permalink / raw)
  To: Colin Ian King, Andrew Morton, Song Liu
  Cc: oe-kbuild-all, Linux Memory Management List, linux-kernel

Hi Colin,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-nonmm-unstable]
[also build test ERROR on akpm-mm/mm-everything linus/master v6.15-rc1 next-20250407]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Colin-Ian-King/md-raid6-algorithms-scale-test-duration-for-speedier-boots/20250407-225435
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link:    https://lore.kernel.org/r/20250407143105.60-1-colin.king%40intel.com
patch subject: [PATCH] md/raid6 algorithms: scale test duration for speedier boots
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20250408/202504081059.IWn9WLkl-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250408/202504081059.IWn9WLkl-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202504081059.IWn9WLkl-lkp@intel.com/

All errors (new ones prefixed by >>):

   m68k-linux-ld: lib/raid6/algos.o: in function `raid6_choose_gen.constprop.0':
>> algos.c:(.text.unlikely+0xa8): undefined reference to `__udivdi3'
>> m68k-linux-ld: algos.c:(.text.unlikely+0x122): undefined reference to `__udivdi3'
   m68k-linux-ld: algos.c:(.text.unlikely+0x1be): undefined reference to `__udivdi3'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] md/raid6 algorithms: scale test duration for speedier boots
@ 2021-09-09 13:59 Colin King
  0 siblings, 0 replies; 8+ messages in thread
From: Colin King @ 2021-09-09 13:59 UTC (permalink / raw)
  To: Neil Brown; +Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

The original code runs for a set run time based on the duration of
2^RAID6_TIME_JIFFIES_LG2. The default kernel value for
RAID6_TIME_JIFFIES_LG2 is 4, however, emperical testing shows that a
value of 3.5 is the sweet spot for getting consistent benchmarking
results and speeding up the run time of the benchmarking.

To achieve 2^3.5 we use the following:
   2^3.5 = 2^4 / 2^0.5
         = 2^4 / sqrt(2)
         = 2^4 * 0.707106781

Too keep this as integer math that is as accurate as required and avoiding
overflow, this becomes:
         = 2^4 * 181 / 256
         = (2^4 * 181) >> 8

We also need to scale down perf by the same factor, however, to
get a good approximate integer result without an overflow we scale
by 2^4.0 * sqrt(2) =
         = 2 ^ 4 * 1.41421356237
         = 2 ^ 4 * 1448 / 1024
         = (2 ^ 4 * 1448) >> 10

This has been tested on 2 AWS instances, a small t2 and a medium m3
with 30 boot tests each and compared to the same instances booted 30
times on an umodified kernel. In all results, we get the same
algorithms being selected and a 100% consistent result over the 30
boots, showing that this optimised jiffy timing scaling does not break
the original functionality.

On the t2.small we see a saving of ~0.126 seconds and t3.medium a saving of
~0.18 seconds.

Tested on a 4 CPU VM on an 8 thread Xeon server; seeing a saving of ~0.35
seconds (average over 50 boots).

The testing included double checking the algorithm chosen by the optimized
selection and seeing the same as pre-optimised version.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 lib/raid6/algos.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 6d5e5000fdd7..5d5b04632168 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -152,6 +152,10 @@ static inline const struct raid6_calls *raid6_choose_gen(
 
 	for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->prefer >= best->prefer) {
+			/* 2 ^ (RAID6_TIME_JIFFIES_LG2 - 0.5) */
+			const unsigned long raid6_time_jiffies =
+				((1 << RAID6_TIME_JIFFIES_LG2) * 181) >> 8;
+
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
 
@@ -167,7 +171,7 @@ static inline const struct raid6_calls *raid6_choose_gen(
 			while ((j1 = jiffies) == j0)
 				cpu_relax();
 			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+					    j1 + raid6_time_jiffies)) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
 				perf++;
 			}
@@ -178,8 +182,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				best = *algo;
 			}
 			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+				(((perf * HZ * (disks-2)) >>
+				 (20 - 16 + RAID6_TIME_JIFFIES_LG2)) * 1448) >> 10);
 
 			if (!(*algo)->xor_syndrome)
 				continue;
@@ -191,7 +195,7 @@ static inline const struct raid6_calls *raid6_choose_gen(
 			while ((j1 = jiffies) == j0)
 				cpu_relax();
 			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+					    j1 + raid6_time_jiffies)) {
 				(*algo)->xor_syndrome(disks, start, stop,
 						      PAGE_SIZE, *dptrs);
 				perf++;
@@ -202,8 +206,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				bestxorperf = perf;
 
 			pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+				(((perf * HZ * (disks-2)) >>
+				 (20 - 16 + RAID6_TIME_JIFFIES_LG2 + 1)) * 1448) >> 10);
 		}
 	}
 
@@ -215,8 +219,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				(20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2));
 			if (best->xor_syndrome)
 				pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-					(bestxorperf * HZ * (disks-2)) >>
-					(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+					(((bestxorperf * HZ * (disks-2)) >>
+					 (20 - 16 + RAID6_TIME_JIFFIES_LG2 + 1)) * 1448) >> 10);
 		} else
 			pr_info("raid6: skip pq benchmark and using algorithm %s\n",
 				best->name);
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-04-08  2:59 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-05 17:08 [PATCH] md/raid6 algorithms: scale test duration for speedier boots Colin Ian King
2024-08-06  3:20 ` kernel test robot
2024-08-06  4:01 ` kernel test robot
2024-08-06 21:28   ` King, Colin
  -- strict thread matches above, loose matches on Subject: below --
2025-04-07 14:31 Colin Ian King
2025-04-07 18:35 ` kernel test robot
2025-04-08  2:58 ` kernel test robot
2021-09-09 13:59 Colin King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox