[PATCH i-g-t v2 1/5] lib/intel_compute: add support for stoppable loop

Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: "Zbigniew Kempczyński" <zbigniew.kempczynski@intel.com>
To: igt-dev@lists.freedesktop.org
Cc: "Zbigniew Kempczyński" <zbigniew.kempczynski@intel.com>,
	"Francois Dugast" <francois.dugast@intel.com>,
	"Priyanka Dandamudi" <priyanka.dandamudi@intel.com>
Subject: [PATCH i-g-t v2 1/5] lib/intel_compute: add support for stoppable loop
Date: Fri,  4 Apr 2025 14:31:36 +0200	[thread overview]
Message-ID: <20250404123140.260143-2-zbigniew.kempczynski@intel.com> (raw)
In-Reply-To: <20250404123140.260143-1-zbigniew.kempczynski@intel.com>

Current loop used for long running job in wmtp case has a drawback
in which we tweak number of loops.

Lets add loop which allows to be stopped from cpu write to first
input data dword. This requires to use volatile for input buffer
and uc.uc send to avoid checking cache instead of direct memory
read.

Before submitting short (compute square) job I've added 1 second
delay to allow other processes to just start many long running
jobs (loops) to make gpu really busy. Previously submission
long / short would complete before another process started same
long / short pair so concurency was more random.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Cc: Priyanka Dandamudi <priyanka.dandamudi@intel.com>
---
 lib/intel_compute.c | 39 ++++++++++++++++++++++++++++++++-------
 lib/intel_compute.h |  2 ++
 opencl/loop.cl      |  9 +++++++++
 3 files changed, 43 insertions(+), 7 deletions(-)
 create mode 100644 opencl/loop.cl

diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index 28149db53e..50e134c8a5 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -46,7 +46,7 @@
 #define OFFSET_STATE_SIP			0xFFFF0000
 
 #define USER_FENCE_VALUE			0xdeadbeefdeadbeefull
-
+#define MAGIC_LOOP_STOP			0x12341234
 /*
  * TGP  - ThreadGroup Preemption
  * WMTP - Walker Mid Thread Preemption
@@ -1874,6 +1874,8 @@ bool xe_run_intel_compute_kernel_on_engine(int fd,
  * @short_kernel_size: size of @short_kernel
  * @sip_kernel: WMTP sip kernel which does save restore during preemption
  * @sip_kernel_size: size of @sip_kernel
+ * @loop_kernel: loop kernel binary stoppable by cpu write
+ * @loop_kernel_size: size of @loop_kernel
  */
 static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel,
 					unsigned int long_kernel_size,
@@ -1881,6 +1883,8 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 					unsigned int short_kernel_size,
 					const unsigned char *sip_kernel,
 					unsigned int sip_kernel_size,
+					const unsigned char *loop_kernel,
+					unsigned int loop_kernel_size,
 					struct drm_xe_engine_class_instance *eci,
 					bool threadgroup_preemption)
 {
@@ -1975,7 +1979,10 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 	bo_sync_short->sync = 0;
 	sync_short.addr = ADDR_SYNC2;
 
-	bo_dict_long[0].size = ALIGN(long_kernel_size, 0x1000);
+	if (loop_kernel)
+		bo_dict_long[0].size = ALIGN(loop_kernel_size, 0x1000);
+	else
+		bo_dict_long[0].size = ALIGN(long_kernel_size, 0x1000);
 	bo_dict_short[0].size = ALIGN(short_kernel_size, 0x1000);
 
 	bo_dict_long[10].size = ALIGN(sip_kernel_size, 0x1000);
@@ -1984,7 +1991,10 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 	bo_execenv_bind(&execenv_long, bo_dict_long, XE2_BO_PREEMPT_DICT_ENTRIES);
 	bo_execenv_bind(&execenv_short, bo_dict_short, XE2_BO_PREEMPT_DICT_ENTRIES);
 
-	memcpy(bo_dict_long[0].data, long_kernel, long_kernel_size);
+	if (loop_kernel)
+		memcpy(bo_dict_long[0].data, loop_kernel, loop_kernel_size);
+	else
+		memcpy(bo_dict_long[0].data, long_kernel, long_kernel_size);
 	memcpy(bo_dict_short[0].data, short_kernel, short_kernel_size);
 
 	memcpy(bo_dict_long[10].data, sip_kernel, sip_kernel_size);
@@ -2024,13 +2034,22 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 				    OFFSET_INDIRECT_DATA_START, OFFSET_KERNEL, OFFSET_STATE_SIP, false);
 
 	xe_exec_sync(fd, execenv_long.exec_queue, ADDR_BATCH, &sync_long, 1);
+
+	/* Wait until multiple LR jobs will start to occupy gpu */
+	if (loop_kernel)
+		sleep(1);
+
 	xe_exec_sync(fd, execenv_short.exec_queue, ADDR_BATCH, &sync_short, 1);
 
 	xe_wait_ufence(fd, &bo_sync_short->sync, USER_FENCE_VALUE, execenv_short.exec_queue,
 		       INT64_MAX);
+
 	/* Check that the long kernel has not completed yet */
 	igt_assert_neq(0, __xe_wait_ufence(fd, &bo_sync_long->sync, USER_FENCE_VALUE,
 					   execenv_long.exec_queue, &timeout_short));
+	if (loop_kernel)
+		((int *)bo_dict_long[4].data)[0] = MAGIC_LOOP_STOP;
+
 	xe_wait_ufence(fd, &bo_sync_long->sync, USER_FENCE_VALUE, execenv_long.exec_queue,
 		       INT64_MAX);
 
@@ -2040,7 +2059,7 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 	munmap(bo_sync_short, bo_size_short);
 	gem_close(fd, bo_short);
 
-	for (int i = 0; i < SIZE_DATA; i++) {
+	for (int i = loop_kernel ? 1 : 0; i < SIZE_DATA; i++) {
 		float input = input_data[i];
 		float output = output_data[i];
 		float expected_output = input * input;
@@ -2067,9 +2086,11 @@ static void xe2lpg_compute_preempt_exec(int fd, const unsigned char *long_kernel
 			 */
 			igt_assert(f1 > long_kernel_loop_count);
 		} else {
-			if (f1 != long_kernel_loop_count)
-				igt_debug("[%4d] f1: %f != %u\n", i, f1, long_kernel_loop_count);
-			igt_assert(f1 == long_kernel_loop_count);
+			if (!loop_kernel) {
+				if (f1 != long_kernel_loop_count)
+					igt_debug("[%4d] f1: %f != %u\n", i, f1, long_kernel_loop_count);
+				igt_assert(f1 == long_kernel_loop_count);
+			}
 		}
 	}
 
@@ -2088,6 +2109,8 @@ static const struct {
 			     unsigned int short_kernel_size,
 			     const unsigned char *sip_kernel,
 			     unsigned int sip_kernel_size,
+			     const unsigned char *loop_kernel,
+			     unsigned int loop_kernel_size,
 			     struct drm_xe_engine_class_instance *eci,
 			     bool threadgroup_preemption);
 	uint32_t compat;
@@ -2149,6 +2172,8 @@ static bool __run_intel_compute_kernel_preempt(int fd,
 							  kernels->kernel, kernels->size,
 							  kernels->sip_kernel,
 							  kernels->sip_kernel_size,
+							  kernels->loop_kernel,
+							  kernels->loop_kernel_size,
 							  eci,
 							  threadgroup_preemption);
 
diff --git a/lib/intel_compute.h b/lib/intel_compute.h
index dc0fe2ec20..8310536a96 100644
--- a/lib/intel_compute.h
+++ b/lib/intel_compute.h
@@ -31,6 +31,8 @@ struct intel_compute_kernels {
 	const unsigned char *sip_kernel;
 	unsigned int long_kernel_size;
 	const unsigned char *long_kernel;
+	unsigned int loop_kernel_size;
+	const unsigned char *loop_kernel;
 };
 
 /**
diff --git a/opencl/loop.cl b/opencl/loop.cl
new file mode 100644
index 0000000000..7fd2c13368
--- /dev/null
+++ b/opencl/loop.cl
@@ -0,0 +1,9 @@
+__kernel void loop(volatile __global int *input,
+		   __global int *output,
+		   unsigned int count)
+{
+	while (1) {
+		if (input[0] == 0x12341234)
+			break;
+	}
+}
-- 
2.34.1

next prev parent reply	other threads:[~2025-04-04 12:31 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-04 12:31 [PATCH i-g-t v2 0/5] Replace loop in WMTP tests Zbigniew Kempczyński
2025-04-04 12:31 ` Zbigniew Kempczyński [this message]
2025-04-04 13:18   ` [PATCH i-g-t v2 1/5] lib/intel_compute: add support for stoppable loop Francois Dugast
2025-04-04 12:31 ` [PATCH i-g-t v2 2/5] lib/intel_compute_square_kernels: use stoppable loop for LNL/BMG Zbigniew Kempczyński
2025-04-04 12:31 ` [PATCH i-g-t v2 3/5] lib/intel_compute_square_kernel: add loop shader binary for PTL Zbigniew Kempczyński
2025-04-04 12:31 ` [PATCH i-g-t v2 4/5] tests/xe_compute_preempt: adjust number of children according to ram size Zbigniew Kempczyński
2025-04-07  8:49   ` Dandamudi, Priyanka
2025-04-07 19:01     ` Zbigniew Kempczyński
2025-04-08 12:12       ` Kamil Konieczny
2025-04-09  8:47     ` Dandamudi, Priyanka
2025-04-04 12:31 ` [PATCH i-g-t v2 5/5] tests/xe_compute_preempt: consume all ram for wmtp Zbigniew Kempczyński
2025-04-04 14:34 ` ✓ Xe.CI.BAT: success for Replace loop in WMTP tests (rev2) Patchwork
2025-04-04 14:55 ` ✓ i915.CI.BAT: " Patchwork
2025-04-04 17:35 ` ✗ i915.CI.Full: failure " Patchwork

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:28149db53 dfblob:50e134c8a dfblob:dc0fe2ec2 dfblob:8310536a9
dfblob:7fd2c1336 )
 OR (
bs:"[PATCH i-g-t v2 1/5] lib/intel_compute: add support for stoppable loop" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250404123140.260143-2-zbigniew.kempczynski@intel.com \
    --to=zbigniew.kempczynski@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    --cc=priyanka.dandamudi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox