Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Cc: igt-dev@lists.freedesktop.org
Subject: [igt-dev] [PATCH] lib/gpgpu_fill: Add support for Xe2 platforms
Date: Tue,  7 Nov 2023 12:59:32 -0800	[thread overview]
Message-ID: <20231107205932.701351-2-jagmeet.randhawa@intel.com> (raw)
In-Reply-To: <20231107205932.701351-1-jagmeet.randhawa@intel.com>

Add xe2lpg_gpgpu_fillfunc to have gpgpu_fill running on XE2
On XE2 there are a few changes to gpu command instruction lengths.

There's also no 'Media Block Write' message, thus 'Typed 2D Block
Store' message has to be used in the shader.

The shader was compiled using the following command:

iga64 -p=2 -Wall -Xprint-ldst -Xauto-deps --assemble
xe2lpg_gpgpu_kernel.asm | od -A n -v -t x4 |sed -e 's/ / 0x/g'
| sed -e 's/^/\t{/' | sed -e 's/([0-9]|[a-f]|[A-F]) /\1, /g'
| sed -e 's/$/ },/g' | sed -e 's/\t /\t/g'

Signed-off-by: Christoph Manszewski <christoph.manszewski@intel.com>
Signed-off-by: Jagmeet Randhawa <jagmeet.randhawa@intel.com>
Reviewed-by: Dominik Grzegorzek <dominik.grzegorzek@intel.com>
---
 lib/gpgpu_fill.c                              | 23 ++++++++
 lib/gpgpu_fill.h                              |  6 ++
 lib/gpu_cmds.c                                | 58 +++++++++++++++----
 .../shaders/gpgpu/xe2lpg_gpgpu_kernel.asm     | 13 +++++
 lib/intel_batchbuffer.c                       |  4 +-
 5 files changed, 93 insertions(+), 11 deletions(-)
 create mode 100644 lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index eed821872..1270c2b22 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -124,6 +124,18 @@ static const uint32_t xehpc_gpgpu_kernel[][4] = {
 	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
 };
 
+static const uint32_t xe2lpg_gpgpu_kernel[][4] = {
+	{ 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
+	{ 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
+	{ 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
+	{ 0x00100061, 0x04054220, 0x00000000, 0x00000000 },
+	{ 0x00041a61, 0x04550220, 0x00220205, 0x00000000 },
+	{ 0x00000061, 0x04754550, 0x00000000, 0x000f000f },
+	{ 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
+	{ 0x00132031, 0x00000000, 0xd00e0494, 0x04000000 },
+	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
+};
+
 /*
  * This sets up the gpgpu pipeline,
  *
@@ -398,3 +410,14 @@ void xehpc_gpgpu_fillfunc(int i915,
 			      xehpc_gpgpu_kernel,
 			      sizeof(xehpc_gpgpu_kernel));
 }
+
+void xe2lpg_gpgpu_fillfunc(int i915,
+			   struct intel_buf *buf,
+			   unsigned int x, unsigned int y,
+			   unsigned int width, unsigned int height,
+			   uint8_t color)
+{
+	__xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
+			      xe2lpg_gpgpu_kernel,
+			      sizeof(xe2lpg_gpgpu_kernel));
+}
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index f81cd0b53..c3b47c10a 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -75,4 +75,10 @@ xehpc_gpgpu_fillfunc(int i915,
 		     unsigned int width, unsigned int height,
 		     uint8_t color);
 
+void xe2lpg_gpgpu_fillfunc(int i915,
+			   struct intel_buf *buf,
+			   unsigned int x, unsigned int y,
+			   unsigned int width, unsigned int height,
+			   uint8_t color);
+
 #endif /* GPGPU_FILL_H */
diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index f19f93b28..49ba364f9 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -328,18 +328,41 @@ fill_binding_table(struct intel_bb *ibb, struct intel_buf *buf)
 	binding_table = intel_bb_ptr(ibb);
 	intel_bb_ptr_add(ibb, 64);
 
-	if (intel_graphics_ver(devid) >= IP_VER(12, 50))
+	if (intel_graphics_ver(devid) >= IP_VER(20, 0)) {
+		/*
+		 * Up until now, SURFACEFORMAT_R8_UNROM was used regardless of the 'bpp' value.
+		 * For bpp 32 this results in a surface that is 4x narrower than expected. However
+		 * it worked, because the 'Media Block Read/Write' message assumes the surface width
+		 * is always in units of dwords.
+		 *
+		 * Since Xe2 the Media Block Write message got replaced with 'Typed 2D Block
+		 * Load/Store Message' which correctly interprets the surface format.
+		 */
+		if (buf->bpp == 32)
+			binding_table[0] = xehp_fill_surface_state(ibb, buf,
+								      SURFACEFORMAT_R8G8B8A8_UNORM,
+								      1);
+		else if (buf->bpp == 8)
+			binding_table[0] = xehp_fill_surface_state(ibb, buf,
+								      SURFACEFORMAT_R8_UNORM,
+								      1);
+		else
+			igt_assert_f(false,
+				     "Surface state for bpp = %u not implemented",
+				     buf->bpp);
+	} else if (intel_graphics_ver(devid) >= IP_VER(12, 50)) {
 		binding_table[0] = xehp_fill_surface_state(ibb, buf,
 							   SURFACEFORMAT_R8_UNORM, 1);
-	else if (intel_graphics_ver(devid) >= IP_VER(9, 0))
+	} else if (intel_graphics_ver(devid) >= IP_VER(9, 0)) {
 		binding_table[0] = gen9_fill_surface_state(ibb, buf,
 							   SURFACEFORMAT_R8_UNORM, 1);
-	else if (intel_graphics_ver(devid) >= IP_VER(8, 0))
+	} else if (intel_graphics_ver(devid) >= IP_VER(8, 0)) {
 		binding_table[0] = gen8_fill_surface_state(ibb, buf,
 							   SURFACEFORMAT_R8_UNORM, 1);
-	else
+	} else {
 		binding_table[0] = gen7_fill_surface_state(ibb, buf,
 							   SURFACEFORMAT_R8_UNORM, 1);
+	}
 
 	return binding_table_offset;
 }
@@ -959,8 +982,14 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
 void
 xehp_emit_state_compute_mode(struct intel_bb *ibb)
 {
-	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE);
+
+	uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
+
+	intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
 	intel_bb_out(ibb, 0);
+
+	if (dword_length)
+		intel_bb_out(ibb, 0);
 }
 
 void
@@ -976,6 +1005,8 @@ xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb)
 void
 xehp_emit_state_base_address(struct intel_bb *ibb)
 {
+	uint32_t tmp;
+
 	intel_bb_out(ibb, GEN8_STATE_BASE_ADDRESS | 0x14);            //dw0
 
 	/* general */
@@ -983,7 +1014,8 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
 	intel_bb_out(ibb, 0);
 
 	/* stateless data port */
-	intel_bb_out(ibb, 0 | BASE_ADDRESS_MODIFY);                   //dw3
+	tmp = intel_graphics_ver(ibb->devid) == IP_VER(20, 0) ? 0 : BASE_ADDRESS_MODIFY;
+	intel_bb_out(ibb, 0 | tmp);                  //dw3
 
 	/* surface */
 	intel_bb_emit_reloc(ibb, ibb->handle, I915_GEM_DOMAIN_SAMPLER, //dw4-dw5
@@ -1008,7 +1040,10 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
 	/* dynamic state buffer size */
 	intel_bb_out(ibb, 1 << 12 | 1);                             //dw13
 	/* indirect object buffer size */
-	intel_bb_out(ibb, 0xfffff000 | 1);                          //dw14
+	if (intel_graphics_ver(ibb->devid) == IP_VER(20, 0))	    //dw14
+		intel_bb_out(ibb, 0);
+	else
+		intel_bb_out(ibb, 0xfffff000 | 1);
 	/* intruction buffer size */
 	intel_bb_out(ibb, 1 << 12 | 1);                             //dw15
 
@@ -1030,7 +1065,7 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 		       struct xehp_interface_descriptor_data *pidd,
 		       uint8_t color)
 {
-	uint32_t x_dim, y_dim, mask;
+	uint32_t x_dim, y_dim, mask, dword_length;
 
 	/*
 	 * Simply do SIMD16 based dispatch, so every thread uses
@@ -1052,7 +1087,8 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 	else
 		mask = (1 << mask) - 1;
 
-	intel_bb_out(ibb, XEHP_COMPUTE_WALKER | 0x25);
+	dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0) ? 0x26 : 0x25;
+	intel_bb_out(ibb, XEHP_COMPUTE_WALKER | dword_length);
 
 	intel_bb_out(ibb, 0); /* debug object */		//dw1
 	intel_bb_out(ibb, 0); /* indirect data length */	//dw2
@@ -1091,8 +1127,10 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
 	intel_bb_out(ibb, 0);					//dw16
 	intel_bb_out(ibb, 0);					//dw17
 
+	if (intel_graphics_ver(ibb->devid) >= IP_VER(20, 0))	//Xe2:dw18
+		intel_bb_out(ibb, 0);
 	/* Interface descriptor data */
-	for (int i = 0; i < 8; i++) {			       //dw18-25
+	for (int i = 0; i < 8; i++) {			       //dw18-25 (Xe2:dw19-26)
 		intel_bb_out(ibb, ((uint32_t *) pidd)[i]);
 	}
 
diff --git a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
new file mode 100644
index 000000000..e2ecc71f5
--- /dev/null
+++ b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
@@ -0,0 +1,13 @@
+L0:
+         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub                        // Load r1.0-3 with color byte
+         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud              // Load r2.0-3 with tg id X << 4
+         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud                        // Load r2.4-7 with tg id Y
+
+         // payload setup
+         mov (16|M0)              r4.0<1>:ud    0x0:ud                                // Zero out register R4
+         mov (2|M0)               r4.5<1>:ud    r2.0<2;2,1>:ud                        // Store X and Y block start (160:191 and 192:223)
+         mov (1|M0)               r4.14<1>:w    0xF:w                                 // Store X and Y block size (224:231 and 232:239)
+         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                        // Load r5-r6 with color byte
+
+         send.tgm (16|M0)         null     r4    null:0    0x0    0x64000007          // Send TypedStore2DBlock to tgm port
+         send.gtwy (8|M0)         null    r80    null:0    0x0    0x02000000 {EOT}
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index df82ef5f5..fe767d8c2 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -755,7 +755,9 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
 {
 	igt_fillfunc_t fill = NULL;
 
-	if (IS_METEORLAKE(devid))
+	if (intel_graphics_ver(devid) >= IP_VER(20, 0))
+		fill = xe2lpg_gpgpu_fillfunc;
+	else if (IS_METEORLAKE(devid))
 		fill = xehp_gpgpu_fillfunc;
 	else if (intel_graphics_ver(devid) >= IP_VER(12, 60))
 		fill = xehpc_gpgpu_fillfunc;
-- 
2.25.1

  reply	other threads:[~2023-11-07 20:59 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-07 20:59 [igt-dev] [PATCH v4] lib/gpgpu_fill: Add support for Xe2 platforms Jagmeet Randhawa
2023-11-07 20:59 ` Jagmeet Randhawa [this message]
2023-11-07 22:05 ` [igt-dev] ✗ Fi.CI.BAT: failure for " Patchwork
2023-11-07 22:18 ` [igt-dev] ✗ CI.xeBAT: " Patchwork
2023-11-15 21:01 ` [igt-dev] ✗ Fi.CI.BAT: failure for lib/gpgpu_fill: Add support for Xe2 platforms (rev2) Patchwork
2023-11-16 11:19   ` Kamil Konieczny
2023-11-21 12:29     ` Illipilli, TejasreeX
2023-11-15 21:31 ` [igt-dev] ✓ CI.xeBAT: success " Patchwork
2023-11-20 22:27 ` [igt-dev] ✗ Fi.CI.BAT: failure for lib/gpgpu_fill: Add support for Xe2 platforms (rev3) Patchwork
2023-11-20 23:49 ` [igt-dev] ✗ CI.xeBAT: " Patchwork
2023-11-21 22:03 ` [igt-dev] ✓ Fi.CI.BAT: success for lib/gpgpu_fill: Add support for Xe2 platforms (rev4) Patchwork
2023-11-21 22:04 ` [igt-dev] ✓ CI.xeBAT: " Patchwork
2023-11-22 16:54 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2023-11-23  9:27   ` Kamil Konieczny
2023-11-23 10:35 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2023-11-23 22:48 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2023-11-25  6:44 ` Patchwork
2023-11-25  7:42 ` Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231107205932.701351-2-jagmeet.randhawa@intel.com \
    --to=jagmeet.randhawa@intel.com \
    --cc=igt-dev@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox