Igt-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform
@ 2023-12-04 12:28 janga.rahul.kumar
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses janga.rahul.kumar
                   ` (4 more replies)
  0 siblings, 5 replies; 8+ messages in thread
From: janga.rahul.kumar @ 2023-12-04 12:28 UTC (permalink / raw)
  To: igt-dev, ramadevi.gandi, janga.rahul.kumar

From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Extend compute sqaure test to run on LNL.

v2: Reuse address variables. (Zbigniew)

Janga Rahul Kumar (4):
  lib/intel_compute: Rename compute state addresses
  lib/intel_compute_square_kernels: Add xe2lpg compute sqaure kernel
  lib/intel_compute: Add XE2 compute implementation
  tests/xe_compute: Update documentation regarding test requirements

 lib/intel_compute.c                | 235 +++++++++++++++++++++++++++--
 lib/intel_compute_square_kernels.c |  36 +++++
 tests/intel/xe_compute.c           |   2 +-
 3 files changed, 258 insertions(+), 15 deletions(-)

-- 
2.25.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses
  2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
@ 2023-12-04 12:28 ` janga.rahul.kumar
  2023-12-04 13:50   ` Zbigniew Kempczyński
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 2/4] lib/intel_compute_square_kernels: Add xe2lpg compute sqaure kernel janga.rahul.kumar
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 8+ messages in thread
From: janga.rahul.kumar @ 2023-12-04 12:28 UTC (permalink / raw)
  To: igt-dev, ramadevi.gandi, janga.rahul.kumar

From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Rename platform specific state base and binding offset address variable
names to generic ones as they can be resued for most of the platforms.

Cc: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
---
 lib/intel_compute.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index dd921bf46..c8dc42338 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -37,9 +37,9 @@
 #define OFFSET_INDIRECT_DATA_START	0xFFFD0000
 #define OFFSET_KERNEL			0xFFFE0000
 
-#define XEHP_ADDR_GENERAL_STATE_BASE		0x80000000UL
-#define XEHP_ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
-#define XEHP_OFFSET_BINDING_TABLE		0x1000
+#define ADDR_GENERAL_STATE_BASE		0x80000000UL
+#define ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
+#define OFFSET_BINDING_TABLE		0x1000
 
 struct bo_dict_entry {
 	uint64_t addr;
@@ -867,7 +867,7 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
 {
 #define XEHP_BO_DICT_ENTRIES 9
 	struct bo_dict_entry bo_dict[XEHP_BO_DICT_ENTRIES] = {
-		{ .addr = XEHP_ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
+		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
 		  .name = "instr state base"},
 		{ .addr = ADDR_DYNAMIC_STATE_BASE,
 		  .size = 0x100000,
@@ -875,16 +875,16 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
 		{ .addr = ADDR_SURFACE_STATE_BASE,
 		  .size = 0x1000,
 		  .name = "surface state base"},
-		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
+		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
 		  .size =  0x1000,
 		  .name = "indirect object base"},
 		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
 		  .name = "addr input"},
 		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
 		  .name = "addr output" },
-		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE, .size = 0x100000,
+		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x100000,
 		  .name = "general state base" },
-		{ .addr = ADDR_SURFACE_STATE_BASE + XEHP_OFFSET_BINDING_TABLE,
+		{ .addr = ADDR_SURFACE_STATE_BASE + OFFSET_BINDING_TABLE,
 		  .size = 0x1000,
 		  .name = "binding table" },
 		{ .addr = ADDR_BATCH, .size = SIZE_BATCH,
@@ -912,10 +912,10 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
 		((float *)dinput)[i] = rand() / (float)RAND_MAX;
 
 	xehp_compute_exec_compute(bo_dict[8].data,
-				  XEHP_ADDR_GENERAL_STATE_BASE,
+				  ADDR_GENERAL_STATE_BASE,
 				  ADDR_SURFACE_STATE_BASE,
 				  ADDR_DYNAMIC_STATE_BASE,
-				  XEHP_ADDR_INSTRUCTION_STATE_BASE,
+				  ADDR_INSTRUCTION_STATE_BASE,
 				  OFFSET_INDIRECT_DATA_START,
 				  OFFSET_KERNEL);
 
@@ -1081,16 +1081,16 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
 {
 #define XEHPC_BO_DICT_ENTRIES 6
 	struct bo_dict_entry bo_dict[XEHPC_BO_DICT_ENTRIES] = {
-		{ .addr = XEHP_ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
+		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
 		  .name = "instr state base"},
-		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
+		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
 		  .size =  0x10000,
 		  .name = "indirect object base"},
 		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
 		  .name = "addr input"},
 		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
 		  .name = "addr output" },
-		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE, .size = 0x10000,
+		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x10000,
 		  .name = "general state base" },
 		{ .addr = ADDR_BATCH, .size = SIZE_BATCH,
 		  .name = "batch" },
@@ -1114,10 +1114,10 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
 		((float *)dinput)[i] = rand() / (float)RAND_MAX;
 
 	xehpc_compute_exec_compute(bo_dict[5].data,
-				   XEHP_ADDR_GENERAL_STATE_BASE,
+				   ADDR_GENERAL_STATE_BASE,
 				   ADDR_SURFACE_STATE_BASE,
 				   ADDR_DYNAMIC_STATE_BASE,
-				   XEHP_ADDR_INSTRUCTION_STATE_BASE,
+				   ADDR_INSTRUCTION_STATE_BASE,
 				   OFFSET_INDIRECT_DATA_START,
 				   OFFSET_KERNEL);
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t v2 2/4] lib/intel_compute_square_kernels: Add xe2lpg compute sqaure kernel
  2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses janga.rahul.kumar
@ 2023-12-04 12:28 ` janga.rahul.kumar
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation janga.rahul.kumar
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 8+ messages in thread
From: janga.rahul.kumar @ 2023-12-04 12:28 UTC (permalink / raw)
  To: igt-dev, ramadevi.gandi, janga.rahul.kumar

From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Add xe2lpg compute sqaure kernel created using iga64.

Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 lib/intel_compute_square_kernels.c | 36 ++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/lib/intel_compute_square_kernels.c b/lib/intel_compute_square_kernels.c
index 3d5b1ad47..682fdfcf6 100644
--- a/lib/intel_compute_square_kernels.c
+++ b/lib/intel_compute_square_kernels.c
@@ -183,6 +183,37 @@ static const unsigned char xehpc_kernel_square_bin[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 };
 
+static const unsigned char xe2lpg_kernel_square_bin[] = {
+	0x65, 0x00, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f, 0x04, 0x00, 0x00, 0x02,
+	0xc0, 0xff, 0xff, 0xff, 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f,
+	0x04, 0x7f, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x31, 0x22, 0x03, 0x80,
+	0x00, 0x00, 0x0c, 0x05, 0x8f, 0x7f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6,
+	0x01, 0x00, 0x00, 0x00, 0x00, 0x42, 0x01, 0x00, 0x00, 0x00, 0x00, 0x20,
+	0x04, 0x00, 0x00, 0x00, 0x66, 0x09, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
+	0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40, 0x41, 0x22, 0x03, 0x80,
+	0x60, 0x06, 0x01, 0x20, 0x54, 0x05, 0x00, 0x01, 0x14, 0x00, 0x00, 0x00,
+	0x53, 0x80, 0x00, 0x80, 0x60, 0x06, 0x05, 0x02, 0x54, 0x05, 0x00, 0x06,
+	0x14, 0x00, 0x00, 0x00, 0x52, 0x19, 0x14, 0x00, 0x60, 0x06, 0x04, 0x06,
+	0x04, 0x02, 0x0e, 0x01, 0x04, 0x01, 0x04, 0x04, 0x70, 0x19, 0x14, 0x00,
+	0x20, 0x02, 0x01, 0x00, 0x04, 0x06, 0x10, 0x52, 0x44, 0x05, 0x00, 0x00,
+	0x2e, 0x00, 0x14, 0x14, 0x00, 0xc0, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
+	0x98, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0xb4, 0x14, 0x06, 0x10, 0x00,
+	0x61, 0x00, 0x08, 0xb4, 0x16, 0x07, 0x10, 0x00, 0x69, 0x1a, 0x10, 0x00,
+	0x70, 0x86, 0x05, 0x18, 0x04, 0x14, 0x20, 0x05, 0x02, 0x00, 0x02, 0x00,
+	0x69, 0x1a, 0x10, 0x02, 0x70, 0x86, 0x05, 0x1a, 0x04, 0x16, 0x20, 0x05,
+	0x02, 0x00, 0x02, 0x00, 0x40, 0x1a, 0x00, 0x38, 0x08, 0x18, 0x10, 0x05,
+	0x40, 0x1a, 0x08, 0x38, 0x0a, 0x1a, 0x10, 0x05, 0x31, 0x23, 0x17, 0x00,
+	0x00, 0x00, 0x14, 0x0c, 0x24, 0x08, 0x00, 0xfb, 0x00, 0x00, 0x00, 0x00,
+	0x40, 0x00, 0x00, 0x38, 0x10, 0x18, 0x30, 0x05, 0x40, 0x00, 0x08, 0x38,
+	0x12, 0x1a, 0x30, 0x05, 0x41, 0x83, 0x20, 0x20, 0x0e, 0x0c, 0x00, 0x0c,
+	0x31, 0x24, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x10, 0x08, 0xfb,
+	0x14, 0x0e, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x42, 0x01, 0x00,
+	0x00, 0x00, 0x00, 0x20, 0x10, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x14, 0x00,
+	0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+	0x61, 0x00, 0x10, 0x28, 0x7f, 0x00, 0x10, 0x00, 0x31, 0x20, 0x02, 0x80,
+	0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00
+};
+
 const struct intel_compute_kernels intel_compute_square_kernels[] = {
 	{
 		.ip_ver = IP_VER(12, 0),
@@ -204,5 +235,10 @@ const struct intel_compute_kernels intel_compute_square_kernels[] = {
 		.size = sizeof(xehpc_kernel_square_bin),
 		.kernel = xehpc_kernel_square_bin,
 	},
+	{
+		.ip_ver = IP_VER(20, 04),
+		.size = sizeof(xe2lpg_kernel_square_bin),
+		.kernel = xe2lpg_kernel_square_bin,
+	},
 	{}
 };
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation
  2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses janga.rahul.kumar
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 2/4] lib/intel_compute_square_kernels: Add xe2lpg compute sqaure kernel janga.rahul.kumar
@ 2023-12-04 12:28 ` janga.rahul.kumar
  2023-12-04 13:51   ` Zbigniew Kempczyński
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 4/4] tests/xe_compute: Update documentation regarding test requirements janga.rahul.kumar
  2023-12-04 16:54 ` [igt-dev] ✗ Fi.CI.BUILD: failure for Extend compute square test to Xe2 platform (rev2) Patchwork
  4 siblings, 1 reply; 8+ messages in thread
From: janga.rahul.kumar @ 2023-12-04 12:28 UTC (permalink / raw)
  To: igt-dev, ramadevi.gandi, janga.rahul.kumar

From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Add compute pipeline and walker instructions for xe2lpg.

v2: Reuse address variables. (Zbigniew)

Cc: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
---
 lib/intel_compute.c | 207 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 207 insertions(+)

diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index c8dc42338..65a85d335 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -41,6 +41,8 @@
 #define ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
 #define OFFSET_BINDING_TABLE		0x1000
 
+#define XE2_ADDR_STATE_CONTEXT_DATA_BASE	0x900000UL
+
 struct bo_dict_entry {
 	uint64_t addr;
 	uint32_t size;
@@ -1137,6 +1139,206 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
 	bo_execenv_destroy(&execenv);
 }
 
+static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
+					uint64_t addr_general_state_base,
+					uint64_t addr_surface_state_base,
+					uint64_t addr_dynamic_state_base,
+					uint64_t addr_instruction_state_base,
+					uint64_t addr_state_contect_data_base,
+					uint64_t offset_indirect_data_start,
+					uint64_t kernel_start_pointer)
+{
+	int b = 0;
+
+	igt_debug("general   state base: %lx\n", addr_general_state_base);
+	igt_debug("surface   state base: %lx\n", addr_surface_state_base);
+	igt_debug("dynamic   state base: %lx\n", addr_dynamic_state_base);
+	igt_debug("instruct   base addr: %lx\n", addr_instruction_state_base);
+	igt_debug("bindless   base addr: %lx\n", addr_surface_state_base);
+	igt_debug("state context data base addr: %lx\n", addr_state_contect_data_base);
+	igt_debug("offset indirect addr: %lx\n", offset_indirect_data_start);
+	igt_debug("kernel start pointer: %lx\n", kernel_start_pointer);
+
+	addr_bo_buffer_batch[b++] = GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
+				    PIPELINE_SELECT_GPGPU;
+
+	addr_bo_buffer_batch[b++] = XEHP_STATE_COMPUTE_MODE | 0x1;
+	addr_bo_buffer_batch[b++] = 0xE0004000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+
+#define XE2_STATE_CONTEXT_DATA_BASE_ADDRESS ((3 << 29) | (0 << 27) | (1 << 24) | (11 << 16) | (1 << 0))
+	addr_bo_buffer_batch[b++] = XE2_STATE_CONTEXT_DATA_BASE_ADDRESS;
+	// Split into low and high 32 bits
+	addr_bo_buffer_batch[b++] = addr_state_contect_data_base & 0xFFFFFFFF; // Mask the low 32 bits ;
+	addr_bo_buffer_batch[b++] = (addr_state_contect_data_base >> 32) & 0xFFFFFFFF;
+
+	addr_bo_buffer_batch[b++] = XEHP_CFE_STATE | 0x4;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x03808800;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = STATE_BASE_ADDRESS | 0x14;
+	addr_bo_buffer_batch[b++] = (addr_general_state_base & 0xffffffff) | 0x21;
+	addr_bo_buffer_batch[b++] = addr_general_state_base >> 32;
+	addr_bo_buffer_batch[b++] = 0x0002C000;
+	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x21;
+	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
+	addr_bo_buffer_batch[b++] = (addr_dynamic_state_base & 0xffffffff) | 0x21;
+	addr_bo_buffer_batch[b++] = addr_dynamic_state_base >> 32;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = (addr_instruction_state_base & 0xffffffff) | 0x21;
+	addr_bo_buffer_batch[b++] = addr_instruction_state_base >> 32;
+	addr_bo_buffer_batch[b++] = 0xfffff001;
+	addr_bo_buffer_batch[b++] = 0x00010001;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0xfffff001;
+	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x21;
+	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
+	addr_bo_buffer_batch[b++] = 0x00007fbe;
+	addr_bo_buffer_batch[b++] = 0x00000021;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+
+	addr_bo_buffer_batch[b++] = GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | 2;
+	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x2;
+	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
+	addr_bo_buffer_batch[b++] = 0x001ff000;
+
+	addr_bo_buffer_batch[b++] = XEHP_COMPUTE_WALKER | 0x26;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000040;
+	addr_bo_buffer_batch[b++] = offset_indirect_data_start;
+	addr_bo_buffer_batch[b++] = 0xbe040000;
+	addr_bo_buffer_batch[b++] = 0xffffffff;
+	addr_bo_buffer_batch[b++] = 0x000003ff;
+	addr_bo_buffer_batch[b++] = 0x00000002;
+	addr_bo_buffer_batch[b++] = 0x00000001;
+	addr_bo_buffer_batch[b++] = 0x00000001;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+
+	addr_bo_buffer_batch[b++] = kernel_start_pointer;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x0c000020;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00001047;
+	addr_bo_buffer_batch[b++] = ADDR_BATCH;
+	addr_bo_buffer_batch[b++] = ADDR_BATCH >> 32;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000400;
+	addr_bo_buffer_batch[b++] = 0x00000001;
+	addr_bo_buffer_batch[b++] = 0x00000001;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+	addr_bo_buffer_batch[b++] = 0x00000000;
+
+	addr_bo_buffer_batch[b++] = MI_BATCH_BUFFER_END;
+}
+
+/**
+ * xe2lpg_compute_exec - run a pipeline compatible with XE2
+ *
+ * @fd: file descriptor of the opened DRM device
+ * @kernel: GPU Kernel binary to be executed
+ * @size: size of @kernel.
+ */
+static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
+				unsigned int size)
+{
+#define XE2_BO_DICT_ENTRIES 10
+	struct bo_dict_entry bo_dict[XE2_BO_DICT_ENTRIES] = {
+		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
+		  .name = "instr state base"},
+		{ .addr = ADDR_DYNAMIC_STATE_BASE,
+		  .size = 0x100000,
+		  .name = "dynamic state base"},
+		{ .addr = ADDR_SURFACE_STATE_BASE,
+		  .size = 0x1000,
+		  .name = "surface state base"},
+		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
+		  .size =  0x1000,
+		  .name = "indirect object base"},
+		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
+		  .name = "addr input"},
+		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
+		  .name = "addr output" },
+		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x100000,
+		  .name = "general state base" },
+		{ .addr = ADDR_SURFACE_STATE_BASE + OFFSET_BINDING_TABLE,
+		  .size = 0x1000,
+		  .name = "binding table" },
+		{ .addr = ADDR_BATCH,
+		  .size = SIZE_BATCH,
+		  .name = "batch" },
+		{ .addr = XE2_ADDR_STATE_CONTEXT_DATA_BASE,
+		  .size = 0x10000,
+		  .name = "state context data base"},
+	};
+
+	struct bo_execenv execenv;
+	float *dinput;
+
+	bo_execenv_create(fd, &execenv);
+
+	/* Sets Kernel size */
+	bo_dict[0].size = ALIGN(size, 0x1000);
+
+	bo_execenv_bind(&execenv, bo_dict, XE2_BO_DICT_ENTRIES);
+
+	memcpy(bo_dict[0].data, kernel, size);
+	create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
+	xehp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
+	xehp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
+	xehp_create_surface_state(bo_dict[7].data, ADDR_INPUT, ADDR_OUTPUT);
+
+	dinput = (float *)bo_dict[4].data;
+	srand(time(NULL));
+
+	for (int i = 0; i < SIZE_DATA; i++)
+		((float *)dinput)[i] = rand() / (float)RAND_MAX;
+
+	xe2lpg_compute_exec_compute(bo_dict[8].data,
+				  ADDR_GENERAL_STATE_BASE,
+				  ADDR_SURFACE_STATE_BASE,
+				  ADDR_DYNAMIC_STATE_BASE,
+				  ADDR_INSTRUCTION_STATE_BASE,
+				  XE2_ADDR_STATE_CONTEXT_DATA_BASE,
+				  OFFSET_INDIRECT_DATA_START,
+				  OFFSET_KERNEL);
+
+	bo_execenv_exec(&execenv, ADDR_BATCH);
+
+	for (int i = 0; i < SIZE_DATA; i++) {
+		float f1, f2;
+
+		f1 = ((float *) bo_dict[5].data)[i];
+		f2 = ((float *) bo_dict[4].data)[i];
+
+		if (f1 != f2 * f2)
+			igt_debug("[%4d] f1: %f != %f\n", i, f1, f2 * f2);
+		igt_assert(f1 == f2 * f2);
+	}
+
+	bo_execenv_unbind(&execenv, bo_dict, XEHPC_BO_DICT_ENTRIES);
+	bo_execenv_destroy(&execenv);
+}
+
 /*
  * Compatibility flags.
  *
@@ -1175,6 +1377,11 @@ static const struct {
 		.compute_exec = xehpc_compute_exec,
 		.compat = COMPAT_DRIVER_XE,
 	},
+	{
+		.ip_ver = IP_VER(20, 04),
+		.compute_exec = xe2lpg_compute_exec,
+		.compat = COMPAT_DRIVER_XE,
+	},
 };
 
 bool run_intel_compute_kernel(int fd)
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [igt-dev] [PATCH i-g-t v2 4/4] tests/xe_compute: Update documentation regarding test requirements
  2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
                   ` (2 preceding siblings ...)
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation janga.rahul.kumar
@ 2023-12-04 12:28 ` janga.rahul.kumar
  2023-12-04 16:54 ` [igt-dev] ✗ Fi.CI.BUILD: failure for Extend compute square test to Xe2 platform (rev2) Patchwork
  4 siblings, 0 replies; 8+ messages in thread
From: janga.rahul.kumar @ 2023-12-04 12:28 UTC (permalink / raw)
  To: igt-dev, ramadevi.gandi, janga.rahul.kumar

From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Test support for LNL is added. Update documentation to
reflect those changes.

Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
---
 tests/intel/xe_compute.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/intel/xe_compute.c b/tests/intel/xe_compute.c
index 35ba8b346..1db72785b 100644
--- a/tests/intel/xe_compute.c
+++ b/tests/intel/xe_compute.c
@@ -18,7 +18,7 @@
 
 /**
  * SUBTEST: compute-square
- * GPU requirement: TGL, PVC
+ * GPU requirement: TGL, PVC, LNL
  * Description:
  *	Run an openCL Kernel that returns output[i] = input[i] * input[i],
  *	for an input dataset..
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses janga.rahul.kumar
@ 2023-12-04 13:50   ` Zbigniew Kempczyński
  0 siblings, 0 replies; 8+ messages in thread
From: Zbigniew Kempczyński @ 2023-12-04 13:50 UTC (permalink / raw)
  To: janga.rahul.kumar; +Cc: igt-dev, ramadevi.gandi

On Mon, Dec 04, 2023 at 05:58:22PM +0530, janga.rahul.kumar@intel.com wrote:
> From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
> 
> Rename platform specific state base and binding offset address variable
> names to generic ones as they can be resued for most of the platforms.
> 
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>

Yes, that I was asking about:

Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>

--
Zbigniew

> ---
>  lib/intel_compute.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index dd921bf46..c8dc42338 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -37,9 +37,9 @@
>  #define OFFSET_INDIRECT_DATA_START	0xFFFD0000
>  #define OFFSET_KERNEL			0xFFFE0000
>  
> -#define XEHP_ADDR_GENERAL_STATE_BASE		0x80000000UL
> -#define XEHP_ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
> -#define XEHP_OFFSET_BINDING_TABLE		0x1000
> +#define ADDR_GENERAL_STATE_BASE		0x80000000UL
> +#define ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
> +#define OFFSET_BINDING_TABLE		0x1000
>  
>  struct bo_dict_entry {
>  	uint64_t addr;
> @@ -867,7 +867,7 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
>  {
>  #define XEHP_BO_DICT_ENTRIES 9
>  	struct bo_dict_entry bo_dict[XEHP_BO_DICT_ENTRIES] = {
> -		{ .addr = XEHP_ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
> +		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
>  		  .name = "instr state base"},
>  		{ .addr = ADDR_DYNAMIC_STATE_BASE,
>  		  .size = 0x100000,
> @@ -875,16 +875,16 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
>  		{ .addr = ADDR_SURFACE_STATE_BASE,
>  		  .size = 0x1000,
>  		  .name = "surface state base"},
> -		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
> +		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
>  		  .size =  0x1000,
>  		  .name = "indirect object base"},
>  		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
>  		  .name = "addr input"},
>  		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
>  		  .name = "addr output" },
> -		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE, .size = 0x100000,
> +		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x100000,
>  		  .name = "general state base" },
> -		{ .addr = ADDR_SURFACE_STATE_BASE + XEHP_OFFSET_BINDING_TABLE,
> +		{ .addr = ADDR_SURFACE_STATE_BASE + OFFSET_BINDING_TABLE,
>  		  .size = 0x1000,
>  		  .name = "binding table" },
>  		{ .addr = ADDR_BATCH, .size = SIZE_BATCH,
> @@ -912,10 +912,10 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
>  		((float *)dinput)[i] = rand() / (float)RAND_MAX;
>  
>  	xehp_compute_exec_compute(bo_dict[8].data,
> -				  XEHP_ADDR_GENERAL_STATE_BASE,
> +				  ADDR_GENERAL_STATE_BASE,
>  				  ADDR_SURFACE_STATE_BASE,
>  				  ADDR_DYNAMIC_STATE_BASE,
> -				  XEHP_ADDR_INSTRUCTION_STATE_BASE,
> +				  ADDR_INSTRUCTION_STATE_BASE,
>  				  OFFSET_INDIRECT_DATA_START,
>  				  OFFSET_KERNEL);
>  
> @@ -1081,16 +1081,16 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
>  {
>  #define XEHPC_BO_DICT_ENTRIES 6
>  	struct bo_dict_entry bo_dict[XEHPC_BO_DICT_ENTRIES] = {
> -		{ .addr = XEHP_ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
> +		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
>  		  .name = "instr state base"},
> -		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
> +		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
>  		  .size =  0x10000,
>  		  .name = "indirect object base"},
>  		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
>  		  .name = "addr input"},
>  		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
>  		  .name = "addr output" },
> -		{ .addr = XEHP_ADDR_GENERAL_STATE_BASE, .size = 0x10000,
> +		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x10000,
>  		  .name = "general state base" },
>  		{ .addr = ADDR_BATCH, .size = SIZE_BATCH,
>  		  .name = "batch" },
> @@ -1114,10 +1114,10 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
>  		((float *)dinput)[i] = rand() / (float)RAND_MAX;
>  
>  	xehpc_compute_exec_compute(bo_dict[5].data,
> -				   XEHP_ADDR_GENERAL_STATE_BASE,
> +				   ADDR_GENERAL_STATE_BASE,
>  				   ADDR_SURFACE_STATE_BASE,
>  				   ADDR_DYNAMIC_STATE_BASE,
> -				   XEHP_ADDR_INSTRUCTION_STATE_BASE,
> +				   ADDR_INSTRUCTION_STATE_BASE,
>  				   OFFSET_INDIRECT_DATA_START,
>  				   OFFSET_KERNEL);
>  
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation janga.rahul.kumar
@ 2023-12-04 13:51   ` Zbigniew Kempczyński
  0 siblings, 0 replies; 8+ messages in thread
From: Zbigniew Kempczyński @ 2023-12-04 13:51 UTC (permalink / raw)
  To: janga.rahul.kumar; +Cc: igt-dev, ramadevi.gandi

On Mon, Dec 04, 2023 at 05:58:24PM +0530, janga.rahul.kumar@intel.com wrote:
> From: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
> 
> Add compute pipeline and walker instructions for xe2lpg.
> 
> v2: Reuse address variables. (Zbigniew)
> 
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
> Signed-off-by: Janga Rahul Kumar <janga.rahul.kumar@intel.com>
> ---
>  lib/intel_compute.c | 207 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 207 insertions(+)
> 
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index c8dc42338..65a85d335 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -41,6 +41,8 @@
>  #define ADDR_INSTRUCTION_STATE_BASE	0x90000000UL
>  #define OFFSET_BINDING_TABLE		0x1000
>  
> +#define XE2_ADDR_STATE_CONTEXT_DATA_BASE	0x900000UL
> +
>  struct bo_dict_entry {
>  	uint64_t addr;
>  	uint32_t size;
> @@ -1137,6 +1139,206 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
>  	bo_execenv_destroy(&execenv);
>  }
>  
> +static void xe2lpg_compute_exec_compute(uint32_t *addr_bo_buffer_batch,
> +					uint64_t addr_general_state_base,
> +					uint64_t addr_surface_state_base,
> +					uint64_t addr_dynamic_state_base,
> +					uint64_t addr_instruction_state_base,
> +					uint64_t addr_state_contect_data_base,
> +					uint64_t offset_indirect_data_start,
> +					uint64_t kernel_start_pointer)
> +{
> +	int b = 0;
> +
> +	igt_debug("general   state base: %lx\n", addr_general_state_base);
> +	igt_debug("surface   state base: %lx\n", addr_surface_state_base);
> +	igt_debug("dynamic   state base: %lx\n", addr_dynamic_state_base);
> +	igt_debug("instruct   base addr: %lx\n", addr_instruction_state_base);
> +	igt_debug("bindless   base addr: %lx\n", addr_surface_state_base);
> +	igt_debug("state context data base addr: %lx\n", addr_state_contect_data_base);
> +	igt_debug("offset indirect addr: %lx\n", offset_indirect_data_start);
> +	igt_debug("kernel start pointer: %lx\n", kernel_start_pointer);
> +
> +	addr_bo_buffer_batch[b++] = GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
> +				    PIPELINE_SELECT_GPGPU;
> +
> +	addr_bo_buffer_batch[b++] = XEHP_STATE_COMPUTE_MODE | 0x1;
> +	addr_bo_buffer_batch[b++] = 0xE0004000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +
> +#define XE2_STATE_CONTEXT_DATA_BASE_ADDRESS ((3 << 29) | (0 << 27) | (1 << 24) | (11 << 16) | (1 << 0))
> +	addr_bo_buffer_batch[b++] = XE2_STATE_CONTEXT_DATA_BASE_ADDRESS;
> +	// Split into low and high 32 bits
> +	addr_bo_buffer_batch[b++] = addr_state_contect_data_base & 0xFFFFFFFF; // Mask the low 32 bits ;
> +	addr_bo_buffer_batch[b++] = (addr_state_contect_data_base >> 32) & 0xFFFFFFFF;
> +
> +	addr_bo_buffer_batch[b++] = XEHP_CFE_STATE | 0x4;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x03808800;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = STATE_BASE_ADDRESS | 0x14;
> +	addr_bo_buffer_batch[b++] = (addr_general_state_base & 0xffffffff) | 0x21;
> +	addr_bo_buffer_batch[b++] = addr_general_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = 0x0002C000;
> +	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x21;
> +	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = (addr_dynamic_state_base & 0xffffffff) | 0x21;
> +	addr_bo_buffer_batch[b++] = addr_dynamic_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = (addr_instruction_state_base & 0xffffffff) | 0x21;
> +	addr_bo_buffer_batch[b++] = addr_instruction_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = 0xfffff001;
> +	addr_bo_buffer_batch[b++] = 0x00010001;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0xfffff001;
> +	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x21;
> +	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = 0x00007fbe;
> +	addr_bo_buffer_batch[b++] = 0x00000021;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +
> +	addr_bo_buffer_batch[b++] = GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | 2;
> +	addr_bo_buffer_batch[b++] = (addr_surface_state_base & 0xffffffff) | 0x2;
> +	addr_bo_buffer_batch[b++] = addr_surface_state_base >> 32;
> +	addr_bo_buffer_batch[b++] = 0x001ff000;
> +
> +	addr_bo_buffer_batch[b++] = XEHP_COMPUTE_WALKER | 0x26;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000040;
> +	addr_bo_buffer_batch[b++] = offset_indirect_data_start;
> +	addr_bo_buffer_batch[b++] = 0xbe040000;
> +	addr_bo_buffer_batch[b++] = 0xffffffff;
> +	addr_bo_buffer_batch[b++] = 0x000003ff;
> +	addr_bo_buffer_batch[b++] = 0x00000002;
> +	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +
> +	addr_bo_buffer_batch[b++] = kernel_start_pointer;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x0c000020;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00001047;
> +	addr_bo_buffer_batch[b++] = ADDR_BATCH;
> +	addr_bo_buffer_batch[b++] = ADDR_BATCH >> 32;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000400;
> +	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = 0x00000001;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +	addr_bo_buffer_batch[b++] = 0x00000000;
> +
> +	addr_bo_buffer_batch[b++] = MI_BATCH_BUFFER_END;
> +}
> +
> +/**
> + * xe2lpg_compute_exec - run a pipeline compatible with XE2
> + *
> + * @fd: file descriptor of the opened DRM device
> + * @kernel: GPU Kernel binary to be executed
> + * @size: size of @kernel.
> + */
> +static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
> +				unsigned int size)
> +{
> +#define XE2_BO_DICT_ENTRIES 10
> +	struct bo_dict_entry bo_dict[XE2_BO_DICT_ENTRIES] = {
> +		{ .addr = ADDR_INSTRUCTION_STATE_BASE + OFFSET_KERNEL,
> +		  .name = "instr state base"},
> +		{ .addr = ADDR_DYNAMIC_STATE_BASE,
> +		  .size = 0x100000,
> +		  .name = "dynamic state base"},
> +		{ .addr = ADDR_SURFACE_STATE_BASE,
> +		  .size = 0x1000,
> +		  .name = "surface state base"},
> +		{ .addr = ADDR_GENERAL_STATE_BASE + OFFSET_INDIRECT_DATA_START,
> +		  .size =  0x1000,
> +		  .name = "indirect object base"},
> +		{ .addr = ADDR_INPUT, .size = SIZE_BUFFER_INPUT,
> +		  .name = "addr input"},
> +		{ .addr = ADDR_OUTPUT, .size = SIZE_BUFFER_OUTPUT,
> +		  .name = "addr output" },
> +		{ .addr = ADDR_GENERAL_STATE_BASE, .size = 0x100000,
> +		  .name = "general state base" },
> +		{ .addr = ADDR_SURFACE_STATE_BASE + OFFSET_BINDING_TABLE,
> +		  .size = 0x1000,
> +		  .name = "binding table" },
> +		{ .addr = ADDR_BATCH,
> +		  .size = SIZE_BATCH,
> +		  .name = "batch" },
> +		{ .addr = XE2_ADDR_STATE_CONTEXT_DATA_BASE,
> +		  .size = 0x10000,
> +		  .name = "state context data base"},
> +	};
> +
> +	struct bo_execenv execenv;
> +	float *dinput;
> +
> +	bo_execenv_create(fd, &execenv);
> +
> +	/* Sets Kernel size */
> +	bo_dict[0].size = ALIGN(size, 0x1000);
> +
> +	bo_execenv_bind(&execenv, bo_dict, XE2_BO_DICT_ENTRIES);
> +
> +	memcpy(bo_dict[0].data, kernel, size);
> +	create_dynamic_state(bo_dict[1].data, OFFSET_KERNEL);
> +	xehp_create_surface_state(bo_dict[2].data, ADDR_INPUT, ADDR_OUTPUT);
> +	xehp_create_indirect_data(bo_dict[3].data, ADDR_INPUT, ADDR_OUTPUT);
> +	xehp_create_surface_state(bo_dict[7].data, ADDR_INPUT, ADDR_OUTPUT);
> +
> +	dinput = (float *)bo_dict[4].data;
> +	srand(time(NULL));
> +
> +	for (int i = 0; i < SIZE_DATA; i++)
> +		((float *)dinput)[i] = rand() / (float)RAND_MAX;
> +
> +	xe2lpg_compute_exec_compute(bo_dict[8].data,
> +				  ADDR_GENERAL_STATE_BASE,
> +				  ADDR_SURFACE_STATE_BASE,
> +				  ADDR_DYNAMIC_STATE_BASE,
> +				  ADDR_INSTRUCTION_STATE_BASE,
> +				  XE2_ADDR_STATE_CONTEXT_DATA_BASE,
> +				  OFFSET_INDIRECT_DATA_START,
> +				  OFFSET_KERNEL);
> +
> +	bo_execenv_exec(&execenv, ADDR_BATCH);
> +
> +	for (int i = 0; i < SIZE_DATA; i++) {
> +		float f1, f2;
> +
> +		f1 = ((float *) bo_dict[5].data)[i];
> +		f2 = ((float *) bo_dict[4].data)[i];
> +
> +		if (f1 != f2 * f2)
> +			igt_debug("[%4d] f1: %f != %f\n", i, f1, f2 * f2);
> +		igt_assert(f1 == f2 * f2);
> +	}
> +
> +	bo_execenv_unbind(&execenv, bo_dict, XEHPC_BO_DICT_ENTRIES);
> +	bo_execenv_destroy(&execenv);
> +}
> +
>  /*
>   * Compatibility flags.
>   *
> @@ -1175,6 +1377,11 @@ static const struct {
>  		.compute_exec = xehpc_compute_exec,
>  		.compat = COMPAT_DRIVER_XE,
>  	},
> +	{
> +		.ip_ver = IP_VER(20, 04),
> +		.compute_exec = xe2lpg_compute_exec,
> +		.compat = COMPAT_DRIVER_XE,
> +	},
>  };
>  
>  bool run_intel_compute_kernel(int fd)
> -- 
> 2.25.1
> 

Good job! Looks good to me:

Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski@intel.com>
--
Zbigniew

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [igt-dev] ✗ Fi.CI.BUILD: failure for Extend compute square test to Xe2 platform (rev2)
  2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
                   ` (3 preceding siblings ...)
  2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 4/4] tests/xe_compute: Update documentation regarding test requirements janga.rahul.kumar
@ 2023-12-04 16:54 ` Patchwork
  4 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2023-12-04 16:54 UTC (permalink / raw)
  To: Kumar, Janga Rahul; +Cc: igt-dev

== Series Details ==

Series: Extend compute square test to Xe2 platform (rev2)
URL   : https://patchwork.freedesktop.org/series/127184/
State : failure

== Summary ==

Applying: lib/intel_compute: Rename compute state addresses
Using index info to reconstruct a base tree...
M	lib/intel_compute.c
Falling back to patching base and 3-way merge...
Auto-merging lib/intel_compute.c
CONFLICT (content): Merge conflict in lib/intel_compute.c
Patch failed at 0001 lib/intel_compute: Rename compute state addresses
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-12-04 16:54 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-04 12:28 [igt-dev] [PATCH i-g-t v2 0/4] Extend compute square test to Xe2 platform janga.rahul.kumar
2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 1/4] lib/intel_compute: Rename compute state addresses janga.rahul.kumar
2023-12-04 13:50   ` Zbigniew Kempczyński
2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 2/4] lib/intel_compute_square_kernels: Add xe2lpg compute sqaure kernel janga.rahul.kumar
2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 3/4] lib/intel_compute: Add XE2 compute implementation janga.rahul.kumar
2023-12-04 13:51   ` Zbigniew Kempczyński
2023-12-04 12:28 ` [igt-dev] [PATCH i-g-t v2 4/4] tests/xe_compute: Update documentation regarding test requirements janga.rahul.kumar
2023-12-04 16:54 ` [igt-dev] ✗ Fi.CI.BUILD: failure for Extend compute square test to Xe2 platform (rev2) Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox