From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [192.55.52.115]) by gabe.freedesktop.org (Postfix) with ESMTPS id 5EBD410E22C for ; Mon, 8 Jan 2024 11:30:25 +0000 (UTC) From: =?UTF-8?q?Zbigniew=20Kempczy=C5=84ski?= To: igt-dev@lists.freedesktop.org Subject: [PATCH i-g-t 1/3] lib/rendercopy: Add render-copy xe2 implementation Date: Mon, 8 Jan 2024 12:30:10 +0100 Message-Id: <20240108113012.382557-2-zbigniew.kempczynski@intel.com> In-Reply-To: <20240108113012.382557-1-zbigniew.kempczynski@intel.com> References: <20240108113012.382557-1-zbigniew.kempczynski@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: igt-dev-bounces@lists.freedesktop.org Sender: "igt-dev" List-ID: Due to small differences between xe2 and previous 3d pipeline I decided to adopt this in gen9 render-copy function instead of introducing a new one. Xe2 uses large GRFs (512-bit) where coordinates occupy only 2 GRF registers (instead 4 on 256-bit GRFs). This requires shader adoption on data preparation for sampler/render target write. Signed-off-by: Zbigniew Kempczyński Cc: Juha-Pekka Heikkila --- lib/genxe2_render.h | 14 ++++++ lib/i915/shaders/ps/gen20_render_copy.asm | 8 +++ lib/rendercopy.h | 4 ++ lib/rendercopy_gen9.c | 60 +++++++++++++++++++++-- 4 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 lib/genxe2_render.h create mode 100644 lib/i915/shaders/ps/gen20_render_copy.asm diff --git a/lib/genxe2_render.h b/lib/genxe2_render.h new file mode 100644 index 0000000000..3db7a84894 --- /dev/null +++ b/lib/genxe2_render.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef GENXE2_RENDER_H +#define GENXE2_RENDER_H + +#define GENXE2_3DSTATE_DRAWING_RECTANGLE_FAST GEN4_3D(3, 0, 0) + +/* 3DSTATE_PS dword6 */ +# define GENXE_KERNEL0_PACKING_POLICY 24 +# define GENXE_KERNEL0_POLY_PACK16_FIXED 3 + +#endif diff --git a/lib/i915/shaders/ps/gen20_render_copy.asm b/lib/i915/shaders/ps/gen20_render_copy.asm new file mode 100644 index 0000000000..330417966d --- /dev/null +++ b/lib/i915/shaders/ps/gen20_render_copy.asm @@ -0,0 +1,8 @@ +L0: +(W) mad (16|M0) acc0.0<1>:f r6.3<0;0>:f r1.0<1;1>:f r6.0<0>:f +(W) mad (16|M0) r113.0<1>:f acc0.0<1;1>:f r1.0<1;1>:f r6.1<0>:f +(W) mad (16|M0) acc0.0<1>:f r6.7<0;0>:f r1.0<1;1>:f r6.4<0>:f +(W) mad (16|M0) r114.0<1>:f acc0.0<1;1>:f r2.0<1;1>:f r6.5<0>:f +(W) send.smpl (16|M0) r12 r113 null:0 0x0 0x04420001 {F@1,$0} // wr:2+0, rd:4; simd16 sample:u+v+r+ai+mlod using sampler index 0 +(W) send.rc (16|M0) null r12 null:0 0x0 0x08031400 {EOT,$0} // wr:4+0, rd:0; full-precision render target write SIMD16; last render target to surface 0 +L96: diff --git a/lib/rendercopy.h b/lib/rendercopy.h index 0d81d27f83..1a97a72573 100644 --- a/lib/rendercopy.h +++ b/lib/rendercopy.h @@ -43,6 +43,10 @@ void gen12p71_render_copyfunc(struct intel_bb *ibb, struct intel_buf *src, uint32_t src_x, uint32_t src_y, uint32_t width, uint32_t height, struct intel_buf *dst, uint32_t dst_x, uint32_t dst_y); +void genxe2_render_copyfunc(struct intel_bb *ibb, + struct intel_buf *src, uint32_t src_x, uint32_t src_y, + uint32_t width, uint32_t height, + struct intel_buf *dst, uint32_t dst_x, uint32_t dst_y); void gen12_render_copyfunc(struct intel_bb *ibb, struct intel_buf *src, uint32_t src_x, uint32_t src_y, uint32_t width, uint32_t height, diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c index 363bc6c1b2..f0efadeb50 100644 --- a/lib/rendercopy_gen9.c +++ b/lib/rendercopy_gen9.c @@ -22,6 +22,7 @@ #include "intel_mocs.h" #include "rendercopy.h" #include "gen9_render.h" +#include "genxe2_render.h" #include "intel_reg.h" #include "igt_aux.h" #include "intel_chipset.h" @@ -136,6 +137,15 @@ static const uint32_t gen12p71_render_copy[][4] = { { 0x80041131, 0x00000004, 0x50007144, 0x00c40000 }, }; +static const uint32_t xe2_render_copy[][4] = { + { 0x8010005b, 0x200002a0, 0x020a0634, 0x06040105 }, + { 0x8010005b, 0x710402a8, 0x020a2001, 0x06140105 }, + { 0x8010005b, 0x200002a0, 0x020a0674, 0x06440105 }, + { 0x8010005b, 0x720402a8, 0x020a2001, 0x06540205 }, + { 0x80122031, 0x0c240000, 0x20027114, 0x00800000 }, + { 0x8010c031, 0x00000004, 0x58000c24, 0x00c40000 }, +}; + /* Mostly copy+paste from gen6, except height, width, pitch moved */ static uint32_t gen9_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst, @@ -545,7 +555,10 @@ gen9_emit_state_base_address(struct intel_bb *ibb) { /* WaBindlessSurfaceStateModifyEnable:skl,bxt */ /* The length has to be one less if we dont modify bindless state */ - intel_bb_out(ibb, GEN4_STATE_BASE_ADDRESS | (19 - 1 - 2)); + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + intel_bb_out(ibb, GEN4_STATE_BASE_ADDRESS | 20); + else + intel_bb_out(ibb, GEN4_STATE_BASE_ADDRESS | (19 - 1 - 2)); /* general */ intel_bb_out(ibb, 0 | BASE_ADDRESS_MODIFY); @@ -586,6 +599,13 @@ gen9_emit_state_base_address(struct intel_bb *ibb) { intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); + + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) { + /* Bindless sampler */ + intel_bb_out(ibb, 0); + intel_bb_out(ibb, 0); + intel_bb_out(ibb, 0); + } } static void @@ -753,7 +773,10 @@ gen9_emit_ds(struct intel_bb *ibb) { static void gen8_emit_wm_hz_op(struct intel_bb *ibb) { - intel_bb_out(ibb, GEN8_3DSTATE_WM_HZ_OP | (5-2)); + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + intel_bb_out(ibb, GEN8_3DSTATE_WM_HZ_OP | (6-2)); + else + intel_bb_out(ibb, GEN8_3DSTATE_WM_HZ_OP | (5-2)); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); @@ -852,7 +875,11 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel, bool fast_clear) { intel_bb_out(ibb, (max_threads - 1) << GEN8_3DSTATE_PS_MAX_THREADS_SHIFT | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE | (fast_clear ? GEN8_3DSTATE_FAST_CLEAR_ENABLE : 0)); - intel_bb_out(ibb, 6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + intel_bb_out(ibb, 6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT | + GENXE_KERNEL0_POLY_PACK16_FIXED << GENXE_KERNEL0_PACKING_POLICY); + else + intel_bb_out(ibb, 6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); intel_bb_out(ibb, 0); // kernel 1 intel_bb_out(ibb, 0); /* kernel 1 hi */ intel_bb_out(ibb, 0); // kernel 2 @@ -862,7 +889,11 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel, bool fast_clear) { intel_bb_out(ibb, GEN8_PS_BLEND_HAS_WRITEABLE_RT); intel_bb_out(ibb, GEN8_3DSTATE_PS_EXTRA | (2 - 2)); - intel_bb_out(ibb, GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE); + + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + intel_bb_out(ibb, GEN8_PSX_PIXEL_SHADER_VALID); + else + intel_bb_out(ibb, GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE); } static void @@ -903,6 +934,9 @@ gen9_emit_depth(struct intel_bb *ibb) static void gen7_emit_clear(struct intel_bb *ibb) { + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + return; + intel_bb_out(ibb, GEN7_3DSTATE_CLEAR_PARAMS | (3-2)); intel_bb_out(ibb, 0); intel_bb_out(ibb, 1); // clear valid @@ -911,7 +945,10 @@ gen7_emit_clear(struct intel_bb *ibb) { static void gen6_emit_drawing_rectangle(struct intel_bb *ibb, const struct intel_buf *dst) { - intel_bb_out(ibb, GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + if (AT_LEAST_GEN(intel_get_drm_devid(ibb->fd), 20)) + intel_bb_out(ibb, GENXE2_3DSTATE_DRAWING_RECTANGLE_FAST | (4 - 2)); + else + intel_bb_out(ibb, GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); intel_bb_out(ibb, 0); intel_bb_out(ibb, (intel_buf_height(dst) - 1) << 16 | (intel_buf_width(dst) - 1)); intel_bb_out(ibb, 0); @@ -1220,6 +1257,19 @@ void gen12p71_render_copyfunc(struct intel_bb *ibb, sizeof(gen12p71_render_copy)); } +void genxe2_render_copyfunc(struct intel_bb *ibb, + struct intel_buf *src, uint32_t src_x, uint32_t src_y, + uint32_t width, uint32_t height, + struct intel_buf *dst, uint32_t dst_x, uint32_t dst_y) +{ + _gen9_render_op(ibb, src, src_x, src_y, + width, height, dst, dst_x, dst_y, + NULL, + NULL, + xe2_render_copy, + sizeof(xe2_render_copy)); +} + void mtl_render_copyfunc(struct intel_bb *ibb, struct intel_buf *src, unsigned int src_x, unsigned int src_y, -- 2.34.1