From: Ramalingam C <ramalingam.c@intel.com>
To: dri-devel <dri-devel@lists.freedesktop.org>,
intel-gfx <intel-gfx@lists.freedesktop.org>
Cc: CQ Tang <cq.tang@intel.com>, Matthew Auld <matthew.auld@intel.com>
Subject: [Intel-gfx] [PATCH v5 12/19] drm/i915/gt: Clear compress metadata for Xe_HP platforms
Date: Tue, 1 Feb 2022 16:11:25 +0530 [thread overview]
Message-ID: <20220201104132.3050-13-ramalingam.c@intel.com> (raw)
In-Reply-To: <20220201104132.3050-1-ramalingam.c@intel.com>
From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Xe-HP and latest devices support Flat CCS which reserved a portion of
the device memory to store compression metadata, during the clearing of
device memory buffer object we also need to clear the associated
CCS buffer.
Flat CCS memory can not be directly accessed by S/W.
Address of CCS buffer associated main BO is automatically calculated
by device itself. KMD/UMD can only access this buffer indirectly using
XY_CTRL_SURF_COPY_BLT cmd via the address of device memory buffer.
v2: Fixed issues with platform naming [Lucas]
v3: Rebased [Ram]
Used the round_up funcs [Bob]
Cc: CQ Tang <cq.tang@intel.com>
Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com>
Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
---
drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 14 +++
drivers/gpu/drm/i915/gt/intel_migrate.c | 114 ++++++++++++++++++-
2 files changed, 125 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f8253012d166..07bf5a1753bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -203,6 +203,20 @@
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
+#define XY_CTRL_SURF_INSTR_SIZE 5
+#define MI_FLUSH_DW_SIZE 3
+#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3)
+#define SRC_ACCESS_TYPE_SHIFT 21
+#define DST_ACCESS_TYPE_SHIFT 20
+#define CCS_SIZE_SHIFT 8
+#define XY_CTRL_SURF_MOCS_SHIFT 25
+#define NUM_CCS_BYTES_PER_BLOCK 256
+#define NUM_CCS_BLKS_PER_XFER 1024
+#define INDIRECT_ACCESS 0
+#define DIRECT_ACCESS 1
+#define MI_FLUSH_LLC BIT(9)
+#define MI_FLUSH_CCS BIT(16)
+
#define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2))
#define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22)
#define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index cac791155244..3e1cf224cdf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -16,6 +16,8 @@ struct insert_pte_data {
};
#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+#define GET_CCS_SIZE(i915, size) (HAS_FLAT_CCS(i915) ? \
+ DIV_ROUND_UP(size, NUM_CCS_BYTES_PER_BLOCK) : 0)
static bool engine_supports_migration(struct intel_engine_cs *engine)
{
@@ -594,19 +596,105 @@ intel_context_migrate_copy(struct intel_context *ce,
return err;
}
+static inline u32 *i915_flush_dw(u32 *cmd, u64 dst, u32 flags)
+{
+ /* Mask the 3 LSB to use the PPGTT address space */
+ *cmd++ = MI_FLUSH_DW | flags;
+ *cmd++ = lower_32_bits(dst);
+ *cmd++ = upper_32_bits(dst);
+
+ return cmd;
+}
+
+static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
+{
+ u32 num_cmds, num_blks, total_size;
+
+ if (!GET_CCS_SIZE(i915, size))
+ return 0;
+
+ /*
+ * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
+ * blocks. one XY_CTRL_SURF_COPY_BLT command can
+ * trnasfer upto 1024 blocks.
+ */
+ num_blks = GET_CCS_SIZE(i915, size);
+ num_cmds = (num_blks + (NUM_CCS_BLKS_PER_XFER - 1)) >> 10;
+ total_size = (XY_CTRL_SURF_INSTR_SIZE) * num_cmds;
+
+ /*
+ * We need to add a flush before and after
+ * XY_CTRL_SURF_COPY_BLT
+ */
+ total_size += 2 * MI_FLUSH_DW_SIZE;
+ return total_size;
+}
+
+static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr,
+ u8 src_mem_access, u8 dst_mem_access,
+ int src_mocs, int dst_mocs,
+ u16 num_ccs_blocks)
+{
+ int i = num_ccs_blocks;
+
+ /*
+ * The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
+ * data in and out of the CCS region.
+ *
+ * We can copy at most 1024 blocks of 256 bytes using one
+ * XY_CTRL_SURF_COPY_BLT instruction.
+ *
+ * In case we need to copy more than 1024 blocks, we need to add
+ * another instruction to the same batch buffer.
+ *
+ * 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
+ *
+ * 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
+ */
+ do {
+ /*
+ * We use logical AND with 1023 since the size field
+ * takes values which is in the range of 0 - 1023
+ */
+ *cmd++ = ((XY_CTRL_SURF_COPY_BLT) |
+ (src_mem_access << SRC_ACCESS_TYPE_SHIFT) |
+ (dst_mem_access << DST_ACCESS_TYPE_SHIFT) |
+ (((i - 1) & 1023) << CCS_SIZE_SHIFT));
+ *cmd++ = lower_32_bits(src_addr);
+ *cmd++ = ((upper_32_bits(src_addr) & 0xFFFF) |
+ (src_mocs << XY_CTRL_SURF_MOCS_SHIFT));
+ *cmd++ = lower_32_bits(dst_addr);
+ *cmd++ = ((upper_32_bits(dst_addr) & 0xFFFF) |
+ (dst_mocs << XY_CTRL_SURF_MOCS_SHIFT));
+ src_addr += SZ_64M;
+ dst_addr += SZ_64M;
+ i -= NUM_CCS_BLKS_PER_XFER;
+ } while (i > 0);
+
+ return cmd;
+}
+
static int emit_clear(struct i915_request *rq,
u64 offset,
int size,
- u32 value)
+ u32 value,
+ bool is_lmem)
{
+ struct drm_i915_private *i915 = rq->engine->i915;
const int ver = GRAPHICS_VER(rq->engine->i915);
+ u32 num_ccs_blks, ccs_ring_size;
u32 *cs;
GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
offset += (u64)rq->engine->instance << 32;
- cs = intel_ring_begin(rq, ver >= 8 ? 8 : 6);
+ /* Clear flat css only when value is 0 */
+ ccs_ring_size = (is_lmem && !value) ?
+ calc_ctrl_surf_instr_size(i915, size)
+ : 0;
+
+ cs = intel_ring_begin(rq, round_up(ver >= 8 ? 8 + ccs_ring_size : 6, 2));
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -629,6 +717,26 @@ static int emit_clear(struct i915_request *rq,
*cs++ = value;
}
+ if (is_lmem && HAS_FLAT_CCS(i915) && !value) {
+ num_ccs_blks = GET_CCS_SIZE(i915, size);
+
+ /*
+ * Flat CCS surface can only be accessed via
+ * XY_CTRL_SURF_COPY_BLT CMD and using indirect
+ * mapping of associated LMEM.
+ * We can clear ccs surface by writing all 0s,
+ * so we will flush the previously cleared buffer
+ * and use it as a source.
+ */
+ cs = i915_flush_dw(cs, offset, MI_FLUSH_LLC | MI_FLUSH_CCS);
+ cs = _i915_ctrl_surf_copy_blt(cs, offset, offset,
+ DIRECT_ACCESS, INDIRECT_ACCESS,
+ 1, 1, num_ccs_blks);
+ cs = i915_flush_dw(cs, offset, MI_FLUSH_LLC | MI_FLUSH_CCS);
+
+ if (ccs_ring_size & 1)
+ *cs++ = MI_NOOP;
+ }
intel_ring_advance(rq, cs);
return 0;
}
@@ -694,7 +802,7 @@ intel_context_migrate_clear(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_clear(rq, offset, len, value);
+ err = emit_clear(rq, offset, len, value, is_lmem);
/* Arbitration is re-enabled between requests. */
out_rq:
--
2.20.1
next prev parent reply other threads:[~2022-02-01 10:42 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-02-01 10:41 [Intel-gfx] [PATCH v5 00/19] drm/i915/dg2: Enabling 64k page size and flat ccs Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 01/19] drm/i915: add needs_compact_pt flag Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 02/19] drm/i915: enforce min GTT alignment for discrete cards Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 03/19] drm/i915: support 64K GTT pages " Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 04/19] drm/i915: add gtt misalignment test Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 05/19] drm/i915/gtt: allow overriding the pt alignment Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 06/19] drm/i915/gtt: add xehpsdv_ppgtt_insert_entry Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 07/19] drm/i915/migrate: add acceleration support for DG2 Ramalingam C
2022-02-01 10:49 ` Matthew Auld
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 08/19] drm/i915/uapi: document behaviour for DG2 64K support Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 09/19] Doc/gpu/rfc/i915: i915 DG2 64k pagesize uAPI Ramalingam C
2022-02-18 5:39 ` Lucas De Marchi
2022-02-18 8:20 ` Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 10/19] drm/i915/xehpsdv: Add has_flat_ccs to device info Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 11/19] drm/i915/lmem: Enable lmem for platforms with Flat CCS Ramalingam C
2022-02-18 10:08 ` Lucas De Marchi
2022-02-18 10:17 ` Lucas De Marchi
2022-02-01 10:41 ` Ramalingam C [this message]
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 13/19] drm/i915: Introduce new Tile 4 format Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 14/19] drm/i915/dg2: Tile 4 plane format support Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 15/19] drm/i915/dg2: Add DG2 unified compression Ramalingam C
2022-02-12 1:17 ` Nanley Chery
2022-02-15 14:53 ` Juha-Pekka Heikkila
2022-02-17 17:15 ` Chery, Nanley G
2022-03-18 17:39 ` Imre Deak
2022-03-23 23:40 ` Chery, Nanley G
2022-03-24 14:19 ` Imre Deak
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 16/19] uapi/drm/dg2: Introduce format modifier for DG2 clear color Ramalingam C
2022-02-12 1:19 ` Nanley Chery
2022-02-15 14:55 ` Juha-Pekka Heikkila
2022-02-15 15:02 ` Chery, Nanley G
2022-02-15 16:15 ` Juha-Pekka Heikkila
2022-02-15 16:44 ` Chery, Nanley G
2022-02-15 17:31 ` Juha-Pekka Heikkila
2022-02-15 18:24 ` Chery, Nanley G
2022-02-15 19:34 ` Juha-Pekka Heikkila
2022-03-21 13:20 ` Imre Deak
2022-03-23 23:42 ` Chery, Nanley G
2022-03-24 14:45 ` Imre Deak
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 17/19] drm/i915/dg2: Flat CCS Support Ramalingam C
2022-03-24 16:16 ` Imre Deak
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 18/19] drm/i915/Flat-CCS: Document on Flat-CCS memory compression Ramalingam C
2022-02-01 10:41 ` [Intel-gfx] [PATCH v5 19/19] Doc/gpu/rfc/i915: i915 DG2 flat-CCS uAPI Ramalingam C
2022-02-01 12:45 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/dg2: Enabling 64k page size and flat ccs (rev5) Patchwork
2022-02-01 12:47 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-02-01 13:15 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2022-02-18 19:04 ` [Intel-gfx] [PATCH v5 00/19] drm/i915/dg2: Enabling 64k page size and flat ccs Ramalingam C
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220201104132.3050-13-ramalingam.c@intel.com \
--to=ramalingam.c@intel.com \
--cc=cq.tang@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=matthew.auld@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox