[Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode

Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed

From: Matthew Auld <matthew.auld@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: Filip Hazubski <filip.hazubski@intel.com>,
	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>,
	Lucas De Marchi <lucas.demarchi@intel.com>,
	Carl Zhang <carl.zhang@intel.com>, Effie Yu <effie.yu@intel.com>,
	Matt Roper <matthew.d.roper@intel.com>
Subject: [Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode
Date: Tue, 29 Aug 2023 17:28:42 +0100	[thread overview]
Message-ID: <20230829162840.73444-8-matthew.auld@intel.com> (raw)
In-Reply-To: <20230829162840.73444-7-matthew.auld@intel.com>

From: Pallavi Mishra <pallavi.mishra@intel.com>

Allow userspace to specify the CPU caching mode to use for system memory
in addition to coherency modes during object creation. Modify gem create
handler and introduce xe_bo_create_user to replace xe_bo_create. In a
later patch we will support setting the pat_index as part of vm_bind,
where expectation is that the coherency mode extracted from the
pat_index must match the one set at object creation.

Co-authored-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Cc: Filip Hazubski <filip.hazubski@intel.com>
Cc: Carl Zhang <carl.zhang@intel.com>
Cc: Effie Yu <effie.yu@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 95 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/xe/xe_bo.h       |  3 +-
 drivers/gpu/drm/xe/xe_bo_types.h | 10 ++++
 drivers/gpu/drm/xe/xe_dma_buf.c  |  5 +-
 include/uapi/drm/xe_drm.h        | 47 +++++++++++++++-
 5 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 1ab682d61e3c..f60090fe8cd2 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -339,6 +339,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 					   PAGE_SIZE);
 
+	if (bo->smem_caching) {
+		if (bo->smem_caching == XE_GEM_CACHING_WB)
+			caching = ttm_cached;
+		else if (bo->smem_caching == XE_GEM_CACHING_WC)
+			caching = ttm_write_combined;
+		else if (bo->smem_caching == XE_GEM_CACHING_UC)
+			caching = ttm_uncached;
+	}
+
 	/*
 	 * Display scanout is always non-coherent with the CPU cache.
 	 *
@@ -1183,9 +1192,10 @@ void xe_bo_free(struct xe_bo *bo)
 	kfree(bo);
 }
 
-struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 				    struct xe_tile *tile, struct dma_resv *resv,
 				    struct ttm_lru_bulk_move *bulk, size_t size,
+				    u16 smem_caching, u16 coh_mode,
 				    enum ttm_bo_type type, u32 flags)
 {
 	struct ttm_operation_ctx ctx = {
@@ -1223,6 +1233,8 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
+	bo->smem_caching = smem_caching;
+	bo->coh_mode = coh_mode;
 	bo->ttm.base.funcs = &xe_gem_object_funcs;
 	bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
 	bo->props.preferred_gt = XE_BO_PROPS_INVALID;
@@ -1306,10 +1318,11 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 }
 
 struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
-			  struct xe_tile *tile, struct xe_vm *vm,
-			  size_t size, u64 start, u64 end,
-			  enum ttm_bo_type type, u32 flags)
+__xe_bo_create_locked(struct xe_device *xe,
+		      struct xe_tile *tile, struct xe_vm *vm,
+		      size_t size, u64 start, u64 end,
+		      u16 smem_caching, u16 coh_mode,
+		      enum ttm_bo_type type, u32 flags)
 {
 	struct xe_bo *bo = NULL;
 	int err;
@@ -1330,10 +1343,11 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}
 
-	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
+	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
 				   vm && !xe_vm_in_fault_mode(vm) &&
 				   flags & XE_BO_CREATE_USER_BIT ?
 				   &vm->lru_bulk_move : NULL, size,
+				   smem_caching, coh_mode,
 				   type, flags);
 	if (IS_ERR(bo))
 		return bo;
@@ -1367,11 +1381,31 @@ xe_bo_create_locked_range(struct xe_device *xe,
 	return ERR_PTR(err);
 }
 
+struct xe_bo *
+xe_bo_create_locked_range(struct xe_device *xe,
+			  struct xe_tile *tile, struct xe_vm *vm,
+			  size_t size, u64 start, u64 end,
+			  enum ttm_bo_type type, u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, 0, type, flags);
+}
+
 struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
 				  struct xe_vm *vm, size_t size,
 				  enum ttm_bo_type type, u32 flags)
 {
-	return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags);
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, 0, type, flags);
+}
+
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
+				struct xe_vm *vm, size_t size,
+				enum ttm_bo_type type,
+				u16 smem_caching, u16 coh_mode,
+				u32 flags)
+{
+	return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
+				     smem_caching, coh_mode, type,
+				     flags | XE_BO_CREATE_USER_BIT);
 }
 
 struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
@@ -1754,11 +1788,11 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx ww;
 	struct xe_vm *vm = NULL;
 	struct xe_bo *bo;
-	unsigned int bo_flags = XE_BO_CREATE_USER_BIT;
+	unsigned int bo_flags;
 	u32 handle;
 	int err;
 
-	if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
+	if (XE_IOCTL_DBG(xe, args->extensions) ||
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
@@ -1811,8 +1845,38 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
 	}
 
-	bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
-			  bo_flags);
+	if (XE_IOCTL_DBG(xe, args->coh_mode > 2))
+		return -EINVAL;
+
+	if (XE_IOCTL_DBG(xe, args->smem_caching > 2))
+		return -EINVAL;
+
+        if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+		if (XE_IOCTL_DBG(xe, !args->coh_mode))
+			return -EINVAL;
+
+		if (XE_IOCTL_DBG(xe, !args->smem_caching))
+			return -EINVAL;
+
+		if (XE_IOCTL_DBG(xe, !IS_DGFX(xe) &&
+				 bo_flags & XE_BO_SCANOUT_BIT &&
+				 args->smem_caching == XE_GEM_CACHING_WB))
+			return -EINVAL;
+
+		if (args->coh_mode == XE_GEM_COHERENCY_NONE) {
+			if (XE_IOCTL_DBG(xe, args->smem_caching == XE_GEM_CACHING_WB))
+				return -EINVAL;
+		} else if (args->coh_mode == XE_GEM_COHERENCY_2WAY) {
+			if (XE_IOCTL_DBG(xe, args->smem_caching != XE_GEM_CACHING_WB))
+				return -EINVAL;
+		}
+	} else if (XE_IOCTL_DBG(xe, args->smem_caching)) {
+		return -EINVAL;
+	}
+
+	bo = xe_bo_create_user(xe, NULL, vm, args->size,
+			       args->smem_caching, args->coh_mode,
+			       ttm_bo_type_device, bo_flags);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto out_vm;
@@ -2093,10 +2157,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
 			   page_size);
 
-	bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
-			  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
-			  XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
-			  XE_BO_NEEDS_CPU_ACCESS);
+	bo = xe_bo_create_user(xe, NULL, NULL, args->size, ttm_bo_type_device,
+			       XE_GEM_CACHING_WC, XE_GEM_COHERENCY_NONE,
+			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
+			       XE_BO_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 0823dda0f31b..2311ef3ffaf1 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -81,9 +81,10 @@ struct sg_table;
 struct xe_bo *xe_bo_alloc(void);
 void xe_bo_free(struct xe_bo *bo);
 
-struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
+struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 				    struct xe_tile *tile, struct dma_resv *resv,
 				    struct ttm_lru_bulk_move *bulk, size_t size,
+				    u16 smem_caching, u16 coh_mode,
 				    enum ttm_bo_type type, u32 flags);
 struct xe_bo *
 xe_bo_create_locked_range(struct xe_device *xe,
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index f6ee920303af..a98ba5bed499 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -68,6 +68,16 @@ struct xe_bo {
 	struct llist_node freed;
 	/** @created: Whether the bo has passed initial creation */
 	bool created;
+	/**
+	 * @coh_mode: Coherency setting. Currently only used for userspace
+	 * objects.
+	 */
+	u16 coh_mode;
+	/**
+	 * @smem_caching: Caching mode for smem. Currently only used for
+	 * userspace objects.
+	 */
+	u16 smem_caching;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 975dee1f770f..8ba7daf011bc 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -199,8 +199,9 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
 	int ret;
 
 	dma_resv_lock(resv, NULL);
-	bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
-				   ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+	bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+				    0, 0, /* Will require 1way or 2way for vm_bind */
+				    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
 	if (IS_ERR(bo)) {
 		ret = PTR_ERR(bo);
 		goto error;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 86f16d50e9cc..64bc66d4b550 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -461,8 +461,51 @@ struct drm_xe_gem_create {
 	 */
 	__u32 handle;
 
-	/** @pad: MBZ */
-	__u32 pad;
+	/**
+	 * @coh_mode: The coherency mode for this object. This will limit the
+	 * possible @smem_caching values.
+	 *
+	 * Supported values:
+	 *
+	 * XE_GEM_COHERENCY_NONE: GPU access is assumed to be not coherent with
+	 * CPU. CPU caches are not snooped.
+	 *
+	 * XE_GEM_COHERENCY_1WAY: GPU access is coherent with CPU (CPU caches
+	 * are snooped) until GPU acquires. The acquire by the GPU is not
+	 * tracked by CPU caches.
+	 *
+	 * XE_GEM_COHERENCY_2WAY: Fully coherent between GPU and CPU. Fully
+	 * tracked by CPU caches. Both CPU and GPU caches are snooped.
+	 */
+#define XE_GEM_COHERENCY_NONE                  1
+#define XE_GEM_COHERENCY_1WAY                  2
+#define XE_GEM_COHERENCY_2WAY                  3
+	__u16 coh_mode;
+
+	/**
+	 * @smem_caching: The CPU caching mode to select for system memory.
+	 *
+	 * Supported values:
+	 *
+	 * XE_GEM_CACHING_WB: Allocate the pages with write-back caching.  On
+	 * iGPU this can't be used for scanout surfaces. The @coh_mode must
+	 * either be XE_GEM_COHERENCY_1WAY or XE_GEM_COHERENCY_2WAY.
+	 *
+	 * XE_GEM_CACHING_WC: Allocate the pages as write-combined. This is
+	 * uncached. The @coh_mode must be XE_GEM_COHERENCY_NONE or
+	 * XE_GEM_COHERENCY_1WAY. Scanout surfaces should likely use this on
+	 * igpu.
+	 *
+	 * XE_GEM_CACHING_UC: Allocate the pages as uncached. The @coh_mode must
+	 * be XE_GEM_COHERENCY_NONE or XE_GEM_COHERENCY_1WAY. Scanout surfaces
+	 * are permitted to use this.
+	 *
+	 * MUST be left as zero for VRAM-only objects.
+	 */
+#define XE_GEM_CACHING_WB                      1
+#define XE_GEM_CACHING_WC                      2
+#define XE_GEM_CACHING_UC                      3
+	__u16 smem_caching;
 
 	/** @reserved: Reserved */
 	__u64 reserved[2];
-- 
2.41.0

next prev parent reply	other threads:[~2023-08-29 16:33 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-29 16:28 [Intel-xe] [RFC 0/5] PAT and cache coherency support Matthew Auld
2023-08-29 16:28 ` Matthew Auld [this message]
2023-08-29 18:09   ` [Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode Matt Roper
2023-08-30 11:13     ` Matthew Auld
2023-09-04 20:00   ` Souza, Jose
2023-09-05  9:04     ` Matthew Auld
2023-09-05 15:30       ` Souza, Jose
2023-08-29 16:28 ` [Intel-xe] [RFC 2/5] drm/xe: fix has_llc on rkl Matthew Auld
2023-08-29 18:46   ` Matt Roper
2023-08-30  1:20   ` Mishra, Pallavi
2023-08-29 16:28 ` [Intel-xe] [RFC 3/5] drm/xe: move pat_table into device info Matthew Auld
2023-08-29 19:14   ` Matt Roper
2023-08-29 21:49   ` Lucas De Marchi
2023-08-29 22:20     ` Matt Roper
2023-08-30  9:34       ` Matthew Auld
2023-08-30  9:43     ` Matthew Auld
2023-08-30  5:14   ` Mishra, Pallavi
2023-09-05 20:50   ` Souza, Jose
2023-08-29 16:28 ` [Intel-xe] [RFC 4/5] drm/xe/pat: annotate pat_index with coherency mode Matthew Auld
2023-08-29 21:08   ` Matt Roper
2023-08-30  9:32     ` Matthew Auld
2023-08-30 19:40       ` Matt Roper
2023-08-29 22:02   ` Lucas De Marchi
2023-08-29 16:28 ` [Intel-xe] [RFC 5/5] drm/xe/uapi: support pat_index selection with vm_bind Matthew Auld
2023-08-29 21:36   ` Matt Roper
2023-08-30  6:38     ` Thomas Hellström
2023-08-30 19:28       ` Matt Roper
2023-08-30 11:28     ` Matthew Auld
2023-08-30 15:27       ` Zhang, Carl
2023-08-30 16:02         ` Matthew Auld
2023-08-31  8:24           ` Zhang, Carl
2023-08-31 10:44             ` Matthew Auld
2023-09-01  9:34               ` Zhang, Carl
2023-09-04  9:23                 ` Matthew Auld
2023-09-05  9:12                   ` Zhang, Carl
2023-09-05  9:46                     ` Matthew Auld
2023-09-05 13:50                       ` Zhang, Carl
2023-09-05 14:07                         ` Matthew Auld
2023-09-04 20:21   ` Souza, Jose
2023-09-05  9:08     ` Matthew Auld
2023-09-07 18:56   ` Souza, Jose
2023-09-08  6:51     ` Matthew Auld
2023-09-13 15:35       ` Souza, Jose
2023-09-13 15:50         ` Matthew Auld
2023-08-29 16:40 ` [Intel-xe] ✗ CI.Patch_applied: failure for PAT and cache coherency support Patchwork
2023-09-04 20:25 ` [Intel-xe] [RFC 0/5] " Souza, Jose
2023-09-05  9:16   ` Matthew Auld

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:1ab682d61e3 dfblob:f60090fe8cd dfblob:0823dda0f31
dfblob:2311ef3ffaf dfblob:f6ee920303a dfblob:a98ba5bed49
dfblob:975dee1f770 dfblob:8ba7daf011b dfblob:86f16d50e9c
dfblob:64bc66d4b55 )
 OR (
bs:"[Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230829162840.73444-8-matthew.auld@intel.com \
    --to=matthew.auld@intel.com \
    --cc=carl.zhang@intel.com \
    --cc=effie.yu@intel.com \
    --cc=filip.hazubski@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=lucas.demarchi@intel.com \
    --cc=matthew.d.roper@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox