From: "Souza, Jose" <jose.souza@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
"Mishra, Pallavi" <pallavi.mishra@intel.com>,
"Auld, Matthew" <matthew.auld@intel.com>
Subject: Re: [Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode
Date: Mon, 4 Sep 2023 20:00:06 +0000 [thread overview]
Message-ID: <04082fd1ccc1ae60ad3d481a616af26d48a04bfb.camel@intel.com> (raw)
In-Reply-To: <20230829162840.73444-8-matthew.auld@intel.com>
On Tue, 2023-08-29 at 17:28 +0100, Matthew Auld wrote:
> From: Pallavi Mishra <pallavi.mishra@intel.com>
>
> Allow userspace to specify the CPU caching mode to use for system memory
> in addition to coherency modes during object creation. Modify gem create
> handler and introduce xe_bo_create_user to replace xe_bo_create. In a
> later patch we will support setting the pat_index as part of vm_bind,
> where expectation is that the coherency mode extracted from the
> pat_index must match the one set at object creation.
>
> Co-authored-by: Matthew Auld <matthew.auld@intel.com>
> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Cc: José Roberto de Souza <jose.souza@intel.com>
> Cc: Filip Hazubski <filip.hazubski@intel.com>
> Cc: Carl Zhang <carl.zhang@intel.com>
> Cc: Effie Yu <effie.yu@intel.com>
> ---
> drivers/gpu/drm/xe/xe_bo.c | 95 +++++++++++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_bo.h | 3 +-
> drivers/gpu/drm/xe/xe_bo_types.h | 10 ++++
> drivers/gpu/drm/xe/xe_dma_buf.c | 5 +-
> include/uapi/drm/xe_drm.h | 47 +++++++++++++++-
> 5 files changed, 140 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 1ab682d61e3c..f60090fe8cd2 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -339,6 +339,15 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
> extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
> PAGE_SIZE);
>
> + if (bo->smem_caching) {
> + if (bo->smem_caching == XE_GEM_CACHING_WB)
> + caching = ttm_cached;
> + else if (bo->smem_caching == XE_GEM_CACHING_WC)
> + caching = ttm_write_combined;
> + else if (bo->smem_caching == XE_GEM_CACHING_UC)
> + caching = ttm_uncached;
> + }
why not a switch/case?
> +
> /*
> * Display scanout is always non-coherent with the CPU cache.
> *
> @@ -1183,9 +1192,10 @@ void xe_bo_free(struct xe_bo *bo)
> kfree(bo);
> }
>
> -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> struct xe_tile *tile, struct dma_resv *resv,
> struct ttm_lru_bulk_move *bulk, size_t size,
> + u16 smem_caching, u16 coh_mode,
> enum ttm_bo_type type, u32 flags)
> {
> struct ttm_operation_ctx ctx = {
> @@ -1223,6 +1233,8 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> bo->tile = tile;
> bo->size = size;
> bo->flags = flags;
> + bo->smem_caching = smem_caching;
> + bo->coh_mode = coh_mode;
> bo->ttm.base.funcs = &xe_gem_object_funcs;
> bo->props.preferred_mem_class = XE_BO_PROPS_INVALID;
> bo->props.preferred_gt = XE_BO_PROPS_INVALID;
> @@ -1306,10 +1318,11 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
> }
>
> struct xe_bo *
> -xe_bo_create_locked_range(struct xe_device *xe,
> - struct xe_tile *tile, struct xe_vm *vm,
> - size_t size, u64 start, u64 end,
> - enum ttm_bo_type type, u32 flags)
> +__xe_bo_create_locked(struct xe_device *xe,
> + struct xe_tile *tile, struct xe_vm *vm,
> + size_t size, u64 start, u64 end,
> + u16 smem_caching, u16 coh_mode,
> + enum ttm_bo_type type, u32 flags)
> {
> struct xe_bo *bo = NULL;
> int err;
> @@ -1330,10 +1343,11 @@ xe_bo_create_locked_range(struct xe_device *xe,
> }
> }
>
> - bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
> + bo = ___xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
> vm && !xe_vm_in_fault_mode(vm) &&
> flags & XE_BO_CREATE_USER_BIT ?
> &vm->lru_bulk_move : NULL, size,
> + smem_caching, coh_mode,
> type, flags);
> if (IS_ERR(bo))
> return bo;
> @@ -1367,11 +1381,31 @@ xe_bo_create_locked_range(struct xe_device *xe,
> return ERR_PTR(err);
> }
>
> +struct xe_bo *
> +xe_bo_create_locked_range(struct xe_device *xe,
> + struct xe_tile *tile, struct xe_vm *vm,
> + size_t size, u64 start, u64 end,
> + enum ttm_bo_type type, u32 flags)
> +{
> + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, 0, type, flags);
> +}
> +
> struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
> struct xe_vm *vm, size_t size,
> enum ttm_bo_type type, u32 flags)
> {
> - return xe_bo_create_locked_range(xe, tile, vm, size, 0, ~0ULL, type, flags);
> + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, 0, type, flags);
> +}
> +
> +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
> + struct xe_vm *vm, size_t size,
> + enum ttm_bo_type type,
> + u16 smem_caching, u16 coh_mode,
> + u32 flags)
> +{
> + return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
> + smem_caching, coh_mode, type,
> + flags | XE_BO_CREATE_USER_BIT);
> }
>
> struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
> @@ -1754,11 +1788,11 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> struct ww_acquire_ctx ww;
> struct xe_vm *vm = NULL;
> struct xe_bo *bo;
> - unsigned int bo_flags = XE_BO_CREATE_USER_BIT;
> + unsigned int bo_flags;
> u32 handle;
> int err;
>
> - if (XE_IOCTL_DBG(xe, args->extensions) || XE_IOCTL_DBG(xe, args->pad) ||
> + if (XE_IOCTL_DBG(xe, args->extensions) ||
> XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> return -EINVAL;
>
> @@ -1811,8 +1845,38 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
> }
>
> - bo = xe_bo_create(xe, NULL, vm, args->size, ttm_bo_type_device,
> - bo_flags);
> + if (XE_IOCTL_DBG(xe, args->coh_mode > 2))
This will not allow XE_GEM_COHERENCY_2WAY, also would be better to use XE_GEM_COHERENCY_XX instead of a magic number.
> + return -EINVAL;
> +
> + if (XE_IOCTL_DBG(xe, args->smem_caching > 2))
> + return -EINVAL;
Same here, this will not allow XE_GEM_CACHING_UC
> +
> + if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
> + if (XE_IOCTL_DBG(xe, !args->coh_mode))
> + return -EINVAL;
> +
> + if (XE_IOCTL_DBG(xe, !args->smem_caching))
> + return -EINVAL;
> +
> + if (XE_IOCTL_DBG(xe, !IS_DGFX(xe) &&
> + bo_flags & XE_BO_SCANOUT_BIT &&
> + args->smem_caching == XE_GEM_CACHING_WB))
> + return -EINVAL;
> +
> + if (args->coh_mode == XE_GEM_COHERENCY_NONE) {
> + if (XE_IOCTL_DBG(xe, args->smem_caching == XE_GEM_CACHING_WB))
> + return -EINVAL;
> + } else if (args->coh_mode == XE_GEM_COHERENCY_2WAY) {
> + if (XE_IOCTL_DBG(xe, args->smem_caching != XE_GEM_CACHING_WB))
> + return -EINVAL;
> + }
> + } else if (XE_IOCTL_DBG(xe, args->smem_caching)) {
> + return -EINVAL;
> + }
> +
> + bo = xe_bo_create_user(xe, NULL, vm, args->size,
> + args->smem_caching, args->coh_mode,
> + ttm_bo_type_device, bo_flags);
> if (IS_ERR(bo)) {
> err = PTR_ERR(bo);
> goto out_vm;
> @@ -2093,10 +2157,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
> args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
> page_size);
>
> - bo = xe_bo_create(xe, NULL, NULL, args->size, ttm_bo_type_device,
> - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
> - XE_BO_NEEDS_CPU_ACCESS);
> + bo = xe_bo_create_user(xe, NULL, NULL, args->size, ttm_bo_type_device,
> + XE_GEM_CACHING_WC, XE_GEM_COHERENCY_NONE,
> + XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> + XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
> + XE_BO_NEEDS_CPU_ACCESS);
> if (IS_ERR(bo))
> return PTR_ERR(bo);
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 0823dda0f31b..2311ef3ffaf1 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -81,9 +81,10 @@ struct sg_table;
> struct xe_bo *xe_bo_alloc(void);
> void xe_bo_free(struct xe_bo *bo);
>
> -struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> +struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> struct xe_tile *tile, struct dma_resv *resv,
> struct ttm_lru_bulk_move *bulk, size_t size,
> + u16 smem_caching, u16 coh_mode,
> enum ttm_bo_type type, u32 flags);
> struct xe_bo *
> xe_bo_create_locked_range(struct xe_device *xe,
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index f6ee920303af..a98ba5bed499 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -68,6 +68,16 @@ struct xe_bo {
> struct llist_node freed;
> /** @created: Whether the bo has passed initial creation */
> bool created;
> + /**
> + * @coh_mode: Coherency setting. Currently only used for userspace
> + * objects.
> + */
> + u16 coh_mode;
> + /**
> + * @smem_caching: Caching mode for smem. Currently only used for
> + * userspace objects.
> + */
> + u16 smem_caching;
> };
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
> index 975dee1f770f..8ba7daf011bc 100644
> --- a/drivers/gpu/drm/xe/xe_dma_buf.c
> +++ b/drivers/gpu/drm/xe/xe_dma_buf.c
> @@ -199,8 +199,9 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
> int ret;
>
> dma_resv_lock(resv, NULL);
> - bo = __xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
> - ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
> + bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
> + 0, 0, /* Will require 1way or 2way for vm_bind */
> + ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
> if (IS_ERR(bo)) {
> ret = PTR_ERR(bo);
> goto error;
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 86f16d50e9cc..64bc66d4b550 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -461,8 +461,51 @@ struct drm_xe_gem_create {
> */
> __u32 handle;
>
> - /** @pad: MBZ */
> - __u32 pad;
> + /**
> + * @coh_mode: The coherency mode for this object. This will limit the
> + * possible @smem_caching values.
> + *
> + * Supported values:
> + *
> + * XE_GEM_COHERENCY_NONE: GPU access is assumed to be not coherent with
> + * CPU. CPU caches are not snooped.
> + *
> + * XE_GEM_COHERENCY_1WAY: GPU access is coherent with CPU (CPU caches
> + * are snooped) until GPU acquires. The acquire by the GPU is not
> + * tracked by CPU caches.
> + *
> + * XE_GEM_COHERENCY_2WAY: Fully coherent between GPU and CPU. Fully
> + * tracked by CPU caches. Both CPU and GPU caches are snooped.
> + */
> +#define XE_GEM_COHERENCY_NONE 1
> +#define XE_GEM_COHERENCY_1WAY 2
> +#define XE_GEM_COHERENCY_2WAY 3
> + __u16 coh_mode;
Why coh_mode is necessary in the create uAPI?
It is not actually used in this patch and at the end the series it is only used to check if the coh_mode of the pat_index in the vm_bind is equal to
coh_mode set in gem create.
I guess it is not allowed to bind the same bo in 2 different addresses with different coh_mode, is that right?
If so we still could remove it from the gem create uAPI, set coh_mode in gem_bo in the first bind of this bo and then check if equal in the subsequent
binds.
> +
> + /**
> + * @smem_caching: The CPU caching mode to select for system memory.
> + *
> + * Supported values:
> + *
> + * XE_GEM_CACHING_WB: Allocate the pages with write-back caching. On
> + * iGPU this can't be used for scanout surfaces. The @coh_mode must
> + * either be XE_GEM_COHERENCY_1WAY or XE_GEM_COHERENCY_2WAY.
> + *
> + * XE_GEM_CACHING_WC: Allocate the pages as write-combined. This is
> + * uncached. The @coh_mode must be XE_GEM_COHERENCY_NONE or
> + * XE_GEM_COHERENCY_1WAY. Scanout surfaces should likely use this on
> + * igpu.
> + *
> + * XE_GEM_CACHING_UC: Allocate the pages as uncached. The @coh_mode must
> + * be XE_GEM_COHERENCY_NONE or XE_GEM_COHERENCY_1WAY. Scanout surfaces
> + * are permitted to use this.
> + *
> + * MUST be left as zero for VRAM-only objects.
> + */
> +#define XE_GEM_CACHING_WB 1
> +#define XE_GEM_CACHING_WC 2
> +#define XE_GEM_CACHING_UC 3
> + __u16 smem_caching;
>
> /** @reserved: Reserved */
> __u64 reserved[2];
next prev parent reply other threads:[~2023-09-04 20:00 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-29 16:28 [Intel-xe] [RFC 0/5] PAT and cache coherency support Matthew Auld
2023-08-29 16:28 ` [Intel-xe] [RFC 1/5] drm/xe/uapi: Add support for cache and coherency mode Matthew Auld
2023-08-29 18:09 ` Matt Roper
2023-08-30 11:13 ` Matthew Auld
2023-09-04 20:00 ` Souza, Jose [this message]
2023-09-05 9:04 ` Matthew Auld
2023-09-05 15:30 ` Souza, Jose
2023-08-29 16:28 ` [Intel-xe] [RFC 2/5] drm/xe: fix has_llc on rkl Matthew Auld
2023-08-29 18:46 ` Matt Roper
2023-08-30 1:20 ` Mishra, Pallavi
2023-08-29 16:28 ` [Intel-xe] [RFC 3/5] drm/xe: move pat_table into device info Matthew Auld
2023-08-29 19:14 ` Matt Roper
2023-08-29 21:49 ` Lucas De Marchi
2023-08-29 22:20 ` Matt Roper
2023-08-30 9:34 ` Matthew Auld
2023-08-30 9:43 ` Matthew Auld
2023-08-30 5:14 ` Mishra, Pallavi
2023-09-05 20:50 ` Souza, Jose
2023-08-29 16:28 ` [Intel-xe] [RFC 4/5] drm/xe/pat: annotate pat_index with coherency mode Matthew Auld
2023-08-29 21:08 ` Matt Roper
2023-08-30 9:32 ` Matthew Auld
2023-08-30 19:40 ` Matt Roper
2023-08-29 22:02 ` Lucas De Marchi
2023-08-29 16:28 ` [Intel-xe] [RFC 5/5] drm/xe/uapi: support pat_index selection with vm_bind Matthew Auld
2023-08-29 21:36 ` Matt Roper
2023-08-30 6:38 ` Thomas Hellström
2023-08-30 19:28 ` Matt Roper
2023-08-30 11:28 ` Matthew Auld
2023-08-30 15:27 ` Zhang, Carl
2023-08-30 16:02 ` Matthew Auld
2023-08-31 8:24 ` Zhang, Carl
2023-08-31 10:44 ` Matthew Auld
2023-09-01 9:34 ` Zhang, Carl
2023-09-04 9:23 ` Matthew Auld
2023-09-05 9:12 ` Zhang, Carl
2023-09-05 9:46 ` Matthew Auld
2023-09-05 13:50 ` Zhang, Carl
2023-09-05 14:07 ` Matthew Auld
2023-09-04 20:21 ` Souza, Jose
2023-09-05 9:08 ` Matthew Auld
2023-09-07 18:56 ` Souza, Jose
2023-09-08 6:51 ` Matthew Auld
2023-09-13 15:35 ` Souza, Jose
2023-09-13 15:50 ` Matthew Auld
2023-08-29 16:40 ` [Intel-xe] ✗ CI.Patch_applied: failure for PAT and cache coherency support Patchwork
2023-09-04 20:25 ` [Intel-xe] [RFC 0/5] " Souza, Jose
2023-09-05 9:16 ` Matthew Auld
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=04082fd1ccc1ae60ad3d481a616af26d48a04bfb.camel@intel.com \
--to=jose.souza@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.auld@intel.com \
--cc=pallavi.mishra@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox