* [PATCH] drm/xe: CLOS Based Cache Reservation support.
@ 2023-12-20 1:27 Pallavi Mishra
2024-01-03 1:56 ` Welty, Brian
0 siblings, 1 reply; 3+ messages in thread
From: Pallavi Mishra @ 2023-12-20 1:27 UTC (permalink / raw)
To: intel-xe
Xe API supports an optional extension for allowing Apps to reserve
portions of the GPU Caches for exclusive use. This allows the App
to separate latency/bandwidth sensitive workloads from all other
workloads.
PVC and XE2 expose control over each Cache through the Class of Service (CLOS)
feature. CLOS allows XEKMD to define which portions of a cache may be
used for a given allocation through a set of Waymask controls grouped
into multiple sets.
For each CLOS set, and supported cache, there is a Waymask to configure
the Ways in that cache that may be used to cache memory requests for that
CLOS.
TODO:
- Need to add sysfs for setting limits
- Need to handle BO host cache type
Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
| 1 +
drivers/gpu/drm/xe/xe_clos.c | 268 ++++++++++++++++++
drivers/gpu/drm/xe/xe_clos.h | 36 +++
drivers/gpu/drm/xe/xe_device.c | 15 +
drivers/gpu/drm/xe/xe_device_types.h | 15 +
drivers/gpu/drm/xe/xe_pat.c | 45 +++
drivers/gpu/drm/xe/xe_pat.h | 10 +
drivers/gpu/drm/xe/xe_pci.c | 4 +
drivers/gpu/drm/xe/xe_vm.c | 11 +
include/uapi/drm/xe_drm.h | 71 +++++
11 files changed, 477 insertions(+)
create mode 100644 drivers/gpu/drm/xe/xe_clos.c
create mode 100644 drivers/gpu/drm/xe/xe_clos.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index df8601d6a59f..bc60708ba23c 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -64,6 +64,7 @@ $(uses_generated_oob): $(generated_oob)
xe-y += xe_bb.o \
xe_bo.o \
xe_bo_evict.o \
+ xe_clos.o \
xe_debugfs.o \
xe_devcoredump.o \
xe_device.o \
--git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index 5d2a77b52db4..27226c1efc5a 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -104,6 +104,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
(xe)->info.graphics_verx100 <= (last*100 + 99))
#define IS_MOBILE(xe) (xe && 0)
#define HAS_LLC(xe) (!IS_DGFX((xe)))
+#define HAS_CLOS(xe) ((xe)->info.has_clos == true)
#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
diff --git a/drivers/gpu/drm/xe/xe_clos.c b/drivers/gpu/drm/xe/xe_clos.c
new file mode 100644
index 000000000000..da56e52d25de
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_clos.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "regs/xe_gt_regs.h"
+#include "xe_clos.h"
+
+#include <drm/xe_drm.h>
+
+static void clos_update_ways(struct xe_gt *gt, u8 clos_index, u32 mask)
+{
+
+ DRM_DEBUG("clos index = %d mask = 0x%x", clos_index, mask);
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(clos_index), mask);
+}
+
+static void update_l3cache_masks(struct xe_device *xe)
+{
+ u8 start_bits = 0;
+ int i;
+
+ for (i = 0; i < NUM_CLOS; i++) {
+ struct xe_gt *gt;
+ u32 mask = 0;
+ int j;
+
+ if (xe->cache_resv.ways[i]) {
+ // Assign contiguous span of ways
+ u8 ways = xe->cache_resv.ways[i];
+ mask = GENMASK(start_bits + ways - 1, start_bits);
+
+ DRM_DEBUG("start_bits = %d ways = %d mask= 0x%x\n",
+ start_bits, ways, mask);
+ start_bits += ways;
+ }
+ for_each_gt(gt, xe, j)
+ clos_update_ways(gt, i, mask);
+ }
+}
+
+#define MAX_L3WAYS 32
+void init_device_clos(struct xe_device *xe)
+{
+ int i;
+
+ if (!(xe->info.has_clos))
+ return;
+
+ mutex_init(&xe->cache_resv.clos_mutex);
+ // CLOS1 and CLOS2 available for Reservation
+ xe->cache_resv.free_clos_mask = 0x6;
+
+ if (GRAPHICS_VER(xe) >= 20)
+ xe->cache_resv.free_clos_mask = 0xe;
+
+ // Shared set uses CLOS0 and initially gets all Ways
+ xe->cache_resv.ways[0] = MAX_L3WAYS;
+
+ for (i = 1; i < 3; i++)
+ xe->cache_resv.ways[i] = 0;
+
+ update_l3cache_masks(xe);
+}
+
+void uninit_device_clos(struct xe_device *xe)
+{
+ if (!(xe->info.has_clos))
+ return;
+
+ mutex_destroy(&xe->cache_resv.clos_mutex);
+}
+
+void init_client_clos(struct xe_file *file)
+{
+ if (!(file->xe->info.has_clos))
+ return;
+
+ file->clos_resv.clos_mask = 0; // No CLOS reserved yet
+ file->clos_resv.l3_rsvd_ways = 0;
+}
+
+void uninit_client_clos(struct xe_file *file)
+{
+ if (!(file->xe->info.has_clos))
+ return;
+
+ while (file->clos_resv.clos_mask) {
+ u16 clos_index = ffs(file->clos_resv.clos_mask) - 1;
+
+ DRM_DEBUG("uninit release mask = 0x%x clos= %d\n",
+ file->clos_resv.clos_mask, clos_index);
+ free_clos(file, clos_index);
+ file->clos_resv.clos_mask &= ~(1 << clos_index);
+ }
+}
+
+#define L3_GLOBAL_RESERVATION_LIMIT 16
+#define L3_CLIENT_RESERVATION_LIMIT 8
+static int reserve_l3cache_ways(struct xe_file *file,
+ u16 clos_index, u16 *num_ways)
+{
+ struct xe_device *xe = file->xe;
+ u8 global_limit = L3_GLOBAL_RESERVATION_LIMIT -
+ (MAX_L3WAYS - xe->cache_resv.ways[0]);
+ u8 client_limit = L3_CLIENT_RESERVATION_LIMIT -
+ file->clos_resv.l3_rsvd_ways;
+ u8 limit = min(global_limit, client_limit);
+
+ if (limit == 0)
+ return -ENOSPC;
+
+ if (*num_ways > limit) {
+ *num_ways = limit;
+ return -EAGAIN;
+ }
+
+ file->clos_resv.l3_rsvd_ways += *num_ways;
+
+ xe->cache_resv.ways[0] -= *num_ways;
+ xe->cache_resv.ways[clos_index] = *num_ways;
+
+ update_l3cache_masks(xe);
+
+ return 0;
+}
+
+static int
+free_l3cache_ways(struct xe_file *file, u16 clos_index)
+{
+ struct xe_device *xe = file->xe;
+
+ if (xe->cache_resv.ways[clos_index]) {
+ u8 num_ways = xe->cache_resv.ways[clos_index];
+
+ file->clos_resv.l3_rsvd_ways -= num_ways;
+
+ xe->cache_resv.ways[0] += num_ways;
+ xe->cache_resv.ways[clos_index] -= num_ways;
+
+ update_l3cache_masks(xe);
+ }
+
+ return 0;
+}
+
+static bool
+clos_is_reserved(struct xe_file *file, u16 clos_index)
+{
+ return file->clos_resv.clos_mask & (1 << clos_index);
+}
+
+int reserve_cache_ways(struct xe_file *file, u16 cache_level,
+ u16 clos_index, u16 *num_ways)
+{
+ struct xe_device *xe = file->xe;
+ int ret = 0;
+
+ if (cache_level != 3)
+ return -EINVAL;
+
+ if ((clos_index >= NUM_CLOS) || !clos_is_reserved(file, clos_index))
+ return -EPERM;
+
+ mutex_lock(&xe->cache_resv.clos_mutex);
+
+ if (*num_ways)
+ ret = reserve_l3cache_ways(file, clos_index, num_ways);
+ else
+ ret = free_l3cache_ways(file, clos_index);
+
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+ return ret;
+}
+
+int reserve_clos(struct xe_file *file, u16 *clos_index)
+{
+ struct xe_device *xe = file->xe;
+
+ mutex_lock(&xe->cache_resv.clos_mutex);
+
+ if (xe->cache_resv.free_clos_mask) {
+ u16 clos = ffs(xe->cache_resv.free_clos_mask) - 1;
+
+ file->clos_resv.clos_mask |= (1 << clos);
+ xe->cache_resv.free_clos_mask &= ~(1 << clos);
+
+ *clos_index = clos;
+ xe->cache_resv.clos_index = clos;
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+
+ return 0;
+ }
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+
+ return -ENOSPC;
+}
+
+int free_clos(struct xe_file *file, u16 clos_index)
+{
+ struct xe_device *xe = file->xe;
+
+ mutex_lock(&xe->cache_resv.clos_mutex);
+
+ if (clos_is_reserved(file, clos_index)) {
+ struct xe_device *xe = file->xe;
+
+ free_l3cache_ways(file, clos_index);
+
+ file->clos_resv.clos_mask &= ~(1 << clos_index);
+ xe->cache_resv.free_clos_mask |= (1 << clos_index);
+
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+
+ return 0;
+ }
+
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+ return -EPERM;
+}
+
+int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_file *file_priv = file->driver_priv;
+ struct xe_device *xe = file_priv->xe;
+ struct drm_xe_clos_reserve *clos = data;
+
+ if (!HAS_CLOS(xe))
+ return -EOPNOTSUPP;
+
+ return reserve_clos(file_priv, &clos->clos_index);
+}
+
+int xe_clos_free_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_file *file_priv = file->driver_priv;
+ struct xe_device *xe = file_priv->xe;
+ struct drm_xe_clos_free *clos = data;
+
+ if (!HAS_CLOS(xe))
+ return -EOPNOTSUPP;
+
+ return free_clos(file_priv, clos->clos_index);
+}
+
+int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct xe_file *file_priv = file->driver_priv;
+ struct xe_device *xe = file_priv->xe;
+ struct drm_xe_cache_reserve *cache_reserve = data;
+
+ if (!HAS_CLOS(xe))
+ return -EOPNOTSUPP;
+
+ return reserve_cache_ways(file_priv,
+ cache_reserve->cache_level,
+ cache_reserve->clos_index,
+ &cache_reserve->num_ways);
+}
+
+
diff --git a/drivers/gpu/drm/xe/xe_clos.h b/drivers/gpu/drm/xe/xe_clos.h
new file mode 100644
index 000000000000..41384028e670
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_clos.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef INTEL_CLOS_H
+#define INTEL_CLOS_H
+
+#include <linux/types.h>
+
+struct xe_device;
+struct xe_file;
+
+struct drm_device;
+struct drm_file;
+
+void init_device_clos(struct xe_device *xe);
+void uninit_device_clos(struct xe_device *xe);
+
+void init_client_clos(struct xe_file *file);
+void uninit_client_clos(struct xe_file *file);
+
+int reserve_clos(struct xe_file *file, u16 *clos_index);
+int free_clos(struct xe_file *file, u16 clos_index);
+int reserve_cache_ways(struct xe_file *file, u16 cache_level,
+ u16 clos_index, u16 *num_ways);
+
+int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_clos_free_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
+
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 86867d42d532..f4287db66ff7 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -43,6 +43,7 @@
#include "xe_vm.h"
#include "xe_wait_user_fence.h"
#include "xe_hwmon.h"
+#include "xe_clos.h"
#ifdef CONFIG_LOCKDEP
struct lockdep_map xe_device_mem_access_lockdep_map = {
@@ -82,6 +83,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
spin_unlock(&xe->clients.lock);
file->driver_priv = xef;
+ init_client_clos(xef);
return 0;
}
@@ -101,6 +103,9 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
xe_exec_queue_kill(q);
xe_exec_queue_put(q);
}
+
+ uninit_client_clos(xef);
+
mutex_unlock(&xef->exec_queue.lock);
xa_destroy(&xef->exec_queue.xa);
mutex_destroy(&xef->exec_queue.lock);
@@ -138,6 +143,12 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_CLOS_RESERVE, xe_clos_reserve_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_CLOS_FREE, xe_clos_free_ioctl,
+ DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_CACHE_RESERVE, xe_cache_reserve_ioctl,
+ DRM_RENDER_ALLOW),
};
static const struct file_operations xe_driver_fops = {
@@ -538,6 +549,8 @@ int xe_device_probe(struct xe_device *xe)
xe_hwmon_register(xe);
+ init_device_clos(xe);
+
err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
if (err)
return err;
@@ -570,6 +583,8 @@ void xe_device_remove(struct xe_device *xe)
xe_heci_gsc_fini(xe);
+ uninit_device_clos(xe);
+
xe_irq_shutdown(xe);
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 71f23ac365e6..1f823c3136c4 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -281,6 +281,8 @@ struct xe_device {
u8 has_heci_gscfi:1;
/** @skip_guc_pc: Skip GuC based PM feature init */
u8 skip_guc_pc:1;
+ /** @has_clos: device supports clos reservation */
+ u8 has_clos:1;
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
struct {
@@ -440,6 +442,14 @@ struct xe_device {
/** @needs_flr_on_fini: requests function-reset on fini */
bool needs_flr_on_fini;
+#define NUM_CLOS 4
+ struct cache_reservation {
+ u32 free_clos_mask; // Mask of CLOS sets that have not been reserved
+ struct mutex clos_mutex;
+ u8 ways[NUM_CLOS];
+ u8 clos_index;
+ } cache_resv;
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -545,6 +555,11 @@ struct xe_file {
/** @client: drm client */
struct xe_drm_client *client;
+
+ struct clos_reservation {
+ u32 clos_mask; // Mask of CLOS sets reserved by client
+ u8 l3_rsvd_ways; // Number of L3 Ways reserved by client, across all CLOS
+ } clos_resv;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 1ff6bc79e7d4..f1a6175930f7 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -148,6 +148,51 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
return xe->pat.table[pat_index].coh_mode;
}
+int xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16 clos_index)
+{
+ WARN_ON(pat_index >= xe->pat.n_entries);
+
+ int err = 0;
+
+ if (GRAPHICS_VER(xe) >= 20) {
+ switch (clos_index) {
+ case 1:
+ if (!(pat_index >= 20 && pat_index < 24))
+ err = -EINVAL;
+ break;
+ case 2:
+ if (!(pat_index >= 24 && pat_index < 28))
+ err = -EINVAL;
+ break;
+ case 3:
+ if (!(pat_index >= 28 && pat_index <= 31))
+ err = -EINVAL;
+ break;
+ default:
+ drm_err(&xe->drm, "Unsupported CLOS value\n");
+ err = -EINVAL;
+ }
+ }
+
+ if (xe->info.platform == XE_PVC) {
+ switch (clos_index) {
+ case 1:
+ if (!(pat_index == 4 || pat_index == 5))
+ err = -EINVAL;
+ break;
+ case 2:
+ if (!(pat_index == 6 || pat_index == 7))
+ err = -EINVAL;
+ break;
+ default:
+ drm_err(&xe->drm, "Unsupported CLOS value\n");
+ err = -EINVAL;
+ }
+ }
+
+ return err;
+}
+
static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index fa0dfbe525cd..fa4dbfd89496 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -58,4 +58,14 @@ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
*/
u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
+/**
+ * xe_pat_index_clos_check - check whether clos has been reserved for
+ * chosen pat_index.
+ * @xe: xe device
+ * @pat_index: The pat_index to query
+ * @clos_index: clos index to compare
+ */
+int xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16 clos_index);
+
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 1f997353a78f..5896a1dc46d1 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -60,6 +60,7 @@ struct xe_device_desc {
u8 require_force_probe:1;
u8 is_dgfx:1;
+ u8 has_clos:1;
u8 has_display:1;
u8 has_heci_gscfi:1;
u8 has_llc:1;
@@ -319,6 +320,7 @@ static const struct xe_device_desc pvc_desc = {
.graphics = &graphics_xehpc,
DGFX_FEATURES,
PLATFORM(XE_PVC),
+ .has_clos = true,
.has_display = false,
.has_heci_gscfi = 1,
.require_force_probe = true,
@@ -333,6 +335,7 @@ static const struct xe_device_desc mtl_desc = {
static const struct xe_device_desc lnl_desc = {
PLATFORM(XE_LUNARLAKE),
+ .has_clos = true,
.require_force_probe = true,
};
@@ -548,6 +551,7 @@ static int xe_info_init_early(struct xe_device *xe,
subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
xe->info.is_dgfx = desc->is_dgfx;
+ xe->info.has_clos = desc->has_clos;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_llc = desc->has_llc;
xe->info.has_mmio_ext = desc->has_mmio_ext;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 1ca917b8315c..1841f2af74c1 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -2800,6 +2800,17 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
err = -EINVAL;
goto free_bind_ops;
}
+
+ /* check whether Clos has been reserved for chosen pat */
+ if ((GRAPHICS_VER(xe) >= 20 && (pat_index > 19)) || (xe->info.platform == XE_PVC && (pat_index > 3))) {
+ mutex_lock(&xe->cache_resv.clos_mutex);
+ err = xe_pat_index_clos_check(xe, pat_index, xe->cache_resv.clos_index);
+ if (err) {
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+ goto free_bind_ops;
+ }
+ mutex_unlock(&xe->cache_resv.clos_mutex);
+ }
}
return 0;
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 9fa3ae324731..47d9d23e0bb0 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -100,6 +100,10 @@ extern "C" {
#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
#define DRM_XE_EXEC 0x09
#define DRM_XE_WAIT_USER_FENCE 0x0a
+#define DRM_XE_CLOS_RESERVE 0x0b
+#define DRM_XE_CLOS_FREE 0x0c
+#define DRM_XE_CACHE_RESERVE 0x0d
+
/* Must be kept compact -- no holes */
#define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
@@ -113,6 +117,10 @@ extern "C" {
#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
#define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
#define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_CLOS_RESERVE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_RESERVE, struct drm_xe_clos_reserve)
+#define DRM_IOCTL_XE_CLOS_FREE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_FREE, struct drm_xe_clos_free)
+#define DRM_IOCTL_XE_CACHE_RESERVE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CACHE_RESERVE, struct drm_xe_cache_reserve)
+
/**
* DOC: Xe IOCTL Extensions
@@ -1340,6 +1348,69 @@ struct drm_xe_wait_user_fence {
__u64 reserved[2];
};
+/**
+ * struct drm_xe_clos_reserve
+ *
+ * Allows clients to request reservation of one free CLOS, to use in subsequent
+ * Cache Reservations.
+ *
+ */
+struct drm_xe_clos_reserve {
+ /** @clos_index: clos index for reservation */
+ __u16 clos_index;
+
+ /** @pad: MBZ */
+ __u16 pad16;
+};
+
+/**
+ * struct drm_xe_clos_free
+ *
+ * Free off a previously reserved CLOS set. Any corresponding Cache Reservations
+ * that are active for the CLOS are automatically dropped and returned to the
+ * Shared set.
+ *
+ * The clos_index indicates the CLOS set which is being released and must
+ * correspond to a CLOS index previously reserved.
+ *
+ */
+struct drm_xe_clos_free {
+ /** clos_index: free clos index */
+ __u16 clos_index;
+
+ /** @pad: MBZ */
+ __u16 pad16;
+};
+
+/**
+ * struct drm_xe_cache_reserve
+ *
+ * Allows clients to request, or release, reservation of one or more cache ways,
+ * within a previously reserved CLOS set.
+ *
+ * If num_ways = 0, KMD will drop any existing Reservation for the specified
+ * clos_index and cache_level. The requested clos_index and cache_level Waymasks
+ * will then track the Shared set once again.
+ *
+ * Otherwise, the requested number of Ways will be removed from the Shared set
+ * for the requested cache level, and assigned to the Cache and CLOS specified
+ * by cache_level/clos_index.
+ *
+ */
+struct drm_xe_cache_reserve {
+ /** @clos_index: reserved clos index */
+ __u16 clos_index;
+
+ /** @cache_level: level of cache */
+ __u16 cache_level; /* e.g. 3 for L3 */
+
+ /** @num_ways: cache ways */
+ __u16 num_ways;
+
+ /** @pad: MBZ */
+ __u16 pad16;
+};
+
#if defined(__cplusplus)
}
#endif
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] drm/xe: CLOS Based Cache Reservation support.
2023-12-20 1:27 [PATCH] drm/xe: CLOS Based Cache Reservation support Pallavi Mishra
@ 2024-01-03 1:56 ` Welty, Brian
2024-01-03 17:38 ` Mishra, Pallavi
0 siblings, 1 reply; 3+ messages in thread
From: Welty, Brian @ 2024-01-03 1:56 UTC (permalink / raw)
To: Pallavi Mishra, intel-xe
On 12/19/2023 5:27 PM, Pallavi Mishra wrote:
> Xe API supports an optional extension for allowing Apps to reserve
> portions of the GPU Caches for exclusive use. This allows the App
> to separate latency/bandwidth sensitive workloads from all other
> workloads.
>
> PVC and XE2 expose control over each Cache through the Class of Service (CLOS)
> feature. CLOS allows XEKMD to define which portions of a cache may be
> used for a given allocation through a set of Waymask controls grouped
> into multiple sets.
>
> For each CLOS set, and supported cache, there is a Waymask to configure
> the Ways in that cache that may be used to cache memory requests for that
> CLOS.
>
> TODO:
> - Need to add sysfs for setting limits
> - Need to handle BO host cache type
>
> Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> ---
> drivers/gpu/drm/xe/Makefile | 1 +
> .../gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 +
> drivers/gpu/drm/xe/xe_clos.c | 268 ++++++++++++++++++
> drivers/gpu/drm/xe/xe_clos.h | 36 +++
> drivers/gpu/drm/xe/xe_device.c | 15 +
> drivers/gpu/drm/xe/xe_device_types.h | 15 +
> drivers/gpu/drm/xe/xe_pat.c | 45 +++
> drivers/gpu/drm/xe/xe_pat.h | 10 +
> drivers/gpu/drm/xe/xe_pci.c | 4 +
> drivers/gpu/drm/xe/xe_vm.c | 11 +
> include/uapi/drm/xe_drm.h | 71 +++++
> 11 files changed, 477 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_clos.c
> create mode 100644 drivers/gpu/drm/xe/xe_clos.h
>
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index df8601d6a59f..bc60708ba23c 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -64,6 +64,7 @@ $(uses_generated_oob): $(generated_oob)
> xe-y += xe_bb.o \
> xe_bo.o \
> xe_bo_evict.o \
> + xe_clos.o \
> xe_debugfs.o \
> xe_devcoredump.o \
> xe_device.o \
> diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> index 5d2a77b52db4..27226c1efc5a 100644
> --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> @@ -104,6 +104,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
> (xe)->info.graphics_verx100 <= (last*100 + 99))
> #define IS_MOBILE(xe) (xe && 0)
> #define HAS_LLC(xe) (!IS_DGFX((xe)))
> +#define HAS_CLOS(xe) ((xe)->info.has_clos == true)
Do we need this define? If so, I think belongs in different header.
But looks like convention is to just do:
if (xe->info.has_clos)
where used.
>
> #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
>
> diff --git a/drivers/gpu/drm/xe/xe_clos.c b/drivers/gpu/drm/xe/xe_clos.c
> new file mode 100644
> index 000000000000..da56e52d25de
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_clos.c
> @@ -0,0 +1,268 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#include "i915_drv.h"
> +#include "xe_device.h"
> +#include "xe_gt.h"
> +#include "xe_gt_mcr.h"
> +#include "regs/xe_gt_regs.h"
> +#include "xe_clos.h"
> +
> +#include <drm/xe_drm.h>
> +
> +static void clos_update_ways(struct xe_gt *gt, u8 clos_index, u32 mask)
> +{
> +
> + DRM_DEBUG("clos index = %d mask = 0x%x", clos_index, mask);
For here and elsewhere in patch, I believe convention is to use drm_dbg().
> + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(clos_index), mask);
> +}
> +
> +static void update_l3cache_masks(struct xe_device *xe)
> +{
> + u8 start_bits = 0;
> + int i;
> +
> + for (i = 0; i < NUM_CLOS; i++) {
> + struct xe_gt *gt;
> + u32 mask = 0;
> + int j;
> +
> + if (xe->cache_resv.ways[i]) {
> + // Assign contiguous span of ways
> + u8 ways = xe->cache_resv.ways[i];
> + mask = GENMASK(start_bits + ways - 1, start_bits);
> +
> + DRM_DEBUG("start_bits = %d ways = %d mask= 0x%x\n",
> + start_bits, ways, mask);
> + start_bits += ways;
> + }
> + for_each_gt(gt, xe, j)
> + clos_update_ways(gt, i, mask);
> + }
> +}
> +
> +#define MAX_L3WAYS 32
> +void init_device_clos(struct xe_device *xe)
> +{
> + int i;
> +
> + if (!(xe->info.has_clos))
> + return;
> +
> + mutex_init(&xe->cache_resv.clos_mutex);
> + // CLOS1 and CLOS2 available for Reservation
> + xe->cache_resv.free_clos_mask = 0x6;
> +
> + if (GRAPHICS_VER(xe) >= 20)
> + xe->cache_resv.free_clos_mask = 0xe;
> +
> + // Shared set uses CLOS0 and initially gets all Ways
> + xe->cache_resv.ways[0] = MAX_L3WAYS;
> +
> + for (i = 1; i < 3; i++)
> + xe->cache_resv.ways[i] = 0;
struct xe_device was allocated with kzalloc(). So don't really need to
initialize these to zero.
> +
> + update_l3cache_masks(xe);
> +}
> +
> +void uninit_device_clos(struct xe_device *xe)
> +{
> + if (!(xe->info.has_clos))
> + return;
> +
> + mutex_destroy(&xe->cache_resv.clos_mutex);
> +}
> +
> +void init_client_clos(struct xe_file *file)
> +{
> + if (!(file->xe->info.has_clos))
> + return;
> +
> + file->clos_resv.clos_mask = 0; // No CLOS reserved yet
> + file->clos_resv.l3_rsvd_ways = 0;
> +}
> +
> +void uninit_client_clos(struct xe_file *file)
> +{
> + if (!(file->xe->info.has_clos))
> + return;
> +
> + while (file->clos_resv.clos_mask) {
> + u16 clos_index = ffs(file->clos_resv.clos_mask) - 1;
I think a little cleaner to replace the while loop with for_each_set_bit()
> +
> + DRM_DEBUG("uninit release mask = 0x%x clos= %d\n",
> + file->clos_resv.clos_mask, clos_index);
> + free_clos(file, clos_index);
> + file->clos_resv.clos_mask &= ~(1 << clos_index);
> + }
> +}
> +
> +#define L3_GLOBAL_RESERVATION_LIMIT 16
> +#define L3_CLIENT_RESERVATION_LIMIT 8
> +static int reserve_l3cache_ways(struct xe_file *file,
> + u16 clos_index, u16 *num_ways)
> +{
> + struct xe_device *xe = file->xe;
> + u8 global_limit = L3_GLOBAL_RESERVATION_LIMIT -
> + (MAX_L3WAYS - xe->cache_resv.ways[0]);
> + u8 client_limit = L3_CLIENT_RESERVATION_LIMIT -
> + file->clos_resv.l3_rsvd_ways;
> + u8 limit = min(global_limit, client_limit);
> +
> + if (limit == 0)
> + return -ENOSPC;
> +
> + if (*num_ways > limit) {
> + *num_ways = limit;
> + return -EAGAIN;
> + }
> +
> + file->clos_resv.l3_rsvd_ways += *num_ways;
> +
> + xe->cache_resv.ways[0] -= *num_ways;
> + xe->cache_resv.ways[clos_index] = *num_ways;
> +
> + update_l3cache_masks(xe);
> +
> + return 0;
> +}
> +
> +static int
> +free_l3cache_ways(struct xe_file *file, u16 clos_index)
> +{
> + struct xe_device *xe = file->xe;
> +
> + if (xe->cache_resv.ways[clos_index]) {
> + u8 num_ways = xe->cache_resv.ways[clos_index];
> +
> + file->clos_resv.l3_rsvd_ways -= num_ways;
> +
> + xe->cache_resv.ways[0] += num_ways;
> + xe->cache_resv.ways[clos_index] -= num_ways;
> +
> + update_l3cache_masks(xe);
> + }
> +
> + return 0;
> +}
> +
> +static bool
> +clos_is_reserved(struct xe_file *file, u16 clos_index)
> +{
> + return file->clos_resv.clos_mask & (1 << clos_index);
> +}
> +
> +int reserve_cache_ways(struct xe_file *file, u16 cache_level,
> + u16 clos_index, u16 *num_ways)
Aren't this function and the 2 below only used in this file so can be
static?
> +{
> + struct xe_device *xe = file->xe;
> + int ret = 0;
> +
> + if (cache_level != 3)
> + return -EINVAL;
> +
> + if ((clos_index >= NUM_CLOS) || !clos_is_reserved(file, clos_index))
> + return -EPERM;
> +
> + mutex_lock(&xe->cache_resv.clos_mutex);
> +
> + if (*num_ways)
> + ret = reserve_l3cache_ways(file, clos_index, num_ways);
> + else
> + ret = free_l3cache_ways(file, clos_index);
> +
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> + return ret;
> +}
> +
> +int reserve_clos(struct xe_file *file, u16 *clos_index)
> +{
> + struct xe_device *xe = file->xe;
> +
> + mutex_lock(&xe->cache_resv.clos_mutex);
> +
> + if (xe->cache_resv.free_clos_mask) {
> + u16 clos = ffs(xe->cache_resv.free_clos_mask) - 1;
> +
> + file->clos_resv.clos_mask |= (1 << clos);
> + xe->cache_resv.free_clos_mask &= ~(1 << clos);
> +
> + *clos_index = clos;
> + xe->cache_resv.clos_index = clos;
indentation looks off here.
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> +
> + return 0;
> + }
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> +
> + return -ENOSPC;
> +}
> +
> +int free_clos(struct xe_file *file, u16 clos_index)
> +{
> + struct xe_device *xe = file->xe;
> +
> + mutex_lock(&xe->cache_resv.clos_mutex);
> +
> + if (clos_is_reserved(file, clos_index)) {
> + struct xe_device *xe = file->xe;
> +
> + free_l3cache_ways(file, clos_index);
> +
> + file->clos_resv.clos_mask &= ~(1 << clos_index);
> + xe->cache_resv.free_clos_mask |= (1 << clos_index);
> +
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> +
> + return 0;
> + }
> +
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> + return -EPERM;
> +}
> +
> +int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct xe_file *file_priv = file->driver_priv;
> + struct xe_device *xe = file_priv->xe;
> + struct drm_xe_clos_reserve *clos = data;
> +
> + if (!HAS_CLOS(xe))
> + return -EOPNOTSUPP;
> +
> + return reserve_clos(file_priv, &clos->clos_index);
> +}
> +
> +int xe_clos_free_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct xe_file *file_priv = file->driver_priv;
> + struct xe_device *xe = file_priv->xe;
> + struct drm_xe_clos_free *clos = data;
> +
> + if (!HAS_CLOS(xe))
> + return -EOPNOTSUPP;
> +
> + return free_clos(file_priv, clos->clos_index);
> +}
> +
> +int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct xe_file *file_priv = file->driver_priv;
> + struct xe_device *xe = file_priv->xe;
> + struct drm_xe_cache_reserve *cache_reserve = data;
> +
> + if (!HAS_CLOS(xe))
> + return -EOPNOTSUPP;
> +
> + return reserve_cache_ways(file_priv,
> + cache_reserve->cache_level,
> + cache_reserve->clos_index,
> + &cache_reserve->num_ways);
> +}
> +
> +
> diff --git a/drivers/gpu/drm/xe/xe_clos.h b/drivers/gpu/drm/xe/xe_clos.h
> new file mode 100644
> index 000000000000..41384028e670
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_clos.h
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +#ifndef INTEL_CLOS_H
> +#define INTEL_CLOS_H
> +
> +#include <linux/types.h>
> +
> +struct xe_device;
> +struct xe_file;
> +
> +struct drm_device;
> +struct drm_file;
> +
> +void init_device_clos(struct xe_device *xe);
> +void uninit_device_clos(struct xe_device *xe);
> +
> +void init_client_clos(struct xe_file *file);
> +void uninit_client_clos(struct xe_file *file);
> +
> +int reserve_clos(struct xe_file *file, u16 *clos_index);
> +int free_clos(struct xe_file *file, u16 clos_index);
> +int reserve_cache_ways(struct xe_file *file, u16 cache_level,
> + u16 clos_index, u16 *num_ways);
> +
> +int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +int xe_clos_free_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +
> +#endif
> +
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 86867d42d532..f4287db66ff7 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -43,6 +43,7 @@
> #include "xe_vm.h"
> #include "xe_wait_user_fence.h"
> #include "xe_hwmon.h"
> +#include "xe_clos.h"
>
> #ifdef CONFIG_LOCKDEP
> struct lockdep_map xe_device_mem_access_lockdep_map = {
> @@ -82,6 +83,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> spin_unlock(&xe->clients.lock);
>
> file->driver_priv = xef;
> + init_client_clos(xef);
> return 0;
> }
>
> @@ -101,6 +103,9 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file)
> xe_exec_queue_kill(q);
> xe_exec_queue_put(q);
> }
> +
> + uninit_client_clos(xef);
> +
> mutex_unlock(&xef->exec_queue.lock);
> xa_destroy(&xef->exec_queue.xa);
> mutex_destroy(&xef->exec_queue.lock);
> @@ -138,6 +143,12 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
> DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
> DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(XE_CLOS_RESERVE, xe_clos_reserve_ioctl,
> + DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(XE_CLOS_FREE, xe_clos_free_ioctl,
> + DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(XE_CACHE_RESERVE, xe_cache_reserve_ioctl,
> + DRM_RENDER_ALLOW),
> };
>
> static const struct file_operations xe_driver_fops = {
> @@ -538,6 +549,8 @@ int xe_device_probe(struct xe_device *xe)
>
> xe_hwmon_register(xe);
>
> + init_device_clos(xe);
> +
> err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
> if (err)
> return err;
> @@ -570,6 +583,8 @@ void xe_device_remove(struct xe_device *xe)
>
> xe_heci_gsc_fini(xe);
>
> + uninit_device_clos(xe);
> +
> xe_irq_shutdown(xe);
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 71f23ac365e6..1f823c3136c4 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -281,6 +281,8 @@ struct xe_device {
> u8 has_heci_gscfi:1;
> /** @skip_guc_pc: Skip GuC based PM feature init */
> u8 skip_guc_pc:1;
> + /** @has_clos: device supports clos reservation */
> + u8 has_clos:1;
>
> #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> struct {
> @@ -440,6 +442,14 @@ struct xe_device {
> /** @needs_flr_on_fini: requests function-reset on fini */
> bool needs_flr_on_fini;
>
> +#define NUM_CLOS 4
> + struct cache_reservation {
> + u32 free_clos_mask; // Mask of CLOS sets that have not been reserved
> + struct mutex clos_mutex;
> + u8 ways[NUM_CLOS];
> + u8 clos_index;
indentation is off here?
> + } cache_resv;
> +
> /* private: */
>
> #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> @@ -545,6 +555,11 @@ struct xe_file {
>
> /** @client: drm client */
> struct xe_drm_client *client;
> +
> + struct clos_reservation {
> + u32 clos_mask; // Mask of CLOS sets reserved by client
> + u8 l3_rsvd_ways; // Number of L3 Ways reserved by client, across all CLOS
> + } clos_resv;
> };
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
> index 1ff6bc79e7d4..f1a6175930f7 100644
> --- a/drivers/gpu/drm/xe/xe_pat.c
> +++ b/drivers/gpu/drm/xe/xe_pat.c
> @@ -148,6 +148,51 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
> return xe->pat.table[pat_index].coh_mode;
> }
>
> +int xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16 clos_index)
> +{
> + WARN_ON(pat_index >= xe->pat.n_entries);
> +
> + int err = 0;
> +
> + if (GRAPHICS_VER(xe) >= 20) {
I don't know. Below is a bit messy with all the magic pat_index numbers.
Perhaps instead define a clos_pat_mask bitmask, for each index?
Then is a simple check here if pat_index is set in the appropriate
bitmask. What do you think?
Perhap then, in addition could get rid of xe.info.has_clos and can just
test for the bitmasks being non-zero with a macro.
> + switch (clos_index) {
> + case 1:
> + if (!(pat_index >= 20 && pat_index < 24))
> + err = -EINVAL;
> + break;
> + case 2:
> + if (!(pat_index >= 24 && pat_index < 28))
> + err = -EINVAL;
> + break;
> + case 3:
> + if (!(pat_index >= 28 && pat_index <= 31))
> + err = -EINVAL;
> + break;
> + default:
> + drm_err(&xe->drm, "Unsupported CLOS value\n");
> + err = -EINVAL;
> + }
> + }
> +
> + if (xe->info.platform == XE_PVC) {
> + switch (clos_index) {
> + case 1:
> + if (!(pat_index == 4 || pat_index == 5))
> + err = -EINVAL;
> + break;
> + case 2:
> + if (!(pat_index == 6 || pat_index == 7))
> + err = -EINVAL;
> + break;
> + default:
> + drm_err(&xe->drm, "Unsupported CLOS value\n");
> + err = -EINVAL;
> + }
> + }
> +
> + return err;
> +}
> +
> static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
> int n_entries)
> {
> diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
> index fa0dfbe525cd..fa4dbfd89496 100644
> --- a/drivers/gpu/drm/xe/xe_pat.h
> +++ b/drivers/gpu/drm/xe/xe_pat.h
> @@ -58,4 +58,14 @@ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
> */
> u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
>
> +/**
> + * xe_pat_index_clos_check - check whether clos has been reserved for
> + * chosen pat_index.
> + * @xe: xe device
> + * @pat_index: The pat_index to query
> + * @clos_index: clos index to compare
> + */
> +int xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16 clos_index);
> +
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index 1f997353a78f..5896a1dc46d1 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -60,6 +60,7 @@ struct xe_device_desc {
> u8 require_force_probe:1;
> u8 is_dgfx:1;
>
> + u8 has_clos:1;
> u8 has_display:1;
> u8 has_heci_gscfi:1;
> u8 has_llc:1;
> @@ -319,6 +320,7 @@ static const struct xe_device_desc pvc_desc = {
> .graphics = &graphics_xehpc,
> DGFX_FEATURES,
> PLATFORM(XE_PVC),
> + .has_clos = true,
> .has_display = false,
> .has_heci_gscfi = 1,
> .require_force_probe = true,
> @@ -333,6 +335,7 @@ static const struct xe_device_desc mtl_desc = {
>
> static const struct xe_device_desc lnl_desc = {
> PLATFORM(XE_LUNARLAKE),
> + .has_clos = true,
> .require_force_probe = true,
> };
>
> @@ -548,6 +551,7 @@ static int xe_info_init_early(struct xe_device *xe,
> subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
>
> xe->info.is_dgfx = desc->is_dgfx;
> + xe->info.has_clos = desc->has_clos;
> xe->info.has_heci_gscfi = desc->has_heci_gscfi;
> xe->info.has_llc = desc->has_llc;
> xe->info.has_mmio_ext = desc->has_mmio_ext;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 1ca917b8315c..1841f2af74c1 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2800,6 +2800,17 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
> err = -EINVAL;
> goto free_bind_ops;
> }
> +
> + /* check whether Clos has been reserved for chosen pat */
> + if ((GRAPHICS_VER(xe) >= 20 && (pat_index > 19)) || (xe->info.platform == XE_PVC && (pat_index > 3))) {
> + mutex_lock(&xe->cache_resv.clos_mutex);
> + err = xe_pat_index_clos_check(xe, pat_index, xe->cache_resv.clos_index);
> + if (err) {
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> + goto free_bind_ops;
> + }
indentation seems off here?
> + mutex_unlock(&xe->cache_resv.clos_mutex);
> + }
> }
>
> return 0;
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 9fa3ae324731..47d9d23e0bb0 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -100,6 +100,10 @@ extern "C" {
> #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
> #define DRM_XE_EXEC 0x09
> #define DRM_XE_WAIT_USER_FENCE 0x0a
> +#define DRM_XE_CLOS_RESERVE 0x0b
> +#define DRM_XE_CLOS_FREE 0x0c
> +#define DRM_XE_CACHE_RESERVE 0x0d
> +
> /* Must be kept compact -- no holes */
>
> #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
> @@ -113,6 +117,10 @@ extern "C" {
> #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
> #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
> #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
> +#define DRM_IOCTL_XE_CLOS_RESERVE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_RESERVE, struct drm_xe_clos_reserve)
> +#define DRM_IOCTL_XE_CLOS_FREE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_FREE, struct drm_xe_clos_free)
> +#define DRM_IOCTL_XE_CACHE_RESERVE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CACHE_RESERVE, struct drm_xe_cache_reserve)
These 3 above are all related.... can they have same prefix?
For the last one, maybe DRM_IOCTL_XE_CLOS_SET_WAYS? Or feel free to
use other name, but seems best if shares common prefix.
> +
>
> /**
> * DOC: Xe IOCTL Extensions
> @@ -1340,6 +1348,69 @@ struct drm_xe_wait_user_fence {
> __u64 reserved[2];
> };
>
> +/**
> + * struct drm_xe_clos_reserve
> + *
> + * Allows clients to request reservation of one free CLOS, to use in subsequent
> + * Cache Reservations.
> + *
> + */
> +struct drm_xe_clos_reserve {
> + /** @clos_index: clos index for reservation */
> + __u16 clos_index;
> +
> + /** @pad: MBZ */
> + __u16 pad16;
> +};
> +
> +/**
> + * struct drm_xe_clos_free
> + *
> + * Free off a previously reserved CLOS set. Any corresponding Cache Reservations
> + * that are active for the CLOS are automatically dropped and returned to the
> + * Shared set.
> + *
> + * The clos_index indicates the CLOS set which is being released and must
> + * correspond to a CLOS index previously reserved.
> + *
> + */
> +struct drm_xe_clos_free {
> + /** clos_index: free clos index */
> + __u16 clos_index;
> +
> + /** @pad: MBZ */
> + __u16 pad16;
> +};
> +
> +/**
> + * struct drm_xe_cache_reserve
> + *
> + * Allows clients to request, or release, reservation of one or more cache ways,
> + * within a previously reserved CLOS set.
> + *
> + * If num_ways = 0, KMD will drop any existing Reservation for the specified
> + * clos_index and cache_level. The requested clos_index and cache_level Waymasks
> + * will then track the Shared set once again.
> + *
> + * Otherwise, the requested number of Ways will be removed from the Shared set
> + * for the requested cache level, and assigned to the Cache and CLOS specified
> + * by cache_level/clos_index.
> + *
> + */
> +struct drm_xe_cache_reserve {
> + /** @clos_index: reserved clos index */
> + __u16 clos_index;
> +
> + /** @cache_level: level of cache */
> + __u16 cache_level; /* e.g. 3 for L3 */
> +
> + /** @num_ways: cache ways */
> + __u16 num_ways;
> +
> + /** @pad: MBZ */
> + __u16 pad16;
> +};
> +
> #if defined(__cplusplus)
> }
> #endif
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH] drm/xe: CLOS Based Cache Reservation support.
2024-01-03 1:56 ` Welty, Brian
@ 2024-01-03 17:38 ` Mishra, Pallavi
0 siblings, 0 replies; 3+ messages in thread
From: Mishra, Pallavi @ 2024-01-03 17:38 UTC (permalink / raw)
To: Welty, Brian, intel-xe@lists.freedesktop.org
> -----Original Message-----
> From: Welty, Brian <brian.welty@intel.com>
> Sent: Tuesday, January 2, 2024 5:57 PM
> To: Mishra, Pallavi <pallavi.mishra@intel.com>; intel-xe@lists.freedesktop.org
> Cc: Vishwanathapura, Niranjana <niranjana.vishwanathapura@intel.com>
> Subject: Re: [PATCH] drm/xe: CLOS Based Cache Reservation support.
>
>
> On 12/19/2023 5:27 PM, Pallavi Mishra wrote:
> > Xe API supports an optional extension for allowing Apps to reserve
> > portions of the GPU Caches for exclusive use. This allows the App to
> > separate latency/bandwidth sensitive workloads from all other
> > workloads.
> >
> > PVC and XE2 expose control over each Cache through the Class of
> > Service (CLOS) feature. CLOS allows XEKMD to define which portions of
> > a cache may be used for a given allocation through a set of Waymask
> > controls grouped into multiple sets.
> >
> > For each CLOS set, and supported cache, there is a Waymask to
> > configure the Ways in that cache that may be used to cache memory
> > requests for that CLOS.
> >
> > TODO:
> > - Need to add sysfs for setting limits
> > - Need to handle BO host cache type
> >
> > Signed-off-by: Pallavi Mishra <pallavi.mishra@intel.com>
> > ---
> > drivers/gpu/drm/xe/Makefile | 1 +
> > .../gpu/drm/xe/compat-i915-headers/i915_drv.h | 1 +
> > drivers/gpu/drm/xe/xe_clos.c | 268 ++++++++++++++++++
> > drivers/gpu/drm/xe/xe_clos.h | 36 +++
> > drivers/gpu/drm/xe/xe_device.c | 15 +
> > drivers/gpu/drm/xe/xe_device_types.h | 15 +
> > drivers/gpu/drm/xe/xe_pat.c | 45 +++
> > drivers/gpu/drm/xe/xe_pat.h | 10 +
> > drivers/gpu/drm/xe/xe_pci.c | 4 +
> > drivers/gpu/drm/xe/xe_vm.c | 11 +
> > include/uapi/drm/xe_drm.h | 71 +++++
> > 11 files changed, 477 insertions(+)
> > create mode 100644 drivers/gpu/drm/xe/xe_clos.c
> > create mode 100644 drivers/gpu/drm/xe/xe_clos.h
> >
> > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > index df8601d6a59f..bc60708ba23c 100644
> > --- a/drivers/gpu/drm/xe/Makefile
> > +++ b/drivers/gpu/drm/xe/Makefile
> > @@ -64,6 +64,7 @@ $(uses_generated_oob): $(generated_oob)
> > xe-y += xe_bb.o \
> > xe_bo.o \
> > xe_bo_evict.o \
> > + xe_clos.o \
> > xe_debugfs.o \
> > xe_devcoredump.o \
> > xe_device.o \
> > diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> > b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> > index 5d2a77b52db4..27226c1efc5a 100644
> > --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> > +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
> > @@ -104,6 +104,7 @@ static inline struct drm_i915_private
> *kdev_to_i915(struct device *kdev)
> > (xe)->info.graphics_verx100 <= (last*100 + 99))
> > #define IS_MOBILE(xe) (xe && 0)
> > #define HAS_LLC(xe) (!IS_DGFX((xe)))
> > +#define HAS_CLOS(xe) ((xe)->info.has_clos == true)
>
> Do we need this define? If so, I think belongs in different header.
> But looks like convention is to just do:
> if (xe->info.has_clos)
> where used.
Yes. Will stick to xe->info.has_clos and remove this
>
> >
> > #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
> >
> > diff --git a/drivers/gpu/drm/xe/xe_clos.c
> > b/drivers/gpu/drm/xe/xe_clos.c new file mode 100644 index
> > 000000000000..da56e52d25de
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_clos.c
> > @@ -0,0 +1,268 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2020 Intel Corporation */
> > +
> > +#include "i915_drv.h"
> > +#include "xe_device.h"
> > +#include "xe_gt.h"
> > +#include "xe_gt_mcr.h"
> > +#include "regs/xe_gt_regs.h"
> > +#include "xe_clos.h"
> > +
> > +#include <drm/xe_drm.h>
> > +
> > +static void clos_update_ways(struct xe_gt *gt, u8 clos_index, u32
> > +mask) {
> > +
> > + DRM_DEBUG("clos index = %d mask = 0x%x", clos_index, mask);
>
> For here and elsewhere in patch, I believe convention is to use drm_dbg().
Ok.
>
> > + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(clos_index),
> mask);
> > +}
> > +
> > +static void update_l3cache_masks(struct xe_device *xe) {
> > + u8 start_bits = 0;
> > + int i;
> > +
> > + for (i = 0; i < NUM_CLOS; i++) {
> > + struct xe_gt *gt;
> > + u32 mask = 0;
> > + int j;
> > +
> > + if (xe->cache_resv.ways[i]) {
> > + // Assign contiguous span of ways
> > + u8 ways = xe->cache_resv.ways[i];
> > + mask = GENMASK(start_bits + ways - 1, start_bits);
> > +
> > + DRM_DEBUG("start_bits = %d ways = %d mask=
> 0x%x\n",
> > + start_bits, ways, mask);
> > + start_bits += ways;
> > + }
> > + for_each_gt(gt, xe, j)
> > + clos_update_ways(gt, i, mask);
> > + }
> > +}
> > +
> > +#define MAX_L3WAYS 32
> > +void init_device_clos(struct xe_device *xe) {
> > + int i;
> > +
> > + if (!(xe->info.has_clos))
> > + return;
> > +
> > + mutex_init(&xe->cache_resv.clos_mutex);
> > + // CLOS1 and CLOS2 available for Reservation
> > + xe->cache_resv.free_clos_mask = 0x6;
> > +
> > + if (GRAPHICS_VER(xe) >= 20)
> > + xe->cache_resv.free_clos_mask = 0xe;
> > +
> > + // Shared set uses CLOS0 and initially gets all Ways
> > + xe->cache_resv.ways[0] = MAX_L3WAYS;
> > +
> > + for (i = 1; i < 3; i++)
> > + xe->cache_resv.ways[i] = 0;
>
> struct xe_device was allocated with kzalloc(). So don't really need to initialize
> these to zero.
Will remove.
>
> > +
> > + update_l3cache_masks(xe);
> > +}
> > +
> > +void uninit_device_clos(struct xe_device *xe) {
> > + if (!(xe->info.has_clos))
> > + return;
> > +
> > + mutex_destroy(&xe->cache_resv.clos_mutex);
> > +}
> > +
> > +void init_client_clos(struct xe_file *file) {
> > + if (!(file->xe->info.has_clos))
> > + return;
> > +
> > + file->clos_resv.clos_mask = 0; // No CLOS reserved yet
> > + file->clos_resv.l3_rsvd_ways = 0;
> > +}
> > +
> > +void uninit_client_clos(struct xe_file *file) {
> > + if (!(file->xe->info.has_clos))
> > + return;
> > +
> > + while (file->clos_resv.clos_mask) {
> > + u16 clos_index = ffs(file->clos_resv.clos_mask) - 1;
>
> I think a little cleaner to replace the while loop with for_each_set_bit()
>
> > +
> > + DRM_DEBUG("uninit release mask = 0x%x clos= %d\n",
> > + file->clos_resv.clos_mask, clos_index);
> > + free_clos(file, clos_index);
> > + file->clos_resv.clos_mask &= ~(1 << clos_index);
> > + }
> > +}
> > +
> > +#define L3_GLOBAL_RESERVATION_LIMIT 16 #define
> > +L3_CLIENT_RESERVATION_LIMIT 8 static int reserve_l3cache_ways(struct
> > +xe_file *file,
> > + u16 clos_index, u16 *num_ways)
> > +{
> > + struct xe_device *xe = file->xe;
> > + u8 global_limit = L3_GLOBAL_RESERVATION_LIMIT -
> > + (MAX_L3WAYS - xe->cache_resv.ways[0]);
> > + u8 client_limit = L3_CLIENT_RESERVATION_LIMIT -
> > + file->clos_resv.l3_rsvd_ways;
> > + u8 limit = min(global_limit, client_limit);
> > +
> > + if (limit == 0)
> > + return -ENOSPC;
> > +
> > + if (*num_ways > limit) {
> > + *num_ways = limit;
> > + return -EAGAIN;
> > + }
> > +
> > + file->clos_resv.l3_rsvd_ways += *num_ways;
> > +
> > + xe->cache_resv.ways[0] -= *num_ways;
> > + xe->cache_resv.ways[clos_index] = *num_ways;
> > +
> > + update_l3cache_masks(xe);
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +free_l3cache_ways(struct xe_file *file, u16 clos_index) {
> > + struct xe_device *xe = file->xe;
> > +
> > + if (xe->cache_resv.ways[clos_index]) {
> > + u8 num_ways = xe->cache_resv.ways[clos_index];
> > +
> > + file->clos_resv.l3_rsvd_ways -= num_ways;
> > +
> > + xe->cache_resv.ways[0] += num_ways;
> > + xe->cache_resv.ways[clos_index] -= num_ways;
> > +
> > + update_l3cache_masks(xe);
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static bool
> > +clos_is_reserved(struct xe_file *file, u16 clos_index) {
> > + return file->clos_resv.clos_mask & (1 << clos_index); }
> > +
> > +int reserve_cache_ways(struct xe_file *file, u16 cache_level,
> > + u16 clos_index, u16 *num_ways)
>
> Aren't this function and the 2 below only used in this file so can be static?
Yes they are used in this file only. I can make them static.
>
> > +{
> > + struct xe_device *xe = file->xe;
> > + int ret = 0;
> > +
> > + if (cache_level != 3)
> > + return -EINVAL;
> > +
> > + if ((clos_index >= NUM_CLOS) || !clos_is_reserved(file, clos_index))
> > + return -EPERM;
> > +
> > + mutex_lock(&xe->cache_resv.clos_mutex);
> > +
> > + if (*num_ways)
> > + ret = reserve_l3cache_ways(file, clos_index, num_ways);
> > + else
> > + ret = free_l3cache_ways(file, clos_index);
> > +
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > + return ret;
> > +}
> > +
> > +int reserve_clos(struct xe_file *file, u16 *clos_index) {
> > + struct xe_device *xe = file->xe;
> > +
> > + mutex_lock(&xe->cache_resv.clos_mutex);
> > +
> > + if (xe->cache_resv.free_clos_mask) {
> > + u16 clos = ffs(xe->cache_resv.free_clos_mask) - 1;
> > +
> > + file->clos_resv.clos_mask |= (1 << clos);
> > + xe->cache_resv.free_clos_mask &= ~(1 << clos);
> > +
> > + *clos_index = clos;
> > + xe->cache_resv.clos_index = clos;
>
> indentation looks off here.
>
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > +
> > + return 0;
> > + }
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > +
> > + return -ENOSPC;
> > +}
> > +
> > +int free_clos(struct xe_file *file, u16 clos_index) {
> > + struct xe_device *xe = file->xe;
> > +
> > + mutex_lock(&xe->cache_resv.clos_mutex);
> > +
> > + if (clos_is_reserved(file, clos_index)) {
> > + struct xe_device *xe = file->xe;
> > +
> > + free_l3cache_ways(file, clos_index);
> > +
> > + file->clos_resv.clos_mask &= ~(1 << clos_index);
> > + xe->cache_resv.free_clos_mask |= (1 << clos_index);
> > +
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > +
> > + return 0;
> > + }
> > +
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > + return -EPERM;
> > +}
> > +
> > +int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file)
> > +{
> > + struct xe_file *file_priv = file->driver_priv;
> > + struct xe_device *xe = file_priv->xe;
> > + struct drm_xe_clos_reserve *clos = data;
> > +
> > + if (!HAS_CLOS(xe))
> > + return -EOPNOTSUPP;
> > +
> > + return reserve_clos(file_priv, &clos->clos_index); }
> > +
> > +int xe_clos_free_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file)
> > +{
> > + struct xe_file *file_priv = file->driver_priv;
> > + struct xe_device *xe = file_priv->xe;
> > + struct drm_xe_clos_free *clos = data;
> > +
> > + if (!HAS_CLOS(xe))
> > + return -EOPNOTSUPP;
> > +
> > + return free_clos(file_priv, clos->clos_index); }
> > +
> > +int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file)
> > +{
> > + struct xe_file *file_priv = file->driver_priv;
> > + struct xe_device *xe = file_priv->xe;
> > + struct drm_xe_cache_reserve *cache_reserve = data;
> > +
> > + if (!HAS_CLOS(xe))
> > + return -EOPNOTSUPP;
> > +
> > + return reserve_cache_ways(file_priv,
> > + cache_reserve->cache_level,
> > + cache_reserve->clos_index,
> > + &cache_reserve->num_ways);
> > +}
> > +
> > +
> > diff --git a/drivers/gpu/drm/xe/xe_clos.h
> > b/drivers/gpu/drm/xe/xe_clos.h new file mode 100644 index
> > 000000000000..41384028e670
> > --- /dev/null
> > +++ b/drivers/gpu/drm/xe/xe_clos.h
> > @@ -0,0 +1,36 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > + * Copyright © 2020 Intel Corporation */
> > +
> > +#ifndef INTEL_CLOS_H
> > +#define INTEL_CLOS_H
> > +
> > +#include <linux/types.h>
> > +
> > +struct xe_device;
> > +struct xe_file;
> > +
> > +struct drm_device;
> > +struct drm_file;
> > +
> > +void init_device_clos(struct xe_device *xe); void
> > +uninit_device_clos(struct xe_device *xe);
> > +
> > +void init_client_clos(struct xe_file *file); void
> > +uninit_client_clos(struct xe_file *file);
> > +
> > +int reserve_clos(struct xe_file *file, u16 *clos_index); int
> > +free_clos(struct xe_file *file, u16 clos_index); int
> > +reserve_cache_ways(struct xe_file *file, u16 cache_level,
> > + u16 clos_index, u16 *num_ways);
> > +
> > +int xe_clos_reserve_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file);
> > +int xe_clos_free_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file);
> > +int xe_cache_reserve_ioctl(struct drm_device *dev, void *data,
> > + struct drm_file *file);
> > +
> > +#endif
> > +
> > diff --git a/drivers/gpu/drm/xe/xe_device.c
> > b/drivers/gpu/drm/xe/xe_device.c index 86867d42d532..f4287db66ff7
> > 100644
> > --- a/drivers/gpu/drm/xe/xe_device.c
> > +++ b/drivers/gpu/drm/xe/xe_device.c
> > @@ -43,6 +43,7 @@
> > #include "xe_vm.h"
> > #include "xe_wait_user_fence.h"
> > #include "xe_hwmon.h"
> > +#include "xe_clos.h"
> >
> > #ifdef CONFIG_LOCKDEP
> > struct lockdep_map xe_device_mem_access_lockdep_map = { @@ -82,6
> > +83,7 @@ static int xe_file_open(struct drm_device *dev, struct drm_file
> *file)
> > spin_unlock(&xe->clients.lock);
> >
> > file->driver_priv = xef;
> > + init_client_clos(xef);
> > return 0;
> > }
> >
> > @@ -101,6 +103,9 @@ static void xe_file_close(struct drm_device *dev,
> struct drm_file *file)
> > xe_exec_queue_kill(q);
> > xe_exec_queue_put(q);
> > }
> > +
> > + uninit_client_clos(xef);
> > +
> > mutex_unlock(&xef->exec_queue.lock);
> > xa_destroy(&xef->exec_queue.xa);
> > mutex_destroy(&xef->exec_queue.lock);
> > @@ -138,6 +143,12 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
> > DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE,
> xe_wait_user_fence_ioctl,
> > DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(XE_CLOS_RESERVE, xe_clos_reserve_ioctl,
> > + DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(XE_CLOS_FREE, xe_clos_free_ioctl,
> > + DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(XE_CACHE_RESERVE, xe_cache_reserve_ioctl,
> > + DRM_RENDER_ALLOW),
> > };
> >
> > static const struct file_operations xe_driver_fops = { @@ -538,6
> > +549,8 @@ int xe_device_probe(struct xe_device *xe)
> >
> > xe_hwmon_register(xe);
> >
> > + init_device_clos(xe);
> > +
> > err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
> > if (err)
> > return err;
> > @@ -570,6 +583,8 @@ void xe_device_remove(struct xe_device *xe)
> >
> > xe_heci_gsc_fini(xe);
> >
> > + uninit_device_clos(xe);
> > +
> > xe_irq_shutdown(xe);
> > }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> > b/drivers/gpu/drm/xe/xe_device_types.h
> > index 71f23ac365e6..1f823c3136c4 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -281,6 +281,8 @@ struct xe_device {
> > u8 has_heci_gscfi:1;
> > /** @skip_guc_pc: Skip GuC based PM feature init */
> > u8 skip_guc_pc:1;
> > + /** @has_clos: device supports clos reservation */
> > + u8 has_clos:1;
> >
> > #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> > struct {
> > @@ -440,6 +442,14 @@ struct xe_device {
> > /** @needs_flr_on_fini: requests function-reset on fini */
> > bool needs_flr_on_fini;
> >
> > +#define NUM_CLOS 4
> > + struct cache_reservation {
> > + u32 free_clos_mask; // Mask of CLOS sets that have not been
> reserved
> > + struct mutex clos_mutex;
> > + u8 ways[NUM_CLOS];
> > + u8 clos_index;
> indentation is off here?
>
> > + } cache_resv;
> > +
> > /* private: */
> >
> > #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -545,6 +555,11 @@ struct
> > xe_file {
> >
> > /** @client: drm client */
> > struct xe_drm_client *client;
> > +
> > + struct clos_reservation {
> > + u32 clos_mask; // Mask of CLOS sets reserved by client
> > + u8 l3_rsvd_ways; // Number of L3 Ways reserved by client,
> across all CLOS
> > + } clos_resv;
> > };
> >
> > #endif
> > diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
> > index 1ff6bc79e7d4..f1a6175930f7 100644
> > --- a/drivers/gpu/drm/xe/xe_pat.c
> > +++ b/drivers/gpu/drm/xe/xe_pat.c
> > @@ -148,6 +148,51 @@ u16 xe_pat_index_get_coh_mode(struct xe_device
> *xe, u16 pat_index)
> > return xe->pat.table[pat_index].coh_mode;
> > }
> >
> > +int xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16
> > +clos_index) {
> > + WARN_ON(pat_index >= xe->pat.n_entries);
> > +
> > + int err = 0;
> > +
> > + if (GRAPHICS_VER(xe) >= 20) {
>
> I don't know. Below is a bit messy with all the magic pat_index numbers.
> Perhaps instead define a clos_pat_mask bitmask, for each index?
> Then is a simple check here if pat_index is set in the appropriate
> bitmask. What do you think?
Ok. Will change this implementation.
> Perhap then, in addition could get rid of xe.info.has_clos and can just test for
> the bitmasks being non-zero with a macro.
>
> > + switch (clos_index) {
> > + case 1:
> > + if (!(pat_index >= 20 && pat_index < 24))
> > + err = -EINVAL;
> > + break;
> > + case 2:
> > + if (!(pat_index >= 24 && pat_index < 28))
> > + err = -EINVAL;
> > + break;
> > + case 3:
> > + if (!(pat_index >= 28 && pat_index <= 31))
> > + err = -EINVAL;
> > + break;
> > + default:
> > + drm_err(&xe->drm, "Unsupported CLOS value\n");
> > + err = -EINVAL;
> > + }
> > + }
> > +
> > + if (xe->info.platform == XE_PVC) {
> > + switch (clos_index) {
> > + case 1:
> > + if (!(pat_index == 4 || pat_index == 5))
> > + err = -EINVAL;
> > + break;
> > + case 2:
> > + if (!(pat_index == 6 || pat_index == 7))
> > + err = -EINVAL;
> > + break;
> > + default:
> > + drm_err(&xe->drm, "Unsupported CLOS value\n");
> > + err = -EINVAL;
> > + }
> > + }
> > +
> > + return err;
> > +}
> > +
> > static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
> table[],
> > int n_entries)
> > {
> > diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
> > index fa0dfbe525cd..fa4dbfd89496 100644
> > --- a/drivers/gpu/drm/xe/xe_pat.h
> > +++ b/drivers/gpu/drm/xe/xe_pat.h
> > @@ -58,4 +58,14 @@ void xe_pat_dump(struct xe_gt *gt, struct
> drm_printer *p);
> > */
> > u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index);
> >
> > +/**
> > + * xe_pat_index_clos_check - check whether clos has been reserved for
> > + * chosen pat_index.
> > + * @xe: xe device
> > + * @pat_index: The pat_index to query
> > + * @clos_index: clos index to compare */ int
> > +xe_pat_index_clos_check(struct xe_device *xe, u16 pat_index, u16
> > +clos_index);
> > +
> > +
> > #endif
> > diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> > index 1f997353a78f..5896a1dc46d1 100644
> > --- a/drivers/gpu/drm/xe/xe_pci.c
> > +++ b/drivers/gpu/drm/xe/xe_pci.c
> > @@ -60,6 +60,7 @@ struct xe_device_desc {
> > u8 require_force_probe:1;
> > u8 is_dgfx:1;
> >
> > + u8 has_clos:1;
> > u8 has_display:1;
> > u8 has_heci_gscfi:1;
> > u8 has_llc:1;
> > @@ -319,6 +320,7 @@ static const struct xe_device_desc pvc_desc = {
> > .graphics = &graphics_xehpc,
> > DGFX_FEATURES,
> > PLATFORM(XE_PVC),
> > + .has_clos = true,
> > .has_display = false,
> > .has_heci_gscfi = 1,
> > .require_force_probe = true,
> > @@ -333,6 +335,7 @@ static const struct xe_device_desc mtl_desc = {
> >
> > static const struct xe_device_desc lnl_desc = {
> > PLATFORM(XE_LUNARLAKE),
> > + .has_clos = true,
> > .require_force_probe = true,
> > };
> >
> > @@ -548,6 +551,7 @@ static int xe_info_init_early(struct xe_device *xe,
> > subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
> >
> > xe->info.is_dgfx = desc->is_dgfx;
> > + xe->info.has_clos = desc->has_clos;
> > xe->info.has_heci_gscfi = desc->has_heci_gscfi;
> > xe->info.has_llc = desc->has_llc;
> > xe->info.has_mmio_ext = desc->has_mmio_ext; diff --git
> > a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index
> > 1ca917b8315c..1841f2af74c1 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -2800,6 +2800,17 @@ static int vm_bind_ioctl_check_args(struct
> xe_device *xe,
> > err = -EINVAL;
> > goto free_bind_ops;
> > }
> > +
> > + /* check whether Clos has been reserved for chosen pat */
> > + if ((GRAPHICS_VER(xe) >= 20 && (pat_index > 19)) || (xe-
> >info.platform == XE_PVC && (pat_index > 3))) {
> > + mutex_lock(&xe->cache_resv.clos_mutex);
> > + err = xe_pat_index_clos_check(xe, pat_index, xe-
> >cache_resv.clos_index);
> > + if (err) {
> > + mutex_unlock(&xe-
> >cache_resv.clos_mutex);
> > + goto free_bind_ops;
> > + }
>
> indentation seems off here?
Will fix.
>
> > + mutex_unlock(&xe->cache_resv.clos_mutex);
> > + }
> > }
> >
> > return 0;
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index 9fa3ae324731..47d9d23e0bb0 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -100,6 +100,10 @@ extern "C" {
> > #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
> > #define DRM_XE_EXEC 0x09
> > #define DRM_XE_WAIT_USER_FENCE 0x0a
> > +#define DRM_XE_CLOS_RESERVE 0x0b
> > +#define DRM_XE_CLOS_FREE 0x0c
> > +#define DRM_XE_CACHE_RESERVE 0x0d
> > +
> > /* Must be kept compact -- no holes */
> >
> > #define DRM_IOCTL_XE_DEVICE_QUERY
> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY,
> struct drm_xe_device_query)
> > @@ -113,6 +117,10 @@ extern "C" {
> > #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
> DRM_IOWR(DRM_COMMAND_BASE +
> DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct
> drm_xe_exec_queue_get_property)
> > #define DRM_IOCTL_XE_EXEC
> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct
> drm_xe_exec)
> > #define DRM_IOCTL_XE_WAIT_USER_FENCE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE,
> struct drm_xe_wait_user_fence)
> > +#define DRM_IOCTL_XE_CLOS_RESERVE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_RESERVE,
> struct drm_xe_clos_reserve)
> > +#define DRM_IOCTL_XE_CLOS_FREE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CLOS_FREE, struct
> drm_xe_clos_free)
> > +#define DRM_IOCTL_XE_CACHE_RESERVE
> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_CACHE_RESERVE,
> struct drm_xe_cache_reserve)
>
> These 3 above are all related.... can they have same prefix?
Yeah I can keep same prefix for the 3rd one as well.
Thanks,
Pallavi
> For the last one, maybe DRM_IOCTL_XE_CLOS_SET_WAYS? Or feel free to
> use other name, but seems best if shares common prefix.
>
>
> > +
> >
> > /**
> > * DOC: Xe IOCTL Extensions
> > @@ -1340,6 +1348,69 @@ struct drm_xe_wait_user_fence {
> > __u64 reserved[2];
> > };
> >
> > +/**
> > + * struct drm_xe_clos_reserve
> > + *
> > + * Allows clients to request reservation of one free CLOS, to use in
> > +subsequent
> > + * Cache Reservations.
> > + *
> > + */
> > +struct drm_xe_clos_reserve {
> > + /** @clos_index: clos index for reservation */
> > + __u16 clos_index;
> > +
> > + /** @pad: MBZ */
> > + __u16 pad16;
> > +};
> > +
> > +/**
> > + * struct drm_xe_clos_free
> > + *
> > + * Free off a previously reserved CLOS set. Any corresponding Cache
> > +Reservations
> > + * that are active for the CLOS are automatically dropped and
> > +returned to the
> > + * Shared set.
> > + *
> > + * The clos_index indicates the CLOS set which is being released and
> > +must
> > + * correspond to a CLOS index previously reserved.
> > + *
> > + */
> > +struct drm_xe_clos_free {
> > + /** clos_index: free clos index */
> > + __u16 clos_index;
> > +
> > + /** @pad: MBZ */
> > + __u16 pad16;
> > +};
> > +
> > +/**
> > + * struct drm_xe_cache_reserve
> > + *
> > + * Allows clients to request, or release, reservation of one or more
> > +cache ways,
> > + * within a previously reserved CLOS set.
> > + *
> > + * If num_ways = 0, KMD will drop any existing Reservation for the
> > +specified
> > + * clos_index and cache_level. The requested clos_index and
> > +cache_level Waymasks
> > + * will then track the Shared set once again.
> > + *
> > + * Otherwise, the requested number of Ways will be removed from the
> > +Shared set
> > + * for the requested cache level, and assigned to the Cache and CLOS
> > +specified
> > + * by cache_level/clos_index.
> > + *
> > + */
> > +struct drm_xe_cache_reserve {
> > + /** @clos_index: reserved clos index */
> > + __u16 clos_index;
> > +
> > + /** @cache_level: level of cache */
> > + __u16 cache_level; /* e.g. 3 for L3 */
> > +
> > + /** @num_ways: cache ways */
> > + __u16 num_ways;
> > +
> > + /** @pad: MBZ */
> > + __u16 pad16;
> > +};
> > +
> > #if defined(__cplusplus)
> > }
> > #endif
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-01-03 17:39 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-20 1:27 [PATCH] drm/xe: CLOS Based Cache Reservation support Pallavi Mishra
2024-01-03 1:56 ` Welty, Brian
2024-01-03 17:38 ` Mishra, Pallavi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox