From: Francois Dugast <francois.dugast@intel.com>
To: intel-xe@lists.freedesktop.org
Cc: matthew.brost@intel.com, Francois Dugast <francois.dugast@intel.com>
Subject: [PATCH v9 01/11] drm/xe/hw_engine_group: Introduce xe_hw_engine_group
Date: Fri, 9 Aug 2024 17:51:26 +0200 [thread overview]
Message-ID: <20240809155156.1955925-2-francois.dugast@intel.com> (raw)
In-Reply-To: <20240809155156.1955925-1-francois.dugast@intel.com>
A xe_hw_engine_group is a group of hw engines. Two hw engines belong to
the same xe_hw_engine_group if one hw engine cannot make progress while
the other is stuck on a page fault.
Typically, hw engines of the same group share some resources such as EUs,
but this really depends on the hardware configuration of the platforms.
The simple engines partitioning proposed here might be too conservative
but is intended to work for existing platforms. It can be optimized later
if more sets of independent engines are identified.
The hw engine groups are intended to be used in the context of faulting
long-running jobs submissions.
v2: Move to own files, improve error handling (Matt Brost)
v3: Fix build issue reported by CI, improve commit message (Matt Roper)
v4: Fix kernel doc
v5: Add switch case for XE_ENGINE_CLASS_OTHER
Signed-off-by: Francois Dugast <francois.dugast@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/xe_hw_engine.c | 4 +
drivers/gpu/drm/xe/xe_hw_engine_group.c | 102 ++++++++++++++++++
drivers/gpu/drm/xe/xe_hw_engine_group.h | 16 +++
drivers/gpu/drm/xe/xe_hw_engine_group_types.h | 51 +++++++++
drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 +
6 files changed, 176 insertions(+)
create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.c
create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group.h
create mode 100644 drivers/gpu/drm/xe/xe_hw_engine_group_types.h
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1ff9602a52f6..b67ace7ed204 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -69,6 +69,7 @@ xe-y += xe_bb.o \
xe_heci_gsc.o \
xe_hw_engine.o \
xe_hw_engine_class_sysfs.o \
+ xe_hw_engine_group.o \
xe_hw_fence.o \
xe_huc.o \
xe_huc_debugfs.o \
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 402dfa748e16..2503a93ad457 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -23,6 +23,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_mcr.h"
#include "xe_gt_topology.h"
+#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
#include "xe_lrc.h"
@@ -790,6 +791,9 @@ int xe_hw_engines_init(struct xe_gt *gt)
}
hw_engine_setup_logical_mapping(gt);
+ err = xe_hw_engine_setup_groups(gt);
+ if (err)
+ return err;
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
new file mode 100644
index 000000000000..1d109c08c7a6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_hw_engine_group.h"
+
+static void
+hw_engine_group_free(struct drm_device *drm, void *arg)
+{
+ struct xe_hw_engine_group *group = arg;
+
+ kfree(group);
+}
+
+static struct xe_hw_engine_group *
+hw_engine_group_alloc(struct xe_device *xe)
+{
+ struct xe_hw_engine_group *group;
+ int err;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ init_rwsem(&group->mode_sem);
+ INIT_LIST_HEAD(&group->exec_queue_list);
+
+ err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
+ if (err)
+ return ERR_PTR(err);
+
+ return group;
+}
+
+/**
+ * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
+ * @gt: The gt for which groups are setup
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_hw_engine_setup_groups(struct xe_gt *gt)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+
+ group_rcs_ccs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_rcs_ccs)) {
+ err = PTR_ERR(group_rcs_ccs);
+ goto err_group_rcs_ccs;
+ }
+
+ group_bcs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_bcs)) {
+ err = PTR_ERR(group_bcs);
+ goto err_group_bcs;
+ }
+
+ group_vcs_vecs = hw_engine_group_alloc(xe);
+ if (IS_ERR(group_vcs_vecs)) {
+ err = PTR_ERR(group_vcs_vecs);
+ goto err_group_vcs_vecs;
+ }
+
+ for_each_hw_engine(hwe, gt, id) {
+ switch (hwe->class) {
+ case XE_ENGINE_CLASS_COPY:
+ hwe->hw_engine_group = group_bcs;
+ break;
+ case XE_ENGINE_CLASS_RENDER:
+ case XE_ENGINE_CLASS_COMPUTE:
+ hwe->hw_engine_group = group_rcs_ccs;
+ break;
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ hwe->hw_engine_group = group_vcs_vecs;
+ break;
+ case XE_ENGINE_CLASS_OTHER:
+ break;
+ default:
+ drm_warn(&xe->drm, "NOT POSSIBLE");
+ }
+ }
+
+ return 0;
+
+err_group_vcs_vecs:
+ kfree(group_vcs_vecs);
+err_group_bcs:
+ kfree(group_bcs);
+err_group_rcs_ccs:
+ kfree(group_rcs_ccs);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.h b/drivers/gpu/drm/xe/xe_hw_engine_group.h
new file mode 100644
index 000000000000..c2648f87f7ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_GROUP_H_
+#define _XE_HW_ENGINE_GROUP_H_
+
+#include "xe_hw_engine_group_types.h"
+
+struct drm_device;
+struct xe_gt;
+
+int xe_hw_engine_setup_groups(struct xe_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group_types.h b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
new file mode 100644
index 000000000000..92b6e0712c03
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_HW_ENGINE_GROUP_TYPES_H_
+#define _XE_HW_ENGINE_GROUP_TYPES_H_
+
+#include "xe_force_wake_types.h"
+#include "xe_lrc_types.h"
+#include "xe_reg_sr_types.h"
+
+/**
+ * enum xe_hw_engine_group_execution_mode - possible execution modes of a hw
+ * engine group
+ *
+ * @EXEC_MODE_LR: execution in long-running mode
+ * @EXEC_MODE_DMA_FENCE: execution in dma fence mode
+ */
+enum xe_hw_engine_group_execution_mode {
+ EXEC_MODE_LR,
+ EXEC_MODE_DMA_FENCE,
+};
+
+/**
+ * struct xe_hw_engine_group - Hardware engine group
+ *
+ * hw engines belong to the same group if they share hardware resources in a way
+ * that prevents them from making progress when one is stuck on a page fault.
+ */
+struct xe_hw_engine_group {
+ /**
+ * @exec_queue_list: list of exec queues attached to this
+ * xe_hw_engine_group
+ */
+ struct list_head exec_queue_list;
+ /** @resume_work: worker to resume faulting LR exec queues */
+ struct work_struct resume_work;
+ /** @resume_wq: workqueue to resume faulting LR exec queues */
+ struct workqueue_struct *resume_wq;
+ /**
+ * @mode_sem: used to protect this group's hardware resources and ensure
+ * mutual exclusion between execution only in faulting LR mode and
+ * execution only in DMA_FENCE mode
+ */
+ struct rw_semaphore mode_sem;
+ /** @cur_mode: current execution mode of this hw engine group */
+ enum xe_hw_engine_group_execution_mode cur_mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 70e6434f150d..39f24012d0f4 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -150,6 +150,8 @@ struct xe_hw_engine {
struct xe_hw_engine_class_intf *eclass;
/** @oa_unit: oa unit for this hw engine */
struct xe_oa_unit *oa_unit;
+ /** @hw_engine_group: the group of hw engines this one belongs to */
+ struct xe_hw_engine_group *hw_engine_group;
};
/**
--
2.43.0
next prev parent reply other threads:[~2024-08-09 15:52 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-09 15:51 [PATCH v9 00/11] Parallel submission of dma fence jobs and LR jobs with shared hardware resources Francois Dugast
2024-08-09 15:51 ` Francois Dugast [this message]
2024-08-09 15:51 ` [PATCH v9 02/11] drm/xe/guc_submit: Make suspend_wait interruptible Francois Dugast
2024-08-09 15:51 ` [PATCH v9 03/11] 'drm/xe/hw_engine_group: Register hw engine group's exec queues Francois Dugast
2024-08-09 15:51 ` [PATCH v9 04/11] drm/xe/hw_engine_group: Add helper to suspend faulting LR jobs Francois Dugast
2024-08-09 15:51 ` [PATCH v9 05/11] drm/xe/exec_queue: Remove duplicated code Francois Dugast
2024-08-09 15:51 ` [PATCH v9 06/11] drm/xe/exec_queue: Prepare last fence for hw engine group resume context Francois Dugast
2024-08-09 15:51 ` [PATCH v9 07/11] drm/xe/hw_engine_group: Add helper to wait for dma fence jobs Francois Dugast
2024-08-09 15:51 ` [PATCH v9 08/11] drm/xe/hw_engine_group: Ensure safe transition between execution modes Francois Dugast
2024-08-09 15:51 ` [PATCH v9 09/11] drm/xe/exec: Switch hw engine group execution mode upon job submission Francois Dugast
2024-08-09 19:20 ` Matthew Brost
2024-08-09 15:51 ` [PATCH v9 10/11] drm/xe/vm: Remove restriction that all VMs must be faulting if one is Francois Dugast
2024-08-09 15:51 ` [PATCH v9 11/11] drm/xe/device: Remove unused xe_device::usm::num_vm_in_* Francois Dugast
2024-08-09 15:57 ` ✓ CI.Patch_applied: success for Parallel submission of dma fence jobs and LR jobs with shared hardware resources (rev9) Patchwork
2024-08-09 15:58 ` ✗ CI.checkpatch: warning " Patchwork
2024-08-09 15:59 ` ✓ CI.KUnit: success " Patchwork
2024-08-09 16:10 ` ✓ CI.Build: " Patchwork
2024-08-09 16:13 ` ✓ CI.Hooks: " Patchwork
2024-08-09 16:14 ` ✓ CI.checksparse: " Patchwork
2024-08-09 16:34 ` ✓ CI.BAT: " Patchwork
2024-08-09 18:02 ` ✗ CI.FULL: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240809155156.1955925-2-francois.dugast@intel.com \
--to=francois.dugast@intel.com \
--cc=intel-xe@lists.freedesktop.org \
--cc=matthew.brost@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox