From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Subject: [Intel-gfx] [RFC PATCH 29/42] drm/i915/guc: Implement BB boundary preemption for multi-lrc
Date: Tue, 20 Jul 2021 13:57:49 -0700 [thread overview]
Message-ID: <20210720205802.39610-30-matthew.brost@intel.com> (raw)
In-Reply-To: <20210720205802.39610-1-matthew.brost@intel.com>
For some users of multi-lrc, e.g. split frame, it isn't safe to preempt
mid BB. To safely enable preemption at the BB boundary, a handshake
between to parent and child is needed. This is implemented via custom
emit_bb_start & emit_fini_breadcrumb functions and enabled via by
default if a context is configured by set parallel extension.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/i915/gt/intel_context.c | 2 +-
drivers/gpu/drm/i915/gt/intel_context_types.h | 3 +
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 285 +++++++++++++++++-
4 files changed, 289 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 7395894724f8..35f5d28c7465 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -639,7 +639,7 @@ void intel_context_bind_parent_child(struct intel_context *parent,
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
- parent->guc_number_children++;
+ child->guc_child_index = parent->guc_number_children++;
list_add_tail(&child->guc_child_link,
&parent->guc_child_list);
child->parent = parent;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 5912eb291bad..4886e107c1f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -222,6 +222,9 @@ struct intel_context {
/* Number of children if parent */
u8 guc_number_children;
+ /* Child index if child */
+ u8 guc_child_index;
+
u8 parent_page; /* if set, page num reserved for parent context */
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 820cb6b5d2d0..11d64746dbf1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -181,7 +181,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 reserved[36];
} __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index dde6aab1db85..b9c43ba823b4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
@@ -460,10 +461,14 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
/*
* When using multi-lrc submission an extra page in the context state is
- * reserved for the process descriptor and work queue.
+ * reserved for the process descriptor, work queue, and preempt BB boundary
+ * handshake between the parent + childlren contexts.
*
* The layout of this page is below:
* 0 guc_process_desc
+ * + sizeof(struct guc_process_desc) child go
+ * + CACHELINE_BYTES child join ...
+ * + CACHELINE_BYTES ...
* ... unused
* PAGE_SIZE / 2 work queue start
* ... work queue
@@ -2166,6 +2171,30 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
return __guc_action_deregister_context(guc, guc_id, loop);
}
+static inline void clear_children_join_go_memory(struct intel_context *ce)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+ u8 i;
+
+ for (i = 0; i < ce->guc_number_children + 1; ++i)
+ mem[i * (CACHELINE_BYTES / sizeof(u32))] = 0;
+}
+
+static inline u32 get_children_go_value(struct intel_context *ce)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+
+ return mem[0];
+}
+
+static inline u32 get_children_join_value(struct intel_context *ce,
+ u8 child_index)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+
+ return mem[(child_index + 1) * (CACHELINE_BYTES / sizeof(u32))];
+}
+
static void guc_context_policy_init(struct intel_engine_cs *engine,
struct guc_lrc_desc *desc)
{
@@ -2361,6 +2390,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
}
+
+ clear_children_join_go_memory(ce);
}
/*
@@ -3771,6 +3802,32 @@ static const struct intel_context_ops virtual_child_context_ops = {
.destroy = guc_child_context_destroy,
};
+/*
+ * The below override of the breadcrumbs is enabled when the user configures a
+ * context for parallel submission (multi-lrc, parent-child).
+ *
+ * The overridden breadcrumbs implements an algorithm which allows the GuC to
+ * safely preempt all the hw contexts configured for parallel submission
+ * between each BB. The contract between the i915 and GuC is if the parent
+ * context can be preempted, all the children can be preempted, and the GuC will
+ * always try to preempt the parent before the children. A handshake between the
+ * parent / children breadcrumbs ensures the i915 holds up its end of the deal
+ * creating a window to preempt between each set of BBs.
+ */
+static int emit_bb_start_parent_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static int emit_bb_start_child_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static u32 *
+emit_fini_breadcrumb_parent_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs);
+static u32 *
+emit_fini_breadcrumb_child_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs);
+
+
static struct intel_context *
guc_create_parallel(struct intel_engine_cs **engines,
unsigned int num_siblings,
@@ -3814,6 +3871,20 @@ guc_create_parallel(struct intel_engine_cs **engines,
for_each_child(parent, ce)
ce->ops = &virtual_child_context_ops;
+ parent->engine->emit_bb_start =
+ emit_bb_start_parent_bb_preempt_boundary;
+ parent->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_parent_bb_preempt_boundary;
+ parent->engine->emit_fini_breadcrumb_dw =
+ 12 + 4 * parent->guc_number_children;
+ for_each_child(parent, ce) {
+ ce->engine->emit_bb_start =
+ emit_bb_start_child_bb_preempt_boundary;
+ ce->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_child_bb_preempt_boundary;
+ ce->engine->emit_fini_breadcrumb_dw = 16;
+ }
+
kfree(siblings);
return parent;
@@ -4174,6 +4245,205 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
guc->submission_selected = __guc_submission_selected(guc);
}
+static inline u32 get_children_go_addr(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_process_desc_offset(ce) +
+ sizeof(struct guc_process_desc);
+}
+
+static inline u32 get_children_join_addr(struct intel_context *ce,
+ u8 child_index)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return get_children_go_addr(ce) + (child_index + 1) * CACHELINE_BYTES;
+}
+
+#define PARENT_GO_BB 1
+#define PARENT_GO_FINI_BREADCRUMB 0
+#define CHILD_GO_BB 1
+#define CHILD_GO_FINI_BREADCRUMB 0
+static int emit_bb_start_parent_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ cs = intel_ring_begin(rq, 10 + 4 * ce->guc_number_children);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Wait on chidlren */
+ for (i = 0; i < ce->guc_number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_BB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_BB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int emit_bb_start_child_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_BB,
+ get_children_join_addr(ce->parent,
+ ce->guc_child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_BB;
+ *cs++ = get_children_go_addr(ce->parent);
+ *cs++ = 0;
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /* Wait on children */
+ for (i = 0; i < ce->guc_number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_FINI_BREADCRUMB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_FINI_BREADCRUMB,
+ get_children_join_addr(ce->parent,
+ ce->guc_child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_go_addr(ce->parent);
+ *cs++ = 0;
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
static inline struct intel_context *
g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
{
@@ -4615,6 +4885,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
drm_printf(p, "\t\tWQI Status: %u\n\n",
READ_ONCE(desc->wq_status));
+ drm_printf(p, "\t\tNumber Children: %u\n\n",
+ ce->guc_number_children);
+ if (ce->engine->emit_bb_start ==
+ emit_bb_start_parent_bb_preempt_boundary) {
+ u8 i;
+
+ drm_printf(p, "\t\tChildren Go: %u\n\n",
+ get_children_go_value(ce));
+ for (i = 0; i < ce->guc_number_children; ++i)
+ drm_printf(p, "\t\tChildren Join: %u\n",
+ get_children_join_value(ce, i));
+ }
+
for_each_child(ce, child)
guc_log_context(p, child);
}
--
2.28.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Subject: [RFC PATCH 29/42] drm/i915/guc: Implement BB boundary preemption for multi-lrc
Date: Tue, 20 Jul 2021 13:57:49 -0700 [thread overview]
Message-ID: <20210720205802.39610-30-matthew.brost@intel.com> (raw)
In-Reply-To: <20210720205802.39610-1-matthew.brost@intel.com>
For some users of multi-lrc, e.g. split frame, it isn't safe to preempt
mid BB. To safely enable preemption at the BB boundary, a handshake
between to parent and child is needed. This is implemented via custom
emit_bb_start & emit_fini_breadcrumb functions and enabled via by
default if a context is configured by set parallel extension.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
drivers/gpu/drm/i915/gt/intel_context.c | 2 +-
drivers/gpu/drm/i915/gt/intel_context_types.h | 3 +
drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 2 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 285 +++++++++++++++++-
4 files changed, 289 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 7395894724f8..35f5d28c7465 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -639,7 +639,7 @@ void intel_context_bind_parent_child(struct intel_context *parent,
GEM_BUG_ON(intel_context_is_child(child));
GEM_BUG_ON(intel_context_is_parent(child));
- parent->guc_number_children++;
+ child->guc_child_index = parent->guc_number_children++;
list_add_tail(&child->guc_child_link,
&parent->guc_child_list);
child->parent = parent;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 5912eb291bad..4886e107c1f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -222,6 +222,9 @@ struct intel_context {
/* Number of children if parent */
u8 guc_number_children;
+ /* Child index if child */
+ u8 guc_child_index;
+
u8 parent_page; /* if set, page num reserved for parent context */
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 820cb6b5d2d0..11d64746dbf1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -181,7 +181,7 @@ struct guc_process_desc {
u32 wq_status;
u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 reserved[36];
} __packed;
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index dde6aab1db85..b9c43ba823b4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -11,6 +11,7 @@
#include "gt/intel_context.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_irq.h"
#include "gt/intel_gt_pm.h"
@@ -460,10 +461,14 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb)
/*
* When using multi-lrc submission an extra page in the context state is
- * reserved for the process descriptor and work queue.
+ * reserved for the process descriptor, work queue, and preempt BB boundary
+ * handshake between the parent + childlren contexts.
*
* The layout of this page is below:
* 0 guc_process_desc
+ * + sizeof(struct guc_process_desc) child go
+ * + CACHELINE_BYTES child join ...
+ * + CACHELINE_BYTES ...
* ... unused
* PAGE_SIZE / 2 work queue start
* ... work queue
@@ -2166,6 +2171,30 @@ static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop)
return __guc_action_deregister_context(guc, guc_id, loop);
}
+static inline void clear_children_join_go_memory(struct intel_context *ce)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+ u8 i;
+
+ for (i = 0; i < ce->guc_number_children + 1; ++i)
+ mem[i * (CACHELINE_BYTES / sizeof(u32))] = 0;
+}
+
+static inline u32 get_children_go_value(struct intel_context *ce)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+
+ return mem[0];
+}
+
+static inline u32 get_children_join_value(struct intel_context *ce,
+ u8 child_index)
+{
+ u32 *mem = (u32 *)(__get_process_desc(ce) + 1);
+
+ return mem[(child_index + 1) * (CACHELINE_BYTES / sizeof(u32))];
+}
+
static void guc_context_policy_init(struct intel_engine_cs *engine,
struct guc_lrc_desc *desc)
{
@@ -2361,6 +2390,8 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop)
desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
guc_context_policy_init(engine, desc);
}
+
+ clear_children_join_go_memory(ce);
}
/*
@@ -3771,6 +3802,32 @@ static const struct intel_context_ops virtual_child_context_ops = {
.destroy = guc_child_context_destroy,
};
+/*
+ * The below override of the breadcrumbs is enabled when the user configures a
+ * context for parallel submission (multi-lrc, parent-child).
+ *
+ * The overridden breadcrumbs implements an algorithm which allows the GuC to
+ * safely preempt all the hw contexts configured for parallel submission
+ * between each BB. The contract between the i915 and GuC is if the parent
+ * context can be preempted, all the children can be preempted, and the GuC will
+ * always try to preempt the parent before the children. A handshake between the
+ * parent / children breadcrumbs ensures the i915 holds up its end of the deal
+ * creating a window to preempt between each set of BBs.
+ */
+static int emit_bb_start_parent_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static int emit_bb_start_child_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+static u32 *
+emit_fini_breadcrumb_parent_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs);
+static u32 *
+emit_fini_breadcrumb_child_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs);
+
+
static struct intel_context *
guc_create_parallel(struct intel_engine_cs **engines,
unsigned int num_siblings,
@@ -3814,6 +3871,20 @@ guc_create_parallel(struct intel_engine_cs **engines,
for_each_child(parent, ce)
ce->ops = &virtual_child_context_ops;
+ parent->engine->emit_bb_start =
+ emit_bb_start_parent_bb_preempt_boundary;
+ parent->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_parent_bb_preempt_boundary;
+ parent->engine->emit_fini_breadcrumb_dw =
+ 12 + 4 * parent->guc_number_children;
+ for_each_child(parent, ce) {
+ ce->engine->emit_bb_start =
+ emit_bb_start_child_bb_preempt_boundary;
+ ce->engine->emit_fini_breadcrumb =
+ emit_fini_breadcrumb_child_bb_preempt_boundary;
+ ce->engine->emit_fini_breadcrumb_dw = 16;
+ }
+
kfree(siblings);
return parent;
@@ -4174,6 +4245,205 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
guc->submission_selected = __guc_submission_selected(guc);
}
+static inline u32 get_children_go_addr(struct intel_context *ce)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return i915_ggtt_offset(ce->state) +
+ __get_process_desc_offset(ce) +
+ sizeof(struct guc_process_desc);
+}
+
+static inline u32 get_children_join_addr(struct intel_context *ce,
+ u8 child_index)
+{
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ return get_children_go_addr(ce) + (child_index + 1) * CACHELINE_BYTES;
+}
+
+#define PARENT_GO_BB 1
+#define PARENT_GO_FINI_BREADCRUMB 0
+#define CHILD_GO_BB 1
+#define CHILD_GO_FINI_BREADCRUMB 0
+static int emit_bb_start_parent_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ cs = intel_ring_begin(rq, 10 + 4 * ce->guc_number_children);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Wait on chidlren */
+ for (i = 0; i < ce->guc_number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_BB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_BB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int emit_bb_start_child_bb_preempt_boundary(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ struct intel_context *ce = rq->context;
+ u32 *cs;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ cs = intel_ring_begin(rq, 12);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_BB,
+ get_children_join_addr(ce->parent,
+ ce->guc_child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_BB;
+ *cs++ = get_children_go_addr(ce->parent);
+ *cs++ = 0;
+
+ /* Turn off preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* Jump to batch */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static u32 *
+emit_fini_breadcrumb_parent_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+ u8 i;
+
+ GEM_BUG_ON(!intel_context_is_parent(ce));
+
+ /* Wait on children */
+ for (i = 0; i < ce->guc_number_children; ++i) {
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = PARENT_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_join_addr(ce, i);
+ *cs++ = 0;
+ }
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Tell children go */
+ cs = gen8_emit_ggtt_write(cs,
+ CHILD_GO_FINI_BREADCRUMB,
+ get_children_go_addr(ce),
+ 0);
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
+static u32 *
+emit_fini_breadcrumb_child_bb_preempt_boundary(struct i915_request *rq,
+ u32 *cs)
+{
+ struct intel_context *ce = rq->context;
+
+ GEM_BUG_ON(!intel_context_is_child(ce));
+
+ /* Turn on preemption */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ *cs++ = MI_NOOP;
+
+ /* Signal parent */
+ cs = gen8_emit_ggtt_write(cs,
+ PARENT_GO_FINI_BREADCRUMB,
+ get_children_join_addr(ce->parent,
+ ce->guc_child_index),
+ 0);
+
+ /* Wait parent on for go */
+ *cs++ = (MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD);
+ *cs++ = CHILD_GO_FINI_BREADCRUMB;
+ *cs++ = get_children_go_addr(ce->parent);
+ *cs++ = 0;
+
+ /* Emit fini breadcrumb */
+ cs = gen8_emit_ggtt_write(cs,
+ rq->fence.seqno,
+ i915_request_active_timeline(rq)->hwsp_offset,
+ 0);
+
+ /* User interrupt */
+ *cs++ = MI_USER_INTERRUPT;
+ *cs++ = MI_NOOP;
+
+ rq->tail = intel_ring_offset(rq, cs);
+
+ return cs;
+}
+
static inline struct intel_context *
g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
{
@@ -4615,6 +4885,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
drm_printf(p, "\t\tWQI Status: %u\n\n",
READ_ONCE(desc->wq_status));
+ drm_printf(p, "\t\tNumber Children: %u\n\n",
+ ce->guc_number_children);
+ if (ce->engine->emit_bb_start ==
+ emit_bb_start_parent_bb_preempt_boundary) {
+ u8 i;
+
+ drm_printf(p, "\t\tChildren Go: %u\n\n",
+ get_children_go_value(ce));
+ for (i = 0; i < ce->guc_number_children; ++i)
+ drm_printf(p, "\t\tChildren Join: %u\n",
+ get_children_join_value(ce, i));
+ }
+
for_each_child(ce, child)
guc_log_context(p, child);
}
--
2.28.0
next prev parent reply other threads:[~2021-07-20 20:41 UTC|newest]
Thread overview: 88+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-20 20:57 [Intel-gfx] [RFC PATCH 00/42] Parallel submission aka multi-bb execbuf Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 01/42] drm/i915/guc: GuC submission squashed into single patch Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-28 12:57 ` [Intel-gfx] " kernel test robot
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 02/42] drm/i915/guc: Allow flexible number of context ids Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 03/42] drm/i915/guc: Connect the number of guc_ids to debugfs Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 04/42] drm/i915/guc: Don't return -EAGAIN to user when guc_ids exhausted Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 05/42] drm/i915/guc: Don't allow requests not ready to consume all guc_ids Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 06/42] drm/i915/guc: Introduce guc_submit_engine object Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 07/42] drm/i915/guc: Check return of __xa_store when registering a context Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 08/42] drm/i915/guc: Non-static lrc descriptor registration buffer Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 09/42] drm/i915/guc: Take GT PM ref when deregistering context Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 10/42] drm/i915: Add GT PM unpark worker Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 11/42] drm/i915/guc: Take engine PM when a context is pinned with GuC submission Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 12/42] drm/i915/guc: Don't call switch_to_kernel_context " Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 13/42] drm/i915/guc: Selftest for GuC flow control Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 14/42] drm/i915: Add logical engine mapping Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 15/42] drm/i915: Expose logical engine instance to user Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 16/42] drm/i915/guc: Introduce context parent-child relationship Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 17/42] drm/i915/guc: Implement GuC parent-child context pin / unpin functions Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 18/42] drm/i915/guc: Add multi-lrc context registration Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 19/42] drm/i915/guc: Ensure GuC schedule operations do not operate on child contexts Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 20/42] drm/i915/guc: Assign contexts in parent-child relationship consecutive guc_ids Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 21/42] drm/i915/guc: Add hang check to GuC submit engine Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 22/42] drm/i915/guc: Add guc_child_context_destroy Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 23/42] drm/i915/guc: Implement multi-lrc submission Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 24/42] drm/i915/guc: Insert submit fences between requests in parent-child relationship Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 25/42] drm/i915/guc: Implement multi-lrc reset Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 26/42] drm/i915/guc: Update debugfs for GuC multi-lrc Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 27/42] drm/i915: Connect UAPI to GuC multi-lrc interface Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 28/42] drm/i915/guc: Add basic GuC multi-lrc selftest Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` Matthew Brost [this message]
2021-07-20 20:57 ` [RFC PATCH 29/42] drm/i915/guc: Implement BB boundary preemption for multi-lrc Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 30/42] i915/drm: Move secure execbuf check to execbuf2 Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 31/42] drm/i915: Move input/exec fence handling to i915_gem_execbuffer2 Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 32/42] drm/i915: Move output " Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 33/42] drm/i915: Return output fence from i915_gem_do_execbuffer Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 34/42] drm/i915: Store batch index in struct i915_execbuffer Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 35/42] drm/i915: Allow callers of i915_gem_do_execbuffer to override the batch index Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 36/42] drm/i915: Teach execbuf there can be more than one batch in the objects list Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 37/42] drm/i915: Only track object dependencies on first request Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 38/42] drm/i915: Force parallel contexts to use copy engine for reloc Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:57 ` [Intel-gfx] [RFC PATCH 39/42] drm/i915: Multi-batch execbuffer2 Matthew Brost
2021-07-20 20:57 ` Matthew Brost
2021-07-20 20:58 ` [Intel-gfx] [RFC PATCH 40/42] drm/i915: Eliminate unnecessary VMA calls for multi-BB submission Matthew Brost
2021-07-20 20:58 ` Matthew Brost
2021-07-20 20:58 ` [Intel-gfx] [RFC PATCH 41/42] drm/i915: Enable multi-bb execbuf Matthew Brost
2021-07-20 20:58 ` Matthew Brost
2021-07-20 20:58 ` [Intel-gfx] [RFC PATCH 42/42] drm/i915/execlists: Parallel submission support for execlists Matthew Brost
2021-07-20 20:58 ` Matthew Brost
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210720205802.39610-30-matthew.brost@intel.com \
--to=matthew.brost@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.