From: fei.yang@intel.com
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Subject: [Intel-gfx] [PATCH] drm/i915: avoid concurrent writes to aux_inv
Date: Wed, 2 Mar 2022 10:26:57 -0800 [thread overview]
Message-ID: <20220302182657.1483623-1-fei.yang@intel.com> (raw)
From: Fei Yang <fei.yang@intel.com>
GPU hangs have been observed when multiple engines write to the
same aux_inv register at the same time. To avoid this each engine
should only invalidate its own auxiliary table. The function
gen12_emit_flush_xcs() currently invalidate the auxiliary table for
all engines because the rq->engine is not necessarily the engine
eventually carrying out the request, and potentially the engine
could even be a virtual one (with engine->instance being -1).
With this patch, auxiliary table invalidation is done only for the
engine executing the request. And the mmio address for the aux_inv
register is set after the engine instance becomes certain.
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
---
drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 41 ++++---------------
.../drm/i915/gt/intel_execlists_submission.c | 38 +++++++++++++++++
drivers/gpu/drm/i915/i915_request.h | 2 +
3 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index b1b9c3fd7bf9..af62e2bc2c9b 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -165,30 +165,6 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
-{
- static const i915_reg_t vd[] = {
- GEN12_VD0_AUX_NV,
- GEN12_VD1_AUX_NV,
- GEN12_VD2_AUX_NV,
- GEN12_VD3_AUX_NV,
- };
-
- static const i915_reg_t ve[] = {
- GEN12_VE0_AUX_NV,
- GEN12_VE1_AUX_NV,
- };
-
- if (engine->class == VIDEO_DECODE_CLASS)
- return vd[engine->instance];
-
- if (engine->class == VIDEO_ENHANCEMENT_CLASS)
- return ve[engine->instance];
-
- GEM_BUG_ON("unknown aux_inv reg\n");
- return INVALID_MMIO_REG;
-}
-
static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
{
*cs++ = MI_LOAD_REGISTER_IMM(1);
@@ -288,7 +264,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE)
aux_inv = rq->engine->mask & ~BIT(BCS0);
if (aux_inv)
- cmd += 2 * hweight32(aux_inv) + 2;
+ cmd += 4;
cs = intel_ring_begin(rq, cmd);
if (IS_ERR(cs))
@@ -319,16 +295,13 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */
- struct intel_engine_cs *engine;
- unsigned int tmp;
-
- *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
- for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
- *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
- *cs++ = AUX_INV;
- }
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ rq->vd_ve_aux_inv = cs;
+ *cs++ = 0; /* address to be set at submission to HW */
+ *cs++ = AUX_INV;
*cs++ = MI_NOOP;
- }
+ } else
+ rq->vd_ve_aux_inv = NULL;
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1c602d4ae297..a018de6dcac5 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1258,6 +1258,34 @@ static bool completed(const struct i915_request *rq)
return __i915_request_is_complete(rq);
}
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+ static const i915_reg_t vd[] = {
+ GEN12_VD0_AUX_NV,
+ GEN12_VD1_AUX_NV,
+ GEN12_VD2_AUX_NV,
+ GEN12_VD3_AUX_NV,
+ };
+
+ static const i915_reg_t ve[] = {
+ GEN12_VE0_AUX_NV,
+ GEN12_VE1_AUX_NV,
+ };
+
+ if (engine->class == VIDEO_DECODE_CLASS) {
+ GEM_BUG_ON(engine->instance >= ARRAY_SIZE(vd));
+ return vd[engine->instance];
+ }
+
+ if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
+ GEM_BUG_ON(engine->instance >= ARRAY_SIZE(ve));
+ return ve[engine->instance];
+ }
+
+ GEM_BUG_ON("unknown aux_inv reg\n");
+ return INVALID_MMIO_REG;
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1538,6 +1566,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
if (__i915_request_submit(rq)) {
+ /* hsdes: 1809175790 */
+ if ((GRAPHICS_VER(engine->i915) == 12) &&
+ rq->vd_ve_aux_inv &&
+ (engine->class == VIDEO_DECODE_CLASS ||
+ engine->class == VIDEO_ENHANCEMENT_CLASS)) {
+ *rq->vd_ve_aux_inv = i915_mmio_reg_offset
+ (aux_inv_reg(engine));
+ rq->vd_ve_aux_inv = NULL;
+ rq->execution_mask = engine->mask;
+ }
if (!merge) {
*port++ = i915_request_get(last);
last = NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 28b1f9db5487..69de32e5e15d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -350,6 +350,8 @@ struct i915_request {
struct list_head link;
unsigned long delay;
} mock;)
+
+ u32 *vd_ve_aux_inv;
};
#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
--
2.25.1
WARNING: multiple messages have this Message-ID (diff)
From: fei.yang@intel.com
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: Fei Yang <fei.yang@intel.com>
Subject: [PATCH] drm/i915: avoid concurrent writes to aux_inv
Date: Wed, 2 Mar 2022 10:26:57 -0800 [thread overview]
Message-ID: <20220302182657.1483623-1-fei.yang@intel.com> (raw)
From: Fei Yang <fei.yang@intel.com>
GPU hangs have been observed when multiple engines write to the
same aux_inv register at the same time. To avoid this each engine
should only invalidate its own auxiliary table. The function
gen12_emit_flush_xcs() currently invalidate the auxiliary table for
all engines because the rq->engine is not necessarily the engine
eventually carrying out the request, and potentially the engine
could even be a virtual one (with engine->instance being -1).
With this patch, auxiliary table invalidation is done only for the
engine executing the request. And the mmio address for the aux_inv
register is set after the engine instance becomes certain.
Signed-off-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
---
drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 41 ++++---------------
.../drm/i915/gt/intel_execlists_submission.c | 38 +++++++++++++++++
drivers/gpu/drm/i915/i915_request.h | 2 +
3 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index b1b9c3fd7bf9..af62e2bc2c9b 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -165,30 +165,6 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
-{
- static const i915_reg_t vd[] = {
- GEN12_VD0_AUX_NV,
- GEN12_VD1_AUX_NV,
- GEN12_VD2_AUX_NV,
- GEN12_VD3_AUX_NV,
- };
-
- static const i915_reg_t ve[] = {
- GEN12_VE0_AUX_NV,
- GEN12_VE1_AUX_NV,
- };
-
- if (engine->class == VIDEO_DECODE_CLASS)
- return vd[engine->instance];
-
- if (engine->class == VIDEO_ENHANCEMENT_CLASS)
- return ve[engine->instance];
-
- GEM_BUG_ON("unknown aux_inv reg\n");
- return INVALID_MMIO_REG;
-}
-
static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
{
*cs++ = MI_LOAD_REGISTER_IMM(1);
@@ -288,7 +264,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
if (mode & EMIT_INVALIDATE)
aux_inv = rq->engine->mask & ~BIT(BCS0);
if (aux_inv)
- cmd += 2 * hweight32(aux_inv) + 2;
+ cmd += 4;
cs = intel_ring_begin(rq, cmd);
if (IS_ERR(cs))
@@ -319,16 +295,13 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */
- struct intel_engine_cs *engine;
- unsigned int tmp;
-
- *cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
- for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
- *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
- *cs++ = AUX_INV;
- }
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ rq->vd_ve_aux_inv = cs;
+ *cs++ = 0; /* address to be set at submission to HW */
+ *cs++ = AUX_INV;
*cs++ = MI_NOOP;
- }
+ } else
+ rq->vd_ve_aux_inv = NULL;
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 1c602d4ae297..a018de6dcac5 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1258,6 +1258,34 @@ static bool completed(const struct i915_request *rq)
return __i915_request_is_complete(rq);
}
+static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
+{
+ static const i915_reg_t vd[] = {
+ GEN12_VD0_AUX_NV,
+ GEN12_VD1_AUX_NV,
+ GEN12_VD2_AUX_NV,
+ GEN12_VD3_AUX_NV,
+ };
+
+ static const i915_reg_t ve[] = {
+ GEN12_VE0_AUX_NV,
+ GEN12_VE1_AUX_NV,
+ };
+
+ if (engine->class == VIDEO_DECODE_CLASS) {
+ GEM_BUG_ON(engine->instance >= ARRAY_SIZE(vd));
+ return vd[engine->instance];
+ }
+
+ if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
+ GEM_BUG_ON(engine->instance >= ARRAY_SIZE(ve));
+ return ve[engine->instance];
+ }
+
+ GEM_BUG_ON("unknown aux_inv reg\n");
+ return INVALID_MMIO_REG;
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1538,6 +1566,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
}
if (__i915_request_submit(rq)) {
+ /* hsdes: 1809175790 */
+ if ((GRAPHICS_VER(engine->i915) == 12) &&
+ rq->vd_ve_aux_inv &&
+ (engine->class == VIDEO_DECODE_CLASS ||
+ engine->class == VIDEO_ENHANCEMENT_CLASS)) {
+ *rq->vd_ve_aux_inv = i915_mmio_reg_offset
+ (aux_inv_reg(engine));
+ rq->vd_ve_aux_inv = NULL;
+ rq->execution_mask = engine->mask;
+ }
if (!merge) {
*port++ = i915_request_get(last);
last = NULL;
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 28b1f9db5487..69de32e5e15d 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -350,6 +350,8 @@ struct i915_request {
struct list_head link;
unsigned long delay;
} mock;)
+
+ u32 *vd_ve_aux_inv;
};
#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
--
2.25.1
next reply other threads:[~2022-03-02 18:32 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-02 18:26 fei.yang [this message]
2022-03-02 18:26 ` [PATCH] drm/i915: avoid concurrent writes to aux_inv fei.yang
2022-03-02 19:05 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: avoid concurrent writes to aux_inv (rev3) Patchwork
2022-03-02 19:06 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-03-02 19:38 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-03-02 20:50 ` [Intel-gfx] [PATCH] drm/i915: avoid concurrent writes to aux_inv Chris Wilson
2022-03-03 5:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure for drm/i915: avoid concurrent writes to aux_inv (rev3) Patchwork
-- strict thread matches above, loose matches on Subject: below --
2022-03-28 17:16 [Intel-gfx] [PATCH] drm/i915: avoid concurrent writes to aux_inv fei.yang
2022-03-29 7:51 ` Tvrtko Ursulin
2022-03-28 3:16 fei.yang
2022-03-28 8:37 ` Tvrtko Ursulin
2022-03-28 17:58 ` Yang, Fei
2022-03-28 17:58 ` Yang, Fei
2022-03-18 18:08 fei.yang
2022-03-21 13:41 ` Tvrtko Ursulin
2022-03-18 5:26 fei.yang
2022-03-18 14:38 ` Tvrtko Ursulin
2022-03-18 18:12 ` Yang, Fei
2022-03-18 18:12 ` Yang, Fei
2022-03-18 5:12 fei.yang
2022-03-04 22:14 fei.yang
2022-03-16 4:43 ` Summers, Stuart
2022-03-16 5:54 ` Yang, Fei
2022-03-16 7:19 ` Yang, Fei
2022-03-16 10:03 ` Tvrtko Ursulin
2022-03-16 18:25 ` Yang, Fei
2022-03-16 18:25 ` Yang, Fei
2022-03-04 22:04 fei.yang
2022-02-26 7:11 fei.yang
2022-02-26 1:50 fei.yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220302182657.1483623-1-fei.yang@intel.com \
--to=fei.yang@intel.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.