* [PATCH] drm/i915: Defer context state allocation for legacy ring submission
@ 2017-04-27 10:37 Chris Wilson
2017-04-27 10:46 ` [PATCH v2] " Chris Wilson
2017-04-27 11:06 ` ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2) Patchwork
0 siblings, 2 replies; 5+ messages in thread
From: Chris Wilson @ 2017-04-27 10:37 UTC (permalink / raw)
To: intel-gfx
Almost from the outset for execlists, we used deferred allocation of the
logical context and rings. Then we ported the infrastructure for pinning
contexts back to legacy, and so now we are able to also implement
deferred allocation for context objects prior to first use on the legacy
submission.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem_context.c | 59 ---------------------------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 50 ++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..d46a69d3d390 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
kfree(ctx);
}
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
- struct drm_i915_gem_object *obj;
- int ret;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- obj = i915_gem_object_create(dev_priv, size);
- if (IS_ERR(obj))
- return obj;
-
- /*
- * Try to make the context utilize L3 as well as LLC.
- *
- * On VLV we don't have L3 controls in the PTEs so we
- * shouldn't touch the cache level, especially as that
- * would make the object snooped which might have a
- * negative performance impact.
- *
- * Snooping is required on non-llc platforms in execlist
- * mode, but since all GGTT accesses use PAT entry 0 we
- * get snooping anyway regardless of cache_level.
- *
- * This is only applicable for Ivy Bridge devices since
- * later platforms don't have L3 control bits in the PTE.
- */
- if (IS_IVYBRIDGE(dev_priv)) {
- ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
- /* Failure shouldn't ever happen this early */
- if (WARN_ON(ret)) {
- i915_gem_object_put(obj);
- return ERR_PTR(ret);
- }
- }
-
- return obj;
-}
-
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
@@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->context_list);
ctx->i915 = dev_priv;
- if (dev_priv->hw_context_size) {
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
- if (IS_ERR(obj)) {
- ret = PTR_ERR(obj);
- goto err_out;
- }
-
- vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
- if (IS_ERR(vma)) {
- i915_gem_object_put(obj);
- ret = PTR_ERR(vma);
- goto err_out;
- }
-
- ctx->engine[RCS].state = vma;
- }
-
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
if (file_priv) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6836efb7e3d2..138cda347488 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
PIN_GLOBAL | PIN_HIGH);
}
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create(i915, i915->hw_context_size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Try to make the context utilize L3 as well as LLC.
+ *
+ * On VLV we don't have L3 controls in the PTEs so we
+ * shouldn't touch the cache level, especially as that
+ * would make the object snooped which might have a
+ * negative performance impact.
+ *
+ * Snooping is required on non-llc platforms in execlist
+ * mode, but since all GGTT accesses use PAT entry 0 we
+ * get snooping anyway regardless of cache_level.
+ *
+ * This is only applicable for Ivy Bridge devices since
+ * later platforms don't have L3 control bits in the PTE.
+ */
+ if (IS_IVYBRIDGE(i915)) {
+ /* Ignore any error, regard it as a simple optimisation */
+ i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+ }
+
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+ if (IS_ERR(vma))
+ i915_gem_object_put(obj);
+
+ return vma;
+}
+
static int intel_ring_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx)
{
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
return 0;
GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+ if (!ce->state && engine->i915->hw_context_size) {
+ struct i915_vma *vma;
+
+ vma = alloc_context_vma(engine);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto error;
+ }
+
+ ce->state = vma;
+ }
+
if (ce->state) {
ret = context_pin(ctx);
if (ret)
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v2] drm/i915: Defer context state allocation for legacy ring submission
2017-04-27 10:37 [PATCH] drm/i915: Defer context state allocation for legacy ring submission Chris Wilson
@ 2017-04-27 10:46 ` Chris Wilson
2017-04-27 10:56 ` Joonas Lahtinen
2017-04-27 11:06 ` ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2) Patchwork
1 sibling, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2017-04-27 10:46 UTC (permalink / raw)
To: intel-gfx
Almost from the outset for execlists, we used deferred allocation of the
logical context and rings. Then we ported the infrastructure for pinning
contexts back to legacy, and so now we are able to also implement
deferred allocation for context objects prior to first use on the legacy
submission.
v2: We still need to differentiate between legacy engines, Joonas is
fixing that but I want this first ;) (Joonas)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem_context.c | 59 ---------------------------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 50 ++++++++++++++++++++++++++++
2 files changed, 50 insertions(+), 59 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..d46a69d3d390 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
kfree(ctx);
}
-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
- struct drm_i915_gem_object *obj;
- int ret;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- obj = i915_gem_object_create(dev_priv, size);
- if (IS_ERR(obj))
- return obj;
-
- /*
- * Try to make the context utilize L3 as well as LLC.
- *
- * On VLV we don't have L3 controls in the PTEs so we
- * shouldn't touch the cache level, especially as that
- * would make the object snooped which might have a
- * negative performance impact.
- *
- * Snooping is required on non-llc platforms in execlist
- * mode, but since all GGTT accesses use PAT entry 0 we
- * get snooping anyway regardless of cache_level.
- *
- * This is only applicable for Ivy Bridge devices since
- * later platforms don't have L3 control bits in the PTE.
- */
- if (IS_IVYBRIDGE(dev_priv)) {
- ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
- /* Failure shouldn't ever happen this early */
- if (WARN_ON(ret)) {
- i915_gem_object_put(obj);
- return ERR_PTR(ret);
- }
- }
-
- return obj;
-}
-
static void context_close(struct i915_gem_context *ctx)
{
i915_gem_context_set_closed(ctx);
@@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->context_list);
ctx->i915 = dev_priv;
- if (dev_priv->hw_context_size) {
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
- if (IS_ERR(obj)) {
- ret = PTR_ERR(obj);
- goto err_out;
- }
-
- vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
- if (IS_ERR(vma)) {
- i915_gem_object_put(obj);
- ret = PTR_ERR(vma);
- goto err_out;
- }
-
- ctx->engine[RCS].state = vma;
- }
-
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
if (file_priv) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6836efb7e3d2..61f612454ce7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
PIN_GLOBAL | PIN_HIGH);
}
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create(i915, i915->hw_context_size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Try to make the context utilize L3 as well as LLC.
+ *
+ * On VLV we don't have L3 controls in the PTEs so we
+ * shouldn't touch the cache level, especially as that
+ * would make the object snooped which might have a
+ * negative performance impact.
+ *
+ * Snooping is required on non-llc platforms in execlist
+ * mode, but since all GGTT accesses use PAT entry 0 we
+ * get snooping anyway regardless of cache_level.
+ *
+ * This is only applicable for Ivy Bridge devices since
+ * later platforms don't have L3 control bits in the PTE.
+ */
+ if (IS_IVYBRIDGE(i915)) {
+ /* Ignore any error, regard it as a simple optimisation */
+ i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+ }
+
+ vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+ if (IS_ERR(vma))
+ i915_gem_object_put(obj);
+
+ return vma;
+}
+
static int intel_ring_context_pin(struct intel_engine_cs *engine,
struct i915_gem_context *ctx)
{
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
return 0;
GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+ if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
+ struct i915_vma *vma;
+
+ vma = alloc_context_vma(engine);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto error;
+ }
+
+ ce->state = vma;
+ }
+
if (ce->state) {
ret = context_pin(ctx);
if (ret)
--
2.11.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2] drm/i915: Defer context state allocation for legacy ring submission
2017-04-27 10:46 ` [PATCH v2] " Chris Wilson
@ 2017-04-27 10:56 ` Joonas Lahtinen
0 siblings, 0 replies; 5+ messages in thread
From: Joonas Lahtinen @ 2017-04-27 10:56 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On to, 2017-04-27 at 11:46 +0100, Chris Wilson wrote:
> Almost from the outset for execlists, we used deferred allocation of the
> logical context and rings. Then we ported the infrastructure for pinning
> contexts back to legacy, and so now we are able to also implement
> deferred allocation for context objects prior to first use on the legacy
> submission.
>
> v2: We still need to differentiate between legacy engines, Joonas is
> fixing that but I want this first ;) (Joonas)
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
You went the extra mile to reduce the if-claus too, so;
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Regards, Joonas
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2)
2017-04-27 10:37 [PATCH] drm/i915: Defer context state allocation for legacy ring submission Chris Wilson
2017-04-27 10:46 ` [PATCH v2] " Chris Wilson
@ 2017-04-27 11:06 ` Patchwork
2017-04-27 11:44 ` Chris Wilson
1 sibling, 1 reply; 5+ messages in thread
From: Patchwork @ 2017-04-27 11:06 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Defer context state allocation for legacy ring submission (rev2)
URL : https://patchwork.freedesktop.org/series/23619/
State : success
== Summary ==
Series 23619v2 drm/i915: Defer context state allocation for legacy ring submission
https://patchwork.freedesktop.org/api/1.0/series/23619/revisions/2/mbox/
Test gem_exec_flush:
Subgroup basic-batch-kernel-default-uc:
pass -> FAIL (fi-snb-2600) fdo#100007
Test kms_flip:
Subgroup basic-flip-vs-wf_vblank:
fail -> PASS (fi-skl-6770hq) fdo#99739
fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
fdo#99739 https://bugs.freedesktop.org/show_bug.cgi?id=99739
fi-bdw-5557u total:278 pass:267 dwarn:0 dfail:0 fail:0 skip:11 time:430s
fi-bdw-gvtdvm total:278 pass:256 dwarn:8 dfail:0 fail:0 skip:14 time:424s
fi-bsw-n3050 total:278 pass:242 dwarn:0 dfail:0 fail:0 skip:36 time:574s
fi-bxt-j4205 total:278 pass:259 dwarn:0 dfail:0 fail:0 skip:19 time:511s
fi-bxt-t5700 total:278 pass:258 dwarn:0 dfail:0 fail:0 skip:20 time:543s
fi-byt-j1900 total:278 pass:254 dwarn:0 dfail:0 fail:0 skip:24 time:488s
fi-byt-n2820 total:278 pass:250 dwarn:0 dfail:0 fail:0 skip:28 time:483s
fi-hsw-4770 total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time:408s
fi-hsw-4770r total:278 pass:262 dwarn:0 dfail:0 fail:0 skip:16 time:403s
fi-ilk-650 total:278 pass:228 dwarn:0 dfail:0 fail:0 skip:50 time:419s
fi-ivb-3520m total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:491s
fi-ivb-3770 total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:464s
fi-kbl-7500u total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:465s
fi-kbl-7560u total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:566s
fi-skl-6260u total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:449s
fi-skl-6700hq total:278 pass:261 dwarn:0 dfail:0 fail:0 skip:17 time:566s
fi-skl-6700k total:278 pass:256 dwarn:4 dfail:0 fail:0 skip:18 time:463s
fi-skl-6770hq total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:487s
fi-skl-gvtdvm total:278 pass:265 dwarn:0 dfail:0 fail:0 skip:13 time:430s
fi-snb-2520m total:278 pass:250 dwarn:0 dfail:0 fail:0 skip:28 time:526s
fi-snb-2600 total:278 pass:248 dwarn:0 dfail:0 fail:1 skip:29 time:409s
a0363fea4a56eef81e8cda759a24d8951dd7ac73 drm-tip: 2017y-04m-27d-08h-13m-25s UTC integration manifest
33aa8f5 drm/i915: Defer context state allocation for legacy ring submission
== Logs ==
For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4565/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2)
2017-04-27 11:06 ` ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2) Patchwork
@ 2017-04-27 11:44 ` Chris Wilson
0 siblings, 0 replies; 5+ messages in thread
From: Chris Wilson @ 2017-04-27 11:44 UTC (permalink / raw)
To: intel-gfx
On Thu, Apr 27, 2017 at 11:06:18AM -0000, Patchwork wrote:
> == Series Details ==
>
> Series: drm/i915: Defer context state allocation for legacy ring submission (rev2)
> URL : https://patchwork.freedesktop.org/series/23619/
> State : success
>
> == Summary ==
>
> Series 23619v2 drm/i915: Defer context state allocation for legacy ring submission
> https://patchwork.freedesktop.org/api/1.0/series/23619/revisions/2/mbox/
>
> Test gem_exec_flush:
> Subgroup basic-batch-kernel-default-uc:
> pass -> FAIL (fi-snb-2600) fdo#100007
> Test kms_flip:
> Subgroup basic-flip-vs-wf_vblank:
> fail -> PASS (fi-skl-6770hq) fdo#99739
>
> fdo#100007 https://bugs.freedesktop.org/show_bug.cgi?id=100007
> fdo#99739 https://bugs.freedesktop.org/show_bug.cgi?id=99739
Pushed, thanks for the review. Another step towards grand unification!
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2017-04-27 11:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-04-27 10:37 [PATCH] drm/i915: Defer context state allocation for legacy ring submission Chris Wilson
2017-04-27 10:46 ` [PATCH v2] " Chris Wilson
2017-04-27 10:56 ` Joonas Lahtinen
2017-04-27 11:06 ` ✓ Fi.CI.BAT: success for drm/i915: Defer context state allocation for legacy ring submission (rev2) Patchwork
2017-04-27 11:44 ` Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox