stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission
@ 2020-10-21 22:04 Chris Wilson
  2020-10-22  1:16 ` [Intel-gfx] " kernel test robot
  2020-10-22  6:41 ` [PATCH] " Chris Wilson
  0 siblings, 2 replies; 4+ messages in thread
From: Chris Wilson @ 2020-10-21 22:04 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, Joonas Lahtinen, stable

We wrap the timeline on construction of the next request, but there may
still be requests in flight that have not yet finalized the breadcrumb.
(The breadcrumb is delayed as we need engine-local offsets, and for the
virtual engine that is not known until execution.) As such, by the time
we write to the timeline's HWSP offset it may have changed, and we
should use the value we preserved in the request instead.

Though the window is small and infrequent (at full flow we can expect a
timeline's seqno to wrap once every 30 minutes), the impact of writing
the old seqno into the new HWSP is severe: the old requests are never
completed, and the new requests are completed before they are even
submitted.

Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.2+
---
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 22 ++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 18 ++++++++-------
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  2 ++
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c22d47cc6701..84ba6cd70a2a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3597,6 +3597,14 @@ static const struct intel_context_ops execlists_context_ops = {
 	.destroy = execlists_context_destroy,
 };
 
+static u32 hwsp_offset(struct i915_request *rq)
+{
+	if (rq->hwsp_cacheline)
+		return rq->hwsp_cacheline->ggtt_offset;
+	else
+		return i915_request_active_timeline(rq)->hwsp_offset;
+}
+
 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
@@ -3619,7 +3627,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	*cs++ = MI_NOOP;
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-	*cs++ = i915_request_timeline(rq)->hwsp_offset;
+	*cs++ = hwsp_offset(rq);
 	*cs++ = 0;
 	*cs++ = rq->fence.seqno - 1;
 
@@ -4939,11 +4947,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
 	return gen8_emit_wa_tail(request, cs);
 }
 
-static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	u32 addr = i915_request_active_timeline(request)->hwsp_offset;
-
-	return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
 }
 
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4962,7 +4968,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
-				      i915_request_active_timeline(request)->hwsp_offset,
+				      hwsp_offset(request),
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
@@ -4974,7 +4980,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
-				      i915_request_active_timeline(request)->hwsp_offset,
+				      hwsp_offset(request),
 				      PIPE_CONTROL_CS_STALL |
 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -5044,7 +5050,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen12_emit_ggtt_write_rcs(cs,
 				       request->fence.seqno,
-				       i915_request_active_timeline(request)->hwsp_offset,
+				       hwsp_offset(request),
 				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_TILE_CACHE_FLUSH |
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index a2f74cefe4c3..7ea94d201fe6 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	return cl;
 }
 
-static void cacheline_acquire(struct intel_timeline_cacheline *cl)
+static void cacheline_acquire(struct intel_timeline_cacheline *cl,
+			      u32 ggtt_offset)
 {
-	if (cl)
-		i915_active_acquire(&cl->active);
+	if (!cl)
+		return;
+
+	cl->ggtt_offset = ggtt_offset;
+	i915_active_acquire(&cl->active);
 }
 
 static void cacheline_release(struct intel_timeline_cacheline *cl)
@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 		 tl->fence_context, tl->hwsp_offset);
 
-	cacheline_acquire(tl->hwsp_cacheline);
+	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
 	if (atomic_fetch_inc(&tl->pin_count)) {
 		cacheline_release(tl->hwsp_cacheline);
 		__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 		 tl->fence_context, tl->hwsp_offset);
 
-	cacheline_acquire(cl);
+	cacheline_acquire(cl, tl->hwsp_offset);
 	tl->hwsp_cacheline = cl;
 
 	*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 	if (err)
 		goto out;
 
-	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
-		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
-
+	*hwsp = cl->ggtt_offset;
 out:
 	i915_active_release(&cl->active);
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 02181c5020db..4474f487f589 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
 	struct intel_timeline_hwsp *hwsp;
 	void *vaddr;
 
+	u32 ggtt_offset;
+
 	struct rcu_head rcu;
 };
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission
  2020-10-21 22:04 [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission Chris Wilson
@ 2020-10-22  1:16 ` kernel test robot
  2020-10-22  6:41 ` [PATCH] " Chris Wilson
  1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2020-10-22  1:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: kbuild-all, stable, Chris Wilson

[-- Attachment #1: Type: text/plain, Size: 1908 bytes --]

Hi Chris,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on drm-tip/drm-tip linus/master v5.9 next-20201021]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-gt-Use-the-local-HWSP-offset-during-submission/20201022-060619
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-s002-20201021 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-dirty
        # https://github.com/0day-ci/linux/commit/034e13871db55a3eed93d3157408443584dddd1f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Chris-Wilson/drm-i915-gt-Use-the-local-HWSP-offset-during-submission/20201022-060619
        git checkout 034e13871db55a3eed93d3157408443584dddd1f
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> drivers/gpu/drm/i915/gt/intel_lrc.c:3553:26: sparse: sparse: dereference of noderef expression

vim +3553 drivers/gpu/drm/i915/gt/intel_lrc.c

  3549	
  3550	static u32 hwsp_offset(struct i915_request *rq)
  3551	{
  3552		if (rq->hwsp_cacheline)
> 3553			return rq->hwsp_cacheline->ggtt_offset;
  3554		else
  3555			return i915_request_active_timeline(rq)->hwsp_offset;
  3556	}
  3557	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 39002 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] drm/i915/gt: Use the local HWSP offset during submission
  2020-10-21 22:04 [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission Chris Wilson
  2020-10-22  1:16 ` [Intel-gfx] " kernel test robot
@ 2020-10-22  6:41 ` Chris Wilson
  2020-10-23 11:25   ` [Intel-gfx] " Mika Kuoppala
  1 sibling, 1 reply; 4+ messages in thread
From: Chris Wilson @ 2020-10-22  6:41 UTC (permalink / raw)
  To: intel-gfx; +Cc: Chris Wilson, Tvrtko Ursulin, Joonas Lahtinen, stable

We wrap the timeline on construction of the next request, but there may
still be requests in flight that have not yet finalized the breadcrumb.
(The breadcrumb is delayed as we need engine-local offsets, and for the
virtual engine that is not known until execution.) As such, by the time
we write to the timeline's HWSP offset it may have changed, and we
should use the value we preserved in the request instead.

Though the window is small and infrequent (at full flow we can expect a
timeline's seqno to wrap once every 30 minutes), the impact of writing
the old seqno into the new HWSP is severe: the old requests are never
completed, and the new requests are completed before they are even
submitted.

Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v5.2+
---
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 27 +++++++++++++------
 drivers/gpu/drm/i915/gt/intel_timeline.c      | 18 +++++++------
 .../gpu/drm/i915/gt/intel_timeline_types.h    |  2 ++
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c22d47cc6701..d0be98b67138 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3597,6 +3597,19 @@ static const struct intel_context_ops execlists_context_ops = {
 	.destroy = execlists_context_destroy,
 };
 
+static u32 hwsp_offset(const struct i915_request *rq)
+{
+	const struct intel_timeline_cacheline *cl;
+
+	/* Before the request is executed, the timeline/cachline is fixed */
+
+	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
+	if (cl)
+		return cl->ggtt_offset;
+
+	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
+}
+
 static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 {
 	u32 *cs;
@@ -3619,7 +3632,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 	*cs++ = MI_NOOP;
 
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-	*cs++ = i915_request_timeline(rq)->hwsp_offset;
+	*cs++ = hwsp_offset(rq);
 	*cs++ = 0;
 	*cs++ = rq->fence.seqno - 1;
 
@@ -4939,11 +4952,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
 	return gen8_emit_wa_tail(request, cs);
 }
 
-static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-	u32 addr = i915_request_active_timeline(request)->hwsp_offset;
-
-	return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
+	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
 }
 
 static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4962,7 +4973,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
-				      i915_request_active_timeline(request)->hwsp_offset,
+				      hwsp_offset(request),
 				      PIPE_CONTROL_FLUSH_ENABLE |
 				      PIPE_CONTROL_CS_STALL);
 
@@ -4974,7 +4985,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
-				      i915_request_active_timeline(request)->hwsp_offset,
+				      hwsp_offset(request),
 				      PIPE_CONTROL_CS_STALL |
 				      PIPE_CONTROL_TILE_CACHE_FLUSH |
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -5044,7 +5055,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
 	cs = gen12_emit_ggtt_write_rcs(cs,
 				       request->fence.seqno,
-				       i915_request_active_timeline(request)->hwsp_offset,
+				       hwsp_offset(request),
 				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_TILE_CACHE_FLUSH |
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index a2f74cefe4c3..7ea94d201fe6 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
 	return cl;
 }
 
-static void cacheline_acquire(struct intel_timeline_cacheline *cl)
+static void cacheline_acquire(struct intel_timeline_cacheline *cl,
+			      u32 ggtt_offset)
 {
-	if (cl)
-		i915_active_acquire(&cl->active);
+	if (!cl)
+		return;
+
+	cl->ggtt_offset = ggtt_offset;
+	i915_active_acquire(&cl->active);
 }
 
 static void cacheline_release(struct intel_timeline_cacheline *cl)
@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 		 tl->fence_context, tl->hwsp_offset);
 
-	cacheline_acquire(tl->hwsp_cacheline);
+	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
 	if (atomic_fetch_inc(&tl->pin_count)) {
 		cacheline_release(tl->hwsp_cacheline);
 		__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
 	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
 		 tl->fence_context, tl->hwsp_offset);
 
-	cacheline_acquire(cl);
+	cacheline_acquire(cl, tl->hwsp_offset);
 	tl->hwsp_cacheline = cl;
 
 	*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
 	if (err)
 		goto out;
 
-	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
-		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
-
+	*hwsp = cl->ggtt_offset;
 out:
 	i915_active_release(&cl->active);
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 02181c5020db..4474f487f589 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
 	struct intel_timeline_hwsp *hwsp;
 	void *vaddr;
 
+	u32 ggtt_offset;
+
 	struct rcu_head rcu;
 };
 
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Intel-gfx] [PATCH] drm/i915/gt: Use the local HWSP offset during submission
  2020-10-22  6:41 ` [PATCH] " Chris Wilson
@ 2020-10-23 11:25   ` Mika Kuoppala
  0 siblings, 0 replies; 4+ messages in thread
From: Mika Kuoppala @ 2020-10-23 11:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: stable, Chris Wilson

Chris Wilson <chris@chris-wilson.co.uk> writes:

> We wrap the timeline on construction of the next request, but there may
> still be requests in flight that have not yet finalized the breadcrumb.
> (The breadcrumb is delayed as we need engine-local offsets, and for the
> virtual engine that is not known until execution.) As such, by the time
> we write to the timeline's HWSP offset it may have changed, and we
> should use the value we preserved in the request instead.
>
> Though the window is small and infrequent (at full flow we can expect a
> timeline's seqno to wrap once every 30 minutes), the impact of writing
> the old seqno into the new HWSP is severe: the old requests are never
> completed, and the new requests are completed before they are even
> submitted.
>
> Fixes: ebece7539242 ("drm/i915: Keep timeline HWSP allocated until idle across the system")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: <stable@vger.kernel.org> # v5.2+
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c           | 27 +++++++++++++------
>  drivers/gpu/drm/i915/gt/intel_timeline.c      | 18 +++++++------
>  .../gpu/drm/i915/gt/intel_timeline_types.h    |  2 ++
>  3 files changed, 31 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c22d47cc6701..d0be98b67138 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -3597,6 +3597,19 @@ static const struct intel_context_ops execlists_context_ops = {
>  	.destroy = execlists_context_destroy,
>  };
>  
> +static u32 hwsp_offset(const struct i915_request *rq)
> +{
> +	const struct intel_timeline_cacheline *cl;
> +
> +	/* Before the request is executed, the timeline/cachline is fixed */

s/cachline/cacheline

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +
> +	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
> +	if (cl)
> +		return cl->ggtt_offset;
> +
> +	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
> +}
> +
>  static int gen8_emit_init_breadcrumb(struct i915_request *rq)
>  {
>  	u32 *cs;
> @@ -3619,7 +3632,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
>  	*cs++ = MI_NOOP;
>  
>  	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> -	*cs++ = i915_request_timeline(rq)->hwsp_offset;
> +	*cs++ = hwsp_offset(rq);
>  	*cs++ = 0;
>  	*cs++ = rq->fence.seqno - 1;
>  
> @@ -4939,11 +4952,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
>  	return gen8_emit_wa_tail(request, cs);
>  }
>  
> -static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
> +static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
> -	u32 addr = i915_request_active_timeline(request)->hwsp_offset;
> -
> -	return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
> +	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
>  }
>  
>  static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
> @@ -4962,7 +4973,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
>  	cs = gen8_emit_ggtt_write_rcs(cs,
>  				      request->fence.seqno,
> -				      i915_request_active_timeline(request)->hwsp_offset,
> +				      hwsp_offset(request),
>  				      PIPE_CONTROL_FLUSH_ENABLE |
>  				      PIPE_CONTROL_CS_STALL);
>  
> @@ -4974,7 +4985,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>  	cs = gen8_emit_ggtt_write_rcs(cs,
>  				      request->fence.seqno,
> -				      i915_request_active_timeline(request)->hwsp_offset,
> +				      hwsp_offset(request),
>  				      PIPE_CONTROL_CS_STALL |
>  				      PIPE_CONTROL_TILE_CACHE_FLUSH |
>  				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> @@ -5044,7 +5055,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  {
>  	cs = gen12_emit_ggtt_write_rcs(cs,
>  				       request->fence.seqno,
> -				       i915_request_active_timeline(request)->hwsp_offset,
> +				       hwsp_offset(request),
>  				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
>  				       PIPE_CONTROL_CS_STALL |
>  				       PIPE_CONTROL_TILE_CACHE_FLUSH |
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
> index a2f74cefe4c3..7ea94d201fe6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
> @@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
>  	return cl;
>  }
>  
> -static void cacheline_acquire(struct intel_timeline_cacheline *cl)
> +static void cacheline_acquire(struct intel_timeline_cacheline *cl,
> +			      u32 ggtt_offset)
>  {
> -	if (cl)
> -		i915_active_acquire(&cl->active);
> +	if (!cl)
> +		return;
> +
> +	cl->ggtt_offset = ggtt_offset;
> +	i915_active_acquire(&cl->active);
>  }
>  
>  static void cacheline_release(struct intel_timeline_cacheline *cl)
> @@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
>  	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
>  		 tl->fence_context, tl->hwsp_offset);
>  
> -	cacheline_acquire(tl->hwsp_cacheline);
> +	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
>  	if (atomic_fetch_inc(&tl->pin_count)) {
>  		cacheline_release(tl->hwsp_cacheline);
>  		__i915_vma_unpin(tl->hwsp_ggtt);
> @@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
>  	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
>  		 tl->fence_context, tl->hwsp_offset);
>  
> -	cacheline_acquire(cl);
> +	cacheline_acquire(cl, tl->hwsp_offset);
>  	tl->hwsp_cacheline = cl;
>  
>  	*seqno = timeline_advance(tl);
> @@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
>  	if (err)
>  		goto out;
>  
> -	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
> -		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
> -
> +	*hwsp = cl->ggtt_offset;
>  out:
>  	i915_active_release(&cl->active);
>  	return err;
> diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> index 02181c5020db..4474f487f589 100644
> --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
> @@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
>  	struct intel_timeline_hwsp *hwsp;
>  	void *vaddr;
>  
> +	u32 ggtt_offset;
> +
>  	struct rcu_head rcu;
>  };
>  
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-23 11:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-10-21 22:04 [PATCH 1/2] drm/i915/gt: Use the local HWSP offset during submission Chris Wilson
2020-10-22  1:16 ` [Intel-gfx] " kernel test robot
2020-10-22  6:41 ` [PATCH] " Chris Wilson
2020-10-23 11:25   ` [Intel-gfx] " Mika Kuoppala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).