intel-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers
Date: Thu, 28 Jun 2018 11:02:17 +0100	[thread overview]
Message-ID: <f3535bb4-78e3-d4a9-4bfe-eb83faa9d152@linux.intel.com> (raw)
In-Reply-To: <20180627210712.25428-5-chris@chris-wilson.co.uk>


On 27/06/2018 22:07, Chris Wilson wrote:
> Following the removal of the last workarounds, the only CSB mmio access
> is for the old vGPU interface. The mmio registers presented by vGPU do
> not require forcewake and can be treated as ordinary volatile memory,
> i.e. they behave just like the HWSP access just at a different location.
> We can reduce the CSB access to a set of read/write/buffer pointers and
> treat the various paths identically and not worry about forcewake.
> (Forcewake is nightmare for worstcase latency, and we want to process
> this all with irqsoff -- no latency allowed!)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  12 ---
>   drivers/gpu/drm/i915/intel_lrc.c        | 116 ++++++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  23 +++--
>   3 files changed, 65 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index d3264bd6e9dc..7209c22798e6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -25,7 +25,6 @@
>   #include <drm/drm_print.h>
>   
>   #include "i915_drv.h"
> -#include "i915_vgpu.h"
>   #include "intel_ringbuffer.h"
>   #include "intel_lrc.h"
>   
> @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
>   	i915_gem_batch_pool_init(&engine->batch_pool, engine);
>   }
>   
> -static bool csb_force_mmio(struct drm_i915_private *i915)
> -{
> -	/* Older GVT emulation depends upon intercepting CSB mmio */
> -	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
> -		return true;
> -
> -	return false;
> -}
> -
>   static void intel_engine_init_execlist(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   
> -	execlists->csb_use_mmio = csb_force_mmio(engine->i915);
> -
>   	execlists->port_mask = 1;
>   	BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists));
>   	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 91656eb2f2db..368a8c74d11d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -137,6 +137,7 @@
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
>   #include "i915_gem_render_state.h"
> +#include "i915_vgpu.h"
>   #include "intel_lrc_reg.h"
>   #include "intel_mocs.h"
>   #include "intel_workarounds.h"
> @@ -953,44 +954,23 @@ static void process_csb(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
> -	struct drm_i915_private *i915 = engine->i915;
> -
> -	/* The HWSP contains a (cacheable) mirror of the CSB */
> -	const u32 *buf =
> -		&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> -	unsigned int head, tail;
> -	bool fw = false;
> +	const u32 * const buf = execlists->csb_status;
> +	u8 head, tail;
>   
>   	/* Clear before reading to catch new interrupts */
>   	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   	smp_mb__after_atomic();
>   
> -	if (unlikely(execlists->csb_use_mmio)) {
> -		intel_uncore_forcewake_get(i915, execlists->fw_domains);
> -		fw = true;
> -
> -		buf = (u32 * __force)
> -			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +	/* Note that csb_write, csb_status may be either in HWSP or mmio */
> +	head = execlists->csb_head;
> +	tail = READ_ONCE(*execlists->csb_write);

Under GVT when this is emulated mmio I think you need to mask and shift 
it with GEN8_CSB_WRITE_PTR.

> +	GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
> +	if (unlikely(head == tail))
> +		return;
>   
> -		head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -		tail = GEN8_CSB_WRITE_PTR(head);
> -		head = GEN8_CSB_READ_PTR(head);
> -		execlists->csb_head = head;
> -	} else {
> -		const int write_idx =
> -			intel_hws_csb_write_index(i915) -
> -			I915_HWS_CSB_BUF0_INDEX;
> +	rmb(); /* Hopefully paired with a wmb() in HW */
>   
> -		head = execlists->csb_head;
> -		tail = READ_ONCE(buf[write_idx]);
> -		rmb(); /* Hopefully paired with a wmb() in HW */
> -	}
> -	GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> -		  engine->name,
> -		  head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> -		  tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> -
> -	while (head != tail) {
> +	do {

Why convert while to do-while? Early unlikely return above handles the 
void process_csb calls? Would the same effect happen if you put unlikely 
in the while (head != tail) condition and would be simpler?

>   		struct i915_request *rq;
>   		unsigned int status;
>   		unsigned int count;
> @@ -1016,12 +996,12 @@ static void process_csb(struct intel_engine_cs *engine)
>   		 * status notifier.
>   		 */
>   
> -		status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
>   		GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
>   			  engine->name, head,
> -			  status, buf[2*head + 1],
> +			  buf[2 * head + 0], buf[2 * head + 1],
>   			  execlists->active);
>   
> +		status = buf[2 * head];

Why not leave before GEM_TRACE above, as is, and then diff is smaller?

>   		if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>   			      GEN8_CTX_STATUS_PREEMPTED))
>   			execlists_set_active(execlists,
> @@ -1103,16 +1083,11 @@ static void process_csb(struct intel_engine_cs *engine)
>   		} else {
>   			port_set(port, port_pack(rq, count));
>   		}
> -	}
> +	} while (head != tail);
>   
> -	if (head != execlists->csb_head) {
> -		execlists->csb_head = head;
> -		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> -		       i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> -	}
> -
> -	if (unlikely(fw))
> -		intel_uncore_forcewake_put(i915, execlists->fw_domains);
> +	writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> +	       execlists->csb_read);
> +	execlists->csb_head = head;

I guess early return helps to avoid this, but since it is going away in 
a following patch maybe it is not worth it.

>   }
>   
>   /*
> @@ -2430,28 +2405,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
>   static void
>   logical_ring_setup(struct intel_engine_cs *engine)
>   {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -	enum forcewake_domains fw_domains;
> -
>   	intel_engine_setup_common(engine);
>   
>   	/* Intentionally left blank. */
>   	engine->buffer = NULL;
>   
> -	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> -						    RING_ELSP(engine),
> -						    FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_PTR(engine),
> -						     FW_REG_READ | FW_REG_WRITE);
> -
> -	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
> -						     RING_CONTEXT_STATUS_BUF_BASE(engine),
> -						     FW_REG_READ);
> -
> -	engine->execlists.fw_domains = fw_domains;
> -
>   	tasklet_init(&engine->execlists.tasklet,
>   		     execlists_submission_tasklet, (unsigned long)engine);
>   
> @@ -2459,34 +2417,56 @@ logical_ring_setup(struct intel_engine_cs *engine)
>   	logical_ring_default_irqs(engine);
>   }
>   
> +static bool csb_force_mmio(struct drm_i915_private *i915)
> +{
> +	/* Older GVT emulation depends upon intercepting CSB mmio */
> +	return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915);
> +}
> +
>   static int logical_ring_init(struct intel_engine_cs *engine)
>   {
> +	struct drm_i915_private *i915 = engine->i915;
> +	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	int ret;
>   
>   	ret = intel_engine_init_common(engine);
>   	if (ret)
>   		goto error;
>   
> -	if (HAS_LOGICAL_RING_ELSQ(engine->i915)) {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +	if (HAS_LOGICAL_RING_ELSQ(i915)) {
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
> -		engine->execlists.ctrl_reg = engine->i915->regs +
> +		execlists->ctrl_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine));
>   	} else {
> -		engine->execlists.submit_reg = engine->i915->regs +
> +		execlists->submit_reg = i915->regs +
>   			i915_mmio_reg_offset(RING_ELSP(engine));
>   	}
>   
> -	engine->execlists.preempt_complete_status = ~0u;
> -	if (engine->i915->preempt_context) {
> +	execlists->preempt_complete_status = ~0u;
> +	if (i915->preempt_context) {
>   		struct intel_context *ce =
> -			to_intel_context(engine->i915->preempt_context, engine);
> +			to_intel_context(i915->preempt_context, engine);
>   
> -		engine->execlists.preempt_complete_status =
> +		execlists->preempt_complete_status =
>   			upper_32_bits(ce->lrc_desc);
>   	}
>   
> -	engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_head = GEN8_CSB_ENTRIES - 1;
> +	execlists->csb_read =
> +		i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
> +	if (csb_force_mmio(i915)) {
> +		execlists->csb_status = (u32 __force *)
> +			(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> +
> +		execlists->csb_write = (u32 __force *)execlists->csb_read;
> +	} else {
> +		execlists->csb_status =
> +			&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> +
> +		execlists->csb_write =
> +			&engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
> +	}
>   
>   	return 0;
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a0bc7a8222b4..5b92c5f03e1d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -300,19 +300,30 @@ struct intel_engine_execlists {
>   	struct rb_node *first;
>   
>   	/**
> -	 * @fw_domains: forcewake domains for irq tasklet
> +	 * @csb_head: context status buffer head
>   	 */
> -	unsigned int fw_domains;
> +	unsigned int csb_head;
>   
>   	/**
> -	 * @csb_head: context status buffer head
> +	 * @csb_read: control register for Context Switch buffer
> +	 *
> +	 * Note this register is always in mmio.
>   	 */
> -	unsigned int csb_head;
> +	u32 __iomem *csb_read;
>   
>   	/**
> -	 * @csb_use_mmio: access csb through mmio, instead of hwsp
> +	 * @csb_write: control register for Context Switch buffer
> +	 *
> +	 * Note this register may be either mmio or HWSP shadow.
> +	 */
> +	u32 *csb_write;
> +
> +	/**
> +	 * @csb_status: status array for Context Switch buffer
> +	 *
> +	 * Note these register may be either mmio or HWSP shadow.
>   	 */
> -	bool csb_use_mmio;
> +	u32 *csb_status;
>   
>   	/**
>   	 * @preempt_complete_status: expected CSB upon completing preemption
> 

Looks otherwise ok.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2018-06-28 10:02 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-27 21:07 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
2018-06-27 21:07 ` [PATCH 2/9] drm/i915/execlists: Pull submit after dequeue under timeline lock Chris Wilson
2018-06-27 21:07 ` [PATCH 3/9] drm/i915/execlists: Pull CSB reset under the timeline.lock Chris Wilson
2018-06-27 21:07 ` [PATCH 4/9] drm/i915/execlists: Process one CSB update at a time Chris Wilson
2018-06-27 21:07 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson
2018-06-28 10:02   ` Tvrtko Ursulin [this message]
2018-06-28 10:10     ` Chris Wilson
2018-06-28 10:58       ` Tvrtko Ursulin
2018-06-28 11:05         ` Chris Wilson
2018-06-28 11:13   ` [PATCH v2] " Chris Wilson
2018-06-28 11:21     ` Tvrtko Ursulin
2018-06-27 21:07 ` [PATCH 6/9] drm/i915/execlists: Reset CSB write pointer after reset Chris Wilson
2018-06-28 10:06   ` Tvrtko Ursulin
2018-06-28 10:17     ` Chris Wilson
2018-06-28 11:02       ` Tvrtko Ursulin
2018-06-28 11:30         ` Chris Wilson
2018-06-28 11:37           ` Tvrtko Ursulin
2018-06-28 10:22   ` [PATCH v2] " Chris Wilson
2018-06-28 11:59   ` [PATCH v3] " Chris Wilson
2018-06-28 12:15     ` Tvrtko Ursulin
2018-06-28 12:19       ` Chris Wilson
2018-06-27 21:07 ` [PATCH 7/9] drm/i915/execlists: Stop storing the CSB read pointer in the mmio register Chris Wilson
2018-06-28 10:07   ` Tvrtko Ursulin
2018-06-27 21:07 ` [PATCH 8/9] drm/i915/execlists: Trust the CSB Chris Wilson
2018-06-28 11:29   ` Tvrtko Ursulin
2018-06-28 12:03     ` Chris Wilson
2018-06-27 21:07 ` [PATCH 9/9] drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Chris Wilson
2018-06-27 21:56 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts Patchwork
2018-06-27 22:13 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-28  3:46 ` ✓ Fi.CI.IGT: " Patchwork
2018-06-28 10:41 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev2) Patchwork
2018-06-28 11:00 ` ✓ Fi.CI.BAT: success " Patchwork
2018-06-28 11:20 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev3) Patchwork
2018-06-28 12:09 ` ✗ Fi.CI.BAT: failure for series starting with [1/9] drm/i915: Drop posting reads to flush master interrupts (rev4) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2018-06-28 12:33 [PATCH 1/9] drm/i915: Drop posting reads to flush master interrupts Chris Wilson
2018-06-28 12:33 ` [PATCH 5/9] drm/i915/execlists: Unify CSB access pointers Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f3535bb4-78e3-d4a9-4bfe-eb83faa9d152@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).