From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, Intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/i915: Do not return stale status / remove impossible WARN
Date: Fri, 12 Feb 2016 10:05:58 +0000 [thread overview]
Message-ID: <56BDAE86.3030009@linux.intel.com> (raw)
In-Reply-To: <20160211210032.GB421@nuc-i3427.alporthouse.com>
On 11/02/16 21:00, Chris Wilson wrote:
> On Thu, Feb 11, 2016 at 06:03:09PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Only caller to get_context_status ensures read pointer stays in
>> range so the WARN is impossible. Also, if the WARN would be
>> triggered by a hypothetical new caller stale status would be
>> returned to them.
>>
>> Maybe it is better to wrap the pointer in the function itself
>> then to avoid both and even results in smaller code.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_lrc.c | 15 ++++++---------
>> 1 file changed, 6 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 89eb892df4ae..951f1e6af947 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -507,17 +507,16 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
>> return false;
>> }
>>
>> -static void get_context_status(struct intel_engine_cs *ring,
>> - u8 read_pointer,
>> - u32 *status, u32 *context_id)
>> +static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer,
>> + u32 *context_id)
>> {
>> struct drm_i915_private *dev_priv = ring->dev->dev_private;
>>
>> - if (WARN_ON(read_pointer >= GEN8_CSB_ENTRIES))
>> - return;
>> + read_pointer %= GEN8_CSB_ENTRIES;
>>
>> - *status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
>> *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
>
> Micro-optimising hat says not to even do the uncached, spinlocked mmio
> read when not required.
You mean move the forcewake grab out from elsp write to cover all mmio
reads? These two patches make the irq handler around 3.7% smaller and
moving the forcewake/uncore lock shrinks that by a 1% more. Must be
faster as well, if someone could measure it. :)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 1e7ccd0a6573..77a64008f53d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -375,8 +375,6 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
rq0->elsp_submitted++;
/* You must always write both descriptors in the order below. */
- spin_lock(&dev_priv->uncore.lock);
- intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
@@ -386,8 +384,6 @@ static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
/* ELSP is a wo register, use another nearby reg for posting */
POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
- intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
- spin_unlock(&dev_priv->uncore.lock);
}
static int execlists_update_context(struct drm_i915_gem_request *rq)
@@ -513,9 +509,9 @@ static u32 get_context_status(struct intel_engine_cs *ring, u8 read_pointer,
read_pointer %= GEN8_CSB_ENTRIES;
- *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
+ *context_id = I915_READ_FW(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
- return I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
+ return I915_READ_FW(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
}
/**
@@ -535,15 +531,18 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
u32 status_id;
u32 submit_contexts = 0;
- status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+ spin_lock(&ring->execlist_lock);
+
+ spin_lock(&dev_priv->uncore.lock);
+ intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
+ status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(ring));
read_pointer = ring->next_context_status_buffer;
write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
if (read_pointer > write_pointer)
write_pointer += GEN8_CSB_ENTRIES;
- spin_lock(&ring->execlist_lock);
-
while (read_pointer < write_pointer) {
status = get_context_status(ring, ++read_pointer, &status_id);
@@ -569,22 +568,26 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
GEN8_CTX_STATUS_ACTIVE_IDLE))))
execlists_context_unqueue(ring);
- spin_unlock(&ring->execlist_lock);
-
- if (unlikely(submit_contexts > 2))
- DRM_ERROR("More than two context complete events?\n");
-
ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
/* Update the read pointer to the old write pointer. Manual ringbuffer
* management ftw </sarcasm> */
- I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
- _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
- ring->next_context_status_buffer << 8));
+ I915_WRITE_FW(RING_CONTEXT_STATUS_PTR(ring),
+ _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+ ring->next_context_status_buffer << 8));
+
+ intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+ spin_unlock(&dev_priv->uncore.lock);
+
+ spin_unlock(&ring->execlist_lock);
+
+ if (unlikely(submit_contexts > 2))
+ DRM_ERROR("More than two context complete events?\n");
}
static int execlists_context_queue(struct drm_i915_gem_request *request)
{
+ struct drm_i915_private *dev_priv = request->i915;
struct intel_engine_cs *ring = request->ring;
struct drm_i915_gem_request *cursor;
int num_elements = 0;
@@ -616,9 +619,16 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
}
list_add_tail(&request->execlist_link, &ring->execlist_queue);
- if (num_elements == 0)
+ if (num_elements == 0) {
+ spin_lock(&dev_priv->uncore.lock);
+ intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+
execlists_context_unqueue(ring);
+ intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+ spin_unlock(&dev_priv->uncore.lock);
+ }
+
spin_unlock_irq(&ring->execlist_lock);
return 0;
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-02-12 10:05 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-11 18:03 [PATCH 1/2] drm/i915: Do not return stale status / remove impossible WARN Tvrtko Ursulin
2016-02-11 18:03 ` [PATCH 2/2] drm/i915: Execlist irq handler micro optimisations Tvrtko Ursulin
2016-02-11 20:59 ` Chris Wilson
2016-02-12 12:00 ` [PATCH v2] " Tvrtko Ursulin
2016-02-12 13:46 ` Tvrtko Ursulin
2016-02-12 14:30 ` Chris Wilson
2016-02-12 14:42 ` Chris Wilson
2016-02-12 15:54 ` Tvrtko Ursulin
2016-02-12 16:22 ` Chris Wilson
2016-02-11 21:00 ` [PATCH 1/2] drm/i915: Do not return stale status / remove impossible WARN Chris Wilson
2016-02-12 10:05 ` Tvrtko Ursulin [this message]
2016-02-12 10:21 ` Chris Wilson
2016-02-12 11:45 ` Tvrtko Ursulin
2016-02-16 8:10 ` ✗ Fi.CI.BAT: failure for series starting with [2/2] drm/i915: Execlist irq handler micro optimisations (rev3) Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56BDAE86.3030009@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=Intel-gfx@lists.freedesktop.org \
--cc=chris@chris-wilson.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).