* [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c
@ 2016-07-19 10:51 Chris Wilson
2016-07-19 10:51 ` [PATCH 2/8] drm/i915: Retire oldest completed request before allocating next Chris Wilson
` (7 more replies)
0 siblings, 8 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
Migrate the request operations out of the main body of i915_gem.c and
into their own C file for easier expansion.
v2: Move __i915_add_request() across as well
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 209 +---------
drivers/gpu/drm/i915/i915_gem.c | 655 +------------------------------
drivers/gpu/drm/i915/i915_gem_request.c | 658 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_request.h | 238 ++++++++++++
5 files changed, 905 insertions(+), 856 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 75318ebb8d25..6092f0ea24df 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_gtt.o \
i915_gem.o \
i915_gem_render_state.o \
+ i915_gem_request.o \
i915_gem_shrinker.o \
i915_gem_stolen.o \
i915_gem_tiling.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 27d9b2c374b3..c97a75597099 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -61,6 +61,7 @@
#include "i915_gem.h"
#include "i915_gem_gtt.h"
#include "i915_gem_render_state.h"
+#include "i915_gem_request.h"
#include "intel_gvt.h"
@@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
(((__iter).curr += PAGE_SIZE) < (__iter).max) || \
((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
-/**
- * Request queue structure.
- *
- * The request queue allows us to note sequence numbers that have been emitted
- * and may be associated with active buffers to be retired.
- *
- * By keeping this list, we can avoid having to do questionable sequence
- * number comparisons on buffer last_read|write_seqno. It also allows an
- * emission time to be associated with the request for tracking how far ahead
- * of the GPU the submission is.
- *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
- */
-struct drm_i915_gem_request {
- struct kref ref;
-
- /** On Which ring this request was generated */
- struct drm_i915_private *i915;
- struct intel_engine_cs *engine;
- struct intel_signal_node signaling;
-
- /** GEM sequence number associated with the previous request,
- * when the HWS breadcrumb is equal to this the GPU is processing
- * this request.
- */
- u32 previous_seqno;
-
- /** GEM sequence number associated with this request,
- * when the HWS breadcrumb is equal or greater than this the GPU
- * has finished processing this request.
- */
- u32 seqno;
-
- /** Position in the ringbuffer of the start of the request */
- u32 head;
-
- /**
- * Position in the ringbuffer of the start of the postfix.
- * This is required to calculate the maximum available ringbuffer
- * space without overwriting the postfix.
- */
- u32 postfix;
-
- /** Position in the ringbuffer of the end of the whole request */
- u32 tail;
-
- /** Preallocate space in the ringbuffer for the emitting the request */
- u32 reserved_space;
-
- /**
- * Context and ring buffer related to this request
- * Contexts are refcounted, so when this request is associated with a
- * context, we must increment the context's refcount, to guarantee that
- * it persists while any request is linked to it. Requests themselves
- * are also refcounted, so the request will only be freed when the last
- * reference to it is dismissed, and the code in
- * i915_gem_request_free() will then decrement the refcount on the
- * context.
- */
- struct i915_gem_context *ctx;
- struct intel_ringbuffer *ringbuf;
-
- /**
- * Context related to the previous request.
- * As the contexts are accessed by the hardware until the switch is
- * completed to a new context, the hardware may still be writing
- * to the context object after the breadcrumb is visible. We must
- * not unpin/unbind/prune that object whilst still active and so
- * we keep the previous context pinned until the following (this)
- * request is retired.
- */
- struct i915_gem_context *previous_context;
-
- /** Batch buffer related to this request if any (used for
- error state dump only) */
- struct drm_i915_gem_object *batch_obj;
-
- /** Time at which this request was emitted, in jiffies. */
- unsigned long emitted_jiffies;
-
- /** global list entry for this request */
- struct list_head list;
-
- struct drm_i915_file_private *file_priv;
- /** file_priv list entry for this request */
- struct list_head client_list;
-
- /** process identifier submitting this request */
- struct pid *pid;
-
- /**
- * The ELSP only accepts two elements at a time, so we queue
- * context/tail pairs on a given queue (ring->execlist_queue) until the
- * hardware is available. The queue serves a double purpose: we also use
- * it to keep track of the up to 2 contexts currently in the hardware
- * (usually one in execution and the other queued up by the GPU): We
- * only remove elements from the head of the queue when the hardware
- * informs us that an element has been completed.
- *
- * All accesses to the queue are mediated by a spinlock
- * (ring->execlist_lock).
- */
-
- /** Execlist link in the submission queue.*/
- struct list_head execlist_link;
-
- /** Execlists no. of times this request has been sent to the ELSP */
- int elsp_submitted;
-
- /** Execlists context hardware id. */
- unsigned ctx_hw_id;
-};
-
-struct drm_i915_gem_request * __must_check
-i915_gem_request_alloc(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- struct drm_file *file);
-
-static inline uint32_t
-i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
-{
- return req ? req->seqno : 0;
-}
-
-static inline struct intel_engine_cs *
-i915_gem_request_get_engine(struct drm_i915_gem_request *req)
-{
- return req ? req->engine : NULL;
-}
-
-static inline struct drm_i915_gem_request *
-i915_gem_request_reference(struct drm_i915_gem_request *req)
-{
- if (req)
- kref_get(&req->ref);
- return req;
-}
-
-static inline void
-i915_gem_request_unreference(struct drm_i915_gem_request *req)
-{
- kref_put(&req->ref, i915_gem_request_free);
-}
-
-static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
- struct drm_i915_gem_request *src)
-{
- if (src)
- i915_gem_request_reference(src);
-
- if (*pdst)
- i915_gem_request_unreference(*pdst);
-
- *pdst = src;
-}
-
-/*
- * XXX: i915_gem_request_completed should be here but currently needs the
- * definition of i915_seqno_passed() which is below. It will be moved in
- * a later patch when the call to i915_seqno_passed() is obsoleted...
- */
-
/*
* A command that requires special handling by the command parser.
*/
@@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
struct drm_i915_gem_object *new,
unsigned frontbuffer_bits);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
- return (int32_t)(seq1 - seq2) >= 0;
-}
-
-static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req)
-{
- return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->previous_seqno);
-}
-
-static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req)
-{
- return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->seqno);
-}
-
-bool __i915_spin_request(const struct drm_i915_gem_request *request,
- int state, unsigned long timeout_us);
-static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
- int state, unsigned long timeout_us)
-{
- return (i915_gem_request_started(request) &&
- __i915_spin_request(request, state, timeout_us));
-}
-
-int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno);
int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
struct drm_i915_gem_request *
@@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev);
int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv);
int __must_check i915_gem_suspend(struct drm_device *dev);
void i915_gem_resume(struct drm_device *dev);
-void __i915_add_request(struct drm_i915_gem_request *req,
- struct drm_i915_gem_object *batch_obj,
- bool flush_caches);
-#define i915_add_request(req) \
- __i915_add_request(req, NULL, true)
-#define i915_add_request_no_flush(req) \
- __i915_add_request(req, NULL, false)
-int __i915_wait_request(struct drm_i915_gem_request *req,
- bool interruptible,
- s64 *timeout,
- struct intel_rps_client *rps);
-int __must_check i915_wait_request(struct drm_i915_gem_request *req);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int __must_check
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e40fab114bf0..6df14058b3fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1325,365 +1325,6 @@ put_rpm:
return ret;
}
-static int
-i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
-{
- if (__i915_terminally_wedged(reset_counter))
- return -EIO;
-
- if (__i915_reset_in_progress(reset_counter)) {
- /* Non-interruptible callers can't handle -EAGAIN, hence return
- * -EIO unconditionally for these. */
- if (!interruptible)
- return -EIO;
-
- return -EAGAIN;
- }
-
- return 0;
-}
-
-static unsigned long local_clock_us(unsigned *cpu)
-{
- unsigned long t;
-
- /* Cheaply and approximately convert from nanoseconds to microseconds.
- * The result and subsequent calculations are also defined in the same
- * approximate microseconds units. The principal source of timing
- * error here is from the simple truncation.
- *
- * Note that local_clock() is only defined wrt to the current CPU;
- * the comparisons are no longer valid if we switch CPUs. Instead of
- * blocking preemption for the entire busywait, we can detect the CPU
- * switch and use that as indicator of system load and a reason to
- * stop busywaiting, see busywait_stop().
- */
- *cpu = get_cpu();
- t = local_clock() >> 10;
- put_cpu();
-
- return t;
-}
-
-static bool busywait_stop(unsigned long timeout, unsigned cpu)
-{
- unsigned this_cpu;
-
- if (time_after(local_clock_us(&this_cpu), timeout))
- return true;
-
- return this_cpu != cpu;
-}
-
-bool __i915_spin_request(const struct drm_i915_gem_request *req,
- int state, unsigned long timeout_us)
-{
- unsigned cpu;
-
- /* When waiting for high frequency requests, e.g. during synchronous
- * rendering split between the CPU and GPU, the finite amount of time
- * required to set up the irq and wait upon it limits the response
- * rate. By busywaiting on the request completion for a short while we
- * can service the high frequency waits as quick as possible. However,
- * if it is a slow request, we want to sleep as quickly as possible.
- * The tradeoff between waiting and sleeping is roughly the time it
- * takes to sleep on a request, on the order of a microsecond.
- */
-
- timeout_us += local_clock_us(&cpu);
- do {
- if (i915_gem_request_completed(req))
- return true;
-
- if (signal_pending_state(state, current))
- break;
-
- if (busywait_stop(timeout_us, cpu))
- break;
-
- cpu_relax_lowlatency();
- } while (!need_resched());
-
- return false;
-}
-
-/**
- * __i915_wait_request - wait until execution of request has finished
- * @req: duh!
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- * @rps: RPS client
- *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the request was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-int __i915_wait_request(struct drm_i915_gem_request *req,
- bool interruptible,
- s64 *timeout,
- struct intel_rps_client *rps)
-{
- int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
- DEFINE_WAIT(reset);
- struct intel_wait wait;
- unsigned long timeout_remain;
- s64 before = 0; /* Only to silence a compiler warning. */
- int ret = 0;
-
- might_sleep();
-
- if (list_empty(&req->list))
- return 0;
-
- if (i915_gem_request_completed(req))
- return 0;
-
- timeout_remain = MAX_SCHEDULE_TIMEOUT;
- if (timeout) {
- if (WARN_ON(*timeout < 0))
- return -EINVAL;
-
- if (*timeout == 0)
- return -ETIME;
-
- timeout_remain = nsecs_to_jiffies_timeout(*timeout);
-
- /*
- * Record current time in case interrupted by signal, or wedged.
- */
- before = ktime_get_raw_ns();
- }
-
- trace_i915_gem_request_wait_begin(req);
-
- /* This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (INTEL_INFO(req->i915)->gen >= 6)
- gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
- /* Optimistic spin for the next ~jiffie before touching IRQs */
- if (i915_spin_request(req, state, 5))
- goto complete;
-
- set_current_state(state);
- add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
- intel_wait_init(&wait, req->seqno);
- if (intel_engine_add_wait(req->engine, &wait))
- /* In order to check that we haven't missed the interrupt
- * as we enabled it, we need to kick ourselves to do a
- * coherent check on the seqno before we sleep.
- */
- goto wakeup;
-
- for (;;) {
- if (signal_pending_state(state, current)) {
- ret = -ERESTARTSYS;
- break;
- }
-
- timeout_remain = io_schedule_timeout(timeout_remain);
- if (timeout_remain == 0) {
- ret = -ETIME;
- break;
- }
-
- if (intel_wait_complete(&wait))
- break;
-
- set_current_state(state);
-
-wakeup:
- /* Carefully check if the request is complete, giving time
- * for the seqno to be visible following the interrupt.
- * We also have to check in case we are kicked by the GPU
- * reset in order to drop the struct_mutex.
- */
- if (__i915_request_irq_complete(req))
- break;
-
- /* Only spin if we know the GPU is processing this request */
- if (i915_spin_request(req, state, 2))
- break;
- }
- remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
-
- intel_engine_remove_wait(req->engine, &wait);
- __set_current_state(TASK_RUNNING);
-complete:
- trace_i915_gem_request_wait_end(req);
-
- if (timeout) {
- s64 tres = *timeout - (ktime_get_raw_ns() - before);
-
- *timeout = tres < 0 ? 0 : tres;
-
- /*
- * Apparently ktime isn't accurate enough and occasionally has a
- * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
- * things up to make the test happy. We allow up to 1 jiffy.
- *
- * This is a regrssion from the timespec->ktime conversion.
- */
- if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
- *timeout = 0;
- }
-
- if (rps && req->seqno == req->engine->last_submitted_seqno) {
- /* The GPU is now idle and this client has stalled.
- * Since no other client has submitted a request in the
- * meantime, assume that this client is the only one
- * supplying work to the GPU but is unable to keep that
- * work supplied because it is waiting. Since the GPU is
- * then never kept fully busy, RPS autoclocking will
- * keep the clocks relatively low, causing further delays.
- * Compensate by giving the synchronous client credit for
- * a waitboost next time.
- */
- spin_lock(&req->i915->rps.client_lock);
- list_del_init(&rps->link);
- spin_unlock(&req->i915->rps.client_lock);
- }
-
- return ret;
-}
-
-int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
- struct drm_file *file)
-{
- struct drm_i915_file_private *file_priv;
-
- WARN_ON(!req || !file || req->file_priv);
-
- if (!req || !file)
- return -EINVAL;
-
- if (req->file_priv)
- return -EINVAL;
-
- file_priv = file->driver_priv;
-
- spin_lock(&file_priv->mm.lock);
- req->file_priv = file_priv;
- list_add_tail(&req->client_list, &file_priv->mm.request_list);
- spin_unlock(&file_priv->mm.lock);
-
- req->pid = get_pid(task_pid(current));
-
- return 0;
-}
-
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
- struct drm_i915_file_private *file_priv = request->file_priv;
-
- if (!file_priv)
- return;
-
- spin_lock(&file_priv->mm.lock);
- list_del(&request->client_list);
- request->file_priv = NULL;
- spin_unlock(&file_priv->mm.lock);
-
- put_pid(request->pid);
- request->pid = NULL;
-}
-
-static void i915_gem_request_retire(struct drm_i915_gem_request *request)
-{
- trace_i915_gem_request_retire(request);
-
- /* We know the GPU must have read the request to have
- * sent us the seqno + interrupt, so use the position
- * of tail of the request to update the last known position
- * of the GPU head.
- *
- * Note this requires that we are always called in request
- * completion order.
- */
- request->ringbuf->last_retired_head = request->postfix;
-
- list_del_init(&request->list);
- i915_gem_request_remove_from_client(request);
-
- if (request->previous_context) {
- if (i915.enable_execlists)
- intel_lr_context_unpin(request->previous_context,
- request->engine);
- }
-
- i915_gem_context_unreference(request->ctx);
- i915_gem_request_unreference(request);
-}
-
-static void
-__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
-{
- struct intel_engine_cs *engine = req->engine;
- struct drm_i915_gem_request *tmp;
-
- lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
- if (list_empty(&req->list))
- return;
-
- do {
- tmp = list_first_entry(&engine->request_list,
- typeof(*tmp), list);
-
- i915_gem_request_retire(tmp);
- } while (tmp != req);
-
- WARN_ON(i915_verify_lists(engine->dev));
-}
-
-/**
- * Waits for a request to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- * @req: request to wait on
- */
-int
-i915_wait_request(struct drm_i915_gem_request *req)
-{
- struct drm_i915_private *dev_priv = req->i915;
- bool interruptible;
- int ret;
-
- interruptible = dev_priv->mm.interruptible;
-
- BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex));
-
- ret = __i915_wait_request(req, interruptible, NULL, NULL);
- if (ret)
- return ret;
-
- /* If the GPU hung, we want to keep the requests to find the guilty. */
- if (!i915_reset_in_progress(&dev_priv->gpu_error))
- __i915_gem_request_retire__upto(req);
-
- return 0;
-}
-
/**
* Ensures that all rendering to the object has completed and the object is
* safe to unbind from the GTT or access from the CPU.
@@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
i915_gem_object_retire__write(obj);
if (!i915_reset_in_progress(&req->i915->gpu_error))
- __i915_gem_request_retire__upto(req);
+ i915_gem_request_retire_upto(req);
}
/* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
drm_gem_object_unreference(&obj->base);
}
-static int
-i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
-{
- struct intel_engine_cs *engine;
- int ret;
-
- /* Carefully retire all requests without writing to the rings */
- for_each_engine(engine, dev_priv) {
- ret = intel_engine_idle(engine);
- if (ret)
- return ret;
- }
- i915_gem_retire_requests(dev_priv);
-
- /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
- if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
- while (intel_kick_waiters(dev_priv) ||
- intel_kick_signalers(dev_priv))
- yield();
- }
-
- /* Finally reset hw state */
- for_each_engine(engine, dev_priv)
- intel_ring_init_seqno(engine, seqno);
-
- return 0;
-}
-
-int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
- int ret;
-
- if (seqno == 0)
- return -EINVAL;
-
- /* HWS page needs to be set less than what we
- * will inject to ring
- */
- ret = i915_gem_init_seqno(dev_priv, seqno - 1);
- if (ret)
- return ret;
-
- /* Carefully set the last_seqno value so that wrap
- * detection still works
- */
- dev_priv->next_seqno = seqno;
- dev_priv->last_seqno = seqno - 1;
- if (dev_priv->last_seqno == 0)
- dev_priv->last_seqno--;
-
- return 0;
-}
-
-int
-i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
-{
- /* reserve 0 for non-seqno */
- if (dev_priv->next_seqno == 0) {
- int ret = i915_gem_init_seqno(dev_priv, 0);
- if (ret)
- return ret;
-
- dev_priv->next_seqno = 1;
- }
-
- *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
- return 0;
-}
-
-static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
-
- dev_priv->gt.active_engines |= intel_engine_flag(engine);
- if (dev_priv->gt.awake)
- return;
-
- intel_runtime_pm_get_noresume(dev_priv);
- dev_priv->gt.awake = true;
-
- intel_enable_gt_powersave(dev_priv);
- i915_update_gfx_val(dev_priv);
- if (INTEL_GEN(dev_priv) >= 6)
- gen6_rps_busy(dev_priv);
-
- queue_delayed_work(dev_priv->wq,
- &dev_priv->gt.retire_work,
- round_jiffies_up_relative(HZ));
-}
-
-/*
- * NB: This function is not allowed to fail. Doing so would mean the the
- * request is not being tracked for completion but the work itself is
- * going to happen on the hardware. This would be a Bad Thing(tm).
- */
-void __i915_add_request(struct drm_i915_gem_request *request,
- struct drm_i915_gem_object *obj,
- bool flush_caches)
-{
- struct intel_engine_cs *engine;
- struct intel_ringbuffer *ringbuf;
- u32 request_start;
- u32 reserved_tail;
- int ret;
-
- if (WARN_ON(request == NULL))
- return;
-
- engine = request->engine;
- ringbuf = request->ringbuf;
-
- /*
- * To ensure that this call will not fail, space for its emissions
- * should already have been reserved in the ring buffer. Let the ring
- * know that it is time to use that space up.
- */
- request_start = intel_ring_get_tail(ringbuf);
- reserved_tail = request->reserved_space;
- request->reserved_space = 0;
-
- /*
- * Emit any outstanding flushes - execbuf can fail to emit the flush
- * after having emitted the batchbuffer command. Hence we need to fix
- * things up similar to emitting the lazy request. The difference here
- * is that the flush _must_ happen before the next request, no matter
- * what.
- */
- if (flush_caches) {
- if (i915.enable_execlists)
- ret = logical_ring_flush_all_caches(request);
- else
- ret = intel_ring_flush_all_caches(request);
- /* Not allowed to fail! */
- WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
- }
-
- trace_i915_gem_request_add(request);
-
- request->head = request_start;
-
- /* Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when this
- * request is retired will the the batch_obj be moved onto the
- * inactive_list and lose its active reference. Hence we do not need
- * to explicitly hold another reference here.
- */
- request->batch_obj = obj;
-
- /* Seal the request and mark it as pending execution. Note that
- * we may inspect this state, without holding any locks, during
- * hangcheck. Hence we apply the barrier to ensure that we do not
- * see a more recent value in the hws than we are tracking.
- */
- request->emitted_jiffies = jiffies;
- request->previous_seqno = engine->last_submitted_seqno;
- smp_store_mb(engine->last_submitted_seqno, request->seqno);
- list_add_tail(&request->list, &engine->request_list);
-
- /* Record the position of the start of the request so that
- * should we detect the updated seqno part-way through the
- * GPU processing the request, we never over-estimate the
- * position of the head.
- */
- request->postfix = intel_ring_get_tail(ringbuf);
-
- if (i915.enable_execlists)
- ret = engine->emit_request(request);
- else {
- ret = engine->add_request(request);
-
- request->tail = intel_ring_get_tail(ringbuf);
- }
- /* Not allowed to fail! */
- WARN(ret, "emit|add_request failed: %d!\n", ret);
- /* Sanity check that the reserved size was large enough. */
- ret = intel_ring_get_tail(ringbuf) - request_start;
- if (ret < 0)
- ret += ringbuf->size;
- WARN_ONCE(ret > reserved_tail,
- "Not enough space reserved (%d bytes) "
- "for adding the request (%d bytes)\n",
- reserved_tail, ret);
-
- i915_gem_mark_busy(engine);
-}
-
static bool i915_context_is_banned(const struct i915_gem_context *ctx)
{
unsigned long elapsed;
@@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx,
}
}
-void i915_gem_request_free(struct kref *req_ref)
-{
- struct drm_i915_gem_request *req = container_of(req_ref,
- typeof(*req), ref);
- kmem_cache_free(req->i915->requests, req);
-}
-
-static inline int
-__i915_gem_request_alloc(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx,
- struct drm_i915_gem_request **req_out)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error);
- struct drm_i915_gem_request *req;
- int ret;
-
- if (!req_out)
- return -EINVAL;
-
- *req_out = NULL;
-
- /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
- * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
- * and restart.
- */
- ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
- if (ret)
- return ret;
-
- req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
- if (req == NULL)
- return -ENOMEM;
-
- ret = i915_gem_get_seqno(engine->i915, &req->seqno);
- if (ret)
- goto err;
-
- kref_init(&req->ref);
- req->i915 = dev_priv;
- req->engine = engine;
- req->ctx = ctx;
- i915_gem_context_reference(req->ctx);
-
- /*
- * Reserve space in the ring buffer for all the commands required to
- * eventually emit this request. This is to guarantee that the
- * i915_add_request() call can't fail. Note that the reserve may need
- * to be redone if the request is not actually submitted straight
- * away, e.g. because a GPU scheduler has deferred it.
- */
- req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-
- if (i915.enable_execlists)
- ret = intel_logical_ring_alloc_request_extras(req);
- else
- ret = intel_ring_alloc_request_extras(req);
- if (ret)
- goto err_ctx;
-
- *req_out = req;
- return 0;
-
-err_ctx:
- i915_gem_context_unreference(ctx);
-err:
- kmem_cache_free(dev_priv->requests, req);
- return ret;
-}
-
-/**
- * i915_gem_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- * This can be NULL if the request is not directly related to
- * any specific user context, in which case this function will
- * choose an appropriate context to use.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
-struct drm_i915_gem_request *
-i915_gem_request_alloc(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
-{
- struct drm_i915_gem_request *req;
- int err;
-
- if (ctx == NULL)
- ctx = engine->i915->kernel_context;
- err = __i915_gem_request_alloc(engine, ctx, &req);
- return err ? ERR_PTR(err) : req;
-}
-
struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *engine)
{
@@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
* implicit references on things like e.g. ppgtt address spaces through
* the request.
*/
- while (!list_empty(&engine->request_list)) {
+ if (!list_empty(&engine->request_list)) {
struct drm_i915_gem_request *request;
- request = list_first_entry(&engine->request_list,
- struct drm_i915_gem_request,
- list);
+ request = list_last_entry(&engine->request_list,
+ struct drm_i915_gem_request,
+ list);
- i915_gem_request_retire(request);
+ i915_gem_request_retire_upto(request);
}
/* Having flushed all requests from all queues, we know that all
@@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
if (!i915_gem_request_completed(request))
break;
- i915_gem_request_retire(request);
+ i915_gem_request_retire_upto(request);
}
/* Move any buffers on the active list that are no longer referenced
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
new file mode 100644
index 000000000000..9e9aa6b725f7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+
+int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
+ struct drm_file *file)
+{
+ struct drm_i915_private *dev_private;
+ struct drm_i915_file_private *file_priv;
+
+ WARN_ON(!req || !file || req->file_priv);
+
+ if (!req || !file)
+ return -EINVAL;
+
+ if (req->file_priv)
+ return -EINVAL;
+
+ dev_private = req->i915;
+ file_priv = file->driver_priv;
+
+ spin_lock(&file_priv->mm.lock);
+ req->file_priv = file_priv;
+ list_add_tail(&req->client_list, &file_priv->mm.request_list);
+ spin_unlock(&file_priv->mm.lock);
+
+ req->pid = get_pid(task_pid(current));
+
+ return 0;
+}
+
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+{
+ struct drm_i915_file_private *file_priv = request->file_priv;
+
+ if (!file_priv)
+ return;
+
+ spin_lock(&file_priv->mm.lock);
+ list_del(&request->client_list);
+ request->file_priv = NULL;
+ spin_unlock(&file_priv->mm.lock);
+
+ put_pid(request->pid);
+ request->pid = NULL;
+}
+
+static void i915_gem_request_retire(struct drm_i915_gem_request *request)
+{
+ trace_i915_gem_request_retire(request);
+ list_del_init(&request->list);
+
+ /* We know the GPU must have read the request to have
+ * sent us the seqno + interrupt, so use the position
+ * of tail of the request to update the last known position
+ * of the GPU head.
+ *
+ * Note this requires that we are always called in request
+ * completion order.
+ */
+ request->ringbuf->last_retired_head = request->postfix;
+
+ i915_gem_request_remove_from_client(request);
+
+ if (request->previous_context) {
+ if (i915.enable_execlists)
+ intel_lr_context_unpin(request->previous_context,
+ request->engine);
+ }
+
+ i915_gem_context_unreference(request->ctx);
+ i915_gem_request_unreference(request);
+}
+
+void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
+{
+ struct intel_engine_cs *engine = req->engine;
+ struct drm_i915_gem_request *tmp;
+
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
+
+ if (list_empty(&req->list))
+ return;
+
+ do {
+ tmp = list_first_entry(&engine->request_list,
+ typeof(*tmp), list);
+
+ i915_gem_request_retire(tmp);
+ } while (tmp != req);
+
+ WARN_ON(i915_verify_lists(engine->dev));
+}
+
+static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible)
+{
+ if (__i915_terminally_wedged(reset_counter))
+ return -EIO;
+
+ if (__i915_reset_in_progress(reset_counter)) {
+ /* Non-interruptible callers can't handle -EAGAIN, hence return
+ * -EIO unconditionally for these.
+ */
+ if (!interruptible)
+ return -EIO;
+
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
+{
+ struct intel_engine_cs *engine;
+ int ret;
+
+ /* Carefully retire all requests without writing to the rings */
+ for_each_engine(engine, dev_priv) {
+ ret = intel_engine_idle(engine);
+ if (ret)
+ return ret;
+ }
+ i915_gem_retire_requests(dev_priv);
+
+ /* If the seqno wraps around, we need to clear the breadcrumb rbtree */
+ if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
+ while (intel_kick_waiters(dev_priv) ||
+ intel_kick_signalers(dev_priv))
+ yield();
+ }
+
+ /* Finally reset hw state */
+ for_each_engine(engine, dev_priv)
+ intel_ring_init_seqno(engine, seqno);
+
+ return 0;
+}
+
+int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ int ret;
+
+ if (seqno == 0)
+ return -EINVAL;
+
+ /* HWS page needs to be set less than what we
+ * will inject to ring
+ */
+ ret = i915_gem_init_seqno(dev_priv, seqno - 1);
+ if (ret)
+ return ret;
+
+ /* Carefully set the last_seqno value so that wrap
+ * detection still works
+ */
+ dev_priv->next_seqno = seqno;
+ dev_priv->last_seqno = seqno - 1;
+ if (dev_priv->last_seqno == 0)
+ dev_priv->last_seqno--;
+
+ return 0;
+}
+
+static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
+{
+ /* reserve 0 for non-seqno */
+ if (unlikely(dev_priv->next_seqno == 0)) {
+ int ret;
+
+ ret = i915_gem_init_seqno(dev_priv, 0);
+ if (ret)
+ return ret;
+
+ dev_priv->next_seqno = 1;
+ }
+
+ *seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
+ return 0;
+}
+
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx,
+ struct drm_i915_gem_request **req_out)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
+ struct drm_i915_gem_request *req;
+ int ret;
+
+ if (!req_out)
+ return -EINVAL;
+
+ *req_out = NULL;
+
+ /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+ * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
+ * and restart.
+ */
+ ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
+ if (ret)
+ return ret;
+
+ req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+ if (ret)
+ goto err;
+
+ kref_init(&req->ref);
+ req->i915 = dev_priv;
+ req->engine = engine;
+ req->ctx = ctx;
+ i915_gem_context_reference(ctx);
+
+ /*
+ * Reserve space in the ring buffer for all the commands required to
+ * eventually emit this request. This is to guarantee that the
+ * i915_add_request() call can't fail. Note that the reserve may need
+ * to be redone if the request is not actually submitted straight
+ * away, e.g. because a GPU scheduler has deferred it.
+ */
+ req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+
+ if (i915.enable_execlists)
+ ret = intel_logical_ring_alloc_request_extras(req);
+ else
+ ret = intel_ring_alloc_request_extras(req);
+ if (ret)
+ goto err_ctx;
+
+ *req_out = req;
+ return 0;
+
+err_ctx:
+ i915_gem_context_unreference(ctx);
+err:
+ kmem_cache_free(dev_priv->requests, req);
+ return ret;
+}
+
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ * This can be NULL if the request is not directly related to
+ * any specific user context, in which case this function will
+ * choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx)
+{
+ struct drm_i915_gem_request *req;
+ int err;
+
+ if (!ctx)
+ ctx = engine->i915->kernel_context;
+ err = __i915_gem_request_alloc(engine, ctx, &req);
+ return err ? ERR_PTR(err) : req;
+}
+
+static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ dev_priv->gt.active_engines |= intel_engine_flag(engine);
+ if (dev_priv->gt.awake)
+ return;
+
+ intel_runtime_pm_get_noresume(dev_priv);
+ dev_priv->gt.awake = true;
+
+ intel_enable_gt_powersave(dev_priv);
+ i915_update_gfx_val(dev_priv);
+ if (INTEL_GEN(dev_priv) >= 6)
+ gen6_rps_busy(dev_priv);
+
+ queue_delayed_work(dev_priv->wq,
+ &dev_priv->gt.retire_work,
+ round_jiffies_up_relative(HZ));
+}
+
+/*
+ * NB: This function is not allowed to fail. Doing so would mean the the
+ * request is not being tracked for completion but the work itself is
+ * going to happen on the hardware. This would be a Bad Thing(tm).
+ */
+void __i915_add_request(struct drm_i915_gem_request *request,
+ struct drm_i915_gem_object *obj,
+ bool flush_caches)
+{
+ struct intel_engine_cs *engine;
+ struct intel_ringbuffer *ringbuf;
+ u32 request_start;
+ u32 reserved_tail;
+ int ret;
+
+ if (WARN_ON(!request))
+ return;
+
+ engine = request->engine;
+ ringbuf = request->ringbuf;
+
+ /*
+ * To ensure that this call will not fail, space for its emissions
+ * should already have been reserved in the ring buffer. Let the ring
+ * know that it is time to use that space up.
+ */
+ request_start = intel_ring_get_tail(ringbuf);
+ reserved_tail = request->reserved_space;
+ request->reserved_space = 0;
+
+ /*
+ * Emit any outstanding flushes - execbuf can fail to emit the flush
+ * after having emitted the batchbuffer command. Hence we need to fix
+ * things up similar to emitting the lazy request. The difference here
+ * is that the flush _must_ happen before the next request, no matter
+ * what.
+ */
+ if (flush_caches) {
+ if (i915.enable_execlists)
+ ret = logical_ring_flush_all_caches(request);
+ else
+ ret = intel_ring_flush_all_caches(request);
+ /* Not allowed to fail! */
+ WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
+ }
+
+ trace_i915_gem_request_add(request);
+
+ request->head = request_start;
+
+ /* Whilst this request exists, batch_obj will be on the
+ * active_list, and so will hold the active reference. Only when this
+ * request is retired will the the batch_obj be moved onto the
+ * inactive_list and lose its active reference. Hence we do not need
+ * to explicitly hold another reference here.
+ */
+ request->batch_obj = obj;
+
+ /* Seal the request and mark it as pending execution. Note that
+ * we may inspect this state, without holding any locks, during
+ * hangcheck. Hence we apply the barrier to ensure that we do not
+ * see a more recent value in the hws than we are tracking.
+ */
+ request->emitted_jiffies = jiffies;
+ request->previous_seqno = engine->last_submitted_seqno;
+ smp_store_mb(engine->last_submitted_seqno, request->seqno);
+ list_add_tail(&request->list, &engine->request_list);
+
+ /* Record the position of the start of the request so that
+ * should we detect the updated seqno part-way through the
+ * GPU processing the request, we never over-estimate the
+ * position of the head.
+ */
+ request->postfix = intel_ring_get_tail(ringbuf);
+
+ if (i915.enable_execlists) {
+ ret = engine->emit_request(request);
+ } else {
+ ret = engine->add_request(request);
+
+ request->tail = intel_ring_get_tail(ringbuf);
+ }
+ /* Not allowed to fail! */
+ WARN(ret, "emit|add_request failed: %d!\n", ret);
+ /* Sanity check that the reserved size was large enough. */
+ ret = intel_ring_get_tail(ringbuf) - request_start;
+ if (ret < 0)
+ ret += ringbuf->size;
+ WARN_ONCE(ret > reserved_tail,
+ "Not enough space reserved (%d bytes) "
+ "for adding the request (%d bytes)\n",
+ reserved_tail, ret);
+
+ i915_gem_mark_busy(engine);
+}
+
+static unsigned long local_clock_us(unsigned int *cpu)
+{
+ unsigned long t;
+
+ /* Cheaply and approximately convert from nanoseconds to microseconds.
+ * The result and subsequent calculations are also defined in the same
+ * approximate microseconds units. The principal source of timing
+ * error here is from the simple truncation.
+ *
+ * Note that local_clock() is only defined wrt to the current CPU;
+ * the comparisons are no longer valid if we switch CPUs. Instead of
+ * blocking preemption for the entire busywait, we can detect the CPU
+ * switch and use that as indicator of system load and a reason to
+ * stop busywaiting, see busywait_stop().
+ */
+ *cpu = get_cpu();
+ t = local_clock() >> 10;
+ put_cpu();
+
+ return t;
+}
+
+static bool busywait_stop(unsigned long timeout, unsigned int cpu)
+{
+ unsigned int this_cpu;
+
+ if (time_after(local_clock_us(&this_cpu), timeout))
+ return true;
+
+ return this_cpu != cpu;
+}
+
+bool __i915_spin_request(const struct drm_i915_gem_request *req,
+ int state, unsigned long timeout_us)
+{
+ unsigned int cpu;
+
+ /* When waiting for high frequency requests, e.g. during synchronous
+ * rendering split between the CPU and GPU, the finite amount of time
+ * required to set up the irq and wait upon it limits the response
+ * rate. By busywaiting on the request completion for a short while we
+ * can service the high frequency waits as quick as possible. However,
+ * if it is a slow request, we want to sleep as quickly as possible.
+ * The tradeoff between waiting and sleeping is roughly the time it
+ * takes to sleep on a request, on the order of a microsecond.
+ */
+
+ timeout_us += local_clock_us(&cpu);
+ do {
+ if (i915_gem_request_completed(req))
+ return true;
+
+ if (signal_pending_state(state, current))
+ break;
+
+ if (busywait_stop(timeout_us, cpu))
+ break;
+
+ cpu_relax_lowlatency();
+ } while (!need_resched());
+
+ return false;
+}
+
+/**
+ * __i915_wait_request - wait until execution of request has finished
+ * @req: duh!
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout: in - how long to wait (NULL forever); out - how much time remaining
+ * @rps: client to charge for RPS boosting
+ *
+ * Note: It is of utmost importance that the passed in seqno and reset_counter
+ * values have been read by the caller in an smp safe manner. Where read-side
+ * locks are involved, it is sufficient to read the reset_counter before
+ * unlocking the lock that protects the seqno. For lockless tricks, the
+ * reset_counter _must_ be read before, and an appropriate smp_rmb must be
+ * inserted.
+ *
+ * Returns 0 if the request was found within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+int __i915_wait_request(struct drm_i915_gem_request *req,
+ bool interruptible,
+ s64 *timeout,
+ struct intel_rps_client *rps)
+{
+ int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+ DEFINE_WAIT(reset);
+ struct intel_wait wait;
+ unsigned long timeout_remain;
+ int ret = 0;
+
+ might_sleep();
+
+ if (list_empty(&req->list))
+ return 0;
+
+ if (i915_gem_request_completed(req))
+ return 0;
+
+ timeout_remain = MAX_SCHEDULE_TIMEOUT;
+ if (timeout) {
+ if (WARN_ON(*timeout < 0))
+ return -EINVAL;
+
+ if (*timeout == 0)
+ return -ETIME;
+
+ /* Record current time in case interrupted, or wedged */
+ timeout_remain = nsecs_to_jiffies_timeout(*timeout);
+ *timeout += ktime_get_raw_ns();
+ }
+
+ trace_i915_gem_request_wait_begin(req);
+
+ /* This client is about to stall waiting for the GPU. In many cases
+ * this is undesirable and limits the throughput of the system, as
+ * many clients cannot continue processing user input/output whilst
+ * blocked. RPS autotuning may take tens of milliseconds to respond
+ * to the GPU load and thus incurs additional latency for the client.
+ * We can circumvent that by promoting the GPU frequency to maximum
+ * before we wait. This makes the GPU throttle up much more quickly
+ * (good for benchmarks and user experience, e.g. window animations),
+ * but at a cost of spending more power processing the workload
+ * (bad for battery). Not all clients even want their results
+ * immediately and for them we should just let the GPU select its own
+ * frequency to maximise efficiency. To prevent a single client from
+ * forcing the clocks too high for the whole system, we only allow
+ * each client to waitboost once in a busy period.
+ */
+ if (INTEL_GEN(req->i915) >= 6)
+ gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
+
+ /* Optimistic spin for the next ~jiffie before touching IRQs */
+ if (i915_spin_request(req, state, 5))
+ goto complete;
+
+ set_current_state(state);
+ add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+ intel_wait_init(&wait, req->seqno);
+ if (intel_engine_add_wait(req->engine, &wait))
+ /* In order to check that we haven't missed the interrupt
+ * as we enabled it, we need to kick ourselves to do a
+ * coherent check on the seqno before we sleep.
+ */
+ goto wakeup;
+
+ for (;;) {
+ if (signal_pending_state(state, current)) {
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ timeout_remain = io_schedule_timeout(timeout_remain);
+ if (timeout_remain == 0) {
+ ret = -ETIME;
+ break;
+ }
+
+ if (intel_wait_complete(&wait))
+ break;
+
+ set_current_state(state);
+
+wakeup:
+ /* Carefully check if the request is complete, giving time
+ * for the seqno to be visible following the interrupt.
+ * We also have to check in case we are kicked by the GPU
+ * reset in order to drop the struct_mutex.
+ */
+ if (__i915_request_irq_complete(req))
+ break;
+
+ /* Only spin if we know the GPU is processing this request */
+ if (i915_spin_request(req, state, 2))
+ break;
+ }
+ remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
+
+ intel_engine_remove_wait(req->engine, &wait);
+ __set_current_state(TASK_RUNNING);
+complete:
+ trace_i915_gem_request_wait_end(req);
+
+ if (timeout) {
+ *timeout -= ktime_get_raw_ns();
+ if (*timeout < 0)
+ *timeout = 0;
+
+ /*
+ * Apparently ktime isn't accurate enough and occasionally has a
+ * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+ * things up to make the test happy. We allow up to 1 jiffy.
+ *
+ * This is a regrssion from the timespec->ktime conversion.
+ */
+ if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
+ *timeout = 0;
+ }
+
+ if (rps && req->seqno == req->engine->last_submitted_seqno) {
+ /* The GPU is now idle and this client has stalled.
+ * Since no other client has submitted a request in the
+ * meantime, assume that this client is the only one
+ * supplying work to the GPU but is unable to keep that
+ * work supplied because it is waiting. Since the GPU is
+ * then never kept fully busy, RPS autoclocking will
+ * keep the clocks relatively low, causing further delays.
+ * Compensate by giving the synchronous client credit for
+ * a waitboost next time.
+ */
+ spin_lock(&req->i915->rps.client_lock);
+ list_del_init(&rps->link);
+ spin_unlock(&req->i915->rps.client_lock);
+ }
+
+ return ret;
+}
+
+/**
+ * Waits for a request to be signaled, and cleans up the
+ * request and object lists appropriately for that event.
+ */
+int i915_wait_request(struct drm_i915_gem_request *req)
+{
+ int ret;
+
+ GEM_BUG_ON(!req);
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
+
+ ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL);
+ if (ret)
+ return ret;
+
+ /* If the GPU hung, we want to keep the requests to find the guilty. */
+ if (!i915_reset_in_progress(&req->i915->gpu_error))
+ i915_gem_request_retire_upto(req);
+
+ return 0;
+}
+
+void i915_gem_request_free(struct kref *req_ref)
+{
+ struct drm_i915_gem_request *req =
+ container_of(req_ref, typeof(*req), ref);
+ kmem_cache_free(req->i915->requests, req);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
new file mode 100644
index 000000000000..ea700befcc28
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright © 2008-2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_REQUEST_H
+#define I915_GEM_REQUEST_H
+
+/**
+ * Request queue structure.
+ *
+ * The request queue allows us to note sequence numbers that have been emitted
+ * and may be associated with active buffers to be retired.
+ *
+ * By keeping this list, we can avoid having to do questionable sequence
+ * number comparisons on buffer last_read|write_seqno. It also allows an
+ * emission time to be associated with the request for tracking how far ahead
+ * of the GPU the submission is.
+ *
+ * The requests are reference counted, so upon creation they should have an
+ * initial reference taken using kref_init
+ */
+struct drm_i915_gem_request {
+ struct kref ref;
+
+ /** On Which ring this request was generated */
+ struct drm_i915_private *i915;
+
+ /**
+ * Context and ring buffer related to this request
+ * Contexts are refcounted, so when this request is associated with a
+ * context, we must increment the context's refcount, to guarantee that
+ * it persists while any request is linked to it. Requests themselves
+ * are also refcounted, so the request will only be freed when the last
+ * reference to it is dismissed, and the code in
+ * i915_gem_request_free() will then decrement the refcount on the
+ * context.
+ */
+ struct i915_gem_context *ctx;
+ struct intel_engine_cs *engine;
+ struct intel_ringbuffer *ringbuf;
+ struct intel_signal_node signaling;
+
+ /** GEM sequence number associated with the previous request,
+ * when the HWS breadcrumb is equal to this the GPU is processing
+ * this request.
+ */
+ u32 previous_seqno;
+
+ /** GEM sequence number associated with this request,
+ * when the HWS breadcrumb is equal or greater than this the GPU
+ * has finished processing this request.
+ */
+ u32 seqno;
+
+ /** Position in the ringbuffer of the start of the request */
+ u32 head;
+
+ /**
+ * Position in the ringbuffer of the start of the postfix.
+ * This is required to calculate the maximum available ringbuffer
+ * space without overwriting the postfix.
+ */
+ u32 postfix;
+
+ /** Position in the ringbuffer of the end of the whole request */
+ u32 tail;
+
+ /** Preallocate space in the ringbuffer for the emitting the request */
+ u32 reserved_space;
+
+ /**
+ * Context related to the previous request.
+ * As the contexts are accessed by the hardware until the switch is
+ * completed to a new context, the hardware may still be writing
+ * to the context object after the breadcrumb is visible. We must
+ * not unpin/unbind/prune that object whilst still active and so
+ * we keep the previous context pinned until the following (this)
+ * request is retired.
+ */
+ struct i915_gem_context *previous_context;
+
+ /** Batch buffer related to this request if any (used for
+ * error state dump only).
+ */
+ struct drm_i915_gem_object *batch_obj;
+
+ /** Time at which this request was emitted, in jiffies. */
+ unsigned long emitted_jiffies;
+
+ /** global list entry for this request */
+ struct list_head list;
+
+ struct drm_i915_file_private *file_priv;
+ /** file_priv list entry for this request */
+ struct list_head client_list;
+
+ /** process identifier submitting this request */
+ struct pid *pid;
+
+ /**
+ * The ELSP only accepts two elements at a time, so we queue
+ * context/tail pairs on a given queue (ring->execlist_queue) until the
+ * hardware is available. The queue serves a double purpose: we also use
+ * it to keep track of the up to 2 contexts currently in the hardware
+ * (usually one in execution and the other queued up by the GPU): We
+ * only remove elements from the head of the queue when the hardware
+ * informs us that an element has been completed.
+ *
+ * All accesses to the queue are mediated by a spinlock
+ * (ring->execlist_lock).
+ */
+
+ /** Execlist link in the submission queue.*/
+ struct list_head execlist_link;
+
+ /** Execlists no. of times this request has been sent to the ELSP */
+ int elsp_submitted;
+
+ /** Execlists context hardware id. */
+ unsigned int ctx_hw_id;
+};
+
+struct drm_i915_gem_request * __must_check
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx);
+void i915_gem_request_free(struct kref *req_ref);
+int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
+ struct drm_file *file);
+void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
+
+static inline u32
+i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
+{
+ return req ? req->seqno : 0;
+}
+
+static inline struct intel_engine_cs *
+i915_gem_request_get_engine(struct drm_i915_gem_request *req)
+{
+ return req ? req->engine : NULL;
+}
+
+static inline struct drm_i915_gem_request *
+i915_gem_request_reference(struct drm_i915_gem_request *req)
+{
+ if (req)
+ kref_get(&req->ref);
+ return req;
+}
+
+static inline void
+i915_gem_request_unreference(struct drm_i915_gem_request *req)
+{
+ kref_put(&req->ref, i915_gem_request_free);
+}
+
+static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
+ struct drm_i915_gem_request *src)
+{
+ if (src)
+ i915_gem_request_reference(src);
+
+ if (*pdst)
+ i915_gem_request_unreference(*pdst);
+
+ *pdst = src;
+}
+
+void __i915_add_request(struct drm_i915_gem_request *req,
+ struct drm_i915_gem_object *batch_obj,
+ bool flush_caches);
+#define i915_add_request(req) \
+ __i915_add_request(req, NULL, true)
+#define i915_add_request_no_flush(req) \
+ __i915_add_request(req, NULL, false)
+
+struct intel_rps_client;
+
+int __i915_wait_request(struct drm_i915_gem_request *req,
+ bool interruptible,
+ s64 *timeout,
+ struct intel_rps_client *rps);
+int __must_check i915_wait_request(struct drm_i915_gem_request *req);
+
+static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_gem_request_started(const struct drm_i915_gem_request *req)
+{
+ return i915_seqno_passed(intel_engine_get_seqno(req->engine),
+ req->previous_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+ return i915_seqno_passed(intel_engine_get_seqno(req->engine),
+ req->seqno);
+}
+
+bool __i915_spin_request(const struct drm_i915_gem_request *request,
+ int state, unsigned long timeout_us);
+static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
+ int state, unsigned long timeout_us)
+{
+ return (i915_gem_request_started(request) &&
+ __i915_spin_request(request, state, timeout_us));
+}
+
+#endif /* I915_GEM_REQUEST_H */
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 2/8] drm/i915: Retire oldest completed request before allocating next
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 10:51 ` [PATCH 3/8] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
` (6 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
In order to keep the memory allocated for requests reasonably tight, try
to reuse the oldest request (so long as it is completed and has no
external references) for the next allocation.
v2: Throw in a comment to hopefully make sure no one mistakes the
optimistic retirement of the oldest request for simply stealing it.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9e9aa6b725f7..5cbb11ece60a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -226,6 +226,14 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
if (ret)
return ret;
+ /* Move the oldest request to the slab-cache (if not in use!) */
+ if (!list_empty(&engine->request_list)) {
+ req = list_first_entry(&engine->request_list,
+ typeof(*req), list);
+ if (i915_gem_request_completed(req))
+ i915_gem_request_retire(req);
+ }
+
req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
if (!req)
return -ENOMEM;
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 3/8] drm/i915: Mark all current requests as complete before resetting them
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
2016-07-19 10:51 ` [PATCH 2/8] drm/i915: Retire oldest completed request before allocating next Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 10:51 ` [PATCH 4/8] drm/i915: Derive GEM requests from dma-fence Chris Wilson
` (5 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
Following a GPU reset upon hang, we retire all the requests and then
mark them all as complete. If we mark them as complete first, we both
keep the normal retirement order (completed first then retired) and
provide a small optimisation for concurrent lookups.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6df14058b3fe..61729d6b4ecc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2486,6 +2486,12 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
i915_gem_object_retire__read(obj, engine->id);
}
+ /* Mark all pending requests as complete so that any concurrent
+ * (lockless) lookup doesn't try and wait upon the request as we
+ * reset it.
+ */
+ intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
/*
* Clear the execlists queue up before freeing the requests, as those
* are the ones that keep the context and ringbuffer backing objects
@@ -2528,8 +2534,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
intel_ring_update_space(buffer);
}
- intel_ring_init_seqno(engine, engine->last_submitted_seqno);
-
engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
}
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 4/8] drm/i915: Derive GEM requests from dma-fence
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
2016-07-19 10:51 ` [PATCH 2/8] drm/i915: Retire oldest completed request before allocating next Chris Wilson
2016-07-19 10:51 ` [PATCH 3/8] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 10:51 ` [PATCH 5/8] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
` (4 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Jesse Barnes
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.
v2: The fence_context needs to be globally unique, not just unique to
this device.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_gem_request.c | 113 ++++++++++++++++++++++++++---
drivers/gpu/drm/i915/i915_gem_request.h | 43 +++++++----
drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-
drivers/gpu/drm/i915/i915_trace.h | 10 +--
drivers/gpu/drm/i915/intel_breadcrumbs.c | 7 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 2 +
drivers/gpu/drm/i915/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +--
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
11 files changed, 150 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 90aef4540193..55fd3d9cc448 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
- req->seqno,
+ req->fence.seqno,
(int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5cbb11ece60a..6528536878f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,95 @@
#include "i915_drv.h"
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+ return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+ /* Timelines are bound by eviction to a VM. However, since
+ * we only have a global seqno at the moment, we only have
+ * a single timeline. Note that each timeline will have
+ * multiple execution contexts (fence contexts) as we allow
+ * engines within a single timeline to execute in parallel.
+ */
+ return "global";
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+ return i915_gem_request_completed(to_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+ if (i915_fence_signaled(fence))
+ return false;
+
+ intel_engine_enable_signaling(to_request(fence));
+ return true;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+ bool interruptible,
+ signed long timeout_jiffies)
+{
+ s64 timeout_ns, *timeout;
+ int ret;
+
+ if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+ timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+ timeout = &timeout_ns;
+ } else {
+ timeout = NULL;
+ }
+
+ ret = __i915_wait_request(to_request(fence),
+ interruptible, timeout,
+ NULL);
+ if (ret == -ETIME)
+ return 0;
+
+ if (ret < 0)
+ return ret;
+
+ if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+ timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+ return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+ snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+ int size)
+{
+ snprintf(str, size, "%u",
+ intel_engine_get_seqno(to_request(fence)->engine));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+ struct drm_i915_gem_request *req = to_request(fence);
+
+ kmem_cache_free(req->i915->requests, req);
+}
+
+const struct fence_ops i915_fence_ops = {
+ .get_driver_name = i915_fence_get_driver_name,
+ .get_timeline_name = i915_fence_get_timeline_name,
+ .enable_signaling = i915_fence_enable_signaling,
+ .signaled = i915_fence_signaled,
+ .wait = i915_fence_wait,
+ .release = i915_fence_release,
+ .fence_value_str = i915_fence_value_str,
+ .timeline_value_str = i915_fence_timeline_value_str,
+};
+
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file)
{
@@ -211,6 +300,7 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
struct drm_i915_private *dev_priv = engine->i915;
unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error);
struct drm_i915_gem_request *req;
+ u32 seqno;
int ret;
if (!req_out)
@@ -238,11 +328,17 @@ __i915_gem_request_alloc(struct intel_engine_cs *engine,
if (!req)
return -ENOMEM;
- ret = i915_gem_get_seqno(dev_priv, &req->seqno);
+ ret = i915_gem_get_seqno(dev_priv, &seqno);
if (ret)
goto err;
- kref_init(&req->ref);
+ spin_lock_init(&req->lock);
+ fence_init(&req->fence,
+ &i915_fence_ops,
+ &req->lock,
+ engine->fence_context,
+ seqno);
+
req->i915 = dev_priv;
req->engine = engine;
req->ctx = ctx;
@@ -385,7 +481,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
*/
request->emitted_jiffies = jiffies;
request->previous_seqno = engine->last_submitted_seqno;
- smp_store_mb(engine->last_submitted_seqno, request->seqno);
+ smp_store_mb(engine->last_submitted_seqno, request->fence.seqno);
list_add_tail(&request->list, &engine->request_list);
/* Record the position of the start of the request so that
@@ -556,7 +652,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
set_current_state(state);
add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
- intel_wait_init(&wait, req->seqno);
+ intel_wait_init(&wait, req->fence.seqno);
if (intel_engine_add_wait(req->engine, &wait))
/* In order to check that we haven't missed the interrupt
* as we enabled it, we need to kick ourselves to do a
@@ -617,7 +713,7 @@ complete:
*timeout = 0;
}
- if (rps && req->seqno == req->engine->last_submitted_seqno) {
+ if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
@@ -657,10 +753,3 @@ int i915_wait_request(struct drm_i915_gem_request *req)
return 0;
}
-
-void i915_gem_request_free(struct kref *req_ref)
-{
- struct drm_i915_gem_request *req =
- container_of(req_ref, typeof(*req), ref);
- kmem_cache_free(req->i915->requests, req);
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index ea700befcc28..6f2c820785f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -25,6 +25,10 @@
#ifndef I915_GEM_REQUEST_H
#define I915_GEM_REQUEST_H
+#include <linux/fence.h>
+
+#include "i915_gem.h"
+
/**
* Request queue structure.
*
@@ -36,11 +40,11 @@
* emission time to be associated with the request for tracking how far ahead
* of the GPU the submission is.
*
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
+ * The requests are reference counted.
*/
struct drm_i915_gem_request {
- struct kref ref;
+ struct fence fence;
+ spinlock_t lock;
/** On Which ring this request was generated */
struct drm_i915_private *i915;
@@ -66,12 +70,6 @@ struct drm_i915_gem_request {
*/
u32 previous_seqno;
- /** GEM sequence number associated with this request,
- * when the HWS breadcrumb is equal or greater than this the GPU
- * has finished processing this request.
- */
- u32 seqno;
-
/** Position in the ringbuffer of the start of the request */
u32 head;
@@ -140,10 +138,16 @@ struct drm_i915_gem_request {
unsigned int ctx_hw_id;
};
+extern const struct fence_ops i915_fence_ops;
+
+static inline bool fence_is_i915(struct fence *fence)
+{
+ return fence->ops == &i915_fence_ops;
+}
+
struct drm_i915_gem_request * __must_check
i915_gem_request_alloc(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
-void i915_gem_request_free(struct kref *req_ref);
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
struct drm_file *file);
void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
@@ -151,7 +155,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
static inline u32
i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
{
- return req ? req->seqno : 0;
+ return req ? req->fence.seqno : 0;
}
static inline struct intel_engine_cs *
@@ -161,17 +165,24 @@ i915_gem_request_get_engine(struct drm_i915_gem_request *req)
}
static inline struct drm_i915_gem_request *
+to_request(struct fence *fence)
+{
+ /* We assume that NULL fence/request are interoperable */
+ BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0);
+ GEM_BUG_ON(fence && !fence_is_i915(fence));
+ return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static inline struct drm_i915_gem_request *
i915_gem_request_reference(struct drm_i915_gem_request *req)
{
- if (req)
- kref_get(&req->ref);
- return req;
+ return to_request(fence_get(&req->fence));
}
static inline void
i915_gem_request_unreference(struct drm_i915_gem_request *req)
{
- kref_put(&req->ref, i915_gem_request_free);
+ fence_put(&req->fence);
}
static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
@@ -223,7 +234,7 @@ static inline bool
i915_gem_request_completed(const struct drm_i915_gem_request *req)
{
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
- req->seqno);
+ req->fence.seqno);
}
bool __i915_spin_request(const struct drm_i915_gem_request *request,
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9d73d2216adc..6daaf4ecd2da 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1182,7 +1182,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
}
erq = &error->ring[i].requests[count++];
- erq->seqno = request->seqno;
+ erq->seqno = request->fence.seqno;
erq->jiffies = request->emitted_jiffies;
erq->tail = request->postfix;
}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 2112e029db6a..1cc5de129e76 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -506,7 +506,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
rq->engine);
wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
- wqi->fence_id = rq->seqno;
+ wqi->fence_id = rq->fence.seqno;
kunmap_atomic(base);
}
@@ -601,7 +601,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
client->b_fail += 1;
guc->submissions[engine_id] += 1;
- guc->last_seqno[engine_id] = rq->seqno;
+ guc->last_seqno[engine_id] = rq->fence.seqno;
return b_ret;
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 534154e05fbe..007112d1e049 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -465,7 +465,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
__entry->dev = from->i915->drm.primary->index;
__entry->sync_from = from->id;
__entry->sync_to = to_req->engine->id;
- __entry->seqno = i915_gem_request_get_seqno(req);
+ __entry->seqno = req->fence.seqno;
),
TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -488,9 +488,9 @@ TRACE_EVENT(i915_gem_ring_dispatch,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
__entry->flags = flags;
- intel_engine_enable_signaling(req);
+ fence_enable_sw_signaling(&req->fence);
),
TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
@@ -533,7 +533,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
),
TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -595,7 +595,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
TP_fast_assign(
__entry->dev = req->i915->drm.primary->index;
__entry->ring = req->engine->id;
- __entry->seqno = req->seqno;
+ __entry->seqno = req->fence.seqno;
__entry->blocking =
mutex_is_locked(&req->i915->drm.struct_mutex);
),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index b074f3d6d127..32ada41dedfb 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -436,6 +436,7 @@ static int intel_breadcrumbs_signaler(void *arg)
*/
intel_engine_remove_wait(engine,
&request->signaling.wait);
+ fence_signal(&request->fence);
/* Find the next oldest signal. Note that as we have
* not been holding the lock, another client may
@@ -482,7 +483,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
}
request->signaling.wait.tsk = b->signaler;
- request->signaling.wait.seqno = request->seqno;
+ request->signaling.wait.seqno = request->fence.seqno;
i915_gem_request_reference(request);
/* First add ourselves into the list of waiters, but register our
@@ -504,8 +505,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
p = &b->signals.rb_node;
while (*p) {
parent = *p;
- if (i915_seqno_passed(request->seqno,
- to_signaler(parent)->seqno)) {
+ if (i915_seqno_passed(request->fence.seqno,
+ to_signaler(parent)->fence.seqno)) {
p = &parent->rb_right;
first = false;
} else {
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e3c9f04ea51e..f4a35ec78481 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -182,6 +182,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&engine->execlist_queue);
spin_lock_init(&engine->execlist_lock);
+ engine->fence_context = fence_context_alloc(1);
+
intel_engine_init_hangcheck(engine);
i915_gem_batch_pool_init(&engine->i915->drm, &engine->batch_pool);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2e670f15881c..860dba2cf7b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1803,7 +1803,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
intel_hws_seqno_address(request->engine) |
MI_FLUSH_DW_USE_GTT);
intel_logical_ring_emit(ringbuf, 0);
- intel_logical_ring_emit(ringbuf, request->seqno);
+ intel_logical_ring_emit(ringbuf, request->fence.seqno);
intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
intel_logical_ring_emit(ringbuf, MI_NOOP);
return intel_logical_ring_advance_and_submit(request);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 94c8ef461721..af0bd71e3a36 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1348,7 +1348,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
PIPE_CONTROL_CS_STALL);
intel_ring_emit(signaller, lower_32_bits(gtt_offset));
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
intel_ring_emit(signaller, 0);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1386,7 +1386,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
MI_FLUSH_DW_USE_GTT);
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->hw_id));
intel_ring_emit(signaller, 0);
@@ -1419,7 +1419,7 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
if (i915_mmio_reg_valid(mbox_reg)) {
intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit_reg(signaller, mbox_reg);
- intel_ring_emit(signaller, signaller_req->seqno);
+ intel_ring_emit(signaller, signaller_req->fence.seqno);
}
}
@@ -1455,7 +1455,7 @@ gen6_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
intel_ring_emit(engine,
I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(engine, req->seqno);
+ intel_ring_emit(engine, req->fence.seqno);
intel_ring_emit(engine, MI_USER_INTERRUPT);
__intel_ring_advance(engine);
@@ -1704,7 +1704,7 @@ i9xx_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(engine, MI_STORE_DWORD_INDEX);
intel_ring_emit(engine,
I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
- intel_ring_emit(engine, req->seqno);
+ intel_ring_emit(engine, req->fence.seqno);
intel_ring_emit(engine, MI_USER_INTERRUPT);
__intel_ring_advance(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index df7587ab1ba7..5cbafc00bfd5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -146,6 +146,7 @@ struct intel_engine_cs {
unsigned int exec_id;
unsigned int hw_id;
unsigned int guc_id; /* XXX same as hw_id? */
+ u64 fence_context;
u32 mmio_base;
unsigned int irq_shift;
struct intel_ringbuffer *buffer;
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 5/8] drm/i915: Disable waitboosting for fence_wait()
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
` (2 preceding siblings ...)
2016-07-19 10:51 ` [PATCH 4/8] drm/i915: Derive GEM requests from dma-fence Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 10:51 ` [PATCH 6/8] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
` (3 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.
v2: Hide the IS_ERR_OR_NULL testing for special clients
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++---
drivers/gpu/drm/i915/i915_gem_request.h | 3 +++
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6528536878f2..f483e605a715 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -70,7 +70,7 @@ static signed long i915_fence_wait(struct fence *fence,
ret = __i915_wait_request(to_request(fence),
interruptible, timeout,
- NULL);
+ NO_WAITBOOST);
if (ret == -ETIME)
return 0;
@@ -642,7 +642,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
- if (INTEL_GEN(req->i915) >= 6)
+ if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
/* Optimistic spin for the next ~jiffie before touching IRQs */
@@ -713,7 +713,8 @@ complete:
*timeout = 0;
}
- if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
+ if (IS_RPS_USER(rps) &&
+ req->fence.seqno == req->engine->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
* Since no other client has submitted a request in the
* meantime, assume that this client is the only one
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6f2c820785f3..0a01d01cac51 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -206,6 +206,9 @@ void __i915_add_request(struct drm_i915_gem_request *req,
__i915_add_request(req, NULL, false)
struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
+#define IS_RPS_CLIENT(p) (!IS_ERR(p))
+#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
int __i915_wait_request(struct drm_i915_gem_request *req,
bool interruptible,
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 6/8] drm/i915: Disable waitboosting for mmioflips/semaphores
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
` (3 preceding siblings ...)
2016-07-19 10:51 ` [PATCH 5/8] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 10:51 ` [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
` (2 subsequent siblings)
7 siblings, 0 replies; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
Since commit a6f766f39751 ("drm/i915: Limit ring synchronisation (sw
sempahores) RPS boosts") and commit bcafc4e38b6a ("drm/i915: Limit mmio
flip RPS boosts") we have limited the waitboosting for semaphores and
flips. Ideally we do not want to boost in either of these instances as no
userspace consumer is waiting upon the results (though a userspace producer
may be stalled trying to submit an execbuf - but in this case the
producer is being throttled due to the engine being saturated with
work). With the introduction of NO_WAITBOOST in the previous patch, we
can finally disable these needless boosts.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
drivers/gpu/drm/i915/i915_debugfs.c | 8 +-------
drivers/gpu/drm/i915/i915_drv.h | 2 --
drivers/gpu/drm/i915/i915_gem.c | 2 +-
drivers/gpu/drm/i915/intel_display.c | 2 +-
drivers/gpu/drm/i915/intel_pm.c | 2 --
5 files changed, 3 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 55fd3d9cc448..618f8cf210fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2465,13 +2465,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
- seq_printf(m, "Semaphore boosts: %d%s\n",
- dev_priv->rps.semaphores.boosts,
- list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
- seq_printf(m, "MMIO flip boosts: %d%s\n",
- dev_priv->rps.mmioflips.boosts,
- list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
- seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+ seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(&dev_priv->rps.client_lock);
mutex_unlock(&dev->filelist_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c97a75597099..e163a9487750 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1195,8 +1195,6 @@ struct intel_gen6_power_mgmt {
struct delayed_work autoenable_work;
unsigned boosts;
- struct intel_rps_client semaphores, mmioflips;
-
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 61729d6b4ecc..079e09cee16a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2849,7 +2849,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
ret = __i915_wait_request(from_req,
i915->mm.interruptible,
NULL,
- &i915->rps.semaphores);
+ NO_WAITBOOST);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fb7d8fc5b0e6..51fbca756cdb 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11473,7 +11473,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
if (work->flip_queued_req)
WARN_ON(__i915_wait_request(work->flip_queued_req,
false, NULL,
- &dev_priv->rps.mmioflips));
+ NO_WAITBOOST));
/* For framebuffer backed by dmabuf, wait for fence */
resv = i915_gem_object_get_dmabuf_resv(obj);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fa6b341c2792..a1bf5f8fbb1c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7810,8 +7810,6 @@ void intel_pm_setup(struct drm_device *dev)
INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work,
__intel_autoenable_gt_powersave);
INIT_LIST_HEAD(&dev_priv->rps.clients);
- INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
- INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
dev_priv->pm.suspended = false;
atomic_set(&dev_priv->pm.wakeref_count, 0);
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
` (4 preceding siblings ...)
2016-07-19 10:51 ` [PATCH 6/8] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 13:27 ` Joonas Lahtinen
2016-07-19 10:51 ` [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects Chris Wilson
2016-07-19 11:29 ` ✗ Ro.CI.BAT: failure for series starting with [1/8] drm/i915: Move GEM request routines to i915_gem_request.c Patchwork
7 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
A foreign dma-buf does not share our cache domain tracking, and we rely
on the producer ensuring cache coherency. Marking them as being in the
CPU domain is incorrect.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 80bbe43a2e92..c7d734248ed0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -299,6 +299,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
obj->base.import_attach = attach;
+ obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+ obj->base.write_domain = 0;
return &obj->base;
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
` (5 preceding siblings ...)
2016-07-19 10:51 ` [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
@ 2016-07-19 10:51 ` Chris Wilson
2016-07-19 14:12 ` Joonas Lahtinen
2016-07-19 11:29 ` ✗ Ro.CI.BAT: failure for series starting with [1/8] drm/i915: Move GEM request routines to i915_gem_request.c Patchwork
7 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 10:51 UTC (permalink / raw)
To: intel-gfx
When transitioning to the GTT or CPU domain we wait on all rendering
from i915 to complete (with the optimisation of allowing concurrent read
access by both the GPU and client). We don't yet ensure all rendering
from third parties (tracked by implicit fences on the dma-buf) is
complete. Since implicitly tracked rendering by third parties will
ignore our cache-domain tracking, we have to always wait upon rendering
from third-parties when transitioning to direct access to the backing
store. We still rely on clients notifying us of cache domain changes
(i.e. they need to move to the GTT read or write domain after doing a CPU
access before letting the third party render again).
v2:
This introduces a potential WARN_ON into i915_gem_object_free() as the
current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To
hit this path we first need to render with the GPU, have a dma-buf
attached with an unsignaled fence and then interrupt the wait. It does
get fixed later in the series (when i915_vma_unbind() only waits on the
active VMA and not all, including third-party, rendering.
To offset that risk, use the __i915_vma_unbind_no_wait hack.
Testcase: igt/prime_vgem/basic-fence-read
Testcase: igt/prime_vgem/basic-fence-mmap
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 44 ++++++++++++++++++++++++++---------------
1 file changed, 28 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 079e09cee16a..37868cc594cb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,10 +29,12 @@
#include <drm/drm_vma_manager.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
+#include "i915_gem_dmabuf.h"
#include "i915_vgpu.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include "intel_mocs.h"
+#include <linux/reservation.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/swap.h>
@@ -511,6 +513,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
return -EINVAL;
+ ret = i915_gem_object_wait_rendering(obj, true);
+ if (ret)
+ return ret;
+
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
@@ -518,9 +524,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
* anyway again before the next pread happens. */
*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
obj->cache_level);
- ret = i915_gem_object_wait_rendering(obj, true);
- if (ret)
- return ret;
}
ret = i915_gem_object_get_pages(obj);
@@ -1132,15 +1135,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+ ret = i915_gem_object_wait_rendering(obj, false);
+ if (ret)
+ return ret;
+
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
/* If we're not in the cpu write domain, set ourself into the gtt
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
needs_clflush_after = cpu_write_needs_clflush(obj);
- ret = i915_gem_object_wait_rendering(obj, false);
- if (ret)
- return ret;
}
/* Same trick applies to invalidate partially written cachelines read
* before writing. */
@@ -1335,11 +1339,9 @@ int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
bool readonly)
{
+ struct reservation_object *resv;
int ret, i;
- if (!obj->active)
- return 0;
-
if (readonly) {
if (obj->last_write_req != NULL) {
ret = i915_wait_request(obj->last_write_req);
@@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
GEM_BUG_ON(obj->active);
}
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+ if (resv) {
+ long err;
+
+ err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
+ MAX_SCHEDULE_TIMEOUT);
+ if (err < 0)
+ return err;
+ }
+
return 0;
}
@@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
struct i915_vma *vma;
int ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
- return 0;
-
ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
+ if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
+ return 0;
+
/* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
@@ -3752,13 +3764,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return 0;
-
ret = i915_gem_object_wait_rendering(obj, !write);
if (ret)
return ret;
+ if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return 0;
+
i915_gem_object_flush_gtt_write_domain(obj);
old_write_domain = obj->base.write_domain;
@@ -4238,7 +4250,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
int ret;
vma->pin_count = 0;
- ret = i915_vma_unbind(vma);
+ ret = __i915_vma_unbind_no_wait(vma);
if (WARN_ON(ret == -ERESTARTSYS)) {
bool was_interruptible;
--
2.8.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 13+ messages in thread
* ✗ Ro.CI.BAT: failure for series starting with [1/8] drm/i915: Move GEM request routines to i915_gem_request.c
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
` (6 preceding siblings ...)
2016-07-19 10:51 ` [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects Chris Wilson
@ 2016-07-19 11:29 ` Patchwork
7 siblings, 0 replies; 13+ messages in thread
From: Patchwork @ 2016-07-19 11:29 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/8] drm/i915: Move GEM request routines to i915_gem_request.c
URL : https://patchwork.freedesktop.org/series/10028/
State : failure
== Summary ==
Series 10028v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/10028/revisions/1/mbox
Test kms_cursor_legacy:
Subgroup basic-flip-vs-cursor:
pass -> FAIL (ro-skl3-i5-6260u)
Test prime_vgem:
Subgroup basic-fence-mmap:
fail -> PASS (ro-bdw-i7-5557U)
fail -> PASS (ro-ilk1-i5-650)
fail -> PASS (ro-hsw-i3-4010u)
fail -> PASS (ro-bdw-i7-5600u)
fail -> PASS (ro-snb-i7-2620M)
fail -> PASS (ro-bdw-i5-5250u)
fail -> PASS (ro-hsw-i7-4770r)
fail -> PASS (ro-ivb-i7-3770)
fail -> PASS (ro-byt-n2820)
fail -> PASS (ro-skl3-i5-6260u)
Subgroup basic-fence-read:
fail -> PASS (ro-bdw-i7-5557U)
fail -> PASS (ro-hsw-i3-4010u)
fail -> PASS (ro-bdw-i7-5600u)
fail -> PASS (ro-bdw-i5-5250u)
fail -> PASS (ro-snb-i7-2620M)
fail -> PASS (ro-hsw-i7-4770r)
fail -> PASS (ro-ivb-i7-3770)
fi-hsw-i7-4770k total:243 pass:213 dwarn:0 dfail:0 fail:10 skip:20
fi-kbl-qkkr total:243 pass:177 dwarn:27 dfail:1 fail:9 skip:29
fi-skl-i5-6260u total:243 pass:221 dwarn:0 dfail:0 fail:9 skip:13
fi-skl-i7-6700k total:243 pass:208 dwarn:0 dfail:0 fail:9 skip:26
fi-snb-i7-2600 total:243 pass:193 dwarn:0 dfail:0 fail:10 skip:40
ro-bdw-i5-5250u total:244 pass:219 dwarn:4 dfail:0 fail:8 skip:13
ro-bdw-i7-5557U total:244 pass:220 dwarn:0 dfail:0 fail:8 skip:16
ro-bdw-i7-5600u total:244 pass:204 dwarn:0 dfail:0 fail:8 skip:32
ro-bsw-n3050 total:218 pass:173 dwarn:0 dfail:0 fail:2 skip:42
ro-byt-n2820 total:244 pass:196 dwarn:0 dfail:0 fail:10 skip:38
ro-hsw-i3-4010u total:244 pass:211 dwarn:0 dfail:0 fail:9 skip:24
ro-hsw-i7-4770r total:244 pass:211 dwarn:0 dfail:0 fail:9 skip:24
ro-ilk-i7-620lm total:244 pass:171 dwarn:0 dfail:0 fail:10 skip:63
ro-ilk1-i5-650 total:239 pass:171 dwarn:0 dfail:0 fail:10 skip:58
ro-ivb-i7-3770 total:244 pass:202 dwarn:0 dfail:0 fail:9 skip:33
ro-skl3-i5-6260u total:244 pass:222 dwarn:1 dfail:0 fail:9 skip:12
ro-snb-i7-2620M total:244 pass:192 dwarn:0 dfail:0 fail:10 skip:42
Results at /archive/results/CI_IGT_test/RO_Patchwork_1528/
d59b92b drm-intel-nightly: 2016y-07m-19d-09h-52m-15s UTC integration manifest
df3db81 drm/i915: Wait on external rendering for GEM objects
bb2bf15 drm/i915: Mark imported dma-buf objects as being coherent
7c1ac1d drm/i915: Disable waitboosting for mmioflips/semaphores
3982e8f drm/i915: Disable waitboosting for fence_wait()
8cb6bb1 drm/i915: Derive GEM requests from dma-fence
62d55fd drm/i915: Mark all current requests as complete before resetting them
c52ae09 drm/i915: Retire oldest completed request before allocating next
4111c02 drm/i915: Move GEM request routines to i915_gem_request.c
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent
2016-07-19 10:51 ` [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
@ 2016-07-19 13:27 ` Joonas Lahtinen
0 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2016-07-19 13:27 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On ti, 2016-07-19 at 11:51 +0100, Chris Wilson wrote:
> A foreign dma-buf does not share our cache domain tracking, and we rely
> on the producer ensuring cache coherency. Marking them as being in the
> CPU domain is incorrect.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Maybe even add a comment above the assignment that _GTT is used in the
purpose of being coherent.
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 80bbe43a2e92..c7d734248ed0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -299,6 +299,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
> i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
> obj->base.import_attach = attach;
> + obj->base.read_domains = I915_GEM_DOMAIN_GTT;
> + obj->base.write_domain = 0;
>
> return &obj->base;
>
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects
2016-07-19 10:51 ` [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects Chris Wilson
@ 2016-07-19 14:12 ` Joonas Lahtinen
2016-07-19 14:27 ` Chris Wilson
0 siblings, 1 reply; 13+ messages in thread
From: Joonas Lahtinen @ 2016-07-19 14:12 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On ti, 2016-07-19 at 11:51 +0100, Chris Wilson wrote:
> When transitioning to the GTT or CPU domain we wait on all rendering
> from i915 to complete (with the optimisation of allowing concurrent read
> access by both the GPU and client). We don't yet ensure all rendering
> from third parties (tracked by implicit fences on the dma-buf) is
> complete. Since implicitly tracked rendering by third parties will
> ignore our cache-domain tracking, we have to always wait upon rendering
> from third-parties when transitioning to direct access to the backing
> store. We still rely on clients notifying us of cache domain changes
> (i.e. they need to move to the GTT read or write domain after doing a CPU
> access before letting the third party render again).
>
> v2:
> This introduces a potential WARN_ON into i915_gem_object_free() as the
> current i915_vma_unbind() calls i915_gem_object_wait_rendering(). To
> hit this path we first need to render with the GPU, have a dma-buf
> attached with an unsignaled fence and then interrupt the wait. It does
> get fixed later in the series (when i915_vma_unbind() only waits on the
> active VMA and not all, including third-party, rendering.
>
> To offset that risk, use the __i915_vma_unbind_no_wait hack.
>
> Testcase: igt/prime_vgem/basic-fence-read
> Testcase: igt/prime_vgem/basic-fence-mmap
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 44 ++++++++++++++++++++++++++---------------
> 1 file changed, 28 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 079e09cee16a..37868cc594cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -29,10 +29,12 @@
> #include
> #include
> #include "i915_drv.h"
> +#include "i915_gem_dmabuf.h"
> #include "i915_vgpu.h"
> #include "i915_trace.h"
> #include "intel_drv.h"
> #include "intel_mocs.h"
> +#include
> #include
> #include
> #include
> @@ -511,6 +513,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
> return -EINVAL;
>
> + ret = i915_gem_object_wait_rendering(obj, true);
> + if (ret)
> + return ret;
> +
> if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
> /* If we're not in the cpu read domain, set ourself into the gtt
> * read domain and manually flush cachelines (if required). This
> @@ -518,9 +524,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> * anyway again before the next pread happens. */
> *needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
> obj->cache_level);
> - ret = i915_gem_object_wait_rendering(obj, true);
> - if (ret)
> - return ret;
> }
>
> ret = i915_gem_object_get_pages(obj);
> @@ -1132,15 +1135,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>
> obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
>
> + ret = i915_gem_object_wait_rendering(obj, false);
> + if (ret)
> + return ret;
> +
> if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
> /* If we're not in the cpu write domain, set ourself into the gtt
> * write domain and manually flush cachelines (if required). This
> * optimizes for the case when the gpu will use the data
> * right away and we therefore have to clflush anyway. */
> needs_clflush_after = cpu_write_needs_clflush(obj);
> - ret = i915_gem_object_wait_rendering(obj, false);
> - if (ret)
> - return ret;
> }
> /* Same trick applies to invalidate partially written cachelines read
> * before writing. */
> @@ -1335,11 +1339,9 @@ int
> i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> bool readonly)
> {
> + struct reservation_object *resv;
> int ret, i;
>
> - if (!obj->active)
> - return 0;
> -
> if (readonly) {
> if (obj->last_write_req != NULL) {
> ret = i915_wait_request(obj->last_write_req);
> @@ -1366,6 +1368,16 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> GEM_BUG_ON(obj->active);
> }
>
> + resv = i915_gem_object_get_dmabuf_resv(obj);
> + if (resv) {
> + long err;
We already have ret in the function scope.
> +
> + err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
> + MAX_SCHEDULE_TIMEOUT);
> + if (err < 0)
> + return err;
> + }
> +
> return 0;
> }
>
> @@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> struct i915_vma *vma;
> int ret;
>
> - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> - return 0;
> -
> ret = i915_gem_object_wait_rendering(obj, !write);
> if (ret)
> return ret;
>
> + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> + return 0;
> +
Not sure I follow this change, wait rendering would only be issued if
DOMAIN_CPU was in place, this function was about moving to gtt_domain
so should really be NOP if in GTT domain already, no? Maybe calling
site should do an explicit wait_rendering if needed.
> /* Flush and acquire obj->pages so that we are coherent through
> * direct access in memory with previous cached writes through
> * shmemfs and that our cache domain tracking remains valid.
> @@ -3752,13 +3764,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
> uint32_t old_write_domain, old_read_domains;
> int ret;
>
> - if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> - return 0;
> -
> ret = i915_gem_object_wait_rendering(obj, !write);
> if (ret)
> return ret;
>
> + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> + return 0;
> +
Ditto.
> i915_gem_object_flush_gtt_write_domain(obj);
>
> old_write_domain = obj->base.write_domain;
> @@ -4238,7 +4250,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> int ret;
>
> vma->pin_count = 0;
> - ret = i915_vma_unbind(vma);
Maybe add a comment/TODO/FIXME: about the potential WARN.
Regards, Joonas
> + ret = __i915_vma_unbind_no_wait(vma);
> if (WARN_ON(ret == -ERESTARTSYS)) {
> bool was_interruptible;
>
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects
2016-07-19 14:12 ` Joonas Lahtinen
@ 2016-07-19 14:27 ` Chris Wilson
2016-07-20 7:32 ` Joonas Lahtinen
0 siblings, 1 reply; 13+ messages in thread
From: Chris Wilson @ 2016-07-19 14:27 UTC (permalink / raw)
To: Joonas Lahtinen; +Cc: intel-gfx
On Tue, Jul 19, 2016 at 05:12:22PM +0300, Joonas Lahtinen wrote:
> On ti, 2016-07-19 at 11:51 +0100, Chris Wilson wrote:
> > + resv = i915_gem_object_get_dmabuf_resv(obj);
> > + if (resv) {
> > + long err;
>
> We already have ret in the function scope.
ret is int, we need a long. At least I can attest to our test coverage!
> > @@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> > struct i915_vma *vma;
> > int ret;
> >
> > - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> > - return 0;
> > -
> > ret = i915_gem_object_wait_rendering(obj, !write);
> > if (ret)
> > return ret;
> >
> > + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> > + return 0;
> > +
>
> Not sure I follow this change, wait rendering would only be issued if
> DOMAIN_CPU was in place, this function was about moving to gtt_domain
> so should really be NOP if in GTT domain already, no? Maybe calling
> site should do an explicit wait_rendering if needed.
No, this is where we do the wait rendering. The API says
set-to-gtt-domain implies that it is out of the GPU domain (no
rendering) and out of the CPU domain. (Similarly for set-to-cpu-domain.)
The relaxation I've applied here is to try and catch third parties that
have not updated our domain tracking for their access.
Moving the wait_rendering to the parent is a fair amount of burden.
> > vma->pin_count = 0;
> > - ret = i915_vma_unbind(vma);
>
> Maybe add a comment/TODO/FIXME: about the potential WARN.
__i915_vma_unbind_no_wait() is itself an automatic FIXME - it only
exists to cover up a WARN. (I wish we had this series in place first so
that such a hack wasn't applied.)
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects
2016-07-19 14:27 ` Chris Wilson
@ 2016-07-20 7:32 ` Joonas Lahtinen
0 siblings, 0 replies; 13+ messages in thread
From: Joonas Lahtinen @ 2016-07-20 7:32 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On ti, 2016-07-19 at 15:27 +0100, Chris Wilson wrote:
> On Tue, Jul 19, 2016 at 05:12:22PM +0300, Joonas Lahtinen wrote:
> >
> > On ti, 2016-07-19 at 11:51 +0100, Chris Wilson wrote:
> > >
> > > + resv = i915_gem_object_get_dmabuf_resv(obj);
> > > + if (resv) {
> > > + long err;
> > We already have ret in the function scope.
> ret is int, we need a long. At least I can attest to our test coverage!
>
> >
> > >
> > > @@ -3402,13 +3414,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> > > struct i915_vma *vma;
> > > int ret;
> > >
> > > - if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> > > - return 0;
> > > -
> > > ret = i915_gem_object_wait_rendering(obj, !write);
> > > if (ret)
> > > return ret;
> > >
> > > + if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> > > + return 0;
> > > +
> > Not sure I follow this change, wait rendering would only be issued if
> > DOMAIN_CPU was in place, this function was about moving to gtt_domain
> > so should really be NOP if in GTT domain already, no? Maybe calling
> > site should do an explicit wait_rendering if needed.
> No, this is where we do the wait rendering. The API says
> set-to-gtt-domain implies that it is out of the GPU domain (no
> rendering) and out of the CPU domain. (Similarly for set-to-cpu-domain.)
> The relaxation I've applied here is to try and catch third parties that
> have not updated our domain tracking for their access.
>
> Moving the wait_rendering to the parent is a fair amount of burden.
>
> >
> > >
> > > vma->pin_count = 0;
> > > - ret = i915_vma_unbind(vma);
> > Maybe add a comment/TODO/FIXME: about the potential WARN.
> __i915_vma_unbind_no_wait() is itself an automatic FIXME - it only
> exists to cover up a WARN. (I wish we had this series in place first so
> that such a hack wasn't applied.)
OK then,
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> -Chris
>
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2016-07-20 7:32 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-07-19 10:51 [PATCH 1/8] drm/i915: Move GEM request routines to i915_gem_request.c Chris Wilson
2016-07-19 10:51 ` [PATCH 2/8] drm/i915: Retire oldest completed request before allocating next Chris Wilson
2016-07-19 10:51 ` [PATCH 3/8] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
2016-07-19 10:51 ` [PATCH 4/8] drm/i915: Derive GEM requests from dma-fence Chris Wilson
2016-07-19 10:51 ` [PATCH 5/8] drm/i915: Disable waitboosting for fence_wait() Chris Wilson
2016-07-19 10:51 ` [PATCH 6/8] drm/i915: Disable waitboosting for mmioflips/semaphores Chris Wilson
2016-07-19 10:51 ` [PATCH 7/8] drm/i915: Mark imported dma-buf objects as being coherent Chris Wilson
2016-07-19 13:27 ` Joonas Lahtinen
2016-07-19 10:51 ` [PATCH 8/8] drm/i915: Wait on external rendering for GEM objects Chris Wilson
2016-07-19 14:12 ` Joonas Lahtinen
2016-07-19 14:27 ` Chris Wilson
2016-07-20 7:32 ` Joonas Lahtinen
2016-07-19 11:29 ` ✗ Ro.CI.BAT: failure for series starting with [1/8] drm/i915: Move GEM request routines to i915_gem_request.c Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox