Re: [PATCH] drm/i915: Asynchronously initialise the GPU state

From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/i915: Asynchronously initialise the GPU state
Date: Wed, 1 Jul 2015 15:07:18 +0200	[thread overview]
Message-ID: <20150701130718.GG23343@phenom.ffwll.local> (raw)
In-Reply-To: <1435742841-28454-1-git-send-email-chris@chris-wilson.co.uk>

On Wed, Jul 01, 2015 at 10:27:21AM +0100, Chris Wilson wrote:
> Dave Gordon made the good suggestion that once the ringbuffers were
> setup, the actual queuing of commands to program the initial GPU state
> could be deferred. Since that initial state contains instructions for
> setting up the first power context, we want to execute that as earlier
> as possible, preferrably in the background to userspace. Then when
> userspace does wake up, the first time it opens the device we just need
> to flush the work to be sure that our commands are queued before any of
> userspace's. (Hooking into the device open should mean we have to check
> less often than say hooking into execbuffer.)
> 
> Suggested-by: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>

Just before this gets a bit out of hand with various patches floating
around ... I really meant it when I said that we should have a proper
design discussion about this in Jesse's meeting first.

Looking at all the ideas between you, Dave & me I count about 3-4
approaches to async gem init, and all have upsides and downsides.

Aside from that I concur that if we do async gem init then it better be a
worker and not relying on some abitrary userspace ioctl/syscall. Of course
we'd still need to place proper synchronization points at a good place
(flush_work in gem_open for Dave's design), but that's really orthogonal
to running it in a worker imo.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h |   2 +
>  drivers/gpu/drm/i915/i915_gem.c | 113 +++++++++++++++++++++++++++-------------
>  2 files changed, 79 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3ea1fe8db63e..d4003dea97eb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1938,6 +1938,8 @@ struct drm_i915_private {
>  
>  	bool edp_low_vswing;
>  
> +	struct work_struct init_hw_late;
> +
>  	/*
>  	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
>  	 * will be rejected. Instead look for a better place.
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 2f0fed1b9dd7..7efa71f8edd7 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5140,12 +5140,76 @@ cleanup_render_ring:
>  	return ret;
>  }
>  
> +static int
> +i915_gem_init_hw_late(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_engine_cs *ring;
> +	int i, j;
> +
> +	for_each_ring(ring, dev_priv, i) {
> +		struct drm_i915_gem_request *req;
> +		int ret;
> +
> +		if (WARN_ON(!ring->default_context)) {
> +			ret = -ENODEV;
> +			goto err;
> +		}
> +
> +		req = i915_gem_request_alloc(ring, ring->default_context);
> +		if (IS_ERR(req)) {
> +			ret = PTR_ERR(req);
> +			goto err;
> +		}
> +
> +		if (ring->id == RCS) {
> +			for (j = 0; j < NUM_L3_SLICES(dev_priv); j++)
> +				i915_gem_l3_remap(req, j);
> +		}
> +
> +		ret = i915_ppgtt_init_ring(req);
> +		if (ret) {
> +			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
> +			goto err_req;
> +		}
> +
> +		ret = i915_gem_context_enable(req);
> +		if (ret) {
> +			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
> +			goto err_req;
> +		}
> +
> +		i915_add_request_no_flush(req);
> +		continue;
> +
> +err_req:
> +		i915_gem_request_cancel(req);
> +err:
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void
> +i915_gem_init_hw_worker(struct work_struct *work)
> +{
> +	struct drm_i915_private *dev_priv =
> +		container_of(work, typeof(*dev_priv), init_hw_late);
> +	mutex_lock(&dev_priv->dev->struct_mutex);
> +	if (i915_gem_init_hw_late(dev_priv)) {
> +		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
> +		atomic_set_mask(I915_WEDGED,
> +				&dev_priv->gpu_error.reset_counter);
> +	}
> +	mutex_unlock(&dev_priv->dev->struct_mutex);
> +}
> +
>  int
>  i915_gem_init_hw(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_engine_cs *ring;
> -	int ret, i, j;
> +	int ret, i;
>  
>  	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
>  		return -EIO;
> @@ -5198,41 +5262,10 @@ i915_gem_init_hw(struct drm_device *dev)
>  	}
>  
>  	/* Now it is safe to go back round and do everything else: */
> -	for_each_ring(ring, dev_priv, i) {
> -		struct drm_i915_gem_request *req;
> -
> -		WARN_ON(!ring->default_context);
> -
> -		req = i915_gem_request_alloc(ring, ring->default_context);
> -		if (IS_ERR(req)) {
> -			ret = PTR_ERR(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> -			goto out;
> -		}
> -
> -		if (ring->id == RCS) {
> -			for (j = 0; j < NUM_L3_SLICES(dev); j++)
> -				i915_gem_l3_remap(req, j);
> -		}
> -
> -		ret = i915_ppgtt_init_ring(req);
> -		if (ret && ret != -EIO) {
> -			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
> -			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> -			goto out;
> -		}
> -
> -		ret = i915_gem_context_enable(req);
> -		if (ret && ret != -EIO) {
> -			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
> -			i915_gem_request_cancel(req);
> -			i915_gem_cleanup_ringbuffer(dev);
> -			goto out;
> -		}
> -
> -		i915_add_request_no_flush(req);
> -	}
> +	if (dev->open_count == 0) /* uncontested with userspace, i.e. boot */
> +		queue_work(dev_priv->wq, &dev_priv->init_hw_late);
> +	else
> +		ret = i915_gem_init_hw_late(dev_priv);
>  
>  out:
>  	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> @@ -5379,6 +5412,7 @@ i915_gem_load(struct drm_device *dev)
>  		init_ring_lists(&dev_priv->ring[i]);
>  	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
>  		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
> +	INIT_WORK(&dev_priv->init_hw_late, i915_gem_init_hw_worker);
>  	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
>  			  i915_gem_retire_work_handler);
>  	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
> @@ -5442,11 +5476,18 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  
>  int i915_gem_open(struct drm_device *dev, struct drm_file *file)
>  {
> +	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_file_private *file_priv;
>  	int ret;
>  
>  	DRM_DEBUG_DRIVER("\n");
>  
> +	/* Flush ring initialisation before userspace can submit its own
> +	 * batches, so the hardware initialisation commands are queued
> +	 * first.
> +	 */
> +	flush_work(&dev_priv->init_hw_late);
> +
>  	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
>  	if (!file_priv)
>  		return -ENOMEM;
> -- 
> 2.1.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx