Re: [PATCH v2 2/2] drm/i915: Shrink the GEM kmem_caches upon idling

From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 2/2] drm/i915: Shrink the GEM kmem_caches upon idling
Date: Wed, 24 Jan 2018 11:27:02 +0000	[thread overview]
Message-ID: <73c6a319-53a2-948e-7721-8072cd11c2cf@linux.intel.com> (raw)
In-Reply-To: <20180124110349.24150-2-chris@chris-wilson.co.uk>


On 24/01/2018 11:03, Chris Wilson wrote:
> When we finally decide the gpu is idle, that is a good time to shrink
> our kmem_caches.
> 
> v3: Defer until an rcu grace period after we idle.
> v4: Think about epoch wraparound and how likely that is.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem.c | 78 +++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 78 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7f0684ccc724..60b34bb98ee3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3334,6 +3334,65 @@ i915_gem_retire_work_handler(struct work_struct *work)
>   	}
>   }
>   
> +static void shrink_caches(struct drm_i915_private *i915)
> +{
> +	/*
> +	 * kmem_cache_shrink() discards empty slabs and reorders partially
> +	 * filled slabs to prioritise allocating from the mostly full slabs,
> +	 * with the aim of reducing fragmentation.
> +	 */
> +	kmem_cache_shrink(i915->priorities);
> +	kmem_cache_shrink(i915->dependencies);
> +	kmem_cache_shrink(i915->requests);
> +	kmem_cache_shrink(i915->luts);
> +	kmem_cache_shrink(i915->vmas);
> +	kmem_cache_shrink(i915->objects);
> +}
> +
> +struct sleep_rcu_work {
> +	union {
> +		struct rcu_head rcu;
> +		struct work_struct work;
> +	};
> +	struct drm_i915_private *i915;
> +	unsigned int epoch;
> +};
> +
> +static inline bool
> +same_epoch(struct drm_i915_private *i915, unsigned int epoch)
> +{
> +	/*
> +	 * There is a small chance that the epoch wrapped since we started
> +	 * sleeping. If we assume that epoch is at least a u32, then it will
> +	 * take at least 2^32 * 100ms for it to wrap, or about 326 years.
> +	 */
> +	return epoch == READ_ONCE(i915->gt.epoch);
> +}
> +
> +static void __sleep_work(struct work_struct *work)
> +{
> +	struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
> +	struct drm_i915_private *i915 = s->i915;
> +	unsigned int epoch = s->epoch;
> +
> +	kfree(s);
> +	if (same_epoch(i915, epoch))
> +		shrink_caches(i915);
> +}
> +
> +static void __sleep_rcu(struct rcu_head *rcu)
> +{
> +	struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
> +	struct drm_i915_private *i915 = s->i915;
> +
> +	if (same_epoch(i915, s->epoch)) {
> +		INIT_WORK(&s->work, __sleep_work);
> +		queue_work(i915->wq, &s->work);
> +	} else {
> +		kfree(s);
> +	}
> +}
> +
>   static inline bool
>   new_requests_since_last_retire(const struct drm_i915_private *i915)
>   {
> @@ -3346,6 +3405,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   {
>   	struct drm_i915_private *dev_priv =
>   		container_of(work, typeof(*dev_priv), gt.idle_work.work);
> +	unsigned int epoch = 0;
>   	bool rearm_hangcheck;
>   	ktime_t end;
>   
> @@ -3406,6 +3466,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   	GEM_BUG_ON(!dev_priv->gt.awake);
>   	dev_priv->gt.awake = false;
>   	rearm_hangcheck = false;
> +	epoch = dev_priv->gt.epoch;
>   
>   	if (INTEL_GEN(dev_priv) >= 6)
>   		gen6_rps_idle(dev_priv);
> @@ -3421,6 +3482,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>   		GEM_BUG_ON(!dev_priv->gt.awake);
>   		i915_queue_hangcheck(dev_priv);
>   	}
> +
> +	/*
> +	 * When we are idle, it is an opportune time to reap our caches.
> +	 * However, we have many objects that utilise RCU and the ordered
> +	 * i915->wq that this work is executing on. To try and flush any
> +	 * pending frees now we are idle, we first wait for an RCU grace
> +	 * period, and then queue a task (that will run last on the wq) to
> +	 * shrink and re-optimize the caches.
> +	 */
> +	if (same_epoch(dev_priv, epoch)) {
> +		struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
> +		if (s) {
> +			s->i915 = dev_priv;
> +			s->epoch = epoch;
> +			call_rcu(&s->rcu, __sleep_rcu);
> +		}
> +	}
>   }
>   
>   void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx