public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Robert Beckett <robert.beckett@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 33/70] drm/i915: Use a separate slab for requests
Date: Fri, 22 May 2015 15:48:59 +0100	[thread overview]
Message-ID: <555F41DB.2010204@intel.com> (raw)
In-Reply-To: <1428420094-18352-34-git-send-email-chris@chris-wilson.co.uk>

On 07/04/2015 16:20, Chris Wilson wrote:
> requests are even more frequently allocated than objects and equally
> benefit from having a dedicated slab.
>
> v2: Rebase
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_dma.c         | 12 ++++++----
>   drivers/gpu/drm/i915/i915_drv.h         |  4 +++-
>   drivers/gpu/drm/i915/i915_gem.c         | 41 +++++++++++++++++++--------------
>   drivers/gpu/drm/i915/intel_ringbuffer.c |  1 -
>   4 files changed, 35 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 7b0109e2ab23..135fbcad367f 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1010,8 +1010,10 @@ out_regs:
>   put_bridge:
>   	pci_dev_put(dev_priv->bridge_dev);
>   free_priv:
> -	if (dev_priv->slab)
> -		kmem_cache_destroy(dev_priv->slab);
> +	if (dev_priv->requests)
> +		kmem_cache_destroy(dev_priv->requests);
> +	if (dev_priv->objects)
> +		kmem_cache_destroy(dev_priv->objects);
>   	kfree(dev_priv);
>   	return ret;
>   }
> @@ -1094,8 +1096,10 @@ int i915_driver_unload(struct drm_device *dev)
>   	if (dev_priv->regs != NULL)
>   		pci_iounmap(dev->pdev, dev_priv->regs);
>
> -	if (dev_priv->slab)
> -		kmem_cache_destroy(dev_priv->slab);
> +	if (dev_priv->requests)
> +		kmem_cache_destroy(dev_priv->requests);
> +	if (dev_priv->objects)
> +		kmem_cache_destroy(dev_priv->objects);
>
>   	pci_dev_put(dev_priv->bridge_dev);
>   	kfree(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 600b6d4a0139..ad08aa532456 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1578,7 +1578,8 @@ struct i915_virtual_gpu {
>
>   struct drm_i915_private {
>   	struct drm_device *dev;
> -	struct kmem_cache *slab;
> +	struct kmem_cache *objects;
> +	struct kmem_cache *requests;
>
>   	const struct intel_device_info info;
>
> @@ -2070,6 +2071,7 @@ struct drm_i915_gem_request {
>   	struct kref ref;
>
>   	/** On Which ring this request was generated */
> +	struct drm_i915_private *i915;
>   	struct intel_engine_cs *ring;
>
>   	/** GEM sequence number associated with this request. */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1f07cd17be04..a4a62592f0f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -381,13 +381,13 @@ out:
>   void *i915_gem_object_alloc(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = dev->dev_private;
> -	return kmem_cache_zalloc(dev_priv->slab, GFP_KERNEL);
> +	return kmem_cache_zalloc(dev_priv->objects, GFP_KERNEL);
>   }
>
>   void i915_gem_object_free(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> -	kmem_cache_free(dev_priv->slab, obj);
> +	kmem_cache_free(dev_priv->objects, obj);
>   }
>
>   static int
> @@ -2633,43 +2633,45 @@ void i915_gem_request_free(struct kref *req_ref)
>   		i915_gem_context_unreference(ctx);
>   	}
>
> -	kfree(req);
> +	kmem_cache_free(req->i915->requests, req);
>   }
>
>   int i915_gem_request_alloc(struct intel_engine_cs *ring,
>   			   struct intel_context *ctx)
>   {
> +	struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +	struct drm_i915_gem_request *rq;
>   	int ret;
> -	struct drm_i915_gem_request *request;
> -	struct drm_i915_private *dev_private = ring->dev->dev_private;
>
>   	if (ring->outstanding_lazy_request)
>   		return 0;
>
> -	request = kzalloc(sizeof(*request), GFP_KERNEL);
> -	if (request == NULL)
> +	rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
> +	if (rq == NULL)
>   		return -ENOMEM;
>
> -	ret = i915_gem_get_seqno(ring->dev, &request->seqno);
> +	kref_init(&rq->ref);
> +	rq->i915 = dev_priv;
> +
> +	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
>   	if (ret) {
> -		kfree(request);
> +		kfree(rq);
>   		return ret;
>   	}
>
> -	kref_init(&request->ref);
> -	request->ring = ring;
> -	request->uniq = dev_private->request_uniq++;
> +	rq->ring = ring;
> +	rq->uniq = dev_priv->request_uniq++;
>
>   	if (i915.enable_execlists)
> -		ret = intel_logical_ring_alloc_request_extras(request, ctx);
> +		ret = intel_logical_ring_alloc_request_extras(rq, ctx);
>   	else
> -		ret = intel_ring_alloc_request_extras(request);
> +		ret = intel_ring_alloc_request_extras(rq);
>   	if (ret) {
> -		kfree(request);
> +		kfree(rq);
>   		return ret;
>   	}
>
> -	ring->outstanding_lazy_request = request;
> +	ring->outstanding_lazy_request = rq;
>   	return 0;
>   }
>
> @@ -5204,11 +5206,16 @@ i915_gem_load(struct drm_device *dev)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	int i;
>
> -	dev_priv->slab =
> +	dev_priv->objects =
>   		kmem_cache_create("i915_gem_object",
>   				  sizeof(struct drm_i915_gem_object), 0,
>   				  SLAB_HWCACHE_ALIGN,
>   				  NULL);
> +	dev_priv->requests =
> +		kmem_cache_create("i915_gem_request",
> +				  sizeof(struct drm_i915_gem_request), 0,
> +				  SLAB_HWCACHE_ALIGN,
> +				  NULL);
>
>   	INIT_LIST_HEAD(&dev_priv->vm_list);
>   	i915_init_vm(dev_priv, &dev_priv->gtt.base);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 99a1fdff4924..bf7837d30388 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2162,7 +2162,6 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>   int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>   {
>   	request->ringbuf = request->ring->buffer;
> -
>   	return 0;
>   }
>
>

You missed a request allocation in execlists_context_queue in 
intel_lrc.c when !request. By the look of it that code could be changed 
to use i915_gem_request_alloc, unless there is any reason not to set the 
outstanding_lazy_request to the dummy request.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2015-05-22 14:49 UTC|newest]

Thread overview: 113+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-07 15:20 Low hanging fruit take 2 Chris Wilson
2015-04-07 15:20 ` [PATCH 01/70] drm/i915: Cache last obj->pages location for i915_gem_object_get_page() Chris Wilson
2015-04-08 11:16   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 02/70] drm/i915: Fix the flip synchronisation to consider mmioflips Chris Wilson
2015-04-07 15:20 ` [PATCH 03/70] drm/i915: Ensure cache flushes prior to doing CS flips Chris Wilson
2015-04-08 11:23   ` Daniel Vetter
2015-04-08 11:29     ` Chris Wilson
2015-04-07 15:20 ` [PATCH 04/70] drm/i915: Agressive downclocking on Baytrail Chris Wilson
2015-04-07 15:20 ` [PATCH 05/70] drm/i915: Fix computation of last_adjustment for RPS autotuning Chris Wilson
2015-04-07 15:20 ` [PATCH 06/70] drm/i915: Fix race on unreferencing the wrong mmio-flip-request Chris Wilson
2015-04-08 11:30   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 07/70] drm/i915: Boost GPU frequency if we detect outstanding pageflips Chris Wilson
2015-04-08 11:31   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 08/70] drm/i915: Deminish contribution of wait-boosting from clients Chris Wilson
2015-04-07 15:20 ` [PATCH 09/70] drm/i915: Re-enable RPS wait-boosting for all engines Chris Wilson
2015-04-07 15:20 ` [PATCH 10/70] drm/i915: Split i915_gem_batch_pool into its own header Chris Wilson
2015-04-07 15:20 ` [PATCH 11/70] drm/i915: Tidy batch pool logic Chris Wilson
2015-04-07 15:20 ` [PATCH 12/70] drm/i915: Split the batch pool by engine Chris Wilson
2015-04-07 15:20 ` [PATCH 13/70] drm/i915: Free batch pool when idle Chris Wilson
2015-04-07 15:20 ` [PATCH 14/70] drm/i915: Split batch pool into size buckets Chris Wilson
2015-04-07 15:20 ` [PATCH 15/70] drm/i915: Include active flag when describing objects in debugfs Chris Wilson
2015-04-08 11:33   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 16/70] drm/i915: Suppress empty lines from debugfs/i915_gem_objects Chris Wilson
2015-04-08 11:34   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 17/70] drm/i915: Optimistically spin for the request completion Chris Wilson
2015-04-08 11:39   ` Daniel Vetter
2015-04-08 13:43     ` Rantala, Valtteri
2015-04-08 14:15       ` Daniel Vetter
2015-04-13 11:34   ` Tvrtko Ursulin
2015-04-13 12:25     ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 18/70] drm/i915: Implement inter-engine read-read optimisations Chris Wilson
2015-04-14 13:51   ` Tvrtko Ursulin
2015-04-14 14:00     ` Chris Wilson
2015-04-07 15:20 ` [PATCH 19/70] drm/i915: Inline check required for object syncing prior to execbuf Chris Wilson
2015-04-07 15:20 ` [PATCH 20/70] drm/i915: Limit ring synchronisation (sw sempahores) RPS boosts Chris Wilson
2015-04-08 11:46   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 21/70] drm/i915: Limit mmio flip " Chris Wilson
2015-04-07 15:20 ` [PATCH 22/70] drm/i915: Reduce frequency of unspecific HSW reg debugging Chris Wilson
2015-04-07 15:20 ` [PATCH 23/70] drm/i915: Record ring->start address in error state Chris Wilson
2015-04-08 11:47   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 24/70] drm/i915: Use simpler form of spin_lock_irq(execlist_lock) Chris Wilson
2015-04-07 15:20 ` [PATCH 25/70] drm/i915: Use the global runtime-pm wakelock for a busy GPU for execlists Chris Wilson
2015-04-07 15:20 ` [PATCH 26/70] drm/i915: Map the execlists context regs once during pinning Chris Wilson
2015-04-07 15:20 ` [PATCH 27/70] drm/i915: Remove vestigal DRI1 ring quiescing code Chris Wilson
2015-04-09 15:02   ` Daniel Vetter
2015-04-09 15:24     ` Chris Wilson
2015-04-09 15:31       ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 28/70] drm/i915: Overhaul execlist submission Chris Wilson
2015-04-07 15:20 ` [PATCH 29/70] drm/i915: Move the execlists retirement to the right spot Chris Wilson
2015-04-07 15:20 ` [PATCH 30/70] drm/i915: Map the ringbuffer using WB on LLC machines Chris Wilson
2015-04-07 15:20 ` [PATCH 31/70] drm/i915: Refactor duplicate object vmap functions Chris Wilson
2015-04-07 15:20 ` [PATCH 32/70] drm/i915: Treat ringbuffer writes as write to normal memory Chris Wilson
2015-04-07 15:20 ` [PATCH 33/70] drm/i915: Use a separate slab for requests Chris Wilson
2015-05-22 14:48   ` Robert Beckett [this message]
2015-04-07 15:20 ` [PATCH 34/70] drm/i915: Use a separate slab for vmas Chris Wilson
2015-04-10  8:32   ` Daniel Vetter
2015-04-07 15:20 ` [PATCH 35/70] drm/i915: Use the new rq->i915 field where appropriate Chris Wilson
2015-04-07 15:21 ` [PATCH 36/70] drm/i915: Reduce the pointer dance of i915_is_ggtt() Chris Wilson
2015-04-07 15:21 ` [PATCH 37/70] drm/i915: Squash more pointer indirection for i915_is_gtt Chris Wilson
2015-04-07 15:21 ` [PATCH 38/70] drm/i915: Reduce locking in execlist command submission Chris Wilson
2015-04-07 15:21 ` [PATCH 39/70] drm/i915: Reduce more " Chris Wilson
2015-04-07 15:21 ` [PATCH 40/70] drm/i915: Reduce locking in gen8 IRQ handler Chris Wilson
2015-04-07 15:21 ` [PATCH 41/70] drm/i915: Tidy " Chris Wilson
2015-04-10  8:36   ` Daniel Vetter
2015-04-07 15:21 ` [PATCH 42/70] drm/i915: Remove request retirement before each batch Chris Wilson
2015-04-07 15:21 ` [PATCH 43/70] drm/i915: Cache the GGTT offset for the execlists context Chris Wilson
2015-04-07 15:21 ` [PATCH 44/70] drm/i915: Prefer to check for idleness in worker rather than sync-flush Chris Wilson
2015-04-10  8:37   ` Daniel Vetter
2015-04-07 15:21 ` [PATCH 45/70] drm/i915: Remove request->uniq Chris Wilson
2015-04-10  8:38   ` Daniel Vetter
2015-04-07 15:21 ` [PATCH 46/70] drm/i915: Cache the reset_counter for the request Chris Wilson
2015-04-07 15:21 ` [PATCH 47/70] drm/i915: Allocate context objects from stolen Chris Wilson
2015-04-10  8:39   ` Daniel Vetter
2015-04-07 15:21 ` [PATCH 48/70] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2015-04-07 15:21 ` [PATCH 49/70] drm/i915: Do not zero initialise page tables Chris Wilson
2015-04-07 15:21 ` [PATCH 50/70] drm/i915: The argument for postfix is redundant Chris Wilson
2015-04-10  8:53   ` Daniel Vetter
2015-04-10  9:00     ` Chris Wilson
2015-04-10  9:32       ` Daniel Vetter
2015-04-10  9:45         ` Chris Wilson
2015-04-07 15:21 ` [PATCH 51/70] drm/i915: Record the position of the start of the request Chris Wilson
2015-04-07 15:21 ` [PATCH 52/70] drm/i915: Cache the execlist ctx descriptor Chris Wilson
2015-04-07 15:21 ` [PATCH 53/70] drm/i915: Eliminate vmap overhead for cmd parser Chris Wilson
2015-04-07 15:21 ` [PATCH 54/70] drm/i915: Cache last cmd descriptor when parsing Chris Wilson
2015-04-07 15:21 ` [PATCH 55/70] drm/i915: Use WC copies on !llc platforms for the command parser Chris Wilson
2015-04-07 15:21 ` [PATCH 56/70] drm/i915: Cache kmap between relocations Chris Wilson
2015-04-07 15:21 ` [PATCH 57/70] drm/i915: intel_ring_initialized() must be simple and inline Chris Wilson
2015-12-08 15:02   ` [PATCH 0/1] " Dave Gordon
2015-12-08 15:02     ` [PATCH 1/1] " Dave Gordon
2015-12-10 10:24       ` Daniel Vetter
2015-04-07 15:21 ` [PATCH 58/70] drm/i915: Before shrink_all we only need to idle the GPU Chris Wilson
2015-04-07 15:21 ` [PATCH 59/70] drm/i915: Simplify object is-pinned checking for shrinker Chris Wilson
2015-04-07 16:28 ` Chris Wilson
2015-04-07 16:28   ` [PATCH 60/70] drm/i915: Make evict-everything more robust Chris Wilson
2015-04-07 16:28   ` [PATCH 61/70] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2015-04-14 14:52     ` Tvrtko Ursulin
2015-04-14 15:05       ` Chris Wilson
2015-04-14 15:15         ` Tvrtko Ursulin
2015-04-07 16:28   ` [PATCH 62/70] drm/i915: Reduce locking inside busy ioctl Chris Wilson
2015-04-07 16:28   ` [PATCH 63/70] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
2015-04-10  9:14     ` Daniel Vetter
2015-04-15  9:03       ` Chris Wilson
2015-04-15  9:33         ` Daniel Vetter
2015-04-15  9:38           ` Chris Wilson
2015-04-07 16:28   ` [PATCH 64/70] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
2015-04-07 16:28   ` [PATCH 65/70] drm/i915: Reduce locking for gen6+ GT interrupts Chris Wilson
2015-04-07 16:28   ` [PATCH 66/70] drm/i915: Remove obj->pin_mappable Chris Wilson
2015-04-13 11:35     ` Tvrtko Ursulin
2015-04-13 12:30       ` Daniel Vetter
2015-04-07 16:28   ` [PATCH 67/70] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2015-04-07 16:28   ` [PATCH 68/70] drm/i915: Simplify vma-walker for i915_gem_objects Chris Wilson
2015-04-07 16:28   ` [PATCH 69/70] drm/i915: Skip holding an object reference for execbuf preparation Chris Wilson
2015-04-07 16:28   ` [PATCH 70/70] drm/i915: Use vma as the primary token for managing binding Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=555F41DB.2010204@intel.com \
    --to=robert.beckett@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox