From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 8/8] drm/i915: Refactor execbuffer relocation writing
Date: Thu, 09 Jun 2016 16:31:25 +0300 [thread overview]
Message-ID: <1465479085.9670.40.camel@linux.intel.com> (raw)
In-Reply-To: <1465471779-20765-9-git-send-email-chris@chris-wilson.co.uk>
On to, 2016-06-09 at 12:29 +0100, Chris Wilson wrote:
> With in the introduction of the reloc page cache, we are just one step
> away from refactoring the relocation write functions into one. Not only
> does it tidy the code (slightly), but it greatly simplifies the control
> logic much to gcc's satisfaction.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 289 +++++++++++++++--------------
> 1 file changed, 150 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 318c71b663f4..bda8ec8b02e6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -34,6 +34,8 @@
> #include
> #include
>
> +#define DBG_USE_CPU_RELOC 0 /* force relocations to use the CPU write path */
Better connect to the new debug Kconfig menu you introduced?
> +
> #define __EXEC_OBJECT_HAS_PIN (1U<<31)
> #define __EXEC_OBJECT_HAS_FENCE (1U<<30)
> #define __EXEC_OBJECT_NEEDS_MAP (1U<<29)
> @@ -53,6 +55,7 @@ struct i915_execbuffer_params {
> };
>
> struct eb_vmas {
> + struct drm_i915_private *i915;
> struct list_head vmas;
> int and;
> union {
> @@ -62,7 +65,8 @@ struct eb_vmas {
> };
>
> static struct eb_vmas *
> -eb_create(struct drm_i915_gem_execbuffer2 *args)
> +eb_create(struct drm_i915_private *i915,
> + struct drm_i915_gem_execbuffer2 *args)
> {
> struct eb_vmas *eb = NULL;
>
> @@ -89,6 +93,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args)
> } else
> eb->and = -args->buffer_count;
>
> + eb->i915 = i915;
> INIT_LIST_HEAD(&eb->vmas);
> return eb;
> }
> @@ -272,7 +277,8 @@ static void eb_destroy(struct eb_vmas *eb)
>
> static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
> {
> - return (HAS_LLC(obj->base.dev) ||
> + return (DBG_USE_CPU_RELOC ||
> + HAS_LLC(obj->base.dev) ||
> obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
> obj->cache_level != I915_CACHE_NONE);
> }
> @@ -296,32 +302,58 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address)
> }
>
> static inline uint64_t
> -relocation_target(struct drm_i915_gem_relocation_entry *reloc,
> +relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
These const changes could be a bunch instead of sprinkled around.
Unless we want to smuggle them in through the resistance.
> uint64_t target_offset)
> {
> return gen8_canonical_addr((int)reloc->delta + target_offset);
> }
>
> struct reloc_cache {
> - void *vaddr;
> + struct drm_i915_private *i915;
> + unsigned long vaddr;
> unsigned page;
> - enum { KMAP, IOMAP } type;
> + struct drm_mm_node node;
> + bool use_64bit_reloc;
> };
>
> -static void reloc_cache_init(struct reloc_cache *cache)
> +static void reloc_cache_init(struct reloc_cache *cache,
> + struct drm_i915_private *i915)
> {
> cache->page = -1;
> - cache->vaddr = NULL;
> + cache->vaddr = 0;
> + cache->i915 = i915;
> + cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8;
> +}
> +
> +static inline void *unmask_page(unsigned long p)
> +{
> + return (void *)(uintptr_t)(p & PAGE_MASK);
> }
>
> +static inline unsigned unmask_flags(unsigned long p)
> +{
> + return p & ~PAGE_MASK;
> +}
> +
> +#define KMAP 0x4
> +
> static void reloc_cache_fini(struct reloc_cache *cache)
> {
> - if (cache->vaddr == NULL)
> + void *vaddr;
> +
> + if (cache->vaddr == 0)
> return;
>
> - switch (cache->type) {
> - case KMAP: kunmap_atomic(cache->vaddr); break;
> - case IOMAP: io_mapping_unmap_atomic(cache->vaddr); break;
> + vaddr = unmask_page(cache->vaddr);
> + if (cache->vaddr & KMAP) {
> + if (cache->vaddr & CLFLUSH_AFTER)
> + mb();
> +
> + kunmap_atomic(vaddr);
> + i915_gem_object_unpin_pages((struct drm_i915_gem_object *)cache->node.mm);
Rather long line.
> + } else {
> + io_mapping_unmap_atomic(vaddr);
> + i915_vma_unpin((struct i915_vma *)cache->node.mm);
> }
> }
>
> @@ -329,148 +361,138 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
> struct reloc_cache *cache,
> int page)
> {
> - if (cache->page == page)
> - return cache->vaddr;
> + void *vaddr;
>
> - if (cache->vaddr)
> - kunmap_atomic(cache->vaddr);
> + if (cache->vaddr) {
> + kunmap_atomic(unmask_page(cache->vaddr));
> + } else {
> + unsigned flushes;
> + int ret;
> +
> + ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
Yeah, needs_clflush is so bad name earlier in the series, that even you
don't use it. "need_clflushes" or anything is better.
> + if (ret)
> + return ERR_PTR(ret);
>
> + cache->vaddr = flushes | KMAP;
> + cache->node.mm = (void *)obj;
> + if (flushes)
> + mb();
> + }
> +
> + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
> + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
> cache->page = page;
> - cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
> - cache->type = KMAP;
>
> - return cache->vaddr;
> + return vaddr;
> }
>
> -static int
> -relocate_entry_cpu(struct drm_i915_gem_object *obj,
> - struct drm_i915_gem_relocation_entry *reloc,
> - struct reloc_cache *cache,
> - uint64_t target_offset)
> +static void *reloc_iomap(struct drm_i915_gem_object *obj,
> + struct reloc_cache *cache,
> + int page)
> {
> - struct drm_device *dev = obj->base.dev;
> - uint32_t page_offset = offset_in_page(reloc->offset);
> - uint64_t delta = relocation_target(reloc, target_offset);
> - char *vaddr;
> - int ret;
> + void *vaddr;
>
> - ret = i915_gem_object_set_to_cpu_domain(obj, true);
> - if (ret)
> - return ret;
> + if (cache->vaddr) {
> + io_mapping_unmap_atomic(unmask_page(cache->vaddr));
> + } else {
> + struct i915_vma *vma;
> + int ret;
>
> - vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
> - *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
> + if (use_cpu_reloc(obj))
> + return NULL;
>
> - if (INTEL_GEN(dev) >= 8) {
> - page_offset += sizeof(uint32_t);
> - if (page_offset == PAGE_SIZE) {
> - vaddr = reloc_kmap(obj, cache, cache->page + 1);
> - page_offset = 0;
> - }
> - *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
> - }
> + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
> + PIN_MAPPABLE | PIN_NONBLOCK);
> + if (IS_ERR(vma))
> + return NULL;
>
> - return 0;
> -}
Ugh, did you simultaneously move and rename functions. This is rather
hard to follow from this point on...
Regards, Joonas
> + ret = i915_gem_object_set_to_gtt_domain(obj, true);
> + if (ret)
> + return ERR_PTR(ret);
>
> -static void *reloc_iomap(struct drm_i915_private *i915,
> - struct reloc_cache *cache,
> - uint64_t offset)
> -{
> - if (cache->page == offset >> PAGE_SHIFT)
> - return cache->vaddr;
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + return ERR_PTR(ret);
>
> - if (cache->vaddr)
> - io_mapping_unmap_atomic(cache->vaddr);
> + cache->node.start = vma->node.start;
> + cache->node.mm = (void *)vma;
> + }
>
> - cache->page = offset >> PAGE_SHIFT;
> - cache->vaddr =
> - io_mapping_map_atomic_wc(i915->ggtt.mappable,
> - offset & PAGE_MASK);
> - cache->type = IOMAP;
> + vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable,
> + cache->node.start + (page << PAGE_SHIFT));
> + cache->page = page;
> + cache->vaddr = (unsigned long)vaddr;
>
> - return cache->vaddr;
> + return vaddr;
> }
>
> -static int
> -relocate_entry_gtt(struct drm_i915_gem_object *obj,
> - struct drm_i915_gem_relocation_entry *reloc,
> - struct reloc_cache *cache,
> - uint64_t target_offset)
> +static void *reloc_vaddr(struct drm_i915_gem_object *obj,
> + struct reloc_cache *cache,
> + int page)
> {
> - struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> - struct i915_vma *vma;
> - uint64_t delta = relocation_target(reloc, target_offset);
> - uint64_t offset;
> - void __iomem *reloc_page;
> - int ret;
> -
> - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
> - if (IS_ERR(vma))
> - return PTR_ERR(vma);
> -
> - ret = i915_gem_object_set_to_gtt_domain(obj, true);
> - if (ret)
> - goto unpin;
> -
> - ret = i915_gem_object_put_fence(obj);
> - if (ret)
> - goto unpin;
> -
> - /* Map the page containing the relocation we're going to perform. */
> - offset = vma->node.start;
> - offset += reloc->offset;
> - reloc_page = reloc_iomap(dev_priv, cache, offset);
> - iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
> + void *vaddr;
>
> - if (INTEL_GEN(dev_priv) >= 8) {
> - offset += sizeof(uint32_t);
> - if (offset_in_page(offset) == 0)
> - reloc_page = reloc_iomap(dev_priv, cache, offset);
> - iowrite32(upper_32_bits(delta),
> - reloc_page + offset_in_page(offset));
> + if (cache->page == page) {
> + vaddr = unmask_page(cache->vaddr);
> + } else {
> + vaddr = NULL;
> + if ((cache->vaddr & KMAP) == 0)
> + vaddr = reloc_iomap(obj, cache, page);
> + if (vaddr == NULL)
> + vaddr = reloc_kmap(obj, cache, page);
> }
>
> -unpin:
> - __i915_vma_unpin(vma);
> - return ret;
> + return vaddr;
> }
>
> static void
> -clflush_write32(void *addr, uint32_t value)
> +clflush_write32(uint32_t *addr, uint32_t value, unsigned flushes)
> {
> - /* This is not a fast path, so KISS. */
> - drm_clflush_virt_range(addr, sizeof(uint32_t));
> - *(uint32_t *)addr = value;
> - drm_clflush_virt_range(addr, sizeof(uint32_t));
> + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
> + if (flushes & CLFLUSH_BEFORE) {
> + clflushopt(addr);
> + mb();
> + }
> +
> + *addr = value;
> +
> + /* Writes to the same cacheline are serialised by the CPU
> + * (including clflush). On the write path, we only require
> + * that it hits memory in an orderly fashion and place
> + * mb barriers at the start and end of the relocation phase
> + * to ensure ordering of clflush wrt to the system.
> + */
> + if (flushes & CLFLUSH_AFTER)
> + clflushopt(addr);
> + } else
> + *addr = value;
> }
>
> static int
> -relocate_entry_clflush(struct drm_i915_gem_object *obj,
> - struct drm_i915_gem_relocation_entry *reloc,
> - struct reloc_cache *cache,
> - uint64_t target_offset)
> +relocate_entry(struct drm_i915_gem_object *obj,
> + const struct drm_i915_gem_relocation_entry *reloc,
> + struct reloc_cache *cache,
> + uint64_t target_offset)
> {
> - struct drm_device *dev = obj->base.dev;
> - uint32_t page_offset = offset_in_page(reloc->offset);
> - uint64_t delta = relocation_target(reloc, target_offset);
> - char *vaddr;
> - int ret;
> + uint64_t offset = reloc->offset;
> + bool wide = cache->use_64bit_reloc;
> + void *vaddr;
>
> - ret = i915_gem_object_set_to_gtt_domain(obj, true);
> - if (ret)
> - return ret;
> + target_offset = relocation_target(reloc, target_offset);
> +repeat:
> + vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT);
> + if (IS_ERR(vaddr))
> + return PTR_ERR(vaddr);
>
> - vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT);
> - clflush_write32(vaddr + page_offset, lower_32_bits(delta));
> + clflush_write32(vaddr + offset_in_page(offset),
> + lower_32_bits(target_offset),
> + cache->vaddr);
>
> - if (INTEL_GEN(dev) >= 8) {
> - page_offset += sizeof(uint32_t);
> - if (page_offset == PAGE_SIZE) {
> - vaddr = reloc_kmap(obj, cache, cache->page + 1);
> - page_offset = 0;
> - }
> - clflush_write32(vaddr + page_offset, upper_32_bits(delta));
> + if (wide) {
> + offset += sizeof(uint32_t);
> + target_offset >>= 32;
> + wide = false;
> + goto repeat;
> }
>
> return 0;
> @@ -557,7 +579,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>
> /* Check that the relocation address is valid... */
> if (unlikely(reloc->offset >
> - obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
> + obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) {
> DRM_DEBUG("Relocation beyond object bounds: "
> "obj %p target %d offset %d size %d.\n",
> obj, reloc->target_handle,
> @@ -577,23 +599,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> if (pagefault_disabled() && !object_is_idle(obj))
> return -EFAULT;
>
> - if (use_cpu_reloc(obj))
> - ret = relocate_entry_cpu(obj, reloc, cache, target_offset);
> - else if (obj->map_and_fenceable)
> - ret = relocate_entry_gtt(obj, reloc, cache, target_offset);
> - else if (static_cpu_has(X86_FEATURE_CLFLUSH))
> - ret = relocate_entry_clflush(obj, reloc, cache, target_offset);
> - else {
> - WARN_ONCE(1, "Impossible case in relocation handling\n");
> - ret = -ENODEV;
> - }
> -
> + ret = relocate_entry(obj, reloc, cache, target_offset);
> if (ret)
> return ret;
>
> /* and update the user's relocation entry */
> reloc->presumed_offset = target_offset;
> -
> return 0;
> }
>
> @@ -609,7 +620,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
> int remain, ret = 0;
>
> user_relocs = u64_to_user_ptr(entry->relocs_ptr);
> - reloc_cache_init(&cache);
> + reloc_cache_init(&cache, eb->i915);
>
> remain = entry->relocation_count;
> while (remain) {
> @@ -658,7 +669,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
> struct reloc_cache cache;
> int i, ret = 0;
>
> - reloc_cache_init(&cache);
> + reloc_cache_init(&cache, eb->i915);
> for (i = 0; i < entry->relocation_count; i++) {
> ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache);
> if (ret)
> @@ -1073,8 +1084,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
> if (flush_chipset)
> i915_gem_chipset_flush(req->engine->i915);
>
> - if (flush_domains & I915_GEM_DOMAIN_GTT)
> - wmb();
> + /* Make sure (untracked) CPU relocs/parsing are flushed */
> + wmb();
>
> /* Unconditionally invalidate gpu caches and TLBs. */
> return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
> @@ -1606,7 +1617,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>
> memset(¶ms_master, 0x00, sizeof(params_master));
>
> - eb = eb_create(args);
> + eb = eb_create(dev_priv, args);
> if (eb == NULL) {
> i915_gem_context_put(ctx);
> mutex_unlock(&dev->struct_mutex);
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-06-09 13:31 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-09 11:29 Start tidying up execbuf relocations Chris Wilson
2016-06-09 11:29 ` [PATCH 1/8] drm/i915: Print the batchbuffer offset next to BBADDR in error state Chris Wilson
2016-06-09 12:29 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 2/8] drm/i915: Cache kmap between relocations Chris Wilson
2016-06-09 12:25 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 3/8] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Chris Wilson
2016-06-09 11:39 ` Chris Wilson
2016-06-09 12:47 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 4/8] drm/i915: Before accessing an object via the cpu, flush GTT writes Chris Wilson
2016-06-09 12:50 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 5/8] drm/i915: Wait for writes through the GTT to land before reading back Chris Wilson
2016-06-09 11:36 ` Chris Wilson
2016-06-09 12:54 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 6/8] drm/i915: Pin the pages first in shmem prepare read/write Chris Wilson
2016-06-09 13:06 ` Joonas Lahtinen
2016-06-09 13:35 ` Chris Wilson
2016-06-09 13:51 ` Joonas Lahtinen
2016-06-09 14:13 ` Chris Wilson
2016-06-09 11:29 ` [PATCH 7/8] drm/i915: Tidy up flush cpu/gtt write domains Chris Wilson
2016-06-09 13:12 ` Joonas Lahtinen
2016-06-09 11:29 ` [PATCH 8/8] drm/i915: Refactor execbuffer relocation writing Chris Wilson
2016-06-09 13:31 ` Joonas Lahtinen [this message]
2016-06-09 14:22 ` Chris Wilson
2016-06-09 11:40 ` ✗ Ro.CI.BAT: failure for series starting with [1/8] drm/i915: Print the batchbuffer offset next to BBADDR in error state Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1465479085.9670.40.camel@linux.intel.com \
--to=joonas.lahtinen@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.