* [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers
@ 2015-01-09 17:50 Chris Wilson
2015-01-10 1:51 ` shuang.he
0 siblings, 1 reply; 2+ messages in thread
From: Chris Wilson @ 2015-01-09 17:50 UTC (permalink / raw)
To: intel-gfx
If the batch buffer is too large to fit into the aperture and we need a
GTT mapping for relocations, we currently fail. This only applies to a
subset of machines for a subset of environments, quite undesirable. We
can simply check after failing to insert the batch into the GTT as to
whether we only need a mappable binding for relocation and, if so, we can
revert to using a non-mappable binding and an alternate relocation
method. However, using relocate_entry_cpu() is excruciatingly slow for
large buffers on non-LLC as the entire buffer requires clflushing before
and after the relocation handling. Alternatively, we can implement a
third relocation method that only clflushes around the relocation entry.
This is still slower than updating through the GTT, so we prefer using
the GTT where possible, but is orders of magnitude faster as we
typically do not have to then clflush the entire buffer.
An alternative idea of using a temporary WC mapping of the backing store
is promising (it should be faster than using the GTT itself), but
requires fairly extensive arch/x86 support - along the lines of
kmap_atomic_prof_pfn() (which is not universally implemented even for
x86).
Testcase: igt/gem_exec_big #byt
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++++++++++++--
1 file changed, 60 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e3ef17783765..a2c4a0a1ec3b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -251,7 +251,6 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
return (HAS_LLC(obj->base.dev) ||
obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
- !obj->map_and_fenceable ||
obj->cache_level != I915_CACHE_NONE);
}
@@ -337,6 +336,51 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
return 0;
}
+static void
+clflush_write32(void *addr, uint32_t value)
+{
+ /* This is not a fast path, so KISS. */
+ drm_clflush_virt_range(addr, sizeof(uint32_t));
+ *(uint32_t *)addr = value;
+ drm_clflush_virt_range(addr, sizeof(uint32_t));
+}
+
+static int
+relocate_entry_clflush(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_relocation_entry *reloc,
+ uint64_t target_offset)
+{
+ struct drm_device *dev = obj->base.dev;
+ uint32_t page_offset = offset_in_page(reloc->offset);
+ uint64_t delta = (int)reloc->delta + target_offset;
+ char *vaddr;
+ int ret;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret)
+ return ret;
+
+ vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+ reloc->offset >> PAGE_SHIFT));
+ clflush_write32(vaddr + page_offset, lower_32_bits(delta));
+
+ if (INTEL_INFO(dev)->gen >= 8) {
+ page_offset = offset_in_page(page_offset + sizeof(uint32_t));
+
+ if (page_offset == 0) {
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+ (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+ }
+
+ clflush_write32(vaddr + page_offset, upper_32_bits(delta));
+ }
+
+ kunmap_atomic(vaddr);
+
+ return 0;
+}
+
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
struct eb_vmas *eb,
@@ -426,9 +470,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (use_cpu_reloc(obj))
ret = relocate_entry_cpu(obj, reloc, target_offset);
- else
+ else if (obj->map_and_fenceable)
ret = relocate_entry_gtt(obj, reloc, target_offset);
-
+ else if (cpu_has_clflush)
+ ret = relocate_entry_clflush(obj, reloc, target_offset);
+ else
+ ret = -ENODEV;
if (ret)
return ret;
@@ -525,6 +572,12 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
return ret;
}
+static bool only_mappable_for_reloc(unsigned int flags)
+{
+ return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
+ __EXEC_OBJECT_NEEDS_MAP;
+}
+
static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
@@ -544,6 +597,10 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+ if (ret == -ENOSPC && only_mappable_for_reloc(entry->flags))
+ ret = i915_gem_object_pin(obj, vma->vm,
+ entry->alignment,
+ flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
if (ret)
return ret;
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers
2015-01-09 17:50 [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers Chris Wilson
@ 2015-01-10 1:51 ` shuang.he
0 siblings, 0 replies; 2+ messages in thread
From: shuang.he @ 2015-01-10 1:51 UTC (permalink / raw)
To: shuang.he, intel-gfx, chris
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV 354/354 354/354
ILK 354/354 354/354
SNB -17 418/439 401/439
IVB 488/488 488/488
BYT 278/280 278/280
HSW +2-40 530/536 492/536
BDW -1 406/406 405/406
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
SNB igt_kms_flip_event_leak NSPT(2, M22M35)PASS(1, M35) NSPT(1, M35)
*SNB igt_gem_concurrent_blit_gtt-bcs-early-read-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-bcs-overwrite-source-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-bcs-overwrite-source-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-early-read-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-early-read-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-overwrite-source-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gtt-rcs-overwrite-source-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-bcs-early-read-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-bcs-gpu-read-after-write-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-bcs-overwrite-source-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-bcs-overwrite-source-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-rcs-overwrite-source-forked PASS(3, M35M22) DMESG_WARN(1, M35)
*SNB igt_gem_concurrent_blit_gttX-rcs-overwrite-source-interruptible PASS(3, M35M22) DMESG_WARN(1, M35)
HSW igt_kms_cursor_crc_cursor-size-change NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_kms_fence_pin_leak NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_kms_flip_dpms-vs-vblank-race DMESG_WARN(1, M40)PASS(2, M40M19) PASS(1, M19)
HSW igt_kms_flip_event_leak NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_kms_flip_flip-vs-dpms-off-vs-modeset DMESG_WARN(1, M40)PASS(2, M40M19) PASS(1, M19)
HSW igt_kms_mmio_vs_cs_flip_setcrtc_vs_cs_flip NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_kms_mmio_vs_cs_flip_setplane_vs_cs_flip NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_lpsp_non-edp NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_cursor NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_cursor-dpms NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_dpms-mode-unset-non-lpsp NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_dpms-non-lpsp NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_drm-resources-equal NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_fences NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_fences-dpms NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_gem-execbuf NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_gem-mmap-cpu NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_gem-mmap-gtt NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_gem-pread NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_i2c NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_modeset-non-lpsp NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_modeset-non-lpsp-stress-no-wait NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_pci-d3-state NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_pm_rpm_rte NSPT(2, M40M19)PASS(1, M40) NSPT(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-early-read-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-early-read-interruptible DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-interruptible DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-overwrite-source-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-bcs-overwrite-source-interruptible DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-rcs-early-read-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-rcs-early-read-interruptible DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-interruptible DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gtt-rcs-overwrite-source-forked DMESG_WARN(2, M40M19)PASS(1, M40) DMESG_WARN(1, M19)
*HSW igt_gem_concurrent_blit_gttX-bcs-early-read-interruptible DMESG_WARN(1, M40)PASS(2, M40M19) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-bcs-overwrite-source-forked DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-bcs-overwrite-source-interruptible DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-rcs-early-read-interruptible DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-rcs-gpu-read-after-write-interruptible DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-rcs-overwrite-source-forked DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
HSW igt_gem_concurrent_blit_gttX-rcs-overwrite-source-interruptible DMESG_WARN(1, M19)PASS(1, M40) DMESG_WARN(1, M19)
*BDW igt_gem_concurrent_blit_gtt-bcs-early-read-interruptible PASS(3, M30M28) DMESG_WARN(1, M30)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-01-10 1:51 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-09 17:50 [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers Chris Wilson
2015-01-10 1:51 ` shuang.he
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox