public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers
@ 2015-01-09 17:50 Chris Wilson
  2015-01-10  1:51 ` shuang.he
  0 siblings, 1 reply; 2+ messages in thread
From: Chris Wilson @ 2015-01-09 17:50 UTC (permalink / raw)
  To: intel-gfx

If the batch buffer is too large to fit into the aperture and we need a
GTT mapping for relocations, we currently fail. This only applies to a
subset of machines for a subset of environments, quite undesirable. We
can simply check after failing to insert the batch into the GTT as to
whether we only need a mappable binding for relocation and, if so, we can
revert to using a non-mappable binding and an alternate relocation
method. However, using relocate_entry_cpu() is excruciatingly slow for
large buffers on non-LLC as the entire buffer requires clflushing before
and after the relocation handling. Alternatively, we can implement a
third relocation method that only clflushes around the relocation entry.
This is still slower than updating through the GTT, so we prefer using
the GTT where possible, but is orders of magnitude faster as we
typically do not have to then clflush the entire buffer.

An alternative idea of using a temporary WC mapping of the backing store
is promising (it should be faster than using the GTT itself), but
requires fairly extensive arch/x86 support - along the lines of
kmap_atomic_prof_pfn() (which is not universally implemented even for
x86).

Testcase: igt/gem_exec_big #byt
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e3ef17783765..a2c4a0a1ec3b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -251,7 +251,6 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 {
 	return (HAS_LLC(obj->base.dev) ||
 		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
-		!obj->map_and_fenceable ||
 		obj->cache_level != I915_CACHE_NONE);
 }
 
@@ -337,6 +336,51 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static void
+clflush_write32(void *addr, uint32_t value)
+{
+	/* This is not a fast path, so KISS. */
+	drm_clflush_virt_range(addr, sizeof(uint32_t));
+	*(uint32_t *)addr = value;
+	drm_clflush_virt_range(addr, sizeof(uint32_t));
+}
+
+static int
+relocate_entry_clflush(struct drm_i915_gem_object *obj,
+		       struct drm_i915_gem_relocation_entry *reloc,
+		       uint64_t target_offset)
+{
+	struct drm_device *dev = obj->base.dev;
+	uint32_t page_offset = offset_in_page(reloc->offset);
+	uint64_t delta = (int)reloc->delta + target_offset;
+	char *vaddr;
+	int ret;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret)
+		return ret;
+
+	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+				reloc->offset >> PAGE_SHIFT));
+	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
+
+	if (INTEL_INFO(dev)->gen >= 8) {
+		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
+
+		if (page_offset == 0) {
+			kunmap_atomic(vaddr);
+			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+		}
+
+		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
+	}
+
+	kunmap_atomic(vaddr);
+
+	return 0;
+}
+
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
@@ -426,9 +470,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 
 	if (use_cpu_reloc(obj))
 		ret = relocate_entry_cpu(obj, reloc, target_offset);
-	else
+	else if (obj->map_and_fenceable)
 		ret = relocate_entry_gtt(obj, reloc, target_offset);
-
+	else if (cpu_has_clflush)
+		ret = relocate_entry_clflush(obj, reloc, target_offset);
+	else
+		ret = -ENODEV;
 	if (ret)
 		return ret;
 
@@ -525,6 +572,12 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 	return ret;
 }
 
+static bool only_mappable_for_reloc(unsigned int flags)
+{
+	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
+		__EXEC_OBJECT_NEEDS_MAP;
+}
+
 static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_engine_cs *ring,
@@ -544,6 +597,10 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 
 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	if (ret == -ENOSPC && only_mappable_for_reloc(entry->flags))
+		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->alignment,
+					  flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
 	if (ret)
 		return ret;
 
-- 
2.1.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers
  2015-01-09 17:50 [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers Chris Wilson
@ 2015-01-10  1:51 ` shuang.he
  0 siblings, 0 replies; 2+ messages in thread
From: shuang.he @ 2015-01-10  1:51 UTC (permalink / raw)
  To: shuang.he, intel-gfx, chris

Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                                  354/354              354/354
ILK                                  354/354              354/354
SNB                 -17              418/439              401/439
IVB                                  488/488              488/488
BYT                                  278/280              278/280
HSW              +2-40              530/536              492/536
BDW                 -1              406/406              405/406
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
 SNB  igt_kms_flip_event_leak      NSPT(2, M22M35)PASS(1, M35)      NSPT(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-bcs-early-read-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-bcs-overwrite-source-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-bcs-overwrite-source-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-early-read-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-early-read-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-overwrite-source-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gtt-rcs-overwrite-source-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-bcs-early-read-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-bcs-gpu-read-after-write-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-bcs-overwrite-source-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-bcs-overwrite-source-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-rcs-overwrite-source-forked      PASS(3, M35M22)      DMESG_WARN(1, M35)
*SNB  igt_gem_concurrent_blit_gttX-rcs-overwrite-source-interruptible      PASS(3, M35M22)      DMESG_WARN(1, M35)
 HSW  igt_kms_cursor_crc_cursor-size-change      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_kms_fence_pin_leak      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_kms_flip_dpms-vs-vblank-race      DMESG_WARN(1, M40)PASS(2, M40M19)      PASS(1, M19)
 HSW  igt_kms_flip_event_leak      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_kms_flip_flip-vs-dpms-off-vs-modeset      DMESG_WARN(1, M40)PASS(2, M40M19)      PASS(1, M19)
 HSW  igt_kms_mmio_vs_cs_flip_setcrtc_vs_cs_flip      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_kms_mmio_vs_cs_flip_setplane_vs_cs_flip      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_lpsp_non-edp      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_cursor      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_cursor-dpms      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_dpms-mode-unset-non-lpsp      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_dpms-non-lpsp      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_drm-resources-equal      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_fences      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_fences-dpms      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_gem-execbuf      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_gem-mmap-cpu      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_gem-mmap-gtt      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_gem-pread      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_i2c      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_modeset-non-lpsp      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_modeset-non-lpsp-stress-no-wait      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_pci-d3-state      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_pm_rpm_rte      NSPT(2, M40M19)PASS(1, M40)      NSPT(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-early-read-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-early-read-interruptible      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-gpu-read-after-write-interruptible      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-overwrite-source-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-bcs-overwrite-source-interruptible      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-rcs-early-read-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-rcs-early-read-interruptible      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-rcs-gpu-read-after-write-interruptible      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gtt-rcs-overwrite-source-forked      DMESG_WARN(2, M40M19)PASS(1, M40)      DMESG_WARN(1, M19)
*HSW  igt_gem_concurrent_blit_gttX-bcs-early-read-interruptible      DMESG_WARN(1, M40)PASS(2, M40M19)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-bcs-overwrite-source-forked      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-bcs-overwrite-source-interruptible      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-rcs-early-read-interruptible      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-rcs-gpu-read-after-write-interruptible      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-rcs-overwrite-source-forked      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
 HSW  igt_gem_concurrent_blit_gttX-rcs-overwrite-source-interruptible      DMESG_WARN(1, M19)PASS(1, M40)      DMESG_WARN(1, M19)
*BDW  igt_gem_concurrent_blit_gtt-bcs-early-read-interruptible      PASS(3, M30M28)      DMESG_WARN(1, M30)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-01-10  1:51 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-09 17:50 [PATCH] drm/i915: Fallback to using CPU relocations for large batch buffers Chris Wilson
2015-01-10  1:51 ` shuang.he

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox