public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Ben Widawsky <benjamin.widawsky@intel.com>
To: Intel GFX <intel-gfx@lists.freedesktop.org>
Cc: Ben Widawsky <ben@bwidawsk.net>,
	Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 2/2] intel: Add prelocation support
Date: Thu, 21 Aug 2014 20:12:33 -0700	[thread overview]
Message-ID: <1408677155-1840-71-git-send-email-benjamin.widawsky@intel.com> (raw)
In-Reply-To: <1408677155-1840-1-git-send-email-benjamin.widawsky@intel.com>

Words

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 intel/intel_bufmgr.h     |   8 ++++
 intel/intel_bufmgr_gem.c | 102 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9383c72..e4ecc44 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -88,6 +88,8 @@ struct _drm_intel_bo {
 	 * Last seen card virtual address (offset from the beginning of the
 	 * aperture) for the object.  This should be used to fill relocation
 	 * entries when calling drm_intel_bo_emit_reloc()
+	 *
+	 * This is also useful when prelocating an object.
 	 */
 	uint64_t offset64;
 };
@@ -106,6 +108,8 @@ typedef struct _drm_intel_aub_annotation {
 } drm_intel_aub_annotation;
 
 #define BO_ALLOC_FOR_RENDER (1<<0)
+#define BO_ALLOC_PRELOCATE  (1<<1)
+#define BO_ALLOC_PRELOCATE_32  (1<<2)
 
 drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
 				 unsigned long size, unsigned int alignment);
@@ -119,6 +123,10 @@ drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
 				       uint32_t *tiling_mode,
 				       unsigned long *pitch,
 				       unsigned long flags);
+drm_intel_bo *drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+					    const char *name,
+					    unsigned long size,
+					    int low);
 void drm_intel_bo_reference(drm_intel_bo *bo);
 void drm_intel_bo_unreference(drm_intel_bo *bo);
 int drm_intel_bo_map(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index d7d3769..5a2a9bd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -221,6 +221,11 @@ struct _drm_intel_bo_gem {
 	 */
 	bool idle;
 
+	/** Fillme in */
+	#define PRELOCATE_MMAP 1
+	#define PRELOCATE_MALLOC 2
+	int prelocated;
+
 	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
@@ -489,7 +494,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
 	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
 	bufmgr_gem->exec2_objects[index].alignment = 0;
-	bufmgr_gem->exec2_objects[index].offset = 0;
+	if (bo_gem->prelocated)
+		bufmgr_gem->exec2_objects[index].offset = bo->offset64;
+	else
+		bufmgr_gem->exec2_objects[index].offset = 0;
 	bufmgr_gem->exec_bos[index] = bo;
 	bufmgr_gem->exec2_objects[index].flags = 0;
 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
@@ -637,9 +645,10 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
 }
 
 static drm_intel_bo_gem *
-__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
+__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, bool prelocate, bool low32)
 {
 	struct drm_i915_gem_create create;
+	drm_intel_bo *bo;
 	drm_intel_bo_gem *bo_gem;
 	int ret;
 
@@ -647,10 +656,35 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
 	if (!bo_gem)
 		return NULL;
 
+	bo = (drm_intel_bo *)bo_gem;
+
 	bo_gem->bo.size = size;
 
 	VG_CLEAR(create);
 	create.size = size;
+	/* FIXME: This is a gross hack to repurpose the create args */
+	if (prelocate) {
+		create.size |= (1ULL << 63);
+		if (low32) {
+			bo->offset64 = (uint64_t)mmap(NULL, size,
+					PROT_READ | PROT_WRITE,
+					MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT,
+					-1, 0);
+			bo_gem->prelocated = PRELOCATE_MALLOC;
+		} else {
+			bo->offset64 = (uint64_t)aligned_alloc(getpagesize(), size);
+			bo_gem->prelocated = PRELOCATE_MMAP;
+		}
+		if (!bo->offset64) {
+			DBG("Couldn't allocate %ld address space for object. %s\n",
+			    size, strerror(errno));
+			free(bo_gem);
+			return NULL;
+		}
+		create.handle = bo->offset64 >> 32;
+		create.pad = bo->offset64;
+	} else
+		bo->offset64 = 0x1;
 
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_I915_GEM_CREATE,
@@ -658,6 +692,10 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
 	bo_gem->gem_handle = create.handle;
 	bo_gem->bo.handle = bo_gem->gem_handle;
 	if (ret != 0) {
+		if (prelocate && low32)
+			munmap((void *)bo->offset64, size);
+		else if (prelocate)
+			free((void *)bo->offset64);
 		free(bo_gem);
 		return NULL;
 	}
@@ -687,10 +725,17 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
 	struct drm_intel_gem_bo_bucket *bucket;
 	bool alloc_from_cache;
 	unsigned long bo_size;
-	bool for_render = false;
+	bool for_render = false, prelocate = false, low = false;
 
 	if (flags & BO_ALLOC_FOR_RENDER)
 		for_render = true;
+	if (flags & BO_ALLOC_PRELOCATE) {
+		if (flags & BO_ALLOC_PRELOCATE_32)
+			low = true;
+		prelocate = true;
+		bo_size = size;
+		goto skip_cache;
+	}
 
 	/* Round the allocated size up to a power of two number of pages. */
 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
@@ -756,7 +801,8 @@ retry:
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	if (!alloc_from_cache) {
-		bo_gem = __bo_alloc(bufmgr_gem, bo_size);
+skip_cache:
+		bo_gem = __bo_alloc(bufmgr_gem, bo_size, prelocate, low);
 		if (!bo_gem)
 			return NULL;
 
@@ -774,7 +820,7 @@ retry:
 	bo_gem->reloc_tree_fences = 0;
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
-	bo_gem->reusable = true;
+	bo_gem->reusable = !prelocate;
 	bo_gem->aub_annotations = NULL;
 	bo_gem->aub_annotation_count = 0;
 
@@ -859,6 +905,25 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
 					       tiling, stride);
 }
 
+drm_public drm_intel_bo *
+drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+			      const char *name,
+			      unsigned long size,
+			      int low)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+	int flag = BO_ALLOC_PRELOCATE;
+	/* FIXME: Need to replace this with a paramcheck */
+	if (bufmgr_gem->gen < 8 || !bufmgr_gem->has_llc)
+		return NULL;
+
+	if (low)
+		flag |= BO_ALLOC_PRELOCATE_32;
+
+	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+					       flag, I915_TILING_NONE, 0);
+}
+
 /**
  * Returns a drm_intel_bo wrapping the given buffer object handle.
  *
@@ -964,7 +1029,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 	int ret;
 
 	DRMLISTDEL(&bo_gem->vma_list);
-	if (bo_gem->mem_virtual) {
+	if (bo_gem->mem_virtual && !bo_gem->prelocated) {
 		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
 		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
 		bufmgr_gem->vma_count--;
@@ -982,6 +1047,12 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
 	}
+
+	if (bo_gem->prelocated == PRELOCATE_MMAP)
+		munmap((void *)bo->offset64, bo->size);
+	else if (bo_gem->prelocated == PRELOCATE_MALLOC)
+		free((void *)bo->offset64);
+
 	free(bo_gem->aub_annotations);
 	free(bo);
 }
@@ -1190,7 +1261,9 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
-	if (!bo_gem->mem_virtual) {
+	if (bo_gem->prelocated) {
+		bo_gem->mem_virtual = (void *)bo->offset64;
+	} else if (!bo_gem->mem_virtual) {
 		struct drm_i915_gem_mmap mmap_arg;
 
 		DBG("bo_map: %d (%s), map_count=%d\n",
@@ -1683,6 +1756,17 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 		return -ENOMEM;
 	}
 
+	/* If the target we're trying point to was a prelocated target, then we
+	 * can skip actually telling the kernel about the relocation. Userspace
+	 * is expected to use offset64 */
+	if (target_bo_gem->prelocated) {
+		assert(target_bo->offset64 != 0x1);
+		assert(target_bo->offset64 != 0); // temp hack
+		if (bo_gem->validate_index == -1)
+			drm_intel_add_validate_buffer2(target_bo, false);
+		return 0;
+	}
+
 	/* We never use HW fences for rendering on 965+ */
 	if (bufmgr_gem->gen >= 4)
 		need_fence = false;
@@ -1863,7 +1947,6 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 	}
 }
 
-
 static void
 drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
 {
@@ -1894,6 +1977,9 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 
+		if (bo_gem->prelocated)
+			continue;
+
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
 			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-- 
2.0.4

  parent reply	other threads:[~2014-08-22  3:14 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-22  3:11 [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Ben Widawsky
2014-08-22  3:11 ` [PATCH 01/68] drm/i915: Split up do_switch Ben Widawsky
2014-08-22  3:11 ` [PATCH 02/68] drm/i915: Extract l3 remapping out of ctx switch Ben Widawsky
2014-08-22  3:11 ` [PATCH 03/68] drm/i915/ppgtt: Load address space after mi_set_context Ben Widawsky
2014-08-22  3:11 ` [PATCH 04/68] drm/i915: Fix another another use-after-free in do_switch Ben Widawsky
2014-08-22  3:11 ` [PATCH 05/68] drm/i915/ctx: Return earlier on failure Ben Widawsky
2014-08-22  3:11 ` [PATCH 06/68] drm/i915/error: vma error capture prettyify Ben Widawsky
2014-08-22  3:11 ` [PATCH 07/68] drm/i915/error: Do a better job of disambiguating VMAs Ben Widawsky
2014-08-22  3:11 ` [PATCH 08/68] drm/i915/error: Capture vmas instead of BOs Ben Widawsky
2014-08-22  3:11 ` [PATCH 09/68] drm/i915: Add some extra guards in evict_vm Ben Widawsky
2014-08-22  3:11 ` [PATCH 10/68] drm/i915: Make an uninterruptible evict Ben Widawsky
2014-08-22  3:11 ` [PATCH 11/68] drm/i915: More correct (slower) ppgtt cleanup Ben Widawsky
2014-08-22  3:11 ` [PATCH 12/68] drm/i915: Defer PPGTT cleanup Ben Widawsky
2014-08-22  3:11 ` [PATCH 13/68] drm/i915/bdw: Enable full PPGTT Ben Widawsky
2014-08-22  3:11 ` [PATCH 14/68] drm/i915: Get the error state over the wire (HACKish) Ben Widawsky
2014-08-22  3:11 ` [PATCH 15/68] drm/i915/gen8: Invalidate TLBs before PDP reload Ben Widawsky
2014-08-22  3:11 ` [PATCH 16/68] drm/i915: Remove false assertion in ppgtt_release Ben Widawsky
2014-08-22  3:11 ` [PATCH 17/68] Revert "drm/i915/bdw: Use timeout mode for RC6 on bdw" Ben Widawsky
2014-10-31 19:45   ` Rodrigo Vivi
2014-10-31 21:10     ` Rodrigo Vivi
2014-08-22  3:11 ` [PATCH 18/68] drm/i915/trace: Fix offsets for 64b Ben Widawsky
2014-08-22  3:11 ` [PATCH 19/68] drm/i915: Wrap VMA binding Ben Widawsky
2014-08-22  3:11 ` [PATCH 20/68] drm/i915: Make pin global flags explicit Ben Widawsky
2014-08-22  3:11 ` [PATCH 21/68] drm/i915: Split out aliasing binds Ben Widawsky
2014-08-22  3:11 ` [PATCH 22/68] drm/i915: fix gtt_total_entries() Ben Widawsky
2014-08-22  3:11 ` [PATCH 23/68] drm/i915: Rename to GEN8_LEGACY_PDPES Ben Widawsky
2014-08-22  3:11 ` [PATCH 24/68] drm/i915: Split out verbose PPGTT dumping Ben Widawsky
2014-08-22  3:11 ` [PATCH 25/68] drm/i915: s/pd/pdpe, s/pt/pde Ben Widawsky
2014-08-22  3:11 ` [PATCH 26/68] drm/i915: rename map/unmap to dma_map/unmap Ben Widawsky
2014-08-22  3:11 ` [PATCH 27/68] drm/i915: Setup less PPGTT on failed pagedir Ben Widawsky
2014-08-22  3:11 ` [PATCH 28/68] drm/i915: clean up PPGTT init error path Ben Widawsky
2014-08-22  3:11 ` [PATCH 29/68] drm/i915: Un-hardcode number of page directories Ben Widawsky
2014-08-22  3:11 ` [PATCH 30/68] drm/i915: Make gen6_write_pdes gen6_map_page_tables Ben Widawsky
2014-08-22  3:11 ` [PATCH 31/68] drm/i915: Range clearing is PPGTT agnostic Ben Widawsky
2014-08-22  3:11 ` [PATCH 32/68] drm/i915: Page table helpers, and define renames Ben Widawsky
2014-08-22  3:11 ` [PATCH 33/68] drm/i915: construct page table abstractions Ben Widawsky
2014-08-22  3:11 ` [PATCH 34/68] drm/i915: Complete page table structures Ben Widawsky
2014-08-22  3:11 ` [PATCH 35/68] drm/i915: Create page table allocators Ben Widawsky
2014-08-22  3:11 ` [PATCH 36/68] drm/i915: Generalize GEN6 mapping Ben Widawsky
2014-08-22  3:12 ` [PATCH 37/68] drm/i915: Clean up pagetable DMA map & unmap Ben Widawsky
2014-08-22  3:12 ` [PATCH 38/68] drm/i915: Always dma map page table allocations Ben Widawsky
2014-08-22  3:12 ` [PATCH 39/68] drm/i915: Consolidate dma mappings Ben Widawsky
2014-08-22  3:12 ` [PATCH 40/68] drm/i915: Always dma map page directory allocations Ben Widawsky
2014-08-22  3:12 ` [PATCH 41/68] drm/i915: Track GEN6 page table usage Ben Widawsky
2014-08-22  3:12 ` [PATCH 42/68] drm/i915: Extract context switch skip logic Ben Widawsky
2014-08-22  3:12 ` [PATCH 43/68] drm/i915: Track page table reload need Ben Widawsky
2014-08-22  3:12 ` [PATCH 44/68] drm/i915: Initialize all contexts Ben Widawsky
2014-08-22  3:12 ` [PATCH 45/68] drm/i915: Finish gen6/7 dynamic page table allocation Ben Widawsky
2014-08-22  3:12 ` [PATCH 46/68] drm/i915/bdw: Use dynamic allocation idioms on free Ben Widawsky
2014-08-22  3:12 ` [PATCH 47/68] drm/i915/bdw: pagedirs rework allocation Ben Widawsky
2014-08-22  3:12 ` [PATCH 48/68] drm/i915/bdw: pagetable allocation rework Ben Widawsky
2014-08-22  3:12 ` [PATCH 49/68] drm/i915/bdw: Make the pdp switch a bit less hacky Ben Widawsky
2014-08-22  3:12 ` [PATCH 50/68] drm/i915: num_pd_pages/num_pd_entries isn't useful Ben Widawsky
2014-08-22  3:12 ` [PATCH 51/68] drm/i915: Extract PPGTT param from pagedir alloc Ben Widawsky
2014-08-22  3:12 ` [PATCH 52/68] drm/i915/bdw: Split out mappings Ben Widawsky
2014-08-22  3:12 ` [PATCH 53/68] drm/i915/bdw: begin bitmap tracking Ben Widawsky
2014-08-22  3:12 ` [PATCH 54/68] drm/i915/bdw: Dynamic page table allocations Ben Widawsky
2014-08-22  3:12 ` [PATCH 55/68] drm/i915/bdw: Make pdp allocation more dynamic Ben Widawsky
2014-08-22  3:12 ` [PATCH 56/68] drm/i915/bdw: Abstract PDP usage Ben Widawsky
2014-08-22  3:12 ` [PATCH 57/68] drm/i915/bdw: Add dynamic page trace events Ben Widawsky
2014-08-22  3:12 ` [PATCH 58/68] drm/i915/bdw: Add ppgtt info for dynamic pages Ben Widawsky
2014-08-22  3:12 ` [PATCH 59/68] drm/i915/bdw: implement alloc/teardown for 4lvl Ben Widawsky
2014-08-22  3:12 ` [PATCH 60/68] drm/i915/bdw: Add 4 level switching infrastructure Ben Widawsky
2014-08-22  3:12 ` [PATCH 61/68] drm/i915/bdw: Generalize PTE writing for GEN8 PPGTT Ben Widawsky
2014-08-22  3:12 ` [PATCH 62/68] drm/i915: Plumb sg_iter through va allocation ->maps Ben Widawsky
2014-08-22  3:12 ` [PATCH 63/68] drm/i915: Introduce map and unmap for VMAs Ben Widawsky
2014-08-22  3:12 ` [PATCH 64/68] drm/i915: Depend exclusively on map and unmap_vma Ben Widawsky
2014-08-22  3:12 ` [PATCH 65/68] drm/i915: Expand error state's address width to 64b Ben Widawsky
2014-08-22  3:12 ` [PATCH 66/68] drm/i915/bdw: Flip the 48b switch Ben Widawsky
2014-08-22  3:12 ` [PATCH 67/68] drm/i915: Provide a soft_pin hook Ben Widawsky
2014-08-22  3:12 ` [PATCH 68/68] XXX: drm/i915: Unexplained workarounds Ben Widawsky
2014-08-22  3:12 ` [PATCH 1/2] intel: Split out bo allocation Ben Widawsky
2014-08-22  3:12 ` Ben Widawsky [this message]
2014-08-22  3:12 ` [PATCH] i965: First step toward prelocation Ben Widawsky
2014-08-22 12:15   ` [Mesa-dev] " Alex Deucher
2014-08-22 17:14     ` Ben Widawsky
2014-08-22  3:12 ` [PATCH] no_reloc: test case Ben Widawsky
2014-08-22  6:30 ` [Intel-gfx] [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Chris Wilson
2014-08-22  6:59   ` Kenneth Graunke
2014-08-22  7:03     ` Chris Wilson
2014-08-22 13:30       ` Daniel Vetter
2014-08-22 13:38         ` [Intel-gfx] " Chris Wilson
2014-08-22 20:29           ` Daniel Vetter
2014-08-22 20:38           ` [Intel-gfx] " Daniel Vetter
2014-08-25 22:42             ` Jesse Barnes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1408677155-1840-71-git-send-email-benjamin.widawsky@intel.com \
    --to=benjamin.widawsky@intel.com \
    --cc=ben@bwidawsk.net \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox