From: Ben Widawsky <benjamin.widawsky@intel.com>
To: Intel GFX <intel-gfx@lists.freedesktop.org>
Cc: Ben Widawsky <ben@bwidawsk.net>,
Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 2/2] intel: Add prelocation support
Date: Thu, 21 Aug 2014 20:12:33 -0700 [thread overview]
Message-ID: <1408677155-1840-71-git-send-email-benjamin.widawsky@intel.com> (raw)
In-Reply-To: <1408677155-1840-1-git-send-email-benjamin.widawsky@intel.com>
Words
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
intel/intel_bufmgr.h | 8 ++++
intel/intel_bufmgr_gem.c | 102 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 102 insertions(+), 8 deletions(-)
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9383c72..e4ecc44 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -88,6 +88,8 @@ struct _drm_intel_bo {
* Last seen card virtual address (offset from the beginning of the
* aperture) for the object. This should be used to fill relocation
* entries when calling drm_intel_bo_emit_reloc()
+ *
+ * This is also useful when prelocating an object.
*/
uint64_t offset64;
};
@@ -106,6 +108,8 @@ typedef struct _drm_intel_aub_annotation {
} drm_intel_aub_annotation;
#define BO_ALLOC_FOR_RENDER (1<<0)
+#define BO_ALLOC_PRELOCATE (1<<1)
+#define BO_ALLOC_PRELOCATE_32 (1<<2)
drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
unsigned long size, unsigned int alignment);
@@ -119,6 +123,10 @@ drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
uint32_t *tiling_mode,
unsigned long *pitch,
unsigned long flags);
+drm_intel_bo *drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+ const char *name,
+ unsigned long size,
+ int low);
void drm_intel_bo_reference(drm_intel_bo *bo);
void drm_intel_bo_unreference(drm_intel_bo *bo);
int drm_intel_bo_map(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index d7d3769..5a2a9bd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -221,6 +221,11 @@ struct _drm_intel_bo_gem {
*/
bool idle;
+ /** Fillme in */
+ #define PRELOCATE_MMAP 1
+ #define PRELOCATE_MALLOC 2
+ int prelocated;
+
/**
* Size in bytes of this buffer and its relocation descendents.
*
@@ -489,7 +494,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
bufmgr_gem->exec2_objects[index].alignment = 0;
- bufmgr_gem->exec2_objects[index].offset = 0;
+ if (bo_gem->prelocated)
+ bufmgr_gem->exec2_objects[index].offset = bo->offset64;
+ else
+ bufmgr_gem->exec2_objects[index].offset = 0;
bufmgr_gem->exec_bos[index] = bo;
bufmgr_gem->exec2_objects[index].flags = 0;
bufmgr_gem->exec2_objects[index].rsvd1 = 0;
@@ -637,9 +645,10 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
}
static drm_intel_bo_gem *
-__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
+__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, bool prelocate, bool low32)
{
struct drm_i915_gem_create create;
+ drm_intel_bo *bo;
drm_intel_bo_gem *bo_gem;
int ret;
@@ -647,10 +656,35 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
if (!bo_gem)
return NULL;
+ bo = (drm_intel_bo *)bo_gem;
+
bo_gem->bo.size = size;
VG_CLEAR(create);
create.size = size;
+ /* FIXME: This is a gross hack to repurpose the create args */
+ if (prelocate) {
+ create.size |= (1ULL << 63);
+ if (low32) {
+ bo->offset64 = (uint64_t)mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT,
+ -1, 0);
+ bo_gem->prelocated = PRELOCATE_MALLOC;
+ } else {
+ bo->offset64 = (uint64_t)aligned_alloc(getpagesize(), size);
+ bo_gem->prelocated = PRELOCATE_MMAP;
+ }
+ if (!bo->offset64) {
+ DBG("Couldn't allocate %ld address space for object. %s\n",
+ size, strerror(errno));
+ free(bo_gem);
+ return NULL;
+ }
+ create.handle = bo->offset64 >> 32;
+ create.pad = bo->offset64;
+ } else
+ bo->offset64 = 0x1;
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_CREATE,
@@ -658,6 +692,10 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
bo_gem->gem_handle = create.handle;
bo_gem->bo.handle = bo_gem->gem_handle;
if (ret != 0) {
+ if (prelocate && low32)
+ munmap((void *)bo->offset64, size);
+ else if (prelocate)
+ free((void *)bo->offset64);
free(bo_gem);
return NULL;
}
@@ -687,10 +725,17 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
struct drm_intel_gem_bo_bucket *bucket;
bool alloc_from_cache;
unsigned long bo_size;
- bool for_render = false;
+ bool for_render = false, prelocate = false, low = false;
if (flags & BO_ALLOC_FOR_RENDER)
for_render = true;
+ if (flags & BO_ALLOC_PRELOCATE) {
+ if (flags & BO_ALLOC_PRELOCATE_32)
+ low = true;
+ prelocate = true;
+ bo_size = size;
+ goto skip_cache;
+ }
/* Round the allocated size up to a power of two number of pages. */
bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
@@ -756,7 +801,8 @@ retry:
pthread_mutex_unlock(&bufmgr_gem->lock);
if (!alloc_from_cache) {
- bo_gem = __bo_alloc(bufmgr_gem, bo_size);
+skip_cache:
+ bo_gem = __bo_alloc(bufmgr_gem, bo_size, prelocate, low);
if (!bo_gem)
return NULL;
@@ -774,7 +820,7 @@ retry:
bo_gem->reloc_tree_fences = 0;
bo_gem->used_as_reloc_target = false;
bo_gem->has_error = false;
- bo_gem->reusable = true;
+ bo_gem->reusable = !prelocate;
bo_gem->aub_annotations = NULL;
bo_gem->aub_annotation_count = 0;
@@ -859,6 +905,25 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
tiling, stride);
}
+drm_public drm_intel_bo *
+drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+ const char *name,
+ unsigned long size,
+ int low)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+ int flag = BO_ALLOC_PRELOCATE;
+ /* FIXME: Need to replace this with a paramcheck */
+ if (bufmgr_gem->gen < 8 || !bufmgr_gem->has_llc)
+ return NULL;
+
+ if (low)
+ flag |= BO_ALLOC_PRELOCATE_32;
+
+ return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+ flag, I915_TILING_NONE, 0);
+}
+
/**
* Returns a drm_intel_bo wrapping the given buffer object handle.
*
@@ -964,7 +1029,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
int ret;
DRMLISTDEL(&bo_gem->vma_list);
- if (bo_gem->mem_virtual) {
+ if (bo_gem->mem_virtual && !bo_gem->prelocated) {
VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
munmap(bo_gem->mem_virtual, bo_gem->bo.size);
bufmgr_gem->vma_count--;
@@ -982,6 +1047,12 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
bo_gem->gem_handle, bo_gem->name, strerror(errno));
}
+
+ if (bo_gem->prelocated == PRELOCATE_MMAP)
+ munmap((void *)bo->offset64, bo->size);
+ else if (bo_gem->prelocated == PRELOCATE_MALLOC)
+ free((void *)bo->offset64);
+
free(bo_gem->aub_annotations);
free(bo);
}
@@ -1190,7 +1261,9 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
if (bo_gem->map_count++ == 0)
drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
- if (!bo_gem->mem_virtual) {
+ if (bo_gem->prelocated) {
+ bo_gem->mem_virtual = (void *)bo->offset64;
+ } else if (!bo_gem->mem_virtual) {
struct drm_i915_gem_mmap mmap_arg;
DBG("bo_map: %d (%s), map_count=%d\n",
@@ -1683,6 +1756,17 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
return -ENOMEM;
}
+ /* If the target we're trying point to was a prelocated target, then we
+ * can skip actually telling the kernel about the relocation. Userspace
+ * is expected to use offset64 */
+ if (target_bo_gem->prelocated) {
+ assert(target_bo->offset64 != 0x1);
+ assert(target_bo->offset64 != 0); // temp hack
+ if (bo_gem->validate_index == -1)
+ drm_intel_add_validate_buffer2(target_bo, false);
+ return 0;
+ }
+
/* We never use HW fences for rendering on 965+ */
if (bufmgr_gem->gen >= 4)
need_fence = false;
@@ -1863,7 +1947,6 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
}
}
-
static void
drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
{
@@ -1894,6 +1977,9 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ if (bo_gem->prelocated)
+ continue;
+
/* Update the buffer offset */
if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
--
2.0.4
next prev parent reply other threads:[~2014-08-22 3:14 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-22 3:11 [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Ben Widawsky
2014-08-22 3:11 ` [PATCH 01/68] drm/i915: Split up do_switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 02/68] drm/i915: Extract l3 remapping out of ctx switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 03/68] drm/i915/ppgtt: Load address space after mi_set_context Ben Widawsky
2014-08-22 3:11 ` [PATCH 04/68] drm/i915: Fix another another use-after-free in do_switch Ben Widawsky
2014-08-22 3:11 ` [PATCH 05/68] drm/i915/ctx: Return earlier on failure Ben Widawsky
2014-08-22 3:11 ` [PATCH 06/68] drm/i915/error: vma error capture prettyify Ben Widawsky
2014-08-22 3:11 ` [PATCH 07/68] drm/i915/error: Do a better job of disambiguating VMAs Ben Widawsky
2014-08-22 3:11 ` [PATCH 08/68] drm/i915/error: Capture vmas instead of BOs Ben Widawsky
2014-08-22 3:11 ` [PATCH 09/68] drm/i915: Add some extra guards in evict_vm Ben Widawsky
2014-08-22 3:11 ` [PATCH 10/68] drm/i915: Make an uninterruptible evict Ben Widawsky
2014-08-22 3:11 ` [PATCH 11/68] drm/i915: More correct (slower) ppgtt cleanup Ben Widawsky
2014-08-22 3:11 ` [PATCH 12/68] drm/i915: Defer PPGTT cleanup Ben Widawsky
2014-08-22 3:11 ` [PATCH 13/68] drm/i915/bdw: Enable full PPGTT Ben Widawsky
2014-08-22 3:11 ` [PATCH 14/68] drm/i915: Get the error state over the wire (HACKish) Ben Widawsky
2014-08-22 3:11 ` [PATCH 15/68] drm/i915/gen8: Invalidate TLBs before PDP reload Ben Widawsky
2014-08-22 3:11 ` [PATCH 16/68] drm/i915: Remove false assertion in ppgtt_release Ben Widawsky
2014-08-22 3:11 ` [PATCH 17/68] Revert "drm/i915/bdw: Use timeout mode for RC6 on bdw" Ben Widawsky
2014-10-31 19:45 ` Rodrigo Vivi
2014-10-31 21:10 ` Rodrigo Vivi
2014-08-22 3:11 ` [PATCH 18/68] drm/i915/trace: Fix offsets for 64b Ben Widawsky
2014-08-22 3:11 ` [PATCH 19/68] drm/i915: Wrap VMA binding Ben Widawsky
2014-08-22 3:11 ` [PATCH 20/68] drm/i915: Make pin global flags explicit Ben Widawsky
2014-08-22 3:11 ` [PATCH 21/68] drm/i915: Split out aliasing binds Ben Widawsky
2014-08-22 3:11 ` [PATCH 22/68] drm/i915: fix gtt_total_entries() Ben Widawsky
2014-08-22 3:11 ` [PATCH 23/68] drm/i915: Rename to GEN8_LEGACY_PDPES Ben Widawsky
2014-08-22 3:11 ` [PATCH 24/68] drm/i915: Split out verbose PPGTT dumping Ben Widawsky
2014-08-22 3:11 ` [PATCH 25/68] drm/i915: s/pd/pdpe, s/pt/pde Ben Widawsky
2014-08-22 3:11 ` [PATCH 26/68] drm/i915: rename map/unmap to dma_map/unmap Ben Widawsky
2014-08-22 3:11 ` [PATCH 27/68] drm/i915: Setup less PPGTT on failed pagedir Ben Widawsky
2014-08-22 3:11 ` [PATCH 28/68] drm/i915: clean up PPGTT init error path Ben Widawsky
2014-08-22 3:11 ` [PATCH 29/68] drm/i915: Un-hardcode number of page directories Ben Widawsky
2014-08-22 3:11 ` [PATCH 30/68] drm/i915: Make gen6_write_pdes gen6_map_page_tables Ben Widawsky
2014-08-22 3:11 ` [PATCH 31/68] drm/i915: Range clearing is PPGTT agnostic Ben Widawsky
2014-08-22 3:11 ` [PATCH 32/68] drm/i915: Page table helpers, and define renames Ben Widawsky
2014-08-22 3:11 ` [PATCH 33/68] drm/i915: construct page table abstractions Ben Widawsky
2014-08-22 3:11 ` [PATCH 34/68] drm/i915: Complete page table structures Ben Widawsky
2014-08-22 3:11 ` [PATCH 35/68] drm/i915: Create page table allocators Ben Widawsky
2014-08-22 3:11 ` [PATCH 36/68] drm/i915: Generalize GEN6 mapping Ben Widawsky
2014-08-22 3:12 ` [PATCH 37/68] drm/i915: Clean up pagetable DMA map & unmap Ben Widawsky
2014-08-22 3:12 ` [PATCH 38/68] drm/i915: Always dma map page table allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 39/68] drm/i915: Consolidate dma mappings Ben Widawsky
2014-08-22 3:12 ` [PATCH 40/68] drm/i915: Always dma map page directory allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 41/68] drm/i915: Track GEN6 page table usage Ben Widawsky
2014-08-22 3:12 ` [PATCH 42/68] drm/i915: Extract context switch skip logic Ben Widawsky
2014-08-22 3:12 ` [PATCH 43/68] drm/i915: Track page table reload need Ben Widawsky
2014-08-22 3:12 ` [PATCH 44/68] drm/i915: Initialize all contexts Ben Widawsky
2014-08-22 3:12 ` [PATCH 45/68] drm/i915: Finish gen6/7 dynamic page table allocation Ben Widawsky
2014-08-22 3:12 ` [PATCH 46/68] drm/i915/bdw: Use dynamic allocation idioms on free Ben Widawsky
2014-08-22 3:12 ` [PATCH 47/68] drm/i915/bdw: pagedirs rework allocation Ben Widawsky
2014-08-22 3:12 ` [PATCH 48/68] drm/i915/bdw: pagetable allocation rework Ben Widawsky
2014-08-22 3:12 ` [PATCH 49/68] drm/i915/bdw: Make the pdp switch a bit less hacky Ben Widawsky
2014-08-22 3:12 ` [PATCH 50/68] drm/i915: num_pd_pages/num_pd_entries isn't useful Ben Widawsky
2014-08-22 3:12 ` [PATCH 51/68] drm/i915: Extract PPGTT param from pagedir alloc Ben Widawsky
2014-08-22 3:12 ` [PATCH 52/68] drm/i915/bdw: Split out mappings Ben Widawsky
2014-08-22 3:12 ` [PATCH 53/68] drm/i915/bdw: begin bitmap tracking Ben Widawsky
2014-08-22 3:12 ` [PATCH 54/68] drm/i915/bdw: Dynamic page table allocations Ben Widawsky
2014-08-22 3:12 ` [PATCH 55/68] drm/i915/bdw: Make pdp allocation more dynamic Ben Widawsky
2014-08-22 3:12 ` [PATCH 56/68] drm/i915/bdw: Abstract PDP usage Ben Widawsky
2014-08-22 3:12 ` [PATCH 57/68] drm/i915/bdw: Add dynamic page trace events Ben Widawsky
2014-08-22 3:12 ` [PATCH 58/68] drm/i915/bdw: Add ppgtt info for dynamic pages Ben Widawsky
2014-08-22 3:12 ` [PATCH 59/68] drm/i915/bdw: implement alloc/teardown for 4lvl Ben Widawsky
2014-08-22 3:12 ` [PATCH 60/68] drm/i915/bdw: Add 4 level switching infrastructure Ben Widawsky
2014-08-22 3:12 ` [PATCH 61/68] drm/i915/bdw: Generalize PTE writing for GEN8 PPGTT Ben Widawsky
2014-08-22 3:12 ` [PATCH 62/68] drm/i915: Plumb sg_iter through va allocation ->maps Ben Widawsky
2014-08-22 3:12 ` [PATCH 63/68] drm/i915: Introduce map and unmap for VMAs Ben Widawsky
2014-08-22 3:12 ` [PATCH 64/68] drm/i915: Depend exclusively on map and unmap_vma Ben Widawsky
2014-08-22 3:12 ` [PATCH 65/68] drm/i915: Expand error state's address width to 64b Ben Widawsky
2014-08-22 3:12 ` [PATCH 66/68] drm/i915/bdw: Flip the 48b switch Ben Widawsky
2014-08-22 3:12 ` [PATCH 67/68] drm/i915: Provide a soft_pin hook Ben Widawsky
2014-08-22 3:12 ` [PATCH 68/68] XXX: drm/i915: Unexplained workarounds Ben Widawsky
2014-08-22 3:12 ` [PATCH 1/2] intel: Split out bo allocation Ben Widawsky
2014-08-22 3:12 ` Ben Widawsky [this message]
2014-08-22 3:12 ` [PATCH] i965: First step toward prelocation Ben Widawsky
2014-08-22 12:15 ` [Mesa-dev] " Alex Deucher
2014-08-22 17:14 ` Ben Widawsky
2014-08-22 3:12 ` [PATCH] no_reloc: test case Ben Widawsky
2014-08-22 6:30 ` [Intel-gfx] [PATCH 00/68] Broadwell 48b addressing and prelocations (no relocs) Chris Wilson
2014-08-22 6:59 ` Kenneth Graunke
2014-08-22 7:03 ` Chris Wilson
2014-08-22 13:30 ` Daniel Vetter
2014-08-22 13:38 ` [Intel-gfx] " Chris Wilson
2014-08-22 20:29 ` Daniel Vetter
2014-08-22 20:38 ` [Intel-gfx] " Daniel Vetter
2014-08-25 22:42 ` Jesse Barnes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1408677155-1840-71-git-send-email-benjamin.widawsky@intel.com \
--to=benjamin.widawsky@intel.com \
--cc=ben@bwidawsk.net \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox