From: Ben Widawsky <benjamin.widawsky@intel.com>
To: Intel GFX <intel-gfx@lists.freedesktop.org>
Cc: Ben Widawsky <ben@bwidawsk.net>,
Ben Widawsky <benjamin.widawsky@intel.com>
Subject: [PATCH 46/56] drm/i915/bdw: implement alloc/teardown for 4lvl
Date: Fri, 9 May 2014 20:59:41 -0700 [thread overview]
Message-ID: <1399694391-3935-47-git-send-email-benjamin.widawsky@intel.com> (raw)
In-Reply-To: <1399694391-3935-1-git-send-email-benjamin.widawsky@intel.com>
The code for 4lvl works just as one would expect, and nicely it is able
to call into the existing 3lvl page table code to handle all of the
lower levels.
PML4 has no special attributes.
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 170 ++++++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_gem_gtt.h | 12 ++-
2 files changed, 163 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c4b53ef..3478bf5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -413,9 +413,12 @@ static void __pdp_fini(struct i915_pagedirpo *pdp)
static void free_pdp_single(struct i915_pagedirpo *pdp,
struct drm_device *dev)
{
- __pdp_fini(pdp);
- if (HAS_48B_PPGTT(dev))
+ if (HAS_48B_PPGTT(dev)) {
+ __pdp_fini(pdp);
+ i915_dma_unmap_single(pdp, dev);
+ __free_page(pdp->page);
kfree(pdp);
+ }
}
static int __pdp_init(struct i915_pagedirpo *pdp,
@@ -441,6 +444,58 @@ static int __pdp_init(struct i915_pagedirpo *pdp,
return 0;
}
+static struct i915_pagedirpo *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt,
+ struct i915_pml4 *pml4)
+{
+ struct drm_device *dev = ppgtt->base.dev;
+ struct i915_pagedirpo *pdp;
+ int ret;
+
+ BUG_ON(!HAS_48B_PPGTT(dev));
+
+ pdp = kmalloc(sizeof(*pdp), GFP_KERNEL);
+ if (!pdp)
+ return ERR_PTR(-ENOMEM);
+
+ pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+ if (!pdp->page) {
+ kfree(pdp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = __pdp_init(pdp, dev);
+ if (ret) {
+ __free_page(pdp->page);
+ kfree(pdp);
+ return ERR_PTR(ret);
+ }
+
+ i915_dma_map_px_single(pdp, dev);
+
+ return pdp;
+}
+
+static void pml4_fini(struct i915_pml4 *pml4)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(pml4, struct i915_hw_ppgtt, pml4);
+ i915_dma_unmap_single(pml4, ppgtt->base.dev);
+ __free_page(pml4->page);
+}
+
+static int pml4_init(struct i915_hw_ppgtt *ppgtt)
+{
+ struct i915_pml4 *pml4 = &ppgtt->pml4;
+
+ pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!pml4->page)
+ return -ENOMEM;
+
+ i915_dma_map_px_single(pml4, ppgtt->base.dev);
+
+ return 0;
+}
+
/* Broadwell Page Directory Pointer Descriptors */
static int gen8_write_pdp(struct intel_ring_buffer *ring,
unsigned entry,
@@ -729,7 +784,14 @@ static void gen8_teardown_va_range_4lvl(struct i915_address_space *vm,
struct i915_pml4 *pml4,
uint64_t start, uint64_t length)
{
- BUG();
+ struct i915_pagedirpo *pdp;
+ uint64_t temp, pml4e;
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ gen8_teardown_va_range_3lvl(vm, pdp, start, length);
+ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev)))
+ clear_bit(pml4e, pml4->used_pml4es);
+ }
}
static void gen8_teardown_va_range(struct i915_address_space *vm,
@@ -738,10 +800,10 @@ static void gen8_teardown_va_range(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
- if (!HAS_48B_PPGTT(vm->dev))
- gen8_teardown_va_range_3lvl(vm, &ppgtt->pdp, start, length);
- else
+ if (HAS_48B_PPGTT(vm->dev))
gen8_teardown_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+ else
+ gen8_teardown_va_range_3lvl(vm, &ppgtt->pdp, start, length);
}
static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
@@ -1021,12 +1083,76 @@ err_out:
return ret;
}
-static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
- struct i915_pml4 *pml4,
- uint64_t start,
- uint64_t length)
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length)
{
- BUG();
+ DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_pagedirpo *pdp;
+ const uint64_t orig_start = start;
+ const uint64_t orig_length = length;
+ uint64_t temp, pml4e;
+
+ /* Do the pml4 allocations first, so we don't need to track the newly
+ * allocated tables below the pdp */
+ bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+
+ /* The pagedirectory and pagetable allocations are done in the shared 3
+ * and 4 level code. Just allocate the pdps.
+ */
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ if (!pdp) {
+ WARN_ON(test_bit(pml4e, pml4->used_pml4es));
+ pdp = alloc_pdp_single(ppgtt, pml4);
+ if (IS_ERR(pdp))
+ goto err_alloc;
+
+ pml4->pdps[pml4e] = pdp;
+ set_bit(pml4e, new_pdps);
+ trace_i915_pagedirpo_alloc(&ppgtt->base, pml4e,
+ pml4e << GEN8_PML4E_SHIFT,
+ GEN8_PML4E_SHIFT);
+
+ } else
+ WARN(!test_bit(pml4e, pml4->used_pml4es),
+ "%lld %p", pml4e, vm);
+ }
+
+ start = orig_start;
+ length = orig_length;
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ int ret;
+
+ BUG_ON(!pdp);
+
+ ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+ if (ret)
+ goto err_out;
+ }
+
+ WARN(bitmap_weight(pml4->used_pml4es, GEN8_PML4ES_PER_PML4) > 2,
+ "The allocation has spanned more than 512GB. It is highly likely this is incorrect.");
+
+ bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+ GEN8_PML4ES_PER_PML4);
+
+ return 0;
+
+err_out:
+ /* This will teardown more than we allocated. It should be fine, and
+ * makes code simpler. */
+ start = orig_start;
+ length = orig_length;
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e)
+ gen8_teardown_va_range_3lvl(vm, pdp, start, length);
+
+err_alloc:
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ free_pdp_single(pdp, vm->dev);
}
static int gen8_alloc_va_range(struct i915_address_space *vm,
@@ -1035,16 +1161,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
- if (!HAS_48B_PPGTT(vm->dev))
- return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
- else
+ if (HAS_48B_PPGTT(vm->dev))
return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+ else
+ return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
}
static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
{
free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
- free_pdp_single(&ppgtt->pdp, ppgtt->base.dev);
+ if (HAS_48B_PPGTT(ppgtt->base.dev))
+ pml4_fini(&ppgtt->pml4);
+ else
+ free_pdp_single(&ppgtt->pdp, ppgtt->base.dev);
}
/**
@@ -1066,7 +1195,13 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
if (IS_ERR(ppgtt->scratch_pd))
return PTR_ERR(ppgtt->scratch_pd);
- if (!HAS_48B_PPGTT(ppgtt->base.dev)) {
+ if (HAS_48B_PPGTT(ppgtt->base.dev)) {
+ int ret = pml4_init(ppgtt);
+ if (ret) {
+ free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
+ return ret;
+ }
+ } else {
int ret = __pdp_init(&ppgtt->pdp, false);
if (ret) {
free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
@@ -1075,8 +1210,7 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
ppgtt->switch_mm = gen8_mm_switch;
trace_i915_pagedirpo_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
- } else
- BUG(); /* Not yet implemented */
+ }
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 94c825e..0e5cd58 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -103,6 +103,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
#endif
#define GEN8_PML4ES_PER_PML4 512
#define GEN8_PML4E_SHIFT 39
+#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1)
#define GEN8_PDPE_SHIFT 30
/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
* tables */
@@ -436,9 +437,18 @@ static inline size_t gen6_pde_count(uint32_t addr, uint32_t length)
temp = min(temp, length), \
start += temp, length -= temp)
+#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \
+ for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter]; \
+ length > 0 && iter < GEN8_PML4ES_PER_PML4; \
+ pdp = (pml4)->pdps[++iter], \
+ temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \
+ temp = min(temp, length), \
+ start += temp, length -= temp)
+
#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \
gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev))
+
/* Clamp length to the next pagetab boundary */
static inline uint64_t gen8_clamp_pt(uint64_t start, uint64_t length)
{
@@ -476,7 +486,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address)
static inline uint32_t gen8_pml4e_index(uint64_t address)
{
- BUG();
+ return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
}
static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)
--
1.9.2
next prev parent reply other threads:[~2014-05-10 4:02 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-10 3:58 [PATCH 00/56] [RFCish] Dynamic page table alloc, 64b, and GPU/CPU mirror Ben Widawsky
2014-05-10 3:58 ` [PATCH 01/56] drm/i915: Fix flush before context switch comment Ben Widawsky
2014-05-10 3:58 ` [PATCH 02/56] Revert "drm/i915: Drop I915_PARAM_HAS_FULL_PPGTT again" Ben Widawsky
2014-05-10 3:58 ` [PATCH 03/56] drm/i915: Prevent signals from interrupting close() Ben Widawsky
2014-05-10 3:58 ` [PATCH 04/56] drm/i915: Wrap VMA binding Ben Widawsky
2014-05-10 3:59 ` [PATCH 05/56] drm/i915: Make pin global flags explicit Ben Widawsky
2014-05-10 3:59 ` [PATCH 06/56] drm/i915: Split out aliasing binds Ben Widawsky
2014-05-10 3:59 ` [PATCH 07/56] drm/i915: fix gtt_total_entries() Ben Widawsky
2014-05-10 3:59 ` [PATCH 08/56] drm/i915: Rename to GEN8_LEGACY_PDPES Ben Widawsky
2014-05-10 3:59 ` [PATCH 09/56] drm/i915: Split out verbose PPGTT dumping Ben Widawsky
2014-05-10 3:59 ` [PATCH 10/56] drm/i915: s/pd/pdpe, s/pt/pde Ben Widawsky
2014-05-10 3:59 ` [PATCH 11/56] drm/i915: rename map/unmap to dma_map/unmap Ben Widawsky
2014-05-10 3:59 ` [PATCH 12/56] drm/i915: Setup less PPGTT on failed pagedir Ben Widawsky
2014-05-10 3:59 ` [PATCH 13/56] drm/i915: clean up PPGTT init error path Ben Widawsky
2014-05-10 3:59 ` [PATCH 14/56] drm/i915: Un-hardcode number of page directories Ben Widawsky
2014-05-10 3:59 ` [PATCH 15/56] drm/i915: Make gen6_write_pdes gen6_map_page_tables Ben Widawsky
2014-05-10 3:59 ` [PATCH 16/56] drm/i915: Range clearing is PPGTT agnostic Ben Widawsky
2014-05-10 3:59 ` [PATCH 17/56] drm/i915: Page table helpers, and define renames Ben Widawsky
2014-05-10 3:59 ` [PATCH 18/56] drm/i915: construct page table abstractions Ben Widawsky
2014-05-10 3:59 ` [PATCH 19/56] drm/i915: Complete page table structures Ben Widawsky
2014-05-10 3:59 ` [PATCH 20/56] drm/i915: Create page table allocators Ben Widawsky
2014-05-10 3:59 ` [PATCH 21/56] drm/i915: Generalize GEN6 mapping Ben Widawsky
2014-05-10 3:59 ` [PATCH 22/56] drm/i915: Clean up pagetable DMA map & unmap Ben Widawsky
2014-05-10 3:59 ` [PATCH 23/56] drm/i915: Always dma map page table allocations Ben Widawsky
2014-05-10 3:59 ` [PATCH 24/56] drm/i915: Consolidate dma mappings Ben Widawsky
2014-05-10 3:59 ` [PATCH 25/56] drm/i915: Always dma map page directory allocations Ben Widawsky
2014-05-10 3:59 ` [PATCH 26/56] drm/i915: Track GEN6 page table usage Ben Widawsky
2014-05-10 3:59 ` [PATCH 27/56] drm/i915: Extract context switch skip logic Ben Widawsky
2014-05-10 3:59 ` [PATCH 28/56] drm/i915: Force pd restore when PDEs change, gen6-7 Ben Widawsky
2014-05-10 3:59 ` [PATCH 29/56] drm/i915: Finish gen6/7 dynamic page table allocation Ben Widawsky
2014-05-10 3:59 ` [PATCH 30/56] drm/i915/bdw: Use dynamic allocation idioms on free Ben Widawsky
2014-05-10 3:59 ` [PATCH 31/56] drm/i915/bdw: pagedirs rework allocation Ben Widawsky
2014-05-10 3:59 ` [PATCH 32/56] drm/i915/bdw: pagetable allocation rework Ben Widawsky
2014-05-10 3:59 ` [PATCH 33/56] drm/i915/bdw: Make the pdp switch a bit less hacky Ben Widawsky
2014-05-10 3:59 ` [PATCH 34/56] drm/i915: num_pd_pages/num_pd_entries isn't useful Ben Widawsky
2014-05-10 3:59 ` [PATCH 35/56] drm/i915: Extract PPGTT param from pagedir alloc Ben Widawsky
2014-05-10 3:59 ` [PATCH 36/56] drm/i915/bdw: Split out mappings Ben Widawsky
2014-05-10 3:59 ` [PATCH 37/56] drm/i915/bdw: begin bitmap tracking Ben Widawsky
2014-05-10 3:59 ` [PATCH 38/56] drm/i915/bdw: Dynamic page table allocations Ben Widawsky
2014-05-10 3:59 ` [PATCH 39/56] drm/i915/bdw: Scratch unused pages Ben Widawsky
2014-05-10 3:59 ` [PATCH 40/56] drm/i915/bdw: Add ppgtt info for dynamic pages Ben Widawsky
2014-05-10 3:59 ` [PATCH 41/56] drm/i915/bdw: Optimize PDP loads Ben Widawsky
2014-05-10 3:59 ` [PATCH 42/56] TESTME: Either drop the last patch or fix it Ben Widawsky
2014-05-10 3:59 ` [PATCH 43/56] drm/i915/bdw: Add dynamic page trace events Ben Widawsky
2014-05-10 3:59 ` [PATCH 44/56] drm/i915/bdw: Make pdp allocation more dynamic Ben Widawsky
2014-05-10 3:59 ` [PATCH 45/56] drm/i915/bdw: Abstract PDP usage Ben Widawsky
2014-05-10 3:59 ` Ben Widawsky [this message]
2014-05-10 3:59 ` [PATCH 47/56] drm/i915/bdw: 4 level pages tables Ben Widawsky
2014-05-10 3:59 ` [PATCH 48/56] drm/i915: Restructure map vs. insert entries Ben Widawsky
2014-05-10 3:59 ` [PATCH 49/56] drm/i915/bdw: make aliasing PPGTT dynamic Ben Widawsky
2014-05-10 3:59 ` [PATCH 50/56] drm/i915: Expand error state's address width to 64b Ben Widawsky
2014-05-10 3:59 ` [PATCH 51/56] drm/i915/bdw: Flip the 48b switch Ben Widawsky
2014-05-10 3:59 ` [PATCH 52/56] TESTME: GFX_TLB_INVALIDATE_EXPLICIT Ben Widawsky
2014-05-10 3:59 ` [PATCH 53/56] TESTME: Always force invalidate Ben Widawsky
2014-05-10 3:59 ` [PATCH 54/56] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl Ben Widawsky
2014-05-10 3:59 ` [PATCH 55/56] drm/i915: Track userptr VMAs Ben Widawsky
2014-05-10 3:59 ` [PATCH 56/56] drm/i915/userptr: Mirror GPU addr at ioctl (HACK/POC) Ben Widawsky
2014-05-11 17:33 ` [PATCH 00/56] [RFCish] Dynamic page table alloc, 64b, and GPU/CPU mirror Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1399694391-3935-47-git-send-email-benjamin.widawsky@intel.com \
--to=benjamin.widawsky@intel.com \
--cc=ben@bwidawsk.net \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox