From: Jani Nikula <jani.nikula@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 01/16] drm/i915: Keep a small stash of preallocated WC pages
Date: Fri, 11 Aug 2017 12:51:07 +0300 [thread overview]
Message-ID: <87h8xee8l0.fsf@nikula.org> (raw)
In-Reply-To: <20170726132610.3336-2-chris@chris-wilson.co.uk>
On Wed, 26 Jul 2017, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> We use WC pages for coherent writes into the ppGTT on !llc
> architectuures. However, to create a WC page requires a stop_machine(),
> i.e. is very slow. To compensate we currently keep a per-vm cache of
> recently freed pages, but we still see the slow startup of new contexts.
> We can amoritize that cost slightly by allocating WC pages in small
> batches (PAGEVEC_SIZE == 14) and since creating a WC page implies a
> stop_machine() there is no penalty for keeping that stash global.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +
> drivers/gpu/drm/i915/i915_gem_gtt.c | 121 +++++++++++++++++++++++++++++-------
> 2 files changed, 100 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c7456f4ed38..fd62be74a422 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1464,6 +1464,9 @@ struct i915_gem_mm {
> struct llist_head free_list;
> struct work_struct free_work;
>
> + /** Small stash of WC pages */
> + struct pagevec wc_stash;
> +
> /** Usable portion of the GTT for GEM */
These are not proper kernel-doc comments.
BR,
Jani.
> dma_addr_t stolen_base; /* limited to low memory (32-bit) */
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 10aa7762d9a6..ad4e48435853 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -351,39 +351,85 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
>
> static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
> {
> - struct page *page;
> + struct pagevec *pvec = &vm->free_pages;
>
> if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
> i915_gem_shrink_all(vm->i915);
>
> - if (vm->free_pages.nr)
> - return vm->free_pages.pages[--vm->free_pages.nr];
> + if (likely(pvec->nr))
> + return pvec->pages[--pvec->nr];
> +
> + if (!vm->pt_kmap_wc)
> + return alloc_page(gfp);
> +
> + lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +
> + /* Look in our global stash of WC pages... */
> + pvec = &vm->i915->mm.wc_stash;
> + if (likely(pvec->nr))
> + return pvec->pages[--pvec->nr];
>
> - page = alloc_page(gfp);
> - if (!page)
> + /* Otherwise batch allocate pages to amoritize cost of set_pages_wc. */
> + do {
> + struct page *page;
> +
> + page = alloc_page(gfp);
> + if (unlikely(!page))
> + break;
> +
> + pvec->pages[pvec->nr++] = page;
> + } while (pagevec_space(pvec));
> +
> + if (unlikely(!pvec->nr))
> return NULL;
>
> - if (vm->pt_kmap_wc)
> - set_pages_array_wc(&page, 1);
> + set_pages_array_wc(pvec->pages, pvec->nr);
>
> - return page;
> + return pvec->pages[--pvec->nr];
> }
>
> -static void vm_free_pages_release(struct i915_address_space *vm)
> +static void vm_free_pages_release(struct i915_address_space *vm,
> + bool immediate)
> {
> - GEM_BUG_ON(!pagevec_count(&vm->free_pages));
> + struct pagevec *pvec = &vm->free_pages;
> +
> + GEM_BUG_ON(!pagevec_count(pvec));
> +
> + if (vm->pt_kmap_wc) {
> + struct pagevec *stash = &vm->i915->mm.wc_stash;
>
> - if (vm->pt_kmap_wc)
> - set_pages_array_wb(vm->free_pages.pages,
> - pagevec_count(&vm->free_pages));
> + /* When we use WC, first fill up the global stash and then
> + * only if full immediately free the overflow.
> + */
> +
> + lockdep_assert_held(&vm->i915->drm.struct_mutex);
> + if (pagevec_space(stash)) {
> + do {
> + stash->pages[stash->nr++] =
> + pvec->pages[--pvec->nr];
> + if (!pvec->nr)
> + return;
> + } while (pagevec_space(stash));
> +
> + /* As we have made some room in the VM's free_pages,
> + * we can wait for it to fill again. Unless we are
> + * inside i915_address_space_fini() and must
> + * immediately release the pages!
> + */
> + if (!immediate)
> + return;
> + }
> +
> + set_pages_array_wb(pvec->pages, pvec->nr);
> + }
>
> - __pagevec_release(&vm->free_pages);
> + __pagevec_release(pvec);
> }
>
> static void vm_free_page(struct i915_address_space *vm, struct page *page)
> {
> if (!pagevec_add(&vm->free_pages, page))
> - vm_free_pages_release(vm);
> + vm_free_pages_release(vm, false);
> }
>
> static int __setup_page_dma(struct i915_address_space *vm,
> @@ -447,12 +493,31 @@ static void fill_page_dma_32(struct i915_address_space *vm,
> static int
> setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
> {
> - return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
> + struct page *page;
> + dma_addr_t addr;
> +
> + page = alloc_page(gfp | __GFP_ZERO);
> + if (unlikely(!page))
> + return -ENOMEM;
> +
> + addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
> + PCI_DMA_BIDIRECTIONAL);
> + if (unlikely(dma_mapping_error(vm->dma, addr))) {
> + __free_page(page);
> + return -ENOMEM;
> + }
> +
> + vm->scratch_page.page = page;
> + vm->scratch_page.daddr = addr;
> + return 0;
> }
>
> static void cleanup_scratch_page(struct i915_address_space *vm)
> {
> - cleanup_page_dma(vm, &vm->scratch_page);
> + struct i915_page_dma *p = &vm->scratch_page;
> +
> + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> + __free_page(p->page);
> }
>
> static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
> @@ -1332,18 +1397,18 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> 1ULL << 48 :
> 1ULL << 32;
>
> - ret = gen8_init_scratch(&ppgtt->base);
> - if (ret) {
> - ppgtt->base.total = 0;
> - return ret;
> - }
> -
> /* There are only few exceptions for gen >=6. chv and bxt.
> * And we are not sure about the latter so play safe for now.
> */
> if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
> ppgtt->base.pt_kmap_wc = true;
>
> + ret = gen8_init_scratch(&ppgtt->base);
> + if (ret) {
> + ppgtt->base.total = 0;
> + return ret;
> + }
> +
> if (use_4lvl(vm)) {
> ret = setup_px(&ppgtt->base, &ppgtt->pml4);
> if (ret)
> @@ -1867,7 +1932,7 @@ static void i915_address_space_init(struct i915_address_space *vm,
> static void i915_address_space_fini(struct i915_address_space *vm)
> {
> if (pagevec_count(&vm->free_pages))
> - vm_free_pages_release(vm);
> + vm_free_pages_release(vm, true);
>
> i915_gem_timeline_fini(&vm->timeline);
> drm_mm_takedown(&vm->mm);
> @@ -2593,6 +2658,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
> {
> struct i915_ggtt *ggtt = &dev_priv->ggtt;
> struct i915_vma *vma, *vn;
> + struct pagevec *pvec;
>
> ggtt->base.closed = true;
>
> @@ -2616,6 +2682,13 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
> }
>
> ggtt->base.cleanup(&ggtt->base);
> +
> + pvec = &dev_priv->mm.wc_stash;
> + if (pvec->nr) {
> + set_pages_array_wb(pvec->pages, pvec->nr);
> + __pagevec_release(pvec);
> + }
> +
> mutex_unlock(&dev_priv->drm.struct_mutex);
>
> arch_phys_wc_del(ggtt->mtrr);
--
Jani Nikula, Intel Open Source Technology Center
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2017-08-11 9:46 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-26 13:25 More tweaks Chris Wilson
2017-07-26 13:25 ` [PATCH 01/16] drm/i915: Keep a small stash of preallocated WC pages Chris Wilson
2017-08-07 20:15 ` Matthew Auld
2017-08-11 7:34 ` Joonas Lahtinen
2017-08-11 11:55 ` Chris Wilson
2017-08-11 9:51 ` Jani Nikula [this message]
2017-07-26 13:25 ` [PATCH 02/16] drm/i915: Pin fence for iomap Chris Wilson
2017-07-26 13:25 ` [PATCH 03/16] drm/i915: Consolidate get_fence with pin_fence Chris Wilson
2017-07-26 13:25 ` [PATCH 04/16] drm/i915: Emit pipelined fence changes Chris Wilson
2017-07-26 13:25 ` [PATCH 05/16] drm/i915: Refactor testing obj->mm.pages Chris Wilson
2017-08-10 16:26 ` Joonas Lahtinen
2017-07-26 13:26 ` [PATCH 06/16] drm/i915: Move dev_priv->mm.[un]bound_list to its own lock Chris Wilson
2017-08-10 16:22 ` Joonas Lahtinen
2017-08-12 10:26 ` Chris Wilson
2017-08-12 11:34 ` Chris Wilson
2017-08-12 11:41 ` Chris Wilson
2017-07-26 13:26 ` [PATCH 07/16] drm/i915: Trim struct_mutex hold duration for i915_gem_free_objects Chris Wilson
2017-08-11 7:36 ` Joonas Lahtinen
2017-07-26 13:26 ` [PATCH 08/16] drm/i915: Only free the oldest stale object before a fresh allocation Chris Wilson
2017-07-26 13:26 ` [PATCH 09/16] drm/i915: Track user GTT faulting per-vma Chris Wilson
2017-07-26 13:26 ` [PATCH 10/16] drm/i915: Try a minimal attempt to insert the whole object for relocations Chris Wilson
2017-07-26 13:26 ` [PATCH 11/16] drm/i915: Check context status before looking up our obj/vma Chris Wilson
2017-07-26 13:26 ` [PATCH 12/16] drm/i915: Convert execbuf to use struct-of-array packing for critical fields Chris Wilson
2017-07-26 13:26 ` [PATCH 13/16] drm/i915: Simplify eb_lookup_vmas() Chris Wilson
2017-07-26 13:26 ` [PATCH 14/16] drm/i915: Replace execbuf vma ht with an idr Chris Wilson
2017-07-26 13:26 ` [PATCH 15/16] drm/i915: Trivial grammar fix s/opt of/opt out of/ in comment Chris Wilson
2017-07-26 13:26 ` [PATCH 16/16] drm/i915: Mark the GT as busy before idling the previous request Chris Wilson
2017-07-26 14:13 ` ✓ Fi.CI.BAT: success for series starting with [01/16] drm/i915: Keep a small stash of preallocated WC pages Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87h8xee8l0.fsf@nikula.org \
--to=jani.nikula@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.