dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: John Brooks <john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org,
	"Michel Dänzer" <michel-otUistvHUpPR7s880joybQ@public.gmane.org>,
	"Marek Olšák" <maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Cc: "David Airlie" <airlied-cv59FeDIM0c@public.gmane.org>,
	"Christian König"
	<deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: [PATCH 8/9] drm/amdgpu: Asynchronously move BOs to visible VRAM
Date: Fri, 23 Jun 2017 13:39:39 -0400	[thread overview]
Message-ID: <1498239580-17360-9-git-send-email-john@fastquake.com> (raw)
In-Reply-To: <1498239580-17360-1-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>

Moving CPU-accessible BOs from GTT to visible VRAM reduces latency on the
GPU and improves average framerate. However, it's an expensive operation.
When visible VRAM is full and evictions are necessary, it can easily take
tens of milliseconds. On the CS path, that directly increases the frame
time and causes noticeable momentary stutters.

Unlike other BO move operations, moving BOs to visible VRAM is a
housekeeping task and does not have to happen immediately. As a compromise
to allow evictions to occur and keep the contents of visible VRAM fresh,
but without stalling the rendering pipeline, we can defer these moves to a
worker thread.

Add a work function that moves a BO into visible VRAM and evicting other
BOs if necessary. And during CS, queue this work function for all requested
CPU_ACCESS_REQUIRED BOs (subject to the usual move throttling).

This decreases the frequency and severity of visible-VRAM-related
stuttering.

Signed-off-by: John Brooks <john@fastquake.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 14 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 45 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    |  7 +++++
 4 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 81dbb93..a809742 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -436,6 +436,10 @@ struct amdgpu_bo {
 	 * is associated to
 	 */
 	struct list_head		va;
+
+	/* Work item for moving this BO to visible VRAM asynchronously */
+	struct work_struct		move_vis_vram_work;
+
 	/* Constant after initialization */
 	struct drm_gem_object		gem_base;
 	struct amdgpu_bo		*parent;
@@ -1583,6 +1587,7 @@ struct amdgpu_device {
 	struct amdgpu_mman		mman;
 	struct amdgpu_vram_scratch	vram_scratch;
 	struct amdgpu_wb		wb;
+	struct workqueue_struct		*vis_vram_wq;
 	atomic64_t			vram_usage;
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 25d6df6..9215611 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -341,14 +341,16 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	if (p->bytes_moved < p->bytes_moved_threshold) {
 		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
-			 * visible VRAM if we've depleted our allowance to do
-			 * that.
+			/* Move CPU_ACCESS_REQUIRED BOs to limited visible VRAM
+			 * asynchronously, if we're allowed.
 			 */
-			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
-				domain = bo->prefered_domains;
-			else
+			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold) {
+				queue_work(adev->vis_vram_wq,
+					   &bo->move_vis_vram_work);
+				return 0;
+			} else {
 				domain = bo->allowed_domains;
+			}
 		} else {
 			domain = bo->prefered_domains;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 27d8c77..a69441d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -93,6 +93,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	bo = container_of(tbo, struct amdgpu_bo, tbo);
 
+	cancel_work_sync(&bo->move_vis_vram_work);
+
 	amdgpu_update_memory_usage(adev, &bo->tbo.mem, NULL);
 
 	drm_gem_object_release(&bo->gem_base);
@@ -330,6 +332,47 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 		*cpu_addr = NULL;
 }
 
+static void amdgpu_bo_move_vis_vram_work_func(struct work_struct *work)
+{
+	struct amdgpu_bo *bo = container_of(work, struct amdgpu_bo,
+					    move_vis_vram_work);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	u64 initial_bytes_moved, bytes_moved;
+	uint32_t old_mem;
+	int r;
+
+	spin_lock(&adev->mm_stats.lock);
+	if (adev->mm_stats.accum_us_vis <= 0 ||
+	    adev->mm_stats.accum_us <= 0) {
+		spin_unlock(&adev->mm_stats.lock);
+		return;
+	}
+	spin_unlock(&adev->mm_stats.lock);
+
+	r = amdgpu_bo_reserve(bo, true);
+	if (r != 0)
+		return;
+
+	amdgpu_bo_clear_cpu_access_required(bo);
+
+	if (!(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED))
+		goto out;
+
+	amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+	old_mem = bo->tbo.mem.mem_type;
+	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+	ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+				    initial_bytes_moved;
+	amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+
+	if (bo->tbo.mem.mem_type != old_mem)
+		bo->last_cs_move_jiffies = jiffies;
+
+out:
+	amdgpu_bo_unreserve(bo);
+}
+
 int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 				unsigned long size, int byte_align,
 				bool kernel, u32 domain, u64 flags,
@@ -382,6 +425,8 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
 	bo->flags = flags;
 
+	INIT_WORK(&bo->move_vis_vram_work, amdgpu_bo_move_vis_vram_work_func);
+
 #ifdef CONFIG_X86_32
 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0676a78..5852ca1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1120,6 +1120,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 		return r;
 	}
 
+	/* Initialize workqueue for asynchronously moving BOs to visible VRAM.
+	 * We want to avoid lock contention (and therefore concurrency), so set
+	 * max_active = 1, and set unbound to disable concurrency management
+	 * (which can interleave sleeping workers).
+	 */
+	adev->vis_vram_wq = alloc_workqueue("amdgpu_vis_vram", WQ_UNBOUND, 1);
+
 	/* Reduce size of CPU-visible VRAM if requested */
 	vis_vram_limit = amdgpu_vis_vram_limit * 1024 * 1024;
 	if (amdgpu_vis_vram_limit > 0 &&
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-06-23 17:39 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-23 17:39 [PATCH 0/9] Visible VRAM Management Improvements John Brooks
2017-06-23 17:39 ` [PATCH 3/9] drm/amdgpu: Don't force BOs into visible VRAM for page faults John Brooks
     [not found]   ` <1498239580-17360-4-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-26  9:38     ` Michel Dänzer
     [not found]       ` <f399c192-d90d-9f43-9b8a-820fa51a7715-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-06-27  3:25         ` John Brooks
2017-06-23 17:39 ` [PATCH 5/9] drm/amdgpu: Track time of last page fault and last CS move in struct amdgpu_bo John Brooks
     [not found] ` <1498239580-17360-1-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-23 17:39   ` [PATCH 1/9] drm/amdgpu: Separate placements and busy placements John Brooks
2017-06-23 17:39   ` [PATCH 2/9] drm/amdgpu: Add vis_vramlimit module parameter John Brooks
2017-06-26  9:48     ` Michel Dänzer
2017-06-26  9:57     ` Christian König
2017-06-23 17:39   ` [PATCH 4/9] drm/amdgpu: Don't force BOs into visible VRAM if they can go to GTT instead John Brooks
     [not found]     ` <1498239580-17360-5-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-24 18:09       ` Christian König
     [not found]         ` <0c5064f9-5b84-8833-b410-055b5e2064bf-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-24 18:37           ` John Brooks
2017-06-23 17:39   ` [PATCH 6/9] drm/amdgpu: Set/clear CPU_ACCESS_REQUIRED flag on page fault and CS John Brooks
     [not found]     ` <1498239580-17360-7-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-24 18:00       ` Christian König
2017-06-25  1:57         ` John Brooks
     [not found]         ` <55ea5e84-0791-5a70-6278-ade83c343a3b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-26  9:27           ` Michel Dänzer
     [not found]             ` <6c6fca21-df95-a413-d5eb-c05f1913787b-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-06-26 23:25               ` Marek Olšák
2017-06-23 17:39   ` [PATCH 7/9] drm/amdgpu: Throttle visible VRAM moves separately John Brooks
     [not found]     ` <1498239580-17360-8-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-26  9:44       ` Michel Dänzer
     [not found]         ` <c132d211-bb7c-1e7d-617a-6f128343a581-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-06-26 22:29           ` John Brooks
2017-06-27  8:25             ` Michel Dänzer
2017-06-23 17:39   ` John Brooks [this message]
2017-06-23 21:02   ` [PATCH 0/9] Visible VRAM Management Improvements Felix Kuehling
     [not found]     ` <82339d2d-481c-ab3f-1590-ab22f0eac371-5C7GfCeVMHo@public.gmane.org>
2017-06-23 23:16       ` John Brooks
2017-06-24 18:20         ` Christian König
     [not found]           ` <644cf9b4-e22b-eab1-a505-b0e1f9850f82-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-24 21:50             ` John Brooks
2017-06-25 11:54               ` Christian König
2017-06-24 18:07   ` Christian König
     [not found]     ` <3cd916a7-6734-5eff-b645-66f3ee83f13a-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-24 18:36       ` John Brooks
2017-06-25 11:31         ` Christian König
2017-06-23 17:39 ` [PATCH 9/9] drm/amdgpu: Reduce lock contention when evicting from visible VRAM John Brooks

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1498239580-17360-9-git-send-email-john@fastquake.com \
    --to=john-xq/ko7c6e2bl57midrcfdg@public.gmane.org \
    --cc=airlied-cv59FeDIM0c@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org \
    --cc=dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=michel-otUistvHUpPR7s880joybQ@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).