dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: John Brooks <john@fastquake.com>
To: amd-gfx@lists.freedesktop.org,
	"Michel Dänzer" <michel@daenzer.net>,
	"Marek Olšák" <maraeo@gmail.com>,
	"Christian König" <deathsimple@vodafone.de>
Cc: dri-devel@lists.freedesktop.org
Subject: [PATCH 2/5] drm/amdgpu: Throttle visible VRAM moves separately
Date: Tue, 27 Jun 2017 22:33:18 -0400	[thread overview]
Message-ID: <1498617201-24557-3-git-send-email-john@fastquake.com> (raw)
In-Reply-To: <1498617201-24557-1-git-send-email-john@fastquake.com>

The BO move throttling code is designed to allow VRAM to fill quickly if it
is relatively empty. However, this does not take into account situations
where the visible VRAM is smaller than total VRAM, and total VRAM may not
be close to full but the visible VRAM segment is under pressure. In such
situations, visible VRAM would experience unrestricted swapping and
performance would drop.

Add a separate counter specifically for moves involving visible VRAM, and
check it before moving BOs there.

v2: Only perform calculations for separate counter if visible VRAM is
    smaller than total VRAM. (Michel Dänzer)

Fixes: 95844d20ae02 (drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2))
Signed-off-by: John Brooks <john@fastquake.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 84 +++++++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 ++--
 3 files changed, 78 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 06ed45c..7366115 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1157,7 +1157,9 @@ struct amdgpu_cs_parser {
 	struct list_head		validated;
 	struct dma_fence		*fence;
 	uint64_t			bytes_moved_threshold;
+	uint64_t			bytes_moved_vis_threshold;
 	uint64_t			bytes_moved;
+	uint64_t			bytes_moved_vis;
 	struct amdgpu_bo_list_entry	*evictable;
 
 	/* user fence */
@@ -1591,6 +1593,7 @@ struct amdgpu_device {
 		spinlock_t		lock;
 		s64			last_update_us;
 		s64			accum_us; /* accumulated microseconds */
+		s64			accum_us_vis; /* for visible VRAM */
 		u32			log2_max_MBps;
 	} mm_stats;
 
@@ -1926,7 +1929,8 @@ bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+				  u64 num_vis_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index aeee684..1dfa847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,11 +223,13 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
  * ticks. The accumulated microseconds (us) are converted to bytes and
  * returned.
  */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+					      u64 *max_bytes,
+					      u64 *max_vis_bytes)
 {
 	s64 time_us, increment_us;
-	u64 max_bytes;
 	u64 free_vram, total_vram, used_vram;
+	u64 free_vis_vram = 0, total_vis_vram = 0, used_vis_vram = 0;
 
 	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
 	 * throttling.
@@ -238,13 +240,23 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 	 */
 	const s64 us_upper_bound = 200000;
 
-	if (!adev->mm_stats.log2_max_MBps)
-		return 0;
+	if (!adev->mm_stats.log2_max_MBps) {
+		*max_bytes = 0;
+		*max_vis_bytes = 0;
+		return;
+	}
 
 	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
 	used_vram = atomic64_read(&adev->vram_usage);
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
 
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+		total_vis_vram = adev->mc.visible_vram_size;
+		used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+		free_vis_vram = used_vis_vram >= total_vis_vram ?
+			0 : total_vis_vram - used_vis_vram;
+	}
+
 	spin_lock(&adev->mm_stats.lock);
 
 	/* Increase the amount of accumulated us. */
@@ -252,7 +264,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 	increment_us = time_us - adev->mm_stats.last_update_us;
 	adev->mm_stats.last_update_us = time_us;
 	adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
-                                      us_upper_bound);
+				      us_upper_bound);
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+		adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+			increment_us, us_upper_bound);
+	}
 
 	/* This prevents the short period of low performance when the VRAM
 	 * usage is low and the driver is in debt or doesn't have enough
@@ -280,23 +296,36 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 	}
 
-	/* This returns 0 if the driver is in debt to disallow (optional)
+	/* Do the same for visible VRAM if half of it is free */
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	    free_vis_vram >= total_vis_vram / 2) {
+		adev->mm_stats.accum_us_vis = max(bytes_to_us(adev,
+						  free_vis_vram / 2),
+						  adev->mm_stats.accum_us_vis);
+	}
+
+	/* This is set to 0 if the driver is in debt to disallow (optional)
 	 * buffer moves.
 	 */
-	max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size)
+		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+	else
+		*max_vis_bytes = 0;
 
 	spin_unlock(&adev->mm_stats.lock);
-	return max_bytes;
 }
 
 /* Report how many bytes have really been moved for the last command
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+				  u64 num_vis_bytes)
 {
 	spin_lock(&adev->mm_stats.lock);
 	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 	spin_unlock(&adev->mm_stats.lock);
 }
 
@@ -304,7 +333,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 				 struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	u64 initial_bytes_moved;
+	u64 initial_bytes_moved, bytes_moved;
 	uint32_t domain;
 	int r;
 
@@ -314,17 +343,34 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	/* Don't move this buffer if we have depleted our allowance
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
-	if (p->bytes_moved < p->bytes_moved_threshold)
-		domain = bo->prefered_domains;
-	else
+	if (p->bytes_moved < p->bytes_moved_threshold) {
+		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
+			 * visible VRAM if we've depleted our allowance to do
+			 * that.
+			 */
+			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+				domain = bo->prefered_domains;
+			else
+				domain = bo->allowed_domains;
+		} else {
+			domain = bo->prefered_domains;
+		}
+	} else {
 		domain = bo->allowed_domains;
+	}
 
 retry:
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-		initial_bytes_moved;
+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+		      initial_bytes_moved;
+	p->bytes_moved += bytes_moved;
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	    bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+		p->bytes_moved_vis += bytes_moved;
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 		domain = bo->allowed_domains;
@@ -554,8 +600,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}
 
-	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
+	p->bytes_moved_vis = 0;
 	p->evictable = list_last_entry(&p->validated,
 				       struct amdgpu_bo_list_entry,
 				       tv.head);
@@ -579,8 +627,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto error_validate;
 	}
 
-	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+				     p->bytes_moved_vis);
 	fpriv->vm.last_eviction_counter =
 		atomic64_read(&p->adev->num_evictions);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 8ee6965..dcf1ddb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	struct amdgpu_bo *bo;
 	enum ttm_bo_type type;
 	unsigned long page_align;
-	u64 initial_bytes_moved;
+	u64 initial_bytes_moved, bytes_moved;
 	size_t acc_size;
 	int r;
 
@@ -398,8 +398,12 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
 				 &bo->placement, page_align, !kernel, NULL,
 				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
-	amdgpu_cs_report_moved_bytes(adev,
-		atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+		      initial_bytes_moved;
+	if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
+	else
+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
 
 	if (unlikely(r != 0))
 		return r;
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2017-06-28  2:33 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-28  2:33 [PATCH v2] Visible VRAM Management Improvements John Brooks
2017-06-28  2:33 ` [PATCH 1/5] drm/amdgpu: Add vis_vramlimit module parameter John Brooks
2017-06-28  2:33 ` John Brooks [this message]
2017-06-28  2:33 ` [PATCH 3/5] drm/amdgpu: Track time of last page fault and last CS move in struct amdgpu_bo John Brooks
2017-06-28 13:06   ` Christian König
     [not found]     ` <e3d7be62-e63b-f370-f159-147aaf8d7c50-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-28 22:59       ` John Brooks
2017-06-29  8:11         ` Christian König
     [not found] ` <1498617201-24557-1-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-28  2:33   ` [PATCH 4/5] drm/amdgpu: Set/clear CPU_ACCESS_REQUIRED flag on page fault and CS John Brooks
     [not found]     ` <1498617201-24557-5-git-send-email-john-xq/Ko7C6e2Bl57MIdRCFDg@public.gmane.org>
2017-06-28 13:05       ` Christian König
2017-06-28 23:26         ` John Brooks
     [not found]           ` <20170628232639.GB11762-6hIufAJW0g7Gr8qjsLp7YGXnswh1EIUO@public.gmane.org>
2017-06-29  2:35             ` Michel Dänzer
2017-06-29  8:23               ` Christian König
2017-06-29  9:58                 ` Michel Dänzer
2017-06-29 10:05                   ` Daniel Vetter
     [not found]                     ` <20170629100523.khsozocltct7tnfu-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-06-30  2:24                       ` Michel Dänzer
     [not found]                         ` <e1568d15-42da-4720-dff8-3a6e373f51d8-otUistvHUpPR7s880joybQ@public.gmane.org>
2017-06-30  6:47                           ` Christian König
     [not found]                             ` <c9b732c1-4bdd-a2ce-1dc2-6abbaf89ce5a-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-06-30 12:39                               ` Daniel Vetter
     [not found]                                 ` <20170630123904.afbsnmxkxxzuydr2-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-06-30 12:46                                   ` Christian König
2017-07-07 14:47                             ` Marek Olšák
2017-06-30  1:56               ` John Brooks
     [not found]                 ` <20170630015638.GA735-6hIufAJW0g7Gr8qjsLp7YGXnswh1EIUO@public.gmane.org>
2017-06-30  2:16                   ` John Brooks
2017-06-29  3:18       ` Michel Dänzer
2017-06-28  2:33   ` [PATCH 5/5] drm/amdgpu: Don't force BOs into visible VRAM for page faults John Brooks
2017-06-29  2:30     ` Michel Dänzer
2017-06-29  2:33 ` [PATCH v2] Visible VRAM Management Improvements Michel Dänzer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1498617201-24557-3-git-send-email-john@fastquake.com \
    --to=john@fastquake.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=deathsimple@vodafone.de \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=maraeo@gmail.com \
    --cc=michel@daenzer.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).