public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Eric Huang <JinHuiEric.Huang@amd.com>,
	Felix Kuehling <Felix.Kuehling@amd.com>,
	Alex Deucher <alexander.deucher@amd.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.19 04/63] drm/amdkfd: change system memory overcommit limit
Date: Tue, 14 May 2024 12:19:25 +0200	[thread overview]
Message-ID: <20240514100948.179672209@linuxfoundation.org> (raw)
In-Reply-To: <20240514100948.010148088@linuxfoundation.org>

4.19-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Eric Huang <JinhuiEric.Huang@amd.com>

[ Upstream commit 5d240da93edc29adb68320c5e475dc9c7fcad5dd ]

It is to improve system limit by:
1. replacing userptrlimit with a total memory limit that
conunts TTM memory usage and userptr usage.
2. counting acc size for all BOs.

Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Stable-dep-of: 25e9227c6afd ("drm/amdgpu: Fix leak when GPU memory allocation fails")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++--------
 1 file changed, 58 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 4488aad64643b..13a03f467688a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -46,9 +46,9 @@
 /* Impose limit on how much memory KFD can use */
 static struct {
 	uint64_t max_system_mem_limit;
-	uint64_t max_userptr_mem_limit;
+	uint64_t max_ttm_mem_limit;
 	int64_t system_mem_used;
-	int64_t userptr_mem_used;
+	int64_t ttm_mem_used;
 	spinlock_t mem_limit_lock;
 } kfd_mem_limit;
 
@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 }
 
 /* Set memory usage limits. Current, limits are
- *  System (kernel) memory - 3/8th System RAM
- *  Userptr memory - 3/4th System RAM
+ *  System (TTM + userptr) memory - 3/4th System RAM
+ *  TTM memory - 3/8th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 {
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 	mem *= si.mem_unit;
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
-	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
-	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+	kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+	kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
 		(kfd_mem_limit.max_system_mem_limit >> 20),
-		(kfd_mem_limit.max_userptr_mem_limit >> 20));
+		(kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
-					      uint64_t size, u32 domain)
+		uint64_t size, u32 domain, bool sg)
 {
-	size_t acc_size;
+	size_t acc_size, system_mem_needed, ttm_mem_needed;
 	int ret = 0;
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
 				       sizeof(struct amdgpu_bo));
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
-			kfd_mem_limit.max_system_mem_limit) {
-			ret = -ENOMEM;
-			goto err_no_mem;
-		}
-		kfd_mem_limit.system_mem_used += (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
-		if ((kfd_mem_limit.system_mem_used + acc_size >
-			kfd_mem_limit.max_system_mem_limit) ||
-			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
-			kfd_mem_limit.max_userptr_mem_limit)) {
-			ret = -ENOMEM;
-			goto err_no_mem;
-		}
-		kfd_mem_limit.system_mem_used += acc_size;
-		kfd_mem_limit.userptr_mem_used += size;
+		/* TTM GTT memory */
+		system_mem_needed = acc_size + size;
+		ttm_mem_needed = acc_size + size;
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+		/* Userptr */
+		system_mem_needed = acc_size + size;
+		ttm_mem_needed = acc_size;
+	} else {
+		/* VRAM and SG */
+		system_mem_needed = acc_size;
+		ttm_mem_needed = acc_size;
+	}
+
+	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
+		kfd_mem_limit.max_system_mem_limit) ||
+		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+		kfd_mem_limit.max_ttm_mem_limit))
+		ret = -ENOMEM;
+	else {
+		kfd_mem_limit.system_mem_used += system_mem_needed;
+		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 	}
-err_no_mem:
+
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	return ret;
 }
 
 static void unreserve_system_mem_limit(struct amdgpu_device *adev,
-				       uint64_t size, u32 domain)
+		uint64_t size, u32 domain, bool sg)
 {
 	size_t acc_size;
 
@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+		kfd_mem_limit.system_mem_used -= (acc_size + size);
+		kfd_mem_limit.ttm_mem_used -= acc_size;
+	} else {
 		kfd_mem_limit.system_mem_used -= acc_size;
-		kfd_mem_limit.userptr_mem_used -= size;
+		kfd_mem_limit.ttm_mem_used -= acc_size;
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
-	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-		  "kfd userptr memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+		  "kfd TTM memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 
 	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
-		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
-		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+		kfd_mem_limit.system_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
 	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+		kfd_mem_limit.ttm_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+	} else {
+		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
-	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-		  "kfd userptr memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+		  "kfd TTM memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -1201,10 +1217,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	amdgpu_sync_create(&(*mem)->sync);
 
-	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
+						     alloc_domain, false);
 	if (ret) {
 		pr_debug("Insufficient system memory\n");
-		goto err_reserve_system_mem;
+		goto err_reserve_limit;
 	}
 
 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1252,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 allocate_init_user_pages_failed:
 	amdgpu_bo_unref(&bo);
 	/* Don't unreserve system mem limit twice */
-	goto err_reserve_system_mem;
+	goto err_reserve_limit;
 err_bo_create:
-	unreserve_system_mem_limit(adev, size, alloc_domain);
-err_reserve_system_mem:
+	unreserve_system_mem_limit(adev, size, alloc_domain, false);
+err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	kfree(*mem);
 	return ret;
-- 
2.43.0




  parent reply	other threads:[~2024-05-14 11:39 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-14 10:19 [PATCH 4.19 00/63] 4.19.314-rc1 review Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 01/63] dmaengine: pl330: issue_pending waits until WFP state Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 02/63] dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state" Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 03/63] wifi: nl80211: dont free NULL coalescing rule Greg Kroah-Hartman
2024-05-14 10:19 ` Greg Kroah-Hartman [this message]
2024-05-14 10:19 ` [PATCH 4.19 05/63] drm/amdgpu: Fix leak when GPU memory allocation fails Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 06/63] net: slightly optimize eth_type_trans Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 07/63] ethernet: add a helper for assigning port addresses Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 08/63] ethernet: Add helper for assigning packet type when dest address does not match device address Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 09/63] pinctrl: core: delete incorrect free in pinctrl_enable() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 10/63] power: rt9455: hide unused rt9455_boost_voltage_values Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 11/63] pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 12/63] s390/mm: Fix storage key clearing for guest huge pages Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 13/63] s390/mm: Fix clearing storage keys for " Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 14/63] bna: ensure the copied buf is NUL terminated Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 15/63] nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 16/63] net l2tp: drop flow hash on forward Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 17/63] ASoC: meson: axg-tdm-interface: manage formatters in trigger Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 18/63] net: dsa: mv88e6xxx: Add number of MACs in the ATU Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 19/63] net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341 Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 20/63] net: bridge: fix multicast-to-unicast with fraglist GSO Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 21/63] tipc: fix a possible memleak in tipc_buf_append Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 22/63] scsi: lpfc: Update lpfc_ramp_down_queue_handler() logic Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 23/63] gfs2: Fix invalid metadata access in punch_hole Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 24/63] wifi: mac80211: fix ieee80211_bss_*_flags kernel-doc Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 25/63] net: mark racy access on sk->sk_rcvbuf Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 26/63] scsi: bnx2fc: Remove spin_lock_bh while releasing resources after upload Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 27/63] ALSA: line6: Zero-initialize message buffers Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 28/63] net: bcmgenet: Reset RBUF on first open Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 29/63] ata: sata_gemini: Check clk_enable() result Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 30/63] firewire: ohci: mask bus reset interrupts between ISR and bottom half Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 31/63] tools/power turbostat: Fix added raw MSR output Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 32/63] tools/power turbostat: Fix Bzy_MHz documentation typo Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 33/63] btrfs: make btrfs_clear_delalloc_extent() free delalloc reserve Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 34/63] btrfs: always clear PERTRANS metadata during commit Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 35/63] scsi: target: Fix SELinux error when systemd-modules loads the target module Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 36/63] selftests: timers: Fix valid-adjtimex signed left-shift undefined behavior Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 37/63] fs/9p: only translate RWX permissions for plain 9P2000 Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 38/63] fs/9p: translate O_TRUNC into OTRUNC Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 39/63] 9p: explicitly deny setlease attempts Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 40/63] gpio: wcove: Use -ENOTSUPP consistently Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 41/63] gpio: crystalcove: " Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 42/63] fs/9p: drop inodes immediately on non-.L too Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 43/63] net:usb:qmi_wwan: support Rolling modules Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 44/63] ASoC: meson: axg-tdm-interface: Fix formatters in trigger" Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 45/63] tcp: remove redundant check on tskb Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 46/63] tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 47/63] tcp: Use refcount_inc_not_zero() in tcp_twsk_unique() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 48/63] Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 49/63] Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 50/63] rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 51/63] phonet: fix rtm_phonet_notify() skb allocation Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 52/63] net: bridge: fix corrupted ethernet header on multicast-to-unicast Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 53/63] ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 54/63] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 55/63] af_unix: Fix garbage collector racing against connect() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 56/63] firewire: nosy: ensure user_length is taken into account when fetching packet contents Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 57/63] usb: gadget: composite: fix OS descriptors w_value logic Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 58/63] usb: gadget: f_fs: Fix a race condition when processing setup packets Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 59/63] tipc: fix UAF in error path Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 60/63] dyndbg: fix old BUG_ON in >control parser Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 61/63] drm/vmwgfx: Fix invalid reads in fence signaled events Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 62/63] net: fix out-of-bounds access in ops_init Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 63/63] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc() Greg Kroah-Hartman
2024-05-14 16:33 ` [PATCH 4.19 00/63] 4.19.314-rc1 review Harshit Mogalapalli
2024-05-14 19:41 ` Pavel Machek
2024-05-15 15:04 ` Shuah Khan
2024-05-16  7:33 ` Naresh Kamboju
2024-05-16 12:24 ` Pavel Machek
2024-05-16 13:05   ` Greg Kroah-Hartman
2024-05-16 12:30 ` Jon Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240514100948.179672209@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Felix.Kuehling@amd.com \
    --cc=JinHuiEric.Huang@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox