All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	patches@lists.linux.dev, Eric Huang <JinHuiEric.Huang@amd.com>,
	Felix Kuehling <Felix.Kuehling@amd.com>,
	Alex Deucher <alexander.deucher@amd.com>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.19 04/63] drm/amdkfd: change system memory overcommit limit
Date: Tue, 14 May 2024 12:19:25 +0200	[thread overview]
Message-ID: <20240514100948.179672209@linuxfoundation.org> (raw)
In-Reply-To: <20240514100948.010148088@linuxfoundation.org>

4.19-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Eric Huang <JinhuiEric.Huang@amd.com>

[ Upstream commit 5d240da93edc29adb68320c5e475dc9c7fcad5dd ]

It is to improve system limit by:
1. replacing userptrlimit with a total memory limit that
conunts TTM memory usage and userptr usage.
2. counting acc size for all BOs.

Signed-off-by: Eric Huang <JinHuiEric.Huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Stable-dep-of: 25e9227c6afd ("drm/amdgpu: Fix leak when GPU memory allocation fails")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++--------
 1 file changed, 58 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 4488aad64643b..13a03f467688a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -46,9 +46,9 @@
 /* Impose limit on how much memory KFD can use */
 static struct {
 	uint64_t max_system_mem_limit;
-	uint64_t max_userptr_mem_limit;
+	uint64_t max_ttm_mem_limit;
 	int64_t system_mem_used;
-	int64_t userptr_mem_used;
+	int64_t ttm_mem_used;
 	spinlock_t mem_limit_lock;
 } kfd_mem_limit;
 
@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 }
 
 /* Set memory usage limits. Current, limits are
- *  System (kernel) memory - 3/8th System RAM
- *  Userptr memory - 3/4th System RAM
+ *  System (TTM + userptr) memory - 3/4th System RAM
+ *  TTM memory - 3/8th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 {
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 	mem *= si.mem_unit;
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
-	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
-	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+	kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
+	kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
+	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
 		(kfd_mem_limit.max_system_mem_limit >> 20),
-		(kfd_mem_limit.max_userptr_mem_limit >> 20));
+		(kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
-					      uint64_t size, u32 domain)
+		uint64_t size, u32 domain, bool sg)
 {
-	size_t acc_size;
+	size_t acc_size, system_mem_needed, ttm_mem_needed;
 	int ret = 0;
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
 				       sizeof(struct amdgpu_bo));
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
-			kfd_mem_limit.max_system_mem_limit) {
-			ret = -ENOMEM;
-			goto err_no_mem;
-		}
-		kfd_mem_limit.system_mem_used += (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
-		if ((kfd_mem_limit.system_mem_used + acc_size >
-			kfd_mem_limit.max_system_mem_limit) ||
-			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
-			kfd_mem_limit.max_userptr_mem_limit)) {
-			ret = -ENOMEM;
-			goto err_no_mem;
-		}
-		kfd_mem_limit.system_mem_used += acc_size;
-		kfd_mem_limit.userptr_mem_used += size;
+		/* TTM GTT memory */
+		system_mem_needed = acc_size + size;
+		ttm_mem_needed = acc_size + size;
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+		/* Userptr */
+		system_mem_needed = acc_size + size;
+		ttm_mem_needed = acc_size;
+	} else {
+		/* VRAM and SG */
+		system_mem_needed = acc_size;
+		ttm_mem_needed = acc_size;
+	}
+
+	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
+		kfd_mem_limit.max_system_mem_limit) ||
+		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+		kfd_mem_limit.max_ttm_mem_limit))
+		ret = -ENOMEM;
+	else {
+		kfd_mem_limit.system_mem_used += system_mem_needed;
+		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 	}
-err_no_mem:
+
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	return ret;
 }
 
 static void unreserve_system_mem_limit(struct amdgpu_device *adev,
-				       uint64_t size, u32 domain)
+		uint64_t size, u32 domain, bool sg)
 {
 	size_t acc_size;
 
@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
-	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
+		kfd_mem_limit.system_mem_used -= (acc_size + size);
+		kfd_mem_limit.ttm_mem_used -= acc_size;
+	} else {
 		kfd_mem_limit.system_mem_used -= acc_size;
-		kfd_mem_limit.userptr_mem_used -= size;
+		kfd_mem_limit.ttm_mem_used -= acc_size;
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
-	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-		  "kfd userptr memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+		  "kfd TTM memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 
 	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
-		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
-		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+		kfd_mem_limit.system_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
 	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+		kfd_mem_limit.ttm_mem_used -=
+			(bo->tbo.acc_size + amdgpu_bo_size(bo));
+	} else {
+		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
-	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-		  "kfd userptr memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+		  "kfd TTM memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -1201,10 +1217,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	amdgpu_sync_create(&(*mem)->sync);
 
-	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
+						     alloc_domain, false);
 	if (ret) {
 		pr_debug("Insufficient system memory\n");
-		goto err_reserve_system_mem;
+		goto err_reserve_limit;
 	}
 
 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1252,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 allocate_init_user_pages_failed:
 	amdgpu_bo_unref(&bo);
 	/* Don't unreserve system mem limit twice */
-	goto err_reserve_system_mem;
+	goto err_reserve_limit;
 err_bo_create:
-	unreserve_system_mem_limit(adev, size, alloc_domain);
-err_reserve_system_mem:
+	unreserve_system_mem_limit(adev, size, alloc_domain, false);
+err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	kfree(*mem);
 	return ret;
-- 
2.43.0




  parent reply	other threads:[~2024-05-14 11:39 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-14 10:19 [PATCH 4.19 00/63] 4.19.314-rc1 review Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 01/63] dmaengine: pl330: issue_pending waits until WFP state Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 02/63] dmaengine: Revert "dmaengine: pl330: issue_pending waits until WFP state" Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 03/63] wifi: nl80211: dont free NULL coalescing rule Greg Kroah-Hartman
2024-05-14 10:19 ` Greg Kroah-Hartman [this message]
2024-05-14 10:19 ` [PATCH 4.19 05/63] drm/amdgpu: Fix leak when GPU memory allocation fails Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 06/63] net: slightly optimize eth_type_trans Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 07/63] ethernet: add a helper for assigning port addresses Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 08/63] ethernet: Add helper for assigning packet type when dest address does not match device address Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 09/63] pinctrl: core: delete incorrect free in pinctrl_enable() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 10/63] power: rt9455: hide unused rt9455_boost_voltage_values Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 11/63] pinctrl: devicetree: fix refcount leak in pinctrl_dt_to_map() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 12/63] s390/mm: Fix storage key clearing for guest huge pages Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 13/63] s390/mm: Fix clearing storage keys for " Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 14/63] bna: ensure the copied buf is NUL terminated Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 15/63] nsh: Restore skb->{protocol,data,mac_header} for outer header in nsh_gso_segment() Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 16/63] net l2tp: drop flow hash on forward Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 17/63] ASoC: meson: axg-tdm-interface: manage formatters in trigger Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 18/63] net: dsa: mv88e6xxx: Add number of MACs in the ATU Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 19/63] net: dsa: mv88e6xxx: Fix number of databases for 88E6141 / 88E6341 Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 20/63] net: bridge: fix multicast-to-unicast with fraglist GSO Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 21/63] tipc: fix a possible memleak in tipc_buf_append Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 22/63] scsi: lpfc: Update lpfc_ramp_down_queue_handler() logic Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 23/63] gfs2: Fix invalid metadata access in punch_hole Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 24/63] wifi: mac80211: fix ieee80211_bss_*_flags kernel-doc Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 25/63] net: mark racy access on sk->sk_rcvbuf Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 26/63] scsi: bnx2fc: Remove spin_lock_bh while releasing resources after upload Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 27/63] ALSA: line6: Zero-initialize message buffers Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 28/63] net: bcmgenet: Reset RBUF on first open Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 29/63] ata: sata_gemini: Check clk_enable() result Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 30/63] firewire: ohci: mask bus reset interrupts between ISR and bottom half Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 31/63] tools/power turbostat: Fix added raw MSR output Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 32/63] tools/power turbostat: Fix Bzy_MHz documentation typo Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 33/63] btrfs: make btrfs_clear_delalloc_extent() free delalloc reserve Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 34/63] btrfs: always clear PERTRANS metadata during commit Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 35/63] scsi: target: Fix SELinux error when systemd-modules loads the target module Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 36/63] selftests: timers: Fix valid-adjtimex signed left-shift undefined behavior Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 37/63] fs/9p: only translate RWX permissions for plain 9P2000 Greg Kroah-Hartman
2024-05-14 10:19 ` [PATCH 4.19 38/63] fs/9p: translate O_TRUNC into OTRUNC Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 39/63] 9p: explicitly deny setlease attempts Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 40/63] gpio: wcove: Use -ENOTSUPP consistently Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 41/63] gpio: crystalcove: " Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 42/63] fs/9p: drop inodes immediately on non-.L too Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 43/63] net:usb:qmi_wwan: support Rolling modules Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 44/63] ASoC: meson: axg-tdm-interface: Fix formatters in trigger" Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 45/63] tcp: remove redundant check on tskb Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 46/63] tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 47/63] tcp: Use refcount_inc_not_zero() in tcp_twsk_unique() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 48/63] Bluetooth: Fix use-after-free bugs caused by sco_sock_timeout Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 49/63] Bluetooth: l2cap: fix null-ptr-deref in l2cap_chan_timeout Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 50/63] rtnetlink: Correct nested IFLA_VF_VLAN_LIST attribute validation Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 51/63] phonet: fix rtm_phonet_notify() skb allocation Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 52/63] net: bridge: fix corrupted ethernet header on multicast-to-unicast Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 53/63] ipv6: fib6_rules: avoid possible NULL dereference in fib6_rule_action() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 54/63] af_unix: Do not use atomic ops for unix_sk(sk)->inflight Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 55/63] af_unix: Fix garbage collector racing against connect() Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 56/63] firewire: nosy: ensure user_length is taken into account when fetching packet contents Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 57/63] usb: gadget: composite: fix OS descriptors w_value logic Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 58/63] usb: gadget: f_fs: Fix a race condition when processing setup packets Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 59/63] tipc: fix UAF in error path Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 60/63] dyndbg: fix old BUG_ON in >control parser Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 61/63] drm/vmwgfx: Fix invalid reads in fence signaled events Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 62/63] net: fix out-of-bounds access in ops_init Greg Kroah-Hartman
2024-05-14 10:20 ` [PATCH 4.19 63/63] af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc() Greg Kroah-Hartman
2024-05-14 16:33 ` [PATCH 4.19 00/63] 4.19.314-rc1 review Harshit Mogalapalli
2024-05-14 19:41 ` Pavel Machek
2024-05-15 15:04 ` Shuah Khan
2024-05-16  7:33 ` Naresh Kamboju
2024-05-16 12:24 ` Pavel Machek
2024-05-16 13:05   ` Greg Kroah-Hartman
2024-05-16 12:30 ` Jon Hunter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240514100948.179672209@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Felix.Kuehling@amd.com \
    --cc=JinHuiEric.Huang@amd.com \
    --cc=alexander.deucher@amd.com \
    --cc=patches@lists.linux.dev \
    --cc=sashal@kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.