* [RFC PATCH 2/3] drm/amdgpu: Add range param to amdgpu_vm_update_range
2022-12-20 23:27 [RFC PATCH 1/3] drm/amdgpu: Add vm->notifier_lock Felix Kuehling
@ 2022-12-20 23:27 ` Felix Kuehling
2023-01-03 8:27 ` Christian König
2022-12-20 23:27 ` [RFC PATCH 3/3] drm/amdkfd: Use per-process notifier_lock for SVM Felix Kuehling
2023-01-02 16:01 ` [RFC PATCH 1/3] drm/amdgpu: Add vm->notifier_lock Christian König
2 siblings, 1 reply; 5+ messages in thread
From: Felix Kuehling @ 2022-12-20 23:27 UTC (permalink / raw)
To: amd-gfx; +Cc: philip.yang, christian.koenig
This allows page table updates to be coordinated with interval notifiers
to avoid writing stale page table entries to the pabe table. Moving the
critical section inside the page table update avoids lock dependencies
with page table allocations under the notifier lock.
Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 ++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 58 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 6 ++-
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +-
4 files changed, 77 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a04f7aef4ca9..556d2e5d90e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -768,6 +768,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
* @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
+ * @range: optional HMM range for coordination with interval notifier
* @fence: optional resulting fence
*
* Fill in the page table entries between @start and @last.
@@ -780,7 +781,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
- struct dma_fence **fence)
+ struct hmm_range *range, struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_cb *tlb_cb;
@@ -794,7 +795,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
r = -ENOMEM;
- goto error_unlock;
+ goto error_dev_exit;
}
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
@@ -811,6 +812,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
+#ifdef CONFIG_MMU_NOTIFIER
+ params.range = range;
+#endif
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
@@ -823,12 +827,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
- amdgpu_vm_eviction_lock(vm);
- if (vm->evicting) {
- r = -EBUSY;
- goto error_free;
- }
-
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
struct dma_fence *tmp = dma_fence_get_stub();
@@ -893,7 +891,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
start = tmp;
}
+ r = amdgpu_vm_pts_lock(¶ms);
+ if (r)
+ goto error_free;
r = vm->update_funcs->commit(¶ms, fence);
+ amdgpu_vm_pts_unlock(¶ms);
if (flush_tlb || params.table_freed) {
tlb_cb->vm = vm;
@@ -911,8 +913,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
error_free:
kfree(tlb_cb);
-error_unlock:
- amdgpu_vm_eviction_unlock(vm);
+error_dev_exit:
drm_dev_exit(idx);
return r;
}
@@ -1058,7 +1059,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv, mapping->start, mapping->last,
update_flags, mapping->offset,
vram_base, mem, pages_addr,
- last_update);
+ NULL, last_update);
if (r)
return r;
}
@@ -1253,7 +1254,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
mapping->start, mapping->last,
init_pte_value, 0, 0, NULL, NULL,
- &f);
+ NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2512,7 +2513,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
}
r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
- addr, flags, value, 0, NULL, NULL, NULL);
+ addr, flags, value, 0, NULL, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 07af80df812b..8fad51d66bce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -31,6 +31,8 @@
#include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include <linux/mmu_notifier.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -196,6 +198,13 @@ struct amdgpu_vm_update_params {
*/
struct amdgpu_vm *vm;
+#ifdef CONFIG_MMU_NOTIFIER
+ /**
+ * @range: optional HMM range for coordination with interval notifier
+ */
+ struct hmm_range *range;
+#endif
+
/**
* @immediate: if changes should be made immediately
*/
@@ -411,7 +420,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct dma_resv *resv, uint64_t start, uint64_t last,
uint64_t flags, uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
- struct dma_fence **fence);
+ struct hmm_range *range, struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
@@ -535,4 +544,51 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
mutex_unlock(&vm->eviction_lock);
}
+/*
+ * Make page tables safe to access without a reservation and ensure that the
+ * ptes being written are still valid. This can fail if page tables are being
+ * evicted (-EBUSY) or an HMM range is being invalidated (-EAGAIN).
+ */
+static inline int amdgpu_vm_pts_lock(struct amdgpu_vm_update_params *params)
+{
+ int r = 0;
+
+#ifdef CONFIG_MMU_NOTIFIER
+ if (params->range) {
+ mutex_lock(params->vm->notifier_lock);
+ if (mmu_interval_read_retry(params->range->notifier,
+ params->range->notifier_seq)) {
+ r = -EAGAIN;
+ goto error_unlock_notifier;
+ }
+ }
+#endif
+ amdgpu_vm_eviction_lock(params->vm);
+ if (params->vm->evicting) {
+ r = -EBUSY;
+ goto error_unlock;
+ }
+
+ return 0;
+
+error_unlock:
+ amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+error_unlock_notifier:
+ if (params->range)
+ mutex_unlock(params->vm->notifier_lock);
+#endif
+
+ return r;
+}
+
+static inline void amdgpu_vm_pts_unlock(struct amdgpu_vm_update_params *params)
+{
+ amdgpu_vm_eviction_unlock(params->vm);
+#ifdef CONFIG_MMU_NOTIFIER
+ if (params->range)
+ mutex_unlock(params->vm->notifier_lock);
+#endif
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index b5f3bba851db..2891284eba1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -597,9 +597,7 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
if (entry->bo)
return 0;
- amdgpu_vm_eviction_unlock(vm);
r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
- amdgpu_vm_eviction_lock(vm);
if (r)
return r;
@@ -960,6 +958,9 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
+ r = amdgpu_vm_pts_lock(params);
+ if (r)
+ return r;
do {
struct amdgpu_vm *vm = params->vm;
uint64_t upd_end = min(entry_end, frag_end);
@@ -992,6 +993,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
break;
}
} while (frag_start < entry_end);
+ amdgpu_vm_pts_unlock(params);
if (amdgpu_vm_pt_descendant(adev, &cursor)) {
/* Free all child entries.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 2dc3b04064bd..cc46878901c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1217,7 +1217,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
last, init_pte_value, 0, 0, NULL, NULL,
- fence);
+ NULL, fence);
}
static int
@@ -1323,7 +1323,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pte_flags,
(last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
- NULL, dma_addr, &vm->last_update);
+ NULL, dma_addr, NULL, &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
--
2.32.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [RFC PATCH 3/3] drm/amdkfd: Use per-process notifier_lock for SVM
2022-12-20 23:27 [RFC PATCH 1/3] drm/amdgpu: Add vm->notifier_lock Felix Kuehling
2022-12-20 23:27 ` [RFC PATCH 2/3] drm/amdgpu: Add range param to amdgpu_vm_update_range Felix Kuehling
@ 2022-12-20 23:27 ` Felix Kuehling
2023-01-02 16:01 ` [RFC PATCH 1/3] drm/amdgpu: Add vm->notifier_lock Christian König
2 siblings, 0 replies; 5+ messages in thread
From: Felix Kuehling @ 2022-12-20 23:27 UTC (permalink / raw)
To: amd-gfx; +Cc: philip.yang, christian.koenig
Use the process_info->notifier lock for validating and mapping SVM ranges.
Take advantage of the locking done inside amdgpu_vm_ptes_update to fix
a lock dependency issue with page table allocations.
TODO: double check that prange->lock it not still needed somewhere inside
the notifier lock because it protects a few other things.
[ 83.979486] ======================================================
[ 83.986583] WARNING: possible circular locking dependency detected
[ 83.993643] 5.19.0-kfd-fkuehlin #75 Not tainted
[ 83.999044] ------------------------------------------------------
[ 84.006088] kfdtest/3453 is trying to acquire lock:
[ 84.011820] ffff9a998561e210 (&prange->lock){+.+.}-{3:3}, at: svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.023911]
but task is already holding lock:
[ 84.031608] ffffffffbcd929c0 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}, at: unmap_vmas+0x5/0x170
[ 84.041992]
which lock already depends on the new lock.
[ 84.052785]
the existing dependency chain (in reverse order) is:
[ 84.061993]
-> #3 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}:
[ 84.071548] fs_reclaim_acquire+0x6d/0xd0
[ 84.076941] kmem_cache_alloc_trace+0x34/0x760
[ 84.082766] alloc_workqueue_attrs+0x1b/0x50
[ 84.088411] workqueue_init+0x88/0x318
[ 84.093533] kernel_init_freeable+0x134/0x28f
[ 84.099258] kernel_init+0x16/0x130
[ 84.104107] ret_from_fork+0x1f/0x30
[ 84.109038]
-> #2 (fs_reclaim){+.+.}-{0:0}:
[ 84.116348] fs_reclaim_acquire+0xa1/0xd0
[ 84.121697] kmem_cache_alloc+0x2c/0x760
[ 84.126948] drm_block_alloc.isra.0+0x27/0x50 [drm_buddy]
[ 84.133679] split_block+0x4d/0x140 [drm_buddy]
[ 84.139539] drm_buddy_alloc_blocks+0x385/0x580 [drm_buddy]
[ 84.146435] amdgpu_vram_mgr_new+0x213/0x4f0 [amdgpu]
[ 84.153399] ttm_resource_alloc+0x31/0x80 [ttm]
[ 84.159366] ttm_bo_mem_space+0x8f/0x230 [ttm]
[ 84.165169] ttm_bo_validate+0xc5/0x170 [ttm]
[ 84.170872] ttm_bo_init_reserved+0x1a6/0x230 [ttm]
[ 84.177075] amdgpu_bo_create+0x1a0/0x510 [amdgpu]
[ 84.183600] amdgpu_bo_create_reserved+0x188/0x1e0 [amdgpu]
[ 84.190803] amdgpu_bo_create_kernel_at+0x64/0x200 [amdgpu]
[ 84.197994] amdgpu_ttm_init+0x420/0x4c0 [amdgpu]
[ 84.204301] gmc_v10_0_sw_init+0x33a/0x530 [amdgpu]
[ 84.210813] amdgpu_device_init.cold+0x10d4/0x17a1 [amdgpu]
[ 84.218077] amdgpu_driver_load_kms+0x15/0x110 [amdgpu]
[ 84.224919] amdgpu_pci_probe+0x142/0x350 [amdgpu]
[ 84.231313] local_pci_probe+0x40/0x80
[ 84.236437] work_for_cpu_fn+0x10/0x20
[ 84.241500] process_one_work+0x270/0x5a0
[ 84.246805] worker_thread+0x203/0x3c0
[ 84.251828] kthread+0xea/0x110
[ 84.256229] ret_from_fork+0x1f/0x30
[ 84.261061]
-> #1 (&mgr->lock){+.+.}-{3:3}:
[ 84.268156] __mutex_lock+0x9a/0xf30
[ 84.272967] amdgpu_vram_mgr_new+0x14a/0x4f0 [amdgpu]
[ 84.279752] ttm_resource_alloc+0x31/0x80 [ttm]
[ 84.285602] ttm_bo_mem_space+0x8f/0x230 [ttm]
[ 84.291321] ttm_bo_validate+0xc5/0x170 [ttm]
[ 84.296939] ttm_bo_init_reserved+0xe2/0x230 [ttm]
[ 84.302969] amdgpu_bo_create+0x1a0/0x510 [amdgpu]
[ 84.309297] amdgpu_bo_create_vm+0x2e/0x80 [amdgpu]
[ 84.315656] amdgpu_vm_pt_create+0xf5/0x270 [amdgpu]
[ 84.322090] amdgpu_vm_ptes_update+0x6c4/0x8f0 [amdgpu]
[ 84.328793] amdgpu_vm_update_range+0x29b/0x730 [amdgpu]
[ 84.335537] svm_range_validate_and_map+0xc78/0x1390 [amdgpu]
[ 84.342734] svm_range_set_attr+0xc74/0x1170 [amdgpu]
[ 84.349222] kfd_ioctl+0x189/0x5c0 [amdgpu]
[ 84.354808] __x64_sys_ioctl+0x80/0xb0
[ 84.359738] do_syscall_64+0x35/0x80
[ 84.364481] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 84.370687]
-> #0 (&prange->lock){+.+.}-{3:3}:
[ 84.377864] __lock_acquire+0x12ed/0x27e0
[ 84.383027] lock_acquire+0xca/0x2e0
[ 84.387759] __mutex_lock+0x9a/0xf30
[ 84.392491] svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.400345] __mmu_notifier_invalidate_range_start+0x1d3/0x230
[ 84.407410] unmap_vmas+0x162/0x170
[ 84.412126] unmap_region+0xa8/0x110
[ 84.416905] __do_munmap+0x209/0x4f0
[ 84.421680] __vm_munmap+0x78/0x120
[ 84.426365] __x64_sys_munmap+0x17/0x20
[ 84.431392] do_syscall_64+0x35/0x80
[ 84.436164] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 84.442405]
other info that might help us debug this:
[ 84.452431] Chain exists of:
&prange->lock --> fs_reclaim --> mmu_notifier_invalidate_range_start
[ 84.466395] Possible unsafe locking scenario:
[ 84.473720] CPU0 CPU1
[ 84.479020] ---- ----
[ 84.484296] lock(mmu_notifier_invalidate_range_start);
[ 84.490333] lock(fs_reclaim);
[ 84.496705] lock(mmu_notifier_invalidate_range_start);
[ 84.505246] lock(&prange->lock);
[ 84.509361]
*** DEADLOCK ***
[ 84.517360] 2 locks held by kfdtest/3453:
[ 84.522060] #0: ffff9a99821ec4a8 (&mm->mmap_lock#2){++++}-{3:3}, at: __do_munmap+0x417/0x4f0
[ 84.531287] #1: ffffffffbcd929c0 (mmu_notifier_invalidate_range_start){+.+.}-{0:0}, at: unmap_vmas+0x5/0x170
[ 84.541896]
stack backtrace:
[ 84.547630] CPU: 3 PID: 3453 Comm: kfdtest Not tainted 5.19.0-kfd-fkuehlin #75
[ 84.555537] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./EPYCD8-2T, BIOS P2.60 04/10/2020
[ 84.565788] Call Trace:
[ 84.568925] <TASK>
[ 84.571702] dump_stack_lvl+0x45/0x59
[ 84.576034] check_noncircular+0xfe/0x110
[ 84.580715] ? kernel_text_address+0x6d/0xe0
[ 84.585652] __lock_acquire+0x12ed/0x27e0
[ 84.590340] lock_acquire+0xca/0x2e0
[ 84.594595] ? svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.602338] __mutex_lock+0x9a/0xf30
[ 84.606714] ? svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.614262] ? svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.621806] ? svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.629353] svm_range_cpu_invalidate_pagetables+0x90/0x5e0 [amdgpu]
[ 84.636742] ? lock_release+0x139/0x2b0
[ 84.641374] __mmu_notifier_invalidate_range_start+0x1d3/0x230
[ 84.647976] unmap_vmas+0x162/0x170
[ 84.652203] unmap_region+0xa8/0x110
[ 84.656503] __do_munmap+0x209/0x4f0
[ 84.660792] __vm_munmap+0x78/0x120
[ 84.664977] __x64_sys_munmap+0x17/0x20
[ 84.669499] do_syscall_64+0x35/0x80
[ 84.673755] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 84.679485] RIP: 0033:0x7f32872eb97b
[ 84.683738] Code: 8b 15 19 35 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb 89 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e5 34 0d 00 f7 d8 64 89 01 48
[ 84.703915] RSP: 002b:00007fffb06c4508 EFLAGS: 00000246 ORIG_RAX: 000000000000000b
[ 84.712205] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f32872eb97b
[ 84.720072] RDX: 0000000004000000 RSI: 0000000004000000 RDI: 00007f32831ae000
[ 84.727944] RBP: 00007fffb06c4750 R08: 00007fffb06c4548 R09: 000055e7570ad230
[ 84.735809] R10: 000055e757088010 R11: 0000000000000246 R12: 000055e75453cefa
[ 84.743688] R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000000
[ 84.751584] </TASK>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 13 ++---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 ++++++++++++------------
drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 16 +------
3 files changed, 37 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 884f7d4ee695..b8430c27d775 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -882,6 +882,7 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
*/
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
+ struct amdkfd_process_info *process_info;
unsigned long addr = vmf->address;
struct svm_range_bo *svm_bo;
enum svm_work_list_ops op;
@@ -889,6 +890,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
+ unsigned int flags;
int r = 0;
svm_bo = vmf->page->zone_device_data;
@@ -916,6 +918,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
r = 0;
goto out_unref_process;
}
+ process_info = p->kgd_process_info;
pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
addr >>= PAGE_SHIFT;
@@ -936,13 +939,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
if (!prange->actual_loc)
goto out_unlock_prange;
- svm_range_lock(parent);
- if (prange != parent)
- mutex_lock_nested(&prange->lock, 1);
+ mutex_lock(&process_info->notifier_lock);
+ flags = memalloc_noreclaim_save();
r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
- if (prange != parent)
- mutex_unlock(&prange->lock);
- svm_range_unlock(parent);
+ memalloc_noreclaim_restore(flags);
+ mutex_unlock(&process_info->notifier_lock);
if (r) {
pr_debug("failed %d to split range by granularity\n", r);
goto out_unlock_prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cc46878901c1..7020861438fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1278,6 +1278,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
static int
svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+ struct hmm_range *hmm_range,
unsigned long offset, unsigned long npages, bool readonly,
dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
struct dma_fence **fence, bool flush_tlb)
@@ -1323,7 +1324,8 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
pte_flags,
(last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
- NULL, dma_addr, NULL, &vm->last_update);
+ NULL, dma_addr, hmm_range,
+ &vm->last_update);
for (j = last_start - prange->start; j <= i; j++)
dma_addr[j] |= last_domain;
@@ -1350,9 +1352,10 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
}
static int
-svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
- unsigned long npages, bool readonly,
- unsigned long *bitmap, bool wait, bool flush_tlb)
+svm_range_map_to_gpus(struct svm_range *prange, struct hmm_range *hmm_range,
+ unsigned long offset, unsigned long npages,
+ bool readonly, unsigned long *bitmap, bool wait,
+ bool flush_tlb)
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
@@ -1385,7 +1388,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
continue;
}
- r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
+ r = svm_range_map_to_gpu(pdd, prange, hmm_range, offset,
+ npages, readonly,
prange->dma_addr[gpuidx],
bo_adev, wait ? &fence : NULL,
flush_tlb);
@@ -1613,23 +1617,15 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
goto unreserve_out;
}
- svm_range_lock(prange);
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
- pr_debug("hmm update the range, need validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
- if (!list_empty(&prange->child_list)) {
- pr_debug("range split by unmap in parallel, validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
+ r = svm_range_map_to_gpus(prange, hmm_range, offset, npages,
+ readonly, ctx.bitmap, wait,
+ flush_tlb);
- r = svm_range_map_to_gpus(prange, offset, npages, readonly,
- ctx.bitmap, wait, flush_tlb);
-
-unlock_out:
- svm_range_unlock(prange);
+ /* Ignoring return value because this just frees the hmm_range.
+ * Actual checking is done in amdgpu_vm_ptes_update under the
+ * notifier lock.
+ */
+ amdgpu_hmm_range_get_pages_done(hmm_range);
addr = next;
}
@@ -1806,13 +1802,11 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
if (!pchild->mapped_to_gpu)
continue;
mapped = true;
- mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
pchild->start, pchild->last);
atomic_inc(&pchild->invalid);
}
- mutex_unlock(&pchild->lock);
}
if (!mapped)
@@ -1848,12 +1842,10 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
prange->svms, start, last);
list_for_each_entry(pchild, &prange->child_list, child_list) {
- mutex_lock_nested(&pchild->lock, 1);
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
- mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
@@ -2335,13 +2327,11 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
unmap_parent = start <= prange->start && last >= prange->last;
list_for_each_entry(pchild, &prange->child_list, child_list) {
- mutex_lock_nested(&pchild->lock, 1);
s = max(start, pchild->start);
l = min(last, pchild->last);
if (l >= s)
svm_range_unmap_from_gpus(pchild, s, l, trigger);
svm_range_unmap_split(mm, prange, pchild, start, last);
- mutex_unlock(&pchild->lock);
}
s = max(start, prange->start);
l = min(last, prange->last);
@@ -2384,9 +2374,12 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
{
+ struct amdkfd_process_info *process_info;
struct svm_range *prange;
+ struct kfd_process *p;
unsigned long start;
unsigned long last;
+ unsigned int flags;
if (range->event == MMU_NOTIFY_RELEASE)
return true;
@@ -2404,8 +2397,11 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
mni->interval_tree.last >> PAGE_SHIFT, range->event);
prange = container_of(mni, struct svm_range, notifier);
+ p = container_of(prange->svms, struct kfd_process, svms);
+ process_info = p->kgd_process_info;
- svm_range_lock(prange);
+ mutex_lock(&process_info->notifier_lock);
+ flags = memalloc_noreclaim_save();
mmu_interval_set_seq(mni, cur_seq);
switch (range->event) {
@@ -2417,7 +2413,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
break;
}
- svm_range_unlock(prange);
+ memalloc_noreclaim_restore(flags);
+ mutex_unlock(&process_info->notifier_lock);
mmput(mni->mm);
return true;
@@ -2959,6 +2956,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
int
svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
{
+ struct amdkfd_process_info *process_info = p->kgd_process_info;
struct svm_range *prange, *pchild;
uint64_t reserved_size = 0;
uint64_t size;
@@ -2967,9 +2965,9 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
mutex_lock(&p->svms.lock);
+ mutex_lock(&process_info->notifier_lock);
list_for_each_entry(prange, &p->svms.list, list) {
- svm_range_lock(prange);
list_for_each_entry(pchild, &prange->child_list, child_list) {
size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
if (xnack_enabled) {
@@ -2996,10 +2994,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
reserved_size += size;
}
out_unlock:
- svm_range_unlock(prange);
if (r)
break;
}
+ mutex_unlock(&process_info->notifier_lock);
if (r)
amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 7a33b93f9df6..bb455dc7f549 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -82,8 +82,7 @@ struct svm_work_list_item {
* @offset: range start offset within mm_nodes
* @svm_bo: struct to manage splited amdgpu_bo
* @svm_bo_list:link list node, to scan all ranges which share same svm_bo
- * @lock: protect prange start, last, child_list, svm_bo_list
- * @saved_flags:save/restore current PF_MEMALLOC flags
+ * @lock: protect prange start, last, svm_bo_list
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
@@ -117,7 +116,6 @@ struct svm_range {
struct svm_range_bo *svm_bo;
struct list_head svm_bo_list;
struct mutex lock;
- unsigned int saved_flags;
uint32_t flags;
uint32_t preferred_loc;
uint32_t prefetch_loc;
@@ -135,18 +133,6 @@ struct svm_range {
bool mapped_to_gpu;
};
-static inline void svm_range_lock(struct svm_range *prange)
-{
- mutex_lock(&prange->lock);
- prange->saved_flags = memalloc_noreclaim_save();
-
-}
-static inline void svm_range_unlock(struct svm_range *prange)
-{
- memalloc_noreclaim_restore(prange->saved_flags);
- mutex_unlock(&prange->lock);
-}
-
static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
{
if (svm_bo)
--
2.32.0
^ permalink raw reply related [flat|nested] 5+ messages in thread