* [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
@ 2021-09-15 6:37 xinhui pan
2021-09-15 6:42 ` 回复: " Pan, Xinhui
2021-09-15 20:14 ` Andrey Grodzovsky
0 siblings, 2 replies; 6+ messages in thread
From: xinhui pan @ 2021-09-15 6:37 UTC (permalink / raw)
To: amd-gfx; +Cc: alexander.deucher, christian.koenig, andrey.grodzovsky,
xinhui pan
We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.
perf top says below,
75.81% [kernel] [k] __srcu_read_unlock
6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
3.56% [kernel] [k] __srcu_read_lock
2.20% [amdgpu] [k] amdgpu_vm_cpu_update
2.20% [kernel] [k] __sg_page_iter_dma_next
2.15% [drm] [k] drm_dev_enter
1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
1.18% [kernel] [k] __sg_alloc_table_from_pages
1.09% [drm] [k] drm_dev_exit
So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
3 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include <drm/drm_drv.h>
/*
* GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ drm_dev_exit(idx);
return 0;
}
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
{
uint64_t page_base;
unsigned i, j, t;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
+ drm_dev_exit(idx);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
- int idx;
-
- if (!drm_dev_enter(&adev->ddev, &idx))
- return 0;
/*
* The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
- drm_dev_exit(idx);
-
return 0;
}
@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
u64 vram_end = vram_addr + vram_size;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+ int idx;
+
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return;
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+ drm_dev_exit(idx);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo = &vmbo->bo;
unsigned entries, ats_entries;
uint64_t addr;
- int r;
+ int r, idx;
/* Figure out our place in the hierarchy */
if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
return r;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
r = vm->update_funcs->map_table(vmbo);
if (r)
- return r;
+ goto exit;
memset(¶ms, 0, sizeof(params));
params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
addr = 0;
if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
value, flags);
if (r)
- return r;
+ goto exit;
addr += ats_entries * 8;
}
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
value, flags);
if (r)
- return r;
+ goto exit;
}
- return vm->update_funcs->commit(¶ms, NULL);
+ r = vm->update_funcs->commit(¶ms, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
}
/**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- int r;
+ int r, idx;
if (list_empty(&vm->relocated))
return 0;
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(¶ms, &vm->last_update);
if (r)
goto error;
+ drm_dev_exit(idx);
return 0;
error:
amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+ drm_dev_exit(idx);
return r;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
2021-09-15 6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
@ 2021-09-15 6:42 ` Pan, Xinhui
2021-09-15 13:52 ` Andrey Grodzovsky
2021-09-15 20:14 ` Andrey Grodzovsky
1 sibling, 1 reply; 6+ messages in thread
From: Pan, Xinhui @ 2021-09-15 6:42 UTC (permalink / raw)
To: amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander, Koenig, Christian, Grodzovsky, Andrey
[AMD Official Use Only]
Andrey
I hit panic with this plug/unplug test without this patch.
But as we add enter/exit in all its callers. maybe it would not impact plug/unplug.
[ 1109.041095] BUG: unable to handle page fault for address: 00000000000010e1
[ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
[ 1109.196706] Call Trace:
[ 1109.199374] ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
[ 1109.205843] amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
[ 1109.212776] amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
[ 1109.218563] vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
[ 1109.223747] amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
[ 1109.229650] amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
[ 1109.235577] amdgpu_pci_remove+0x37/0x70 [amdgpu]
[ 1109.240853] pci_device_remove+0x3b/0xb0
[ 1109.245127] device_release_driver_internal+0x100/0x1d0
[ 1109.250857] device_release_driver+0x12/0x20
[ 1109.255535] pci_stop_bus_device+0x79/0xa0
[ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
[ 1109.266197] remove_store+0x7b/0x90
[ 1109.269990] dev_attr_store+0x14/0x30
[ 1109.274002] sysfs_kf_write+0x48/0x60
[ 1109.277998] kernfs_fop_write_iter+0x14e/0x1e0
________________________________________
发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
发送时间: 2021年9月15日 14:37
收件人: amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, Xinhui
主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.
perf top says below,
75.81% [kernel] [k] __srcu_read_unlock
6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
3.56% [kernel] [k] __srcu_read_lock
2.20% [amdgpu] [k] amdgpu_vm_cpu_update
2.20% [kernel] [k] __sg_page_iter_dma_next
2.15% [drm] [k] drm_dev_enter
1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
1.18% [kernel] [k] __sg_alloc_table_from_pages
1.09% [drm] [k] drm_dev_exit
So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
3 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include <drm/drm_drv.h>
/*
* GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ drm_dev_exit(idx);
return 0;
}
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
{
uint64_t page_base;
unsigned i, j, t;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
+ drm_dev_exit(idx);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
- int idx;
-
- if (!drm_dev_enter(&adev->ddev, &idx))
- return 0;
/*
* The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
- drm_dev_exit(idx);
-
return 0;
}
@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
u64 vram_end = vram_addr + vram_size;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+ int idx;
+
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return;
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+ drm_dev_exit(idx);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo = &vmbo->bo;
unsigned entries, ats_entries;
uint64_t addr;
- int r;
+ int r, idx;
/* Figure out our place in the hierarchy */
if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
return r;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
r = vm->update_funcs->map_table(vmbo);
if (r)
- return r;
+ goto exit;
memset(¶ms, 0, sizeof(params));
params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
addr = 0;
if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
value, flags);
if (r)
- return r;
+ goto exit;
addr += ats_entries * 8;
}
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
value, flags);
if (r)
- return r;
+ goto exit;
}
- return vm->update_funcs->commit(¶ms, NULL);
+ r = vm->update_funcs->commit(¶ms, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
}
/**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- int r;
+ int r, idx;
if (list_empty(&vm->relocated))
return 0;
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(¶ms, &vm->last_update);
if (r)
goto error;
+ drm_dev_exit(idx);
return 0;
error:
amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+ drm_dev_exit(idx);
return r;
}
--
2.25.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
2021-09-15 6:42 ` 回复: " Pan, Xinhui
@ 2021-09-15 13:52 ` Andrey Grodzovsky
2021-09-15 13:57 ` Christian König
0 siblings, 1 reply; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 13:52 UTC (permalink / raw)
To: Pan, Xinhui, amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander, Koenig, Christian
On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
> [AMD Official Use Only]
>
> Andrey
> I hit panic with this plug/unplug test without this patch.
Can you please tell which ASIC you are using and which kernel branch and
what is the tip commit ?
> But as we add enter/exit in all its callers. maybe it would not impact plug/unplug.
If you add enter/exit in all callers then why this solves the problem ?
Is it because in one or more callers
amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit
many times creates the problematic
contention from multiple threads ?
Andrey
>
> [ 1109.041095] BUG: unable to handle page fault for address: 00000000000010e1
> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
> [ 1109.196706] Call Trace:
> [ 1109.199374] ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
> [ 1109.205843] amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
> [ 1109.212776] amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
> [ 1109.218563] vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
> [ 1109.223747] amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
> [ 1109.229650] amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
> [ 1109.235577] amdgpu_pci_remove+0x37/0x70 [amdgpu]
> [ 1109.240853] pci_device_remove+0x3b/0xb0
> [ 1109.245127] device_release_driver_internal+0x100/0x1d0
> [ 1109.250857] device_release_driver+0x12/0x20
> [ 1109.255535] pci_stop_bus_device+0x79/0xa0
> [ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
> [ 1109.266197] remove_store+0x7b/0x90
> [ 1109.269990] dev_attr_store+0x14/0x30
> [ 1109.274002] sysfs_kf_write+0x48/0x60
> [ 1109.277998] kernfs_fop_write_iter+0x14e/0x1e0
>
> ________________________________________
> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
> 发送时间: 2021年9月15日 14:37
> 收件人: amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, Xinhui
> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
>
> We hit soft hang while doing memory pressure test on one numa system.
> After a qucik look, this is because kfd invalid/valid userptr memory
> frequently with process_info lock hold.
> Looks like update page table mapping use too much cpu time.
>
> perf top says below,
> 75.81% [kernel] [k] __srcu_read_unlock
> 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
> 3.56% [kernel] [k] __srcu_read_lock
> 2.20% [amdgpu] [k] amdgpu_vm_cpu_update
> 2.20% [kernel] [k] __sg_page_iter_dma_next
> 2.15% [drm] [k] drm_dev_enter
> 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
> 1.18% [kernel] [k] __sg_alloc_table_from_pages
> 1.09% [drm] [k] drm_dev_exit
>
> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> change from v1:
> add enter/exit in more gmc_set_pte_pde callers
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
> 3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 76efd5f8950f..d7e4f4660acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -34,6 +34,7 @@
> #include <asm/set_memory.h>
> #endif
> #include "amdgpu.h"
> +#include <drm/drm_drv.h>
>
> /*
> * GART
> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> u64 page_base;
> /* Starting from VEGA10, system bit must be 0 to mean invalid. */
> uint64_t flags = 0;
> + int idx;
>
> if (!adev->gart.ready) {
> WARN(1, "trying to unbind memory from uninitialized GART !\n");
> return -EINVAL;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return 0;
> +
> t = offset / AMDGPU_GPU_PAGE_SIZE;
> p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
> for (i = 0; i < pages; i++, p++) {
> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> for (i = 0; i < adev->num_vmhubs; i++)
> amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>
> + drm_dev_exit(idx);
> return 0;
> }
>
> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> {
> uint64_t page_base;
> unsigned i, j, t;
> + int idx;
>
> if (!adev->gart.ready) {
> WARN(1, "trying to bind memory to uninitialized GART !\n");
> return -EINVAL;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return 0;
> +
> t = offset / AMDGPU_GPU_PAGE_SIZE;
>
> for (i = 0; i < pages; i++) {
> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> page_base += AMDGPU_GPU_PAGE_SIZE;
> }
> }
> + drm_dev_exit(idx);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 54f059501a33..1427fd70310c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
> {
> void __iomem *ptr = (void *)cpu_pt_addr;
> uint64_t value;
> - int idx;
> -
> - if (!drm_dev_enter(&adev->ddev, &idx))
> - return 0;
>
> /*
> * The following is for PTE only. GART does not have PDEs.
> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
> value |= flags;
> writeq(value, ptr + (gpu_page_idx * 8));
>
> - drm_dev_exit(idx);
> -
> return 0;
> }
>
> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
> adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
> u64 vram_end = vram_addr + vram_size;
> u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
> + int idx;
> +
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return;
>
> flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
> flags |= AMDGPU_PTE_WRITEABLE;
> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
> flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
> /* Requires gart_ptb_gpu_pa to be 4K aligned */
> amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
> + drm_dev_exit(idx);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0126ece898da..daa16d2f89da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> struct amdgpu_bo *bo = &vmbo->bo;
> unsigned entries, ats_entries;
> uint64_t addr;
> - int r;
> + int r, idx;
>
> /* Figure out our place in the hierarchy */
> if (ancestor->parent) {
> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> return r;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return -ENODEV;
> +
> r = vm->update_funcs->map_table(vmbo);
> if (r)
> - return r;
> + goto exit;
>
> memset(¶ms, 0, sizeof(params));
> params.adev = adev;
> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>
> r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
> if (r)
> - return r;
> + goto exit;
>
> addr = 0;
> if (ats_entries) {
> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
> value, flags);
> if (r)
> - return r;
> + goto exit;
>
> addr += ats_entries * 8;
> }
> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
> value, flags);
> if (r)
> - return r;
> + goto exit;
> }
>
> - return vm->update_funcs->commit(¶ms, NULL);
> + r = vm->update_funcs->commit(¶ms, NULL);
> +exit:
> + drm_dev_exit(idx);
> + return r;
> }
>
> /**
> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> struct amdgpu_vm *vm, bool immediate)
> {
> struct amdgpu_vm_update_params params;
> - int r;
> + int r, idx;
>
> if (list_empty(&vm->relocated))
> return 0;
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return -ENODEV;
> +
> memset(¶ms, 0, sizeof(params));
> params.adev = adev;
> params.vm = vm;
> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>
> r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
> if (r)
> - return r;
> + goto exit;
>
> while (!list_empty(&vm->relocated)) {
> struct amdgpu_vm_bo_base *entry;
> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> r = vm->update_funcs->commit(¶ms, &vm->last_update);
> if (r)
> goto error;
> + drm_dev_exit(idx);
> return 0;
>
> error:
> amdgpu_vm_invalidate_pds(adev, vm);
> +exit:
> + drm_dev_exit(idx);
> return r;
> }
>
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
2021-09-15 13:52 ` Andrey Grodzovsky
@ 2021-09-15 13:57 ` Christian König
2021-09-15 14:04 ` Andrey Grodzovsky
0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2021-09-15 13:57 UTC (permalink / raw)
To: Andrey Grodzovsky, Pan, Xinhui, amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander, Koenig, Christian
Am 15.09.21 um 15:52 schrieb Andrey Grodzovsky:
> On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
>> [AMD Official Use Only]
>>
>> Andrey
>> I hit panic with this plug/unplug test without this patch.
>
>
> Can you please tell which ASIC you are using and which kernel branch
> and what is the tip commit ?
>
>
>> But as we add enter/exit in all its callers. maybe it would not
>> impact plug/unplug.
>
>
> If you add enter/exit in all callers then why this solves the problem
> ? Is it because in one or more callers
> amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit
> many times creates the problematic
> contention from multiple threads ?
The most likely cause of this is cache line bouncing I think and yes
moving the enter/exit a level up should fix this.
But I strongly suggest to test this with lockdep enabled and hotplug/GPU
reset a couple of times.
Christian.
>
> Andrey
>
>>
>> [ 1109.041095] BUG: unable to handle page fault for address:
>> 00000000000010e1
>> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
>> [ 1109.196706] Call Trace:
>> [ 1109.199374] ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
>> [ 1109.205843] amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
>> [ 1109.212776] amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
>> [ 1109.218563] vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
>> [ 1109.223747] amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
>> [ 1109.229650] amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
>> [ 1109.235577] amdgpu_pci_remove+0x37/0x70 [amdgpu]
>> [ 1109.240853] pci_device_remove+0x3b/0xb0
>> [ 1109.245127] device_release_driver_internal+0x100/0x1d0
>> [ 1109.250857] device_release_driver+0x12/0x20
>> [ 1109.255535] pci_stop_bus_device+0x79/0xa0
>> [ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
>> [ 1109.266197] remove_store+0x7b/0x90
>> [ 1109.269990] dev_attr_store+0x14/0x30
>> [ 1109.274002] sysfs_kf_write+0x48/0x60
>> [ 1109.277998] kernfs_fop_write_iter+0x14e/0x1e0
>>
>> ________________________________________
>> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
>> 发送时间: 2021年9月15日 14:37
>> 收件人: amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan,
>> Xinhui
>> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
>>
>> We hit soft hang while doing memory pressure test on one numa system.
>> After a qucik look, this is because kfd invalid/valid userptr memory
>> frequently with process_info lock hold.
>> Looks like update page table mapping use too much cpu time.
>>
>> perf top says below,
>> 75.81% [kernel] [k] __srcu_read_unlock
>> 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
>> 3.56% [kernel] [k] __srcu_read_lock
>> 2.20% [amdgpu] [k] amdgpu_vm_cpu_update
>> 2.20% [kernel] [k] __sg_page_iter_dma_next
>> 2.15% [drm] [k] drm_dev_enter
>> 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
>> 1.18% [kernel] [k] __sg_alloc_table_from_pages
>> 1.09% [drm] [k] drm_dev_exit
>>
>> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
>> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
>> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>> change from v1:
>> add enter/exit in more gmc_set_pte_pde callers
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
>> 3 files changed, 36 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 76efd5f8950f..d7e4f4660acf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -34,6 +34,7 @@
>> #include <asm/set_memory.h>
>> #endif
>> #include "amdgpu.h"
>> +#include <drm/drm_drv.h>
>>
>> /*
>> * GART
>> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device
>> *adev, uint64_t offset,
>> u64 page_base;
>> /* Starting from VEGA10, system bit must be 0 to mean
>> invalid. */
>> uint64_t flags = 0;
>> + int idx;
>>
>> if (!adev->gart.ready) {
>> WARN(1, "trying to unbind memory from uninitialized
>> GART !\n");
>> return -EINVAL;
>> }
>>
>> + if (!drm_dev_enter(&adev->ddev, &idx))
>> + return 0;
>> +
>> t = offset / AMDGPU_GPU_PAGE_SIZE;
>> p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>> for (i = 0; i < pages; i++, p++) {
>> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device
>> *adev, uint64_t offset,
>> for (i = 0; i < adev->num_vmhubs; i++)
>> amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>>
>> + drm_dev_exit(idx);
>> return 0;
>> }
>>
>> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev,
>> uint64_t offset,
>> {
>> uint64_t page_base;
>> unsigned i, j, t;
>> + int idx;
>>
>> if (!adev->gart.ready) {
>> WARN(1, "trying to bind memory to uninitialized GART
>> !\n");
>> return -EINVAL;
>> }
>>
>> + if (!drm_dev_enter(&adev->ddev, &idx))
>> + return 0;
>> +
>> t = offset / AMDGPU_GPU_PAGE_SIZE;
>>
>> for (i = 0; i < pages; i++) {
>> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev,
>> uint64_t offset,
>> page_base += AMDGPU_GPU_PAGE_SIZE;
>> }
>> }
>> + drm_dev_exit(idx);
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> index 54f059501a33..1427fd70310c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device
>> *adev, void *cpu_pt_addr,
>> {
>> void __iomem *ptr = (void *)cpu_pt_addr;
>> uint64_t value;
>> - int idx;
>> -
>> - if (!drm_dev_enter(&adev->ddev, &idx))
>> - return 0;
>>
>> /*
>> * The following is for PTE only. GART does not have PDEs.
>> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device
>> *adev, void *cpu_pt_addr,
>> value |= flags;
>> writeq(value, ptr + (gpu_page_idx * 8));
>>
>> - drm_dev_exit(idx);
>> -
>> return 0;
>> }
>>
>> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device
>> *adev)
>> adev->gmc.xgmi.physical_node_id *
>> adev->gmc.xgmi.node_segment_size;
>> u64 vram_end = vram_addr + vram_size;
>> u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
>> + int idx;
>> +
>> + if (!drm_dev_enter(&adev->ddev, &idx))
>> + return;
>>
>> flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>> flags |= AMDGPU_PTE_WRITEABLE;
>> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device
>> *adev)
>> flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>> /* Requires gart_ptb_gpu_pa to be 4K aligned */
>> amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i,
>> gart_ptb_gpu_pa, flags);
>> + drm_dev_exit(idx);
>> }
>>
>> /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0126ece898da..daa16d2f89da 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct
>> amdgpu_device *adev,
>> struct amdgpu_bo *bo = &vmbo->bo;
>> unsigned entries, ats_entries;
>> uint64_t addr;
>> - int r;
>> + int r, idx;
>>
>> /* Figure out our place in the hierarchy */
>> if (ancestor->parent) {
>> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct
>> amdgpu_device *adev,
>> return r;
>> }
>>
>> + if (!drm_dev_enter(&adev->ddev, &idx))
>> + return -ENODEV;
>> +
>> r = vm->update_funcs->map_table(vmbo);
>> if (r)
>> - return r;
>> + goto exit;
>>
>> memset(¶ms, 0, sizeof(params));
>> params.adev = adev;
>> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct
>> amdgpu_device *adev,
>>
>> r = vm->update_funcs->prepare(¶ms, NULL,
>> AMDGPU_SYNC_EXPLICIT);
>> if (r)
>> - return r;
>> + goto exit;
>>
>> addr = 0;
>> if (ats_entries) {
>> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct
>> amdgpu_device *adev,
>> r = vm->update_funcs->update(¶ms, vmbo, addr, 0,
>> ats_entries,
>> value, flags);
>> if (r)
>> - return r;
>> + goto exit;
>>
>> addr += ats_entries * 8;
>> }
>> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct
>> amdgpu_device *adev,
>> r = vm->update_funcs->update(¶ms, vmbo, addr, 0,
>> entries,
>> value, flags);
>> if (r)
>> - return r;
>> + goto exit;
>> }
>>
>> - return vm->update_funcs->commit(¶ms, NULL);
>> + r = vm->update_funcs->commit(¶ms, NULL);
>> +exit:
>> + drm_dev_exit(idx);
>> + return r;
>> }
>>
>> /**
>> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct
>> amdgpu_device *adev,
>> struct amdgpu_vm *vm, bool immediate)
>> {
>> struct amdgpu_vm_update_params params;
>> - int r;
>> + int r, idx;
>>
>> if (list_empty(&vm->relocated))
>> return 0;
>>
>> + if (!drm_dev_enter(&adev->ddev, &idx))
>> + return -ENODEV;
>> +
>> memset(¶ms, 0, sizeof(params));
>> params.adev = adev;
>> params.vm = vm;
>> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device
>> *adev,
>>
>> r = vm->update_funcs->prepare(¶ms, NULL,
>> AMDGPU_SYNC_EXPLICIT);
>> if (r)
>> - return r;
>> + goto exit;
>>
>> while (!list_empty(&vm->relocated)) {
>> struct amdgpu_vm_bo_base *entry;
>> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct
>> amdgpu_device *adev,
>> r = vm->update_funcs->commit(¶ms, &vm->last_update);
>> if (r)
>> goto error;
>> + drm_dev_exit(idx);
>> return 0;
>>
>> error:
>> amdgpu_vm_invalidate_pds(adev, vm);
>> +exit:
>> + drm_dev_exit(idx);
>> return r;
>> }
>>
>> --
>> 2.25.1
>>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
2021-09-15 13:57 ` Christian König
@ 2021-09-15 14:04 ` Andrey Grodzovsky
0 siblings, 0 replies; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 14:04 UTC (permalink / raw)
To: Christian König, Pan, Xinhui, amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander, Koenig, Christian
On 2021-09-15 9:57 a.m., Christian König wrote:
> Am 15.09.21 um 15:52 schrieb Andrey Grodzovsky:
>> On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
>>> [AMD Official Use Only]
>>>
>>> Andrey
>>> I hit panic with this plug/unplug test without this patch.
>>
>>
>> Can you please tell which ASIC you are using and which kernel branch
>> and what is the tip commit ?
>>
>>
>>> But as we add enter/exit in all its callers. maybe it would not
>>> impact plug/unplug.
>>
>>
>> If you add enter/exit in all callers then why this solves the problem
>> ? Is it because in one or more callers
>> amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit
>> many times creates the problematic
>> contention from multiple threads ?
>
> The most likely cause of this is cache line bouncing I think and yes
> moving the enter/exit a level up should fix this.
>
> But I strongly suggest to test this with lockdep enabled and
> hotplug/GPU reset a couple of times.
Xinhui already tried testing with Hotplug and there is a regression
crash as you can see bellow. That
why I wanted to know which kernel and which ASIC to see if I can
reproduce and fix the regression quickly.
I will try on my side with latest kernel and vega 10 anyway since i see
vega 10 functions from the trace.
Andrey
>
> Christian.
>
>>
>> Andrey
>>
>>>
>>> [ 1109.041095] BUG: unable to handle page fault for address:
>>> 00000000000010e1
>>> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
>>> [ 1109.196706] Call Trace:
>>> [ 1109.199374] ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
>>> [ 1109.205843] amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
>>> [ 1109.212776] amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
>>> [ 1109.218563] vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
>>> [ 1109.223747] amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
>>> [ 1109.229650] amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
>>> [ 1109.235577] amdgpu_pci_remove+0x37/0x70 [amdgpu]
>>> [ 1109.240853] pci_device_remove+0x3b/0xb0
>>> [ 1109.245127] device_release_driver_internal+0x100/0x1d0
>>> [ 1109.250857] device_release_driver+0x12/0x20
>>> [ 1109.255535] pci_stop_bus_device+0x79/0xa0
>>> [ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
>>> [ 1109.266197] remove_store+0x7b/0x90
>>> [ 1109.269990] dev_attr_store+0x14/0x30
>>> [ 1109.274002] sysfs_kf_write+0x48/0x60
>>> [ 1109.277998] kernfs_fop_write_iter+0x14e/0x1e0
>>>
>>> ________________________________________
>>> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
>>> 发送时间: 2021年9月15日 14:37
>>> 收件人: amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan,
>>> Xinhui
>>> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot
>>> codepath
>>>
>>> We hit soft hang while doing memory pressure test on one numa system.
>>> After a qucik look, this is because kfd invalid/valid userptr memory
>>> frequently with process_info lock hold.
>>> Looks like update page table mapping use too much cpu time.
>>>
>>> perf top says below,
>>> 75.81% [kernel] [k] __srcu_read_unlock
>>> 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
>>> 3.56% [kernel] [k] __srcu_read_lock
>>> 2.20% [amdgpu] [k] amdgpu_vm_cpu_update
>>> 2.20% [kernel] [k] __sg_page_iter_dma_next
>>> 2.15% [drm] [k] drm_dev_enter
>>> 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
>>> 1.18% [kernel] [k] __sg_alloc_table_from_pages
>>> 1.09% [drm] [k] drm_dev_exit
>>>
>>> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
>>> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
>>> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>> change from v1:
>>> add enter/exit in more gmc_set_pte_pde callers
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28
>>> +++++++++++++++++-------
>>> 3 files changed, 36 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> index 76efd5f8950f..d7e4f4660acf 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> @@ -34,6 +34,7 @@
>>> #include <asm/set_memory.h>
>>> #endif
>>> #include "amdgpu.h"
>>> +#include <drm/drm_drv.h>
>>>
>>> /*
>>> * GART
>>> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device
>>> *adev, uint64_t offset,
>>> u64 page_base;
>>> /* Starting from VEGA10, system bit must be 0 to mean
>>> invalid. */
>>> uint64_t flags = 0;
>>> + int idx;
>>>
>>> if (!adev->gart.ready) {
>>> WARN(1, "trying to unbind memory from uninitialized
>>> GART !\n");
>>> return -EINVAL;
>>> }
>>>
>>> + if (!drm_dev_enter(&adev->ddev, &idx))
>>> + return 0;
>>> +
>>> t = offset / AMDGPU_GPU_PAGE_SIZE;
>>> p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>> for (i = 0; i < pages; i++, p++) {
>>> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device
>>> *adev, uint64_t offset,
>>> for (i = 0; i < adev->num_vmhubs; i++)
>>> amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>>>
>>> + drm_dev_exit(idx);
>>> return 0;
>>> }
>>>
>>> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device
>>> *adev, uint64_t offset,
>>> {
>>> uint64_t page_base;
>>> unsigned i, j, t;
>>> + int idx;
>>>
>>> if (!adev->gart.ready) {
>>> WARN(1, "trying to bind memory to uninitialized
>>> GART !\n");
>>> return -EINVAL;
>>> }
>>>
>>> + if (!drm_dev_enter(&adev->ddev, &idx))
>>> + return 0;
>>> +
>>> t = offset / AMDGPU_GPU_PAGE_SIZE;
>>>
>>> for (i = 0; i < pages; i++) {
>>> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev,
>>> uint64_t offset,
>>> page_base += AMDGPU_GPU_PAGE_SIZE;
>>> }
>>> }
>>> + drm_dev_exit(idx);
>>> return 0;
>>> }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> index 54f059501a33..1427fd70310c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device
>>> *adev, void *cpu_pt_addr,
>>> {
>>> void __iomem *ptr = (void *)cpu_pt_addr;
>>> uint64_t value;
>>> - int idx;
>>> -
>>> - if (!drm_dev_enter(&adev->ddev, &idx))
>>> - return 0;
>>>
>>> /*
>>> * The following is for PTE only. GART does not have PDEs.
>>> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device
>>> *adev, void *cpu_pt_addr,
>>> value |= flags;
>>> writeq(value, ptr + (gpu_page_idx * 8));
>>>
>>> - drm_dev_exit(idx);
>>> -
>>> return 0;
>>> }
>>>
>>> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device
>>> *adev)
>>> adev->gmc.xgmi.physical_node_id *
>>> adev->gmc.xgmi.node_segment_size;
>>> u64 vram_end = vram_addr + vram_size;
>>> u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
>>> + int idx;
>>> +
>>> + if (!drm_dev_enter(&adev->ddev, &idx))
>>> + return;
>>>
>>> flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>>> flags |= AMDGPU_PTE_WRITEABLE;
>>> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device
>>> *adev)
>>> flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>>> /* Requires gart_ptb_gpu_pa to be 4K aligned */
>>> amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i,
>>> gart_ptb_gpu_pa, flags);
>>> + drm_dev_exit(idx);
>>> }
>>>
>>> /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 0126ece898da..daa16d2f89da 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct
>>> amdgpu_device *adev,
>>> struct amdgpu_bo *bo = &vmbo->bo;
>>> unsigned entries, ats_entries;
>>> uint64_t addr;
>>> - int r;
>>> + int r, idx;
>>>
>>> /* Figure out our place in the hierarchy */
>>> if (ancestor->parent) {
>>> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct
>>> amdgpu_device *adev,
>>> return r;
>>> }
>>>
>>> + if (!drm_dev_enter(&adev->ddev, &idx))
>>> + return -ENODEV;
>>> +
>>> r = vm->update_funcs->map_table(vmbo);
>>> if (r)
>>> - return r;
>>> + goto exit;
>>>
>>> memset(¶ms, 0, sizeof(params));
>>> params.adev = adev;
>>> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct
>>> amdgpu_device *adev,
>>>
>>> r = vm->update_funcs->prepare(¶ms, NULL,
>>> AMDGPU_SYNC_EXPLICIT);
>>> if (r)
>>> - return r;
>>> + goto exit;
>>>
>>> addr = 0;
>>> if (ats_entries) {
>>> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct
>>> amdgpu_device *adev,
>>> r = vm->update_funcs->update(¶ms, vmbo, addr,
>>> 0, ats_entries,
>>> value, flags);
>>> if (r)
>>> - return r;
>>> + goto exit;
>>>
>>> addr += ats_entries * 8;
>>> }
>>> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct
>>> amdgpu_device *adev,
>>> r = vm->update_funcs->update(¶ms, vmbo, addr,
>>> 0, entries,
>>> value, flags);
>>> if (r)
>>> - return r;
>>> + goto exit;
>>> }
>>>
>>> - return vm->update_funcs->commit(¶ms, NULL);
>>> + r = vm->update_funcs->commit(¶ms, NULL);
>>> +exit:
>>> + drm_dev_exit(idx);
>>> + return r;
>>> }
>>>
>>> /**
>>> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct
>>> amdgpu_device *adev,
>>> struct amdgpu_vm *vm, bool immediate)
>>> {
>>> struct amdgpu_vm_update_params params;
>>> - int r;
>>> + int r, idx;
>>>
>>> if (list_empty(&vm->relocated))
>>> return 0;
>>>
>>> + if (!drm_dev_enter(&adev->ddev, &idx))
>>> + return -ENODEV;
>>> +
>>> memset(¶ms, 0, sizeof(params));
>>> params.adev = adev;
>>> params.vm = vm;
>>> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device
>>> *adev,
>>>
>>> r = vm->update_funcs->prepare(¶ms, NULL,
>>> AMDGPU_SYNC_EXPLICIT);
>>> if (r)
>>> - return r;
>>> + goto exit;
>>>
>>> while (!list_empty(&vm->relocated)) {
>>> struct amdgpu_vm_bo_base *entry;
>>> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct
>>> amdgpu_device *adev,
>>> r = vm->update_funcs->commit(¶ms, &vm->last_update);
>>> if (r)
>>> goto error;
>>> + drm_dev_exit(idx);
>>> return 0;
>>>
>>> error:
>>> amdgpu_vm_invalidate_pds(adev, vm);
>>> +exit:
>>> + drm_dev_exit(idx);
>>> return r;
>>> }
>>>
>>> --
>>> 2.25.1
>>>
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
2021-09-15 6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
2021-09-15 6:42 ` 回复: " Pan, Xinhui
@ 2021-09-15 20:14 ` Andrey Grodzovsky
1 sibling, 0 replies; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 20:14 UTC (permalink / raw)
To: xinhui pan, amd-gfx; +Cc: alexander.deucher, christian.koenig
I fixed 2 regressions and latest code, applied your patch on top and
passed libdrm tests
on Vega 10. You can pickup those 2 patches and try too if you have time.
In any case -
Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Andrey
On 2021-09-15 2:37 a.m., xinhui pan wrote:
> We hit soft hang while doing memory pressure test on one numa system.
> After a qucik look, this is because kfd invalid/valid userptr memory
> frequently with process_info lock hold.
> Looks like update page table mapping use too much cpu time.
>
> perf top says below,
> 75.81% [kernel] [k] __srcu_read_unlock
> 6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
> 3.56% [kernel] [k] __srcu_read_lock
> 2.20% [amdgpu] [k] amdgpu_vm_cpu_update
> 2.20% [kernel] [k] __sg_page_iter_dma_next
> 2.15% [drm] [k] drm_dev_enter
> 1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
> 1.18% [kernel] [k] __sg_alloc_table_from_pages
> 1.09% [drm] [k] drm_dev_exit
>
> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> change from v1:
> add enter/exit in more gmc_set_pte_pde callers
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
> 3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 76efd5f8950f..d7e4f4660acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -34,6 +34,7 @@
> #include <asm/set_memory.h>
> #endif
> #include "amdgpu.h"
> +#include <drm/drm_drv.h>
>
> /*
> * GART
> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> u64 page_base;
> /* Starting from VEGA10, system bit must be 0 to mean invalid. */
> uint64_t flags = 0;
> + int idx;
>
> if (!adev->gart.ready) {
> WARN(1, "trying to unbind memory from uninitialized GART !\n");
> return -EINVAL;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return 0;
> +
> t = offset / AMDGPU_GPU_PAGE_SIZE;
> p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
> for (i = 0; i < pages; i++, p++) {
> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> for (i = 0; i < adev->num_vmhubs; i++)
> amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>
> + drm_dev_exit(idx);
> return 0;
> }
>
> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> {
> uint64_t page_base;
> unsigned i, j, t;
> + int idx;
>
> if (!adev->gart.ready) {
> WARN(1, "trying to bind memory to uninitialized GART !\n");
> return -EINVAL;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return 0;
> +
> t = offset / AMDGPU_GPU_PAGE_SIZE;
>
> for (i = 0; i < pages; i++) {
> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> page_base += AMDGPU_GPU_PAGE_SIZE;
> }
> }
> + drm_dev_exit(idx);
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 54f059501a33..1427fd70310c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
> {
> void __iomem *ptr = (void *)cpu_pt_addr;
> uint64_t value;
> - int idx;
> -
> - if (!drm_dev_enter(&adev->ddev, &idx))
> - return 0;
>
> /*
> * The following is for PTE only. GART does not have PDEs.
> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
> value |= flags;
> writeq(value, ptr + (gpu_page_idx * 8));
>
> - drm_dev_exit(idx);
> -
> return 0;
> }
>
> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
> adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
> u64 vram_end = vram_addr + vram_size;
> u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
> + int idx;
> +
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return;
>
> flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
> flags |= AMDGPU_PTE_WRITEABLE;
> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
> flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
> /* Requires gart_ptb_gpu_pa to be 4K aligned */
> amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
> + drm_dev_exit(idx);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0126ece898da..daa16d2f89da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> struct amdgpu_bo *bo = &vmbo->bo;
> unsigned entries, ats_entries;
> uint64_t addr;
> - int r;
> + int r, idx;
>
> /* Figure out our place in the hierarchy */
> if (ancestor->parent) {
> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> return r;
> }
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return -ENODEV;
> +
> r = vm->update_funcs->map_table(vmbo);
> if (r)
> - return r;
> + goto exit;
>
> memset(¶ms, 0, sizeof(params));
> params.adev = adev;
> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>
> r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
> if (r)
> - return r;
> + goto exit;
>
> addr = 0;
> if (ats_entries) {
> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
> value, flags);
> if (r)
> - return r;
> + goto exit;
>
> addr += ats_entries * 8;
> }
> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
> r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
> value, flags);
> if (r)
> - return r;
> + goto exit;
> }
>
> - return vm->update_funcs->commit(¶ms, NULL);
> + r = vm->update_funcs->commit(¶ms, NULL);
> +exit:
> + drm_dev_exit(idx);
> + return r;
> }
>
> /**
> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> struct amdgpu_vm *vm, bool immediate)
> {
> struct amdgpu_vm_update_params params;
> - int r;
> + int r, idx;
>
> if (list_empty(&vm->relocated))
> return 0;
>
> + if (!drm_dev_enter(&adev->ddev, &idx))
> + return -ENODEV;
> +
> memset(¶ms, 0, sizeof(params));
> params.adev = adev;
> params.vm = vm;
> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>
> r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
> if (r)
> - return r;
> + goto exit;
>
> while (!list_empty(&vm->relocated)) {
> struct amdgpu_vm_bo_base *entry;
> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
> r = vm->update_funcs->commit(¶ms, &vm->last_update);
> if (r)
> goto error;
> + drm_dev_exit(idx);
> return 0;
>
> error:
> amdgpu_vm_invalidate_pds(adev, vm);
> +exit:
> + drm_dev_exit(idx);
> return r;
> }
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-09-15 20:14 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-09-15 6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
2021-09-15 6:42 ` 回复: " Pan, Xinhui
2021-09-15 13:52 ` Andrey Grodzovsky
2021-09-15 13:57 ` Christian König
2021-09-15 14:04 ` Andrey Grodzovsky
2021-09-15 20:14 ` Andrey Grodzovsky
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox