From: xinhui pan <xinhui.pan@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: <alexander.deucher@amd.com>, <christian.koenig@amd.com>,
<andrey.grodzovsky@amd.com>, xinhui pan <xinhui.pan@amd.com>
Subject: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
Date: Wed, 15 Sep 2021 14:37:39 +0800 [thread overview]
Message-ID: <20210915063739.31244-1-xinhui.pan@amd.com> (raw)
We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.
perf top says below,
75.81% [kernel] [k] __srcu_read_unlock
6.19% [amdgpu] [k] amdgpu_gmc_set_pte_pde
3.56% [kernel] [k] __srcu_read_lock
2.20% [amdgpu] [k] amdgpu_vm_cpu_update
2.20% [kernel] [k] __sg_page_iter_dma_next
2.15% [drm] [k] drm_dev_enter
1.70% [drm] [k] drm_prime_sg_to_dma_addr_array
1.18% [kernel] [k] __sg_alloc_table_from_pages
1.09% [drm] [k] drm_dev_exit
So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 28 +++++++++++++++++-------
3 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
#include <asm/set_memory.h>
#endif
#include "amdgpu.h"
+#include <drm/drm_drv.h>
/*
* GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
u64 page_base;
/* Starting from VEGA10, system bit must be 0 to mean invalid. */
uint64_t flags = 0;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < adev->num_vmhubs; i++)
amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+ drm_dev_exit(idx);
return 0;
}
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
{
uint64_t page_base;
unsigned i, j, t;
+ int idx;
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
return -EINVAL;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return 0;
+
t = offset / AMDGPU_GPU_PAGE_SIZE;
for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
+ drm_dev_exit(idx);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
- int idx;
-
- if (!drm_dev_enter(&adev->ddev, &idx))
- return 0;
/*
* The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
value |= flags;
writeq(value, ptr + (gpu_page_idx * 8));
- drm_dev_exit(idx);
-
return 0;
}
@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
u64 vram_end = vram_addr + vram_size;
u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+ int idx;
+
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return;
flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+ drm_dev_exit(idx);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_bo *bo = &vmbo->bo;
unsigned entries, ats_entries;
uint64_t addr;
- int r;
+ int r, idx;
/* Figure out our place in the hierarchy */
if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
return r;
}
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
r = vm->update_funcs->map_table(vmbo);
if (r)
- return r;
+ goto exit;
memset(¶ms, 0, sizeof(params));
params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
addr = 0;
if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries,
value, flags);
if (r)
- return r;
+ goto exit;
addr += ats_entries * 8;
}
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries,
value, flags);
if (r)
- return r;
+ goto exit;
}
- return vm->update_funcs->commit(¶ms, NULL);
+ r = vm->update_funcs->commit(¶ms, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
}
/**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
- int r;
+ int r, idx;
if (list_empty(&vm->relocated))
return 0;
+ if (!drm_dev_enter(&adev->ddev, &idx))
+ return -ENODEV;
+
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- return r;
+ goto exit;
while (!list_empty(&vm->relocated)) {
struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(¶ms, &vm->last_update);
if (r)
goto error;
+ drm_dev_exit(idx);
return 0;
error:
amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+ drm_dev_exit(idx);
return r;
}
--
2.25.1
next reply other threads:[~2021-09-15 6:38 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-15 6:37 xinhui pan [this message]
2021-09-15 6:42 ` 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath Pan, Xinhui
2021-09-15 13:52 ` Andrey Grodzovsky
2021-09-15 13:57 ` Christian König
2021-09-15 14:04 ` Andrey Grodzovsky
2021-09-15 20:14 ` Andrey Grodzovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210915063739.31244-1-xinhui.pan@amd.com \
--to=xinhui.pan@amd.com \
--cc=alexander.deucher@amd.com \
--cc=amd-gfx@lists.freedesktop.org \
--cc=andrey.grodzovsky@amd.com \
--cc=christian.koenig@amd.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox