AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
@ 2021-09-15  6:37 xinhui pan
  2021-09-15  6:42 ` 回复: " Pan, Xinhui
  2021-09-15 20:14 ` Andrey Grodzovsky
  0 siblings, 2 replies; 6+ messages in thread
From: xinhui pan @ 2021-09-15  6:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, christian.koenig, andrey.grodzovsky,
	xinhui pan

We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.

perf top says below,
75.81%  [kernel]       [k] __srcu_read_unlock
 6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
 3.56%  [kernel]       [k] __srcu_read_lock
 2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
 2.20%  [kernel]       [k] __sg_page_iter_dma_next
 2.15%  [drm]          [k] drm_dev_enter
 1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
 1.18%  [kernel]       [k] __sg_alloc_table_from_pages
 1.09%  [drm]          [k] drm_dev_exit

So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
 #include <asm/set_memory.h>
 #endif
 #include "amdgpu.h"
+#include <drm/drm_drv.h>
 
 /*
  * GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	u64 page_base;
 	/* Starting from VEGA10, system bit must be 0 to mean invalid. */
 	uint64_t flags = 0;
+	int idx;
 
 	if (!adev->gart.ready) {
 		WARN(1, "trying to unbind memory from uninitialized GART !\n");
 		return -EINVAL;
 	}
 
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return 0;
+
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 	for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	for (i = 0; i < adev->num_vmhubs; i++)
 		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
 
+	drm_dev_exit(idx);
 	return 0;
 }
 
@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 {
 	uint64_t page_base;
 	unsigned i, j, t;
+	int idx;
 
 	if (!adev->gart.ready) {
 		WARN(1, "trying to bind memory to uninitialized GART !\n");
 		return -EINVAL;
 	}
 
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return 0;
+
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
 
 	for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
 	}
+	drm_dev_exit(idx);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
 {
 	void __iomem *ptr = (void *)cpu_pt_addr;
 	uint64_t value;
-	int idx;
-
-	if (!drm_dev_enter(&adev->ddev, &idx))
-		return 0;
 
 	/*
 	 * The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
 	value |= flags;
 	writeq(value, ptr + (gpu_page_idx * 8));
 
-	drm_dev_exit(idx);
-
 	return 0;
 }
 
@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
 	u64 vram_end = vram_addr + vram_size;
 	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+	int idx;
+
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return;
 
 	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
 	flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
 	flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
 	/* Requires gart_ptb_gpu_pa to be 4K aligned */
 	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+	drm_dev_exit(idx);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	struct amdgpu_bo *bo = &vmbo->bo;
 	unsigned entries, ats_entries;
 	uint64_t addr;
-	int r;
+	int r, idx;
 
 	/* Figure out our place in the hierarchy */
 	if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			return r;
 	}
 
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return -ENODEV;
+
 	r = vm->update_funcs->map_table(vmbo);
 	if (r)
-		return r;
+		goto exit;
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
-		return r;
+		goto exit;
 
 	addr = 0;
 	if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
 					     value, flags);
 		if (r)
-			return r;
+			goto exit;
 
 		addr += ats_entries * 8;
 	}
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
 					     value, flags);
 		if (r)
-			return r;
+			goto exit;
 	}
 
-	return vm->update_funcs->commit(&params, NULL);
+	r = vm->update_funcs->commit(&params, NULL);
+exit:
+	drm_dev_exit(idx);
+	return r;
 }
 
 /**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm, bool immediate)
 {
 	struct amdgpu_vm_update_params params;
-	int r;
+	int r, idx;
 
 	if (list_empty(&vm->relocated))
 		return 0;
 
+	if (!drm_dev_enter(&adev->ddev, &idx))
+		return -ENODEV;
+
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 
 	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
 	if (r)
-		return r;
+		goto exit;
 
 	while (!list_empty(&vm->relocated)) {
 		struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
 	r = vm->update_funcs->commit(&params, &vm->last_update);
 	if (r)
 		goto error;
+	drm_dev_exit(idx);
 	return 0;
 
 error:
 	amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+	drm_dev_exit(idx);
 	return r;
 }
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
  2021-09-15  6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
@ 2021-09-15  6:42 ` Pan, Xinhui
  2021-09-15 13:52   ` Andrey Grodzovsky
  2021-09-15 20:14 ` Andrey Grodzovsky
  1 sibling, 1 reply; 6+ messages in thread
From: Pan, Xinhui @ 2021-09-15  6:42 UTC (permalink / raw)
  To: amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian, Grodzovsky, Andrey

[AMD Official Use Only]

Andrey
I hit panic with this plug/unplug test without this patch.
But as we add enter/exit in all its callers. maybe it would not impact plug/unplug.

[ 1109.041095] BUG: unable to handle page fault for address: 00000000000010e1
[ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
[ 1109.196706] Call Trace:
[ 1109.199374]  ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
[ 1109.205843]  amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
[ 1109.212776]  amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
[ 1109.218563]  vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
[ 1109.223747]  amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
[ 1109.229650]  amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
[ 1109.235577]  amdgpu_pci_remove+0x37/0x70 [amdgpu]
[ 1109.240853]  pci_device_remove+0x3b/0xb0
[ 1109.245127]  device_release_driver_internal+0x100/0x1d0
[ 1109.250857]  device_release_driver+0x12/0x20
[ 1109.255535]  pci_stop_bus_device+0x79/0xa0
[ 1109.260016]  pci_stop_and_remove_bus_device_locked+0x1b/0x30
[ 1109.266197]  remove_store+0x7b/0x90
[ 1109.269990]  dev_attr_store+0x14/0x30
[ 1109.274002]  sysfs_kf_write+0x48/0x60
[ 1109.277998]  kernfs_fop_write_iter+0x14e/0x1e0

________________________________________
发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
发送时间: 2021年9月15日 14:37
收件人: amd-gfx@lists.freedesktop.org
抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, Xinhui
主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath

We hit soft hang while doing memory pressure test on one numa system.
After a qucik look, this is because kfd invalid/valid userptr memory
frequently with process_info lock hold.
Looks like update page table mapping use too much cpu time.

perf top says below,
75.81%  [kernel]       [k] __srcu_read_unlock
 6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
 3.56%  [kernel]       [k] __srcu_read_lock
 2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
 2.20%  [kernel]       [k] __sg_page_iter_dma_next
 2.15%  [drm]          [k] drm_dev_enter
 1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
 1.18%  [kernel]       [k] __sg_alloc_table_from_pages
 1.09%  [drm]          [k] drm_dev_exit

So move drm_dev_enter/exit outside gmc code, instead let caller do it.
They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
gmc_init_pdb0. vm_bo_update_mapping already calls it.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
change from v1:
add enter/exit in more gmc_set_pte_pde callers
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 76efd5f8950f..d7e4f4660acf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -34,6 +34,7 @@
 #include <asm/set_memory.h>
 #endif
 #include "amdgpu.h"
+#include <drm/drm_drv.h>

 /*
  * GART
@@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        u64 page_base;
        /* Starting from VEGA10, system bit must be 0 to mean invalid. */
        uint64_t flags = 0;
+       int idx;

        if (!adev->gart.ready) {
                WARN(1, "trying to unbind memory from uninitialized GART !\n");
                return -EINVAL;
        }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
        t = offset / AMDGPU_GPU_PAGE_SIZE;
        p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
        for (i = 0; i < pages; i++, p++) {
@@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
        for (i = 0; i < adev->num_vmhubs; i++)
                amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);

+       drm_dev_exit(idx);
        return 0;
 }

@@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
 {
        uint64_t page_base;
        unsigned i, j, t;
+       int idx;

        if (!adev->gart.ready) {
                WARN(1, "trying to bind memory to uninitialized GART !\n");
                return -EINVAL;
        }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return 0;
+
        t = offset / AMDGPU_GPU_PAGE_SIZE;

        for (i = 0; i < pages; i++) {
@@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
                        page_base += AMDGPU_GPU_PAGE_SIZE;
                }
        }
+       drm_dev_exit(idx);
        return 0;
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 54f059501a33..1427fd70310c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
 {
        void __iomem *ptr = (void *)cpu_pt_addr;
        uint64_t value;
-       int idx;
-
-       if (!drm_dev_enter(&adev->ddev, &idx))
-               return 0;

        /*
         * The following is for PTE only. GART does not have PDEs.
@@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
        value |= flags;
        writeq(value, ptr + (gpu_page_idx * 8));

-       drm_dev_exit(idx);
-
        return 0;
 }

@@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
                adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
        u64 vram_end = vram_addr + vram_size;
        u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
+       int idx;
+
+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return;

        flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
        flags |= AMDGPU_PTE_WRITEABLE;
@@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
        flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
        /* Requires gart_ptb_gpu_pa to be 4K aligned */
        amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
+       drm_dev_exit(idx);
 }

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0126ece898da..daa16d2f89da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
        struct amdgpu_bo *bo = &vmbo->bo;
        unsigned entries, ats_entries;
        uint64_t addr;
-       int r;
+       int r, idx;

        /* Figure out our place in the hierarchy */
        if (ancestor->parent) {
@@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                        return r;
        }

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
        r = vm->update_funcs->map_table(vmbo);
        if (r)
-               return r;
+               goto exit;

        memset(&params, 0, sizeof(params));
        params.adev = adev;
@@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,

        r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
        if (r)
-               return r;
+               goto exit;

        addr = 0;
        if (ats_entries) {
@@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
                                             value, flags);
                if (r)
-                       return r;
+                       goto exit;

                addr += ats_entries * 8;
        }
@@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
                r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
                                             value, flags);
                if (r)
-                       return r;
+                       goto exit;
        }

-       return vm->update_funcs->commit(&params, NULL);
+       r = vm->update_funcs->commit(&params, NULL);
+exit:
+       drm_dev_exit(idx);
+       return r;
 }

 /**
@@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
                          struct amdgpu_vm *vm, bool immediate)
 {
        struct amdgpu_vm_update_params params;
-       int r;
+       int r, idx;

        if (list_empty(&vm->relocated))
                return 0;

+       if (!drm_dev_enter(&adev->ddev, &idx))
+               return -ENODEV;
+
        memset(&params, 0, sizeof(params));
        params.adev = adev;
        params.vm = vm;
@@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,

        r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
        if (r)
-               return r;
+               goto exit;

        while (!list_empty(&vm->relocated)) {
                struct amdgpu_vm_bo_base *entry;
@@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
        r = vm->update_funcs->commit(&params, &vm->last_update);
        if (r)
                goto error;
+       drm_dev_exit(idx);
        return 0;

 error:
        amdgpu_vm_invalidate_pds(adev, vm);
+exit:
+       drm_dev_exit(idx);
        return r;
 }

--
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
  2021-09-15  6:42 ` 回复: " Pan, Xinhui
@ 2021-09-15 13:52   ` Andrey Grodzovsky
  2021-09-15 13:57     ` Christian König
  0 siblings, 1 reply; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 13:52 UTC (permalink / raw)
  To: Pan, Xinhui, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian

On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
> [AMD Official Use Only]
>
> Andrey
> I hit panic with this plug/unplug test without this patch.


Can you please tell which ASIC you are using and which kernel branch and 
what is the tip commit ?


> But as we add enter/exit in all its callers. maybe it would not impact plug/unplug.


If you add enter/exit in all callers then why this solves the problem ? 
Is it because in one or more callers
amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit 
many times creates the problematic
contention from multiple threads ?

Andrey

>
> [ 1109.041095] BUG: unable to handle page fault for address: 00000000000010e1
> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
> [ 1109.196706] Call Trace:
> [ 1109.199374]  ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
> [ 1109.205843]  amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
> [ 1109.212776]  amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
> [ 1109.218563]  vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
> [ 1109.223747]  amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
> [ 1109.229650]  amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
> [ 1109.235577]  amdgpu_pci_remove+0x37/0x70 [amdgpu]
> [ 1109.240853]  pci_device_remove+0x3b/0xb0
> [ 1109.245127]  device_release_driver_internal+0x100/0x1d0
> [ 1109.250857]  device_release_driver+0x12/0x20
> [ 1109.255535]  pci_stop_bus_device+0x79/0xa0
> [ 1109.260016]  pci_stop_and_remove_bus_device_locked+0x1b/0x30
> [ 1109.266197]  remove_store+0x7b/0x90
> [ 1109.269990]  dev_attr_store+0x14/0x30
> [ 1109.274002]  sysfs_kf_write+0x48/0x60
> [ 1109.277998]  kernfs_fop_write_iter+0x14e/0x1e0
>
> ________________________________________
> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
> 发送时间: 2021年9月15日 14:37
> 收件人: amd-gfx@lists.freedesktop.org
> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, Xinhui
> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
>
> We hit soft hang while doing memory pressure test on one numa system.
> After a qucik look, this is because kfd invalid/valid userptr memory
> frequently with process_info lock hold.
> Looks like update page table mapping use too much cpu time.
>
> perf top says below,
> 75.81%  [kernel]       [k] __srcu_read_unlock
>   6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
>   3.56%  [kernel]       [k] __srcu_read_lock
>   2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
>   2.20%  [kernel]       [k] __sg_page_iter_dma_next
>   2.15%  [drm]          [k] drm_dev_enter
>   1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
>   1.18%  [kernel]       [k] __sg_alloc_table_from_pages
>   1.09%  [drm]          [k] drm_dev_exit
>
> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> change from v1:
> add enter/exit in more gmc_set_pte_pde callers
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
>   3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 76efd5f8950f..d7e4f4660acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -34,6 +34,7 @@
>   #include <asm/set_memory.h>
>   #endif
>   #include "amdgpu.h"
> +#include <drm/drm_drv.h>
>
>   /*
>    * GART
> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>          u64 page_base;
>          /* Starting from VEGA10, system bit must be 0 to mean invalid. */
>          uint64_t flags = 0;
> +       int idx;
>
>          if (!adev->gart.ready) {
>                  WARN(1, "trying to unbind memory from uninitialized GART !\n");
>                  return -EINVAL;
>          }
>
> +       if (!drm_dev_enter(&adev->ddev, &idx))
> +               return 0;
> +
>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>          p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>          for (i = 0; i < pages; i++, p++) {
> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>          for (i = 0; i < adev->num_vmhubs; i++)
>                  amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>
> +       drm_dev_exit(idx);
>          return 0;
>   }
>
> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>   {
>          uint64_t page_base;
>          unsigned i, j, t;
> +       int idx;
>
>          if (!adev->gart.ready) {
>                  WARN(1, "trying to bind memory to uninitialized GART !\n");
>                  return -EINVAL;
>          }
>
> +       if (!drm_dev_enter(&adev->ddev, &idx))
> +               return 0;
> +
>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>
>          for (i = 0; i < pages; i++) {
> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>                          page_base += AMDGPU_GPU_PAGE_SIZE;
>                  }
>          }
> +       drm_dev_exit(idx);
>          return 0;
>   }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 54f059501a33..1427fd70310c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>   {
>          void __iomem *ptr = (void *)cpu_pt_addr;
>          uint64_t value;
> -       int idx;
> -
> -       if (!drm_dev_enter(&adev->ddev, &idx))
> -               return 0;
>
>          /*
>           * The following is for PTE only. GART does not have PDEs.
> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>          value |= flags;
>          writeq(value, ptr + (gpu_page_idx * 8));
>
> -       drm_dev_exit(idx);
> -
>          return 0;
>   }
>
> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>                  adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
>          u64 vram_end = vram_addr + vram_size;
>          u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
> +       int idx;
> +
> +       if (!drm_dev_enter(&adev->ddev, &idx))
> +               return;
>
>          flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>          flags |= AMDGPU_PTE_WRITEABLE;
> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>          flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>          /* Requires gart_ptb_gpu_pa to be 4K aligned */
>          amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
> +       drm_dev_exit(idx);
>   }
>
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0126ece898da..daa16d2f89da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>          struct amdgpu_bo *bo = &vmbo->bo;
>          unsigned entries, ats_entries;
>          uint64_t addr;
> -       int r;
> +       int r, idx;
>
>          /* Figure out our place in the hierarchy */
>          if (ancestor->parent) {
> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>                          return r;
>          }
>
> +       if (!drm_dev_enter(&adev->ddev, &idx))
> +               return -ENODEV;
> +
>          r = vm->update_funcs->map_table(vmbo);
>          if (r)
> -               return r;
> +               goto exit;
>
>          memset(&params, 0, sizeof(params));
>          params.adev = adev;
> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>
>          r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>          if (r)
> -               return r;
> +               goto exit;
>
>          addr = 0;
>          if (ats_entries) {
> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>                  r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
>                                               value, flags);
>                  if (r)
> -                       return r;
> +                       goto exit;
>
>                  addr += ats_entries * 8;
>          }
> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>                  r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
>                                               value, flags);
>                  if (r)
> -                       return r;
> +                       goto exit;
>          }
>
> -       return vm->update_funcs->commit(&params, NULL);
> +       r = vm->update_funcs->commit(&params, NULL);
> +exit:
> +       drm_dev_exit(idx);
> +       return r;
>   }
>
>   /**
> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>                            struct amdgpu_vm *vm, bool immediate)
>   {
>          struct amdgpu_vm_update_params params;
> -       int r;
> +       int r, idx;
>
>          if (list_empty(&vm->relocated))
>                  return 0;
>
> +       if (!drm_dev_enter(&adev->ddev, &idx))
> +               return -ENODEV;
> +
>          memset(&params, 0, sizeof(params));
>          params.adev = adev;
>          params.vm = vm;
> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>
>          r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>          if (r)
> -               return r;
> +               goto exit;
>
>          while (!list_empty(&vm->relocated)) {
>                  struct amdgpu_vm_bo_base *entry;
> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>          r = vm->update_funcs->commit(&params, &vm->last_update);
>          if (r)
>                  goto error;
> +       drm_dev_exit(idx);
>          return 0;
>
>   error:
>          amdgpu_vm_invalidate_pds(adev, vm);
> +exit:
> +       drm_dev_exit(idx);
>          return r;
>   }
>
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
  2021-09-15 13:52   ` Andrey Grodzovsky
@ 2021-09-15 13:57     ` Christian König
  2021-09-15 14:04       ` Andrey Grodzovsky
  0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2021-09-15 13:57 UTC (permalink / raw)
  To: Andrey Grodzovsky, Pan, Xinhui, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian

Am 15.09.21 um 15:52 schrieb Andrey Grodzovsky:
> On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
>> [AMD Official Use Only]
>>
>> Andrey
>> I hit panic with this plug/unplug test without this patch.
>
>
> Can you please tell which ASIC you are using and which kernel branch 
> and what is the tip commit ?
>
>
>> But as we add enter/exit in all its callers. maybe it would not 
>> impact plug/unplug.
>
>
> If you add enter/exit in all callers then why this solves the problem 
> ? Is it because in one or more callers
> amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit 
> many times creates the problematic
> contention from multiple threads ?

The most likely cause of this is cache line bouncing I think and yes 
moving the enter/exit a level up should fix this.

But I strongly suggest to test this with lockdep enabled and hotplug/GPU 
reset a couple of times.

Christian.

>
> Andrey
>
>>
>> [ 1109.041095] BUG: unable to handle page fault for address: 
>> 00000000000010e1
>> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
>> [ 1109.196706] Call Trace:
>> [ 1109.199374]  ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
>> [ 1109.205843]  amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
>> [ 1109.212776]  amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
>> [ 1109.218563]  vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
>> [ 1109.223747]  amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
>> [ 1109.229650]  amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
>> [ 1109.235577]  amdgpu_pci_remove+0x37/0x70 [amdgpu]
>> [ 1109.240853]  pci_device_remove+0x3b/0xb0
>> [ 1109.245127]  device_release_driver_internal+0x100/0x1d0
>> [ 1109.250857]  device_release_driver+0x12/0x20
>> [ 1109.255535]  pci_stop_bus_device+0x79/0xa0
>> [ 1109.260016]  pci_stop_and_remove_bus_device_locked+0x1b/0x30
>> [ 1109.266197]  remove_store+0x7b/0x90
>> [ 1109.269990]  dev_attr_store+0x14/0x30
>> [ 1109.274002]  sysfs_kf_write+0x48/0x60
>> [ 1109.277998]  kernfs_fop_write_iter+0x14e/0x1e0
>>
>> ________________________________________
>> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
>> 发送时间: 2021年9月15日 14:37
>> 收件人: amd-gfx@lists.freedesktop.org
>> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, 
>> Xinhui
>> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
>>
>> We hit soft hang while doing memory pressure test on one numa system.
>> After a qucik look, this is because kfd invalid/valid userptr memory
>> frequently with process_info lock hold.
>> Looks like update page table mapping use too much cpu time.
>>
>> perf top says below,
>> 75.81%  [kernel]       [k] __srcu_read_unlock
>>   6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
>>   3.56%  [kernel]       [k] __srcu_read_lock
>>   2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
>>   2.20%  [kernel]       [k] __sg_page_iter_dma_next
>>   2.15%  [drm]          [k] drm_dev_enter
>>   1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
>>   1.18%  [kernel]       [k] __sg_alloc_table_from_pages
>>   1.09%  [drm]          [k] drm_dev_exit
>>
>> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
>> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
>> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>> change from v1:
>> add enter/exit in more gmc_set_pte_pde callers
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
>>   3 files changed, 36 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 76efd5f8950f..d7e4f4660acf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -34,6 +34,7 @@
>>   #include <asm/set_memory.h>
>>   #endif
>>   #include "amdgpu.h"
>> +#include <drm/drm_drv.h>
>>
>>   /*
>>    * GART
>> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device 
>> *adev, uint64_t offset,
>>          u64 page_base;
>>          /* Starting from VEGA10, system bit must be 0 to mean 
>> invalid. */
>>          uint64_t flags = 0;
>> +       int idx;
>>
>>          if (!adev->gart.ready) {
>>                  WARN(1, "trying to unbind memory from uninitialized 
>> GART !\n");
>>                  return -EINVAL;
>>          }
>>
>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>> +               return 0;
>> +
>>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>>          p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>          for (i = 0; i < pages; i++, p++) {
>> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device 
>> *adev, uint64_t offset,
>>          for (i = 0; i < adev->num_vmhubs; i++)
>>                  amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>>
>> +       drm_dev_exit(idx);
>>          return 0;
>>   }
>>
>> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, 
>> uint64_t offset,
>>   {
>>          uint64_t page_base;
>>          unsigned i, j, t;
>> +       int idx;
>>
>>          if (!adev->gart.ready) {
>>                  WARN(1, "trying to bind memory to uninitialized GART 
>> !\n");
>>                  return -EINVAL;
>>          }
>>
>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>> +               return 0;
>> +
>>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>>
>>          for (i = 0; i < pages; i++) {
>> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, 
>> uint64_t offset,
>>                          page_base += AMDGPU_GPU_PAGE_SIZE;
>>                  }
>>          }
>> +       drm_dev_exit(idx);
>>          return 0;
>>   }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> index 54f059501a33..1427fd70310c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
>> *adev, void *cpu_pt_addr,
>>   {
>>          void __iomem *ptr = (void *)cpu_pt_addr;
>>          uint64_t value;
>> -       int idx;
>> -
>> -       if (!drm_dev_enter(&adev->ddev, &idx))
>> -               return 0;
>>
>>          /*
>>           * The following is for PTE only. GART does not have PDEs.
>> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
>> *adev, void *cpu_pt_addr,
>>          value |= flags;
>>          writeq(value, ptr + (gpu_page_idx * 8));
>>
>> -       drm_dev_exit(idx);
>> -
>>          return 0;
>>   }
>>
>> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device 
>> *adev)
>>                  adev->gmc.xgmi.physical_node_id * 
>> adev->gmc.xgmi.node_segment_size;
>>          u64 vram_end = vram_addr + vram_size;
>>          u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
>> +       int idx;
>> +
>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>> +               return;
>>
>>          flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>>          flags |= AMDGPU_PTE_WRITEABLE;
>> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device 
>> *adev)
>>          flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>>          /* Requires gart_ptb_gpu_pa to be 4K aligned */
>>          amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, 
>> gart_ptb_gpu_pa, flags);
>> +       drm_dev_exit(idx);
>>   }
>>
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 0126ece898da..daa16d2f89da 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct 
>> amdgpu_device *adev,
>>          struct amdgpu_bo *bo = &vmbo->bo;
>>          unsigned entries, ats_entries;
>>          uint64_t addr;
>> -       int r;
>> +       int r, idx;
>>
>>          /* Figure out our place in the hierarchy */
>>          if (ancestor->parent) {
>> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct 
>> amdgpu_device *adev,
>>                          return r;
>>          }
>>
>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>> +               return -ENODEV;
>> +
>>          r = vm->update_funcs->map_table(vmbo);
>>          if (r)
>> -               return r;
>> +               goto exit;
>>
>>          memset(&params, 0, sizeof(params));
>>          params.adev = adev;
>> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct 
>> amdgpu_device *adev,
>>
>>          r = vm->update_funcs->prepare(&params, NULL, 
>> AMDGPU_SYNC_EXPLICIT);
>>          if (r)
>> -               return r;
>> +               goto exit;
>>
>>          addr = 0;
>>          if (ats_entries) {
>> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct 
>> amdgpu_device *adev,
>>                  r = vm->update_funcs->update(&params, vmbo, addr, 0, 
>> ats_entries,
>>                                               value, flags);
>>                  if (r)
>> -                       return r;
>> +                       goto exit;
>>
>>                  addr += ats_entries * 8;
>>          }
>> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct 
>> amdgpu_device *adev,
>>                  r = vm->update_funcs->update(&params, vmbo, addr, 0, 
>> entries,
>>                                               value, flags);
>>                  if (r)
>> -                       return r;
>> +                       goto exit;
>>          }
>>
>> -       return vm->update_funcs->commit(&params, NULL);
>> +       r = vm->update_funcs->commit(&params, NULL);
>> +exit:
>> +       drm_dev_exit(idx);
>> +       return r;
>>   }
>>
>>   /**
>> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct 
>> amdgpu_device *adev,
>>                            struct amdgpu_vm *vm, bool immediate)
>>   {
>>          struct amdgpu_vm_update_params params;
>> -       int r;
>> +       int r, idx;
>>
>>          if (list_empty(&vm->relocated))
>>                  return 0;
>>
>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>> +               return -ENODEV;
>> +
>>          memset(&params, 0, sizeof(params));
>>          params.adev = adev;
>>          params.vm = vm;
>> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device 
>> *adev,
>>
>>          r = vm->update_funcs->prepare(&params, NULL, 
>> AMDGPU_SYNC_EXPLICIT);
>>          if (r)
>> -               return r;
>> +               goto exit;
>>
>>          while (!list_empty(&vm->relocated)) {
>>                  struct amdgpu_vm_bo_base *entry;
>> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct 
>> amdgpu_device *adev,
>>          r = vm->update_funcs->commit(&params, &vm->last_update);
>>          if (r)
>>                  goto error;
>> +       drm_dev_exit(idx);
>>          return 0;
>>
>>   error:
>>          amdgpu_vm_invalidate_pds(adev, vm);
>> +exit:
>> +       drm_dev_exit(idx);
>>          return r;
>>   }
>>
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: 回复: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
  2021-09-15 13:57     ` Christian König
@ 2021-09-15 14:04       ` Andrey Grodzovsky
  0 siblings, 0 replies; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 14:04 UTC (permalink / raw)
  To: Christian König, Pan, Xinhui, amd-gfx@lists.freedesktop.org
  Cc: Deucher, Alexander, Koenig, Christian


On 2021-09-15 9:57 a.m., Christian König wrote:
> Am 15.09.21 um 15:52 schrieb Andrey Grodzovsky:
>> On 2021-09-15 2:42 a.m., Pan, Xinhui wrote:
>>> [AMD Official Use Only]
>>>
>>> Andrey
>>> I hit panic with this plug/unplug test without this patch.
>>
>>
>> Can you please tell which ASIC you are using and which kernel branch 
>> and what is the tip commit ?
>>
>>
>>> But as we add enter/exit in all its callers. maybe it would not 
>>> impact plug/unplug.
>>
>>
>> If you add enter/exit in all callers then why this solves the problem 
>> ? Is it because in one or more callers
>> amdgpu_gmc_set_pte_pde is called many times and so calling enter/exit 
>> many times creates the problematic
>> contention from multiple threads ?
>
> The most likely cause of this is cache line bouncing I think and yes 
> moving the enter/exit a level up should fix this.
>
> But I strongly suggest to test this with lockdep enabled and 
> hotplug/GPU reset a couple of times.


Xinhui already tried testing with Hotplug and there is a regression 
crash as you can see bellow. That
why I wanted to know which kernel and which ASIC to see if I can 
reproduce and fix the regression quickly.

I will try on my side with latest kernel and vega 10 anyway since i see 
vega 10 functions from the trace.

Andrey


>
> Christian.
>
>>
>> Andrey
>>
>>>
>>> [ 1109.041095] BUG: unable to handle page fault for address: 
>>> 00000000000010e1
>>> [ 1109.086353] RIP: 0010:vega10_power_gate_vce+0x15/0x40 [amdgpu]
>>> [ 1109.196706] Call Trace:
>>> [ 1109.199374]  ? pp_set_powergating_by_smu+0x1f9/0x4a0 [amdgpu]
>>> [ 1109.205843]  amdgpu_dpm_set_powergating_by_smu+0xa6/0x150 [amdgpu]
>>> [ 1109.212776]  amdgpu_dpm_enable_vce+0x36/0x100 [amdgpu]
>>> [ 1109.218563]  vce_v4_0_hw_fini+0xe1/0xf0 [amdgpu]
>>> [ 1109.223747]  amdgpu_device_fini_hw+0x333/0x483 [amdgpu]
>>> [ 1109.229650]  amdgpu_driver_unload_kms+0x80/0xe0 [amdgpu]
>>> [ 1109.235577]  amdgpu_pci_remove+0x37/0x70 [amdgpu]
>>> [ 1109.240853]  pci_device_remove+0x3b/0xb0
>>> [ 1109.245127]  device_release_driver_internal+0x100/0x1d0
>>> [ 1109.250857]  device_release_driver+0x12/0x20
>>> [ 1109.255535]  pci_stop_bus_device+0x79/0xa0
>>> [ 1109.260016] pci_stop_and_remove_bus_device_locked+0x1b/0x30
>>> [ 1109.266197]  remove_store+0x7b/0x90
>>> [ 1109.269990]  dev_attr_store+0x14/0x30
>>> [ 1109.274002]  sysfs_kf_write+0x48/0x60
>>> [ 1109.277998]  kernfs_fop_write_iter+0x14e/0x1e0
>>>
>>> ________________________________________
>>> 发件人: Pan, Xinhui <Xinhui.Pan@amd.com>
>>> 发送时间: 2021年9月15日 14:37
>>> 收件人: amd-gfx@lists.freedesktop.org
>>> 抄送: Deucher, Alexander; Koenig, Christian; Grodzovsky, Andrey; Pan, 
>>> Xinhui
>>> 主题: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot 
>>> codepath
>>>
>>> We hit soft hang while doing memory pressure test on one numa system.
>>> After a qucik look, this is because kfd invalid/valid userptr memory
>>> frequently with process_info lock hold.
>>> Looks like update page table mapping use too much cpu time.
>>>
>>> perf top says below,
>>> 75.81%  [kernel]       [k] __srcu_read_unlock
>>>   6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
>>>   3.56%  [kernel]       [k] __srcu_read_lock
>>>   2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
>>>   2.20%  [kernel]       [k] __sg_page_iter_dma_next
>>>   2.15%  [drm]          [k] drm_dev_enter
>>>   1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
>>>   1.18%  [kernel]       [k] __sg_alloc_table_from_pages
>>>   1.09%  [drm]          [k] drm_dev_exit
>>>
>>> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
>>> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
>>> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>>>
>>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>>> ---
>>> change from v1:
>>> add enter/exit in more gmc_set_pte_pde callers
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 
>>> +++++++++++++++++-------
>>>   3 files changed, 36 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> index 76efd5f8950f..d7e4f4660acf 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>> @@ -34,6 +34,7 @@
>>>   #include <asm/set_memory.h>
>>>   #endif
>>>   #include "amdgpu.h"
>>> +#include <drm/drm_drv.h>
>>>
>>>   /*
>>>    * GART
>>> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device 
>>> *adev, uint64_t offset,
>>>          u64 page_base;
>>>          /* Starting from VEGA10, system bit must be 0 to mean 
>>> invalid. */
>>>          uint64_t flags = 0;
>>> +       int idx;
>>>
>>>          if (!adev->gart.ready) {
>>>                  WARN(1, "trying to unbind memory from uninitialized 
>>> GART !\n");
>>>                  return -EINVAL;
>>>          }
>>>
>>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>>> +               return 0;
>>> +
>>>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>>>          p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>>          for (i = 0; i < pages; i++, p++) {
>>> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device 
>>> *adev, uint64_t offset,
>>>          for (i = 0; i < adev->num_vmhubs; i++)
>>>                  amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>>>
>>> +       drm_dev_exit(idx);
>>>          return 0;
>>>   }
>>>
>>> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device 
>>> *adev, uint64_t offset,
>>>   {
>>>          uint64_t page_base;
>>>          unsigned i, j, t;
>>> +       int idx;
>>>
>>>          if (!adev->gart.ready) {
>>>                  WARN(1, "trying to bind memory to uninitialized 
>>> GART !\n");
>>>                  return -EINVAL;
>>>          }
>>>
>>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>>> +               return 0;
>>> +
>>>          t = offset / AMDGPU_GPU_PAGE_SIZE;
>>>
>>>          for (i = 0; i < pages; i++) {
>>> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, 
>>> uint64_t offset,
>>>                          page_base += AMDGPU_GPU_PAGE_SIZE;
>>>                  }
>>>          }
>>> +       drm_dev_exit(idx);
>>>          return 0;
>>>   }
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> index 54f059501a33..1427fd70310c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
>>> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
>>> *adev, void *cpu_pt_addr,
>>>   {
>>>          void __iomem *ptr = (void *)cpu_pt_addr;
>>>          uint64_t value;
>>> -       int idx;
>>> -
>>> -       if (!drm_dev_enter(&adev->ddev, &idx))
>>> -               return 0;
>>>
>>>          /*
>>>           * The following is for PTE only. GART does not have PDEs.
>>> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device 
>>> *adev, void *cpu_pt_addr,
>>>          value |= flags;
>>>          writeq(value, ptr + (gpu_page_idx * 8));
>>>
>>> -       drm_dev_exit(idx);
>>> -
>>>          return 0;
>>>   }
>>>
>>> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device 
>>> *adev)
>>>                  adev->gmc.xgmi.physical_node_id * 
>>> adev->gmc.xgmi.node_segment_size;
>>>          u64 vram_end = vram_addr + vram_size;
>>>          u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
>>> +       int idx;
>>> +
>>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>>> +               return;
>>>
>>>          flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>>>          flags |= AMDGPU_PTE_WRITEABLE;
>>> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device 
>>> *adev)
>>>          flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>>>          /* Requires gart_ptb_gpu_pa to be 4K aligned */
>>>          amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, 
>>> gart_ptb_gpu_pa, flags);
>>> +       drm_dev_exit(idx);
>>>   }
>>>
>>>   /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 0126ece898da..daa16d2f89da 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct 
>>> amdgpu_device *adev,
>>>          struct amdgpu_bo *bo = &vmbo->bo;
>>>          unsigned entries, ats_entries;
>>>          uint64_t addr;
>>> -       int r;
>>> +       int r, idx;
>>>
>>>          /* Figure out our place in the hierarchy */
>>>          if (ancestor->parent) {
>>> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct 
>>> amdgpu_device *adev,
>>>                          return r;
>>>          }
>>>
>>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>>> +               return -ENODEV;
>>> +
>>>          r = vm->update_funcs->map_table(vmbo);
>>>          if (r)
>>> -               return r;
>>> +               goto exit;
>>>
>>>          memset(&params, 0, sizeof(params));
>>>          params.adev = adev;
>>> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct 
>>> amdgpu_device *adev,
>>>
>>>          r = vm->update_funcs->prepare(&params, NULL, 
>>> AMDGPU_SYNC_EXPLICIT);
>>>          if (r)
>>> -               return r;
>>> +               goto exit;
>>>
>>>          addr = 0;
>>>          if (ats_entries) {
>>> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct 
>>> amdgpu_device *adev,
>>>                  r = vm->update_funcs->update(&params, vmbo, addr, 
>>> 0, ats_entries,
>>>                                               value, flags);
>>>                  if (r)
>>> -                       return r;
>>> +                       goto exit;
>>>
>>>                  addr += ats_entries * 8;
>>>          }
>>> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct 
>>> amdgpu_device *adev,
>>>                  r = vm->update_funcs->update(&params, vmbo, addr, 
>>> 0, entries,
>>>                                               value, flags);
>>>                  if (r)
>>> -                       return r;
>>> +                       goto exit;
>>>          }
>>>
>>> -       return vm->update_funcs->commit(&params, NULL);
>>> +       r = vm->update_funcs->commit(&params, NULL);
>>> +exit:
>>> +       drm_dev_exit(idx);
>>> +       return r;
>>>   }
>>>
>>>   /**
>>> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct 
>>> amdgpu_device *adev,
>>>                            struct amdgpu_vm *vm, bool immediate)
>>>   {
>>>          struct amdgpu_vm_update_params params;
>>> -       int r;
>>> +       int r, idx;
>>>
>>>          if (list_empty(&vm->relocated))
>>>                  return 0;
>>>
>>> +       if (!drm_dev_enter(&adev->ddev, &idx))
>>> +               return -ENODEV;
>>> +
>>>          memset(&params, 0, sizeof(params));
>>>          params.adev = adev;
>>>          params.vm = vm;
>>> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device 
>>> *adev,
>>>
>>>          r = vm->update_funcs->prepare(&params, NULL, 
>>> AMDGPU_SYNC_EXPLICIT);
>>>          if (r)
>>> -               return r;
>>> +               goto exit;
>>>
>>>          while (!list_empty(&vm->relocated)) {
>>>                  struct amdgpu_vm_bo_base *entry;
>>> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct 
>>> amdgpu_device *adev,
>>>          r = vm->update_funcs->commit(&params, &vm->last_update);
>>>          if (r)
>>>                  goto error;
>>> +       drm_dev_exit(idx);
>>>          return 0;
>>>
>>>   error:
>>>          amdgpu_vm_invalidate_pds(adev, vm);
>>> +exit:
>>> +       drm_dev_exit(idx);
>>>          return r;
>>>   }
>>>
>>> -- 
>>> 2.25.1
>>>
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath
  2021-09-15  6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
  2021-09-15  6:42 ` 回复: " Pan, Xinhui
@ 2021-09-15 20:14 ` Andrey Grodzovsky
  1 sibling, 0 replies; 6+ messages in thread
From: Andrey Grodzovsky @ 2021-09-15 20:14 UTC (permalink / raw)
  To: xinhui pan, amd-gfx; +Cc: alexander.deucher, christian.koenig

I fixed 2 regressions and latest code, applied your patch on top and 
passed libdrm tests
on Vega 10. You can pickup those 2 patches and try too if you have time. 
In any case -

Reviewed-and-tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Andrey

On 2021-09-15 2:37 a.m., xinhui pan wrote:
> We hit soft hang while doing memory pressure test on one numa system.
> After a qucik look, this is because kfd invalid/valid userptr memory
> frequently with process_info lock hold.
> Looks like update page table mapping use too much cpu time.
>
> perf top says below,
> 75.81%  [kernel]       [k] __srcu_read_unlock
>   6.19%  [amdgpu]       [k] amdgpu_gmc_set_pte_pde
>   3.56%  [kernel]       [k] __srcu_read_lock
>   2.20%  [amdgpu]       [k] amdgpu_vm_cpu_update
>   2.20%  [kernel]       [k] __sg_page_iter_dma_next
>   2.15%  [drm]          [k] drm_dev_enter
>   1.70%  [drm]          [k] drm_prime_sg_to_dma_addr_array
>   1.18%  [kernel]       [k] __sg_alloc_table_from_pages
>   1.09%  [drm]          [k] drm_dev_exit
>
> So move drm_dev_enter/exit outside gmc code, instead let caller do it.
> They are gart_unbind, gart_map, vm_clear_bo, vm_update_pdes and
> gmc_init_pdb0. vm_bo_update_mapping already calls it.
>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
> change from v1:
> add enter/exit in more gmc_set_pte_pde callers
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 11 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c  | 11 +++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 28 +++++++++++++++++-------
>   3 files changed, 36 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 76efd5f8950f..d7e4f4660acf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -34,6 +34,7 @@
>   #include <asm/set_memory.h>
>   #endif
>   #include "amdgpu.h"
> +#include <drm/drm_drv.h>
>   
>   /*
>    * GART
> @@ -230,12 +231,16 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	u64 page_base;
>   	/* Starting from VEGA10, system bit must be 0 to mean invalid. */
>   	uint64_t flags = 0;
> +	int idx;
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to unbind memory from uninitialized GART !\n");
>   		return -EINVAL;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return 0;
> +
>   	t = offset / AMDGPU_GPU_PAGE_SIZE;
>   	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>   	for (i = 0; i < pages; i++, p++) {
> @@ -254,6 +259,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	for (i = 0; i < adev->num_vmhubs; i++)
>   		amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
>   
> +	drm_dev_exit(idx);
>   	return 0;
>   }
>   
> @@ -276,12 +282,16 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>   {
>   	uint64_t page_base;
>   	unsigned i, j, t;
> +	int idx;
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to bind memory to uninitialized GART !\n");
>   		return -EINVAL;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return 0;
> +
>   	t = offset / AMDGPU_GPU_PAGE_SIZE;
>   
>   	for (i = 0; i < pages; i++) {
> @@ -291,6 +301,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>   			page_base += AMDGPU_GPU_PAGE_SIZE;
>   		}
>   	}
> +	drm_dev_exit(idx);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 54f059501a33..1427fd70310c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -153,10 +153,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>   {
>   	void __iomem *ptr = (void *)cpu_pt_addr;
>   	uint64_t value;
> -	int idx;
> -
> -	if (!drm_dev_enter(&adev->ddev, &idx))
> -		return 0;
>   
>   	/*
>   	 * The following is for PTE only. GART does not have PDEs.
> @@ -165,8 +161,6 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
>   	value |= flags;
>   	writeq(value, ptr + (gpu_page_idx * 8));
>   
> -	drm_dev_exit(idx);
> -
>   	return 0;
>   }
>   
> @@ -752,6 +746,10 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>   		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
>   	u64 vram_end = vram_addr + vram_size;
>   	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
> +	int idx;
> +
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return;
>   
>   	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
>   	flags |= AMDGPU_PTE_WRITEABLE;
> @@ -773,6 +771,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
>   	flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
>   	/* Requires gart_ptb_gpu_pa to be 4K aligned */
>   	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
> +	drm_dev_exit(idx);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0126ece898da..daa16d2f89da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -800,7 +800,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   	struct amdgpu_bo *bo = &vmbo->bo;
>   	unsigned entries, ats_entries;
>   	uint64_t addr;
> -	int r;
> +	int r, idx;
>   
>   	/* Figure out our place in the hierarchy */
>   	if (ancestor->parent) {
> @@ -845,9 +845,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   			return r;
>   	}
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return -ENODEV;
> +
>   	r = vm->update_funcs->map_table(vmbo);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
> @@ -856,7 +859,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   
>   	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	addr = 0;
>   	if (ats_entries) {
> @@ -872,7 +875,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   		r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
>   					     value, flags);
>   		if (r)
> -			return r;
> +			goto exit;
>   
>   		addr += ats_entries * 8;
>   	}
> @@ -895,10 +898,13 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
>   		r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
>   					     value, flags);
>   		if (r)
> -			return r;
> +			goto exit;
>   	}
>   
> -	return vm->update_funcs->commit(&params, NULL);
> +	r = vm->update_funcs->commit(&params, NULL);
> +exit:
> +	drm_dev_exit(idx);
> +	return r;
>   }
>   
>   /**
> @@ -1384,11 +1390,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   			  struct amdgpu_vm *vm, bool immediate)
>   {
>   	struct amdgpu_vm_update_params params;
> -	int r;
> +	int r, idx;
>   
>   	if (list_empty(&vm->relocated))
>   		return 0;
>   
> +	if (!drm_dev_enter(&adev->ddev, &idx))
> +		return -ENODEV;
> +
>   	memset(&params, 0, sizeof(params));
>   	params.adev = adev;
>   	params.vm = vm;
> @@ -1396,7 +1405,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   
>   	r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
>   	if (r)
> -		return r;
> +		goto exit;
>   
>   	while (!list_empty(&vm->relocated)) {
>   		struct amdgpu_vm_bo_base *entry;
> @@ -1414,10 +1423,13 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
>   	r = vm->update_funcs->commit(&params, &vm->last_update);
>   	if (r)
>   		goto error;
> +	drm_dev_exit(idx);
>   	return 0;
>   
>   error:
>   	amdgpu_vm_invalidate_pds(adev, vm);
> +exit:
> +	drm_dev_exit(idx);
>   	return r;
>   }
>   

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-09-15 20:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-09-15  6:37 [PATCH v2] drm/amdgpu: Put drm_dev_enter/exit outside hot codepath xinhui pan
2021-09-15  6:42 ` 回复: " Pan, Xinhui
2021-09-15 13:52   ` Andrey Grodzovsky
2021-09-15 13:57     ` Christian König
2021-09-15 14:04       ` Andrey Grodzovsky
2021-09-15 20:14 ` Andrey Grodzovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox