AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Deucher <alexander.deucher@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Alex Deucher <alexander.deucher@amd.com>,
	Jack Xiao <Jack.Xiao@amd.com>,
	Hawking Zhang <Hawking.Zhang@amd.com>
Subject: [PATCH 71/73] drm/amdgpu/mes: fix vm csa update issue
Date: Fri, 29 Apr 2022 13:46:22 -0400	[thread overview]
Message-ID: <20220429174624.459475-72-alexander.deucher@amd.com> (raw)
In-Reply-To: <20220429174624.459475-1-alexander.deucher@amd.com>

From: Jack Xiao <Jack.Xiao@amd.com>

Need reserve VM buffers before update VM csa.

v2: rebase fixes

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 81 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  3 +
 2 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index c9516b3aa6d9..51a6f309ef22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -883,40 +883,76 @@ void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data)
 		amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, NULL, NULL);
 }
 
-static int amdgpu_mes_test_map_ctx_meta_data(struct amdgpu_device *adev,
-				     struct amdgpu_vm *vm,
-				     struct amdgpu_mes_ctx_data *ctx_data)
+int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
+				 struct amdgpu_vm *vm,
+				 struct amdgpu_mes_ctx_data *ctx_data)
 {
-	struct amdgpu_bo_va *meta_data_va = NULL;
-	uint64_t meta_data_addr = AMDGPU_VA_RESERVED_SIZE;
+	struct amdgpu_bo_va *bo_va;
+	struct ww_acquire_ctx ticket;
+	struct list_head list;
+	struct amdgpu_bo_list_entry pd;
+	struct ttm_validate_buffer csa_tv;
+	struct amdgpu_sync sync;
 	int r;
 
-	r = amdgpu_map_static_csa(adev, vm, ctx_data->meta_data_obj,
-				  &meta_data_va, meta_data_addr,
-				  sizeof(struct amdgpu_mes_ctx_meta_data));
-	if (r)
+	amdgpu_sync_create(&sync);
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&csa_tv.head);
+
+	csa_tv.bo = &ctx_data->meta_data_obj->tbo;
+	csa_tv.num_shared = 1;
+
+	list_add(&csa_tv.head, &list);
+	amdgpu_vm_get_pd_bo(vm, &list, &pd);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	if (r) {
+		DRM_ERROR("failed to reserve meta data BO: err=%d\n", r);
 		return r;
+	}
 
-	r = amdgpu_vm_bo_update(adev, meta_data_va, false);
-	if (r)
+	bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj);
+	if (!bo_va) {
+		ttm_eu_backoff_reservation(&ticket, &list);
+		DRM_ERROR("failed to create bo_va for meta data BO\n");
+		return -ENOMEM;
+	}
+
+	r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0,
+			     sizeof(struct amdgpu_mes_ctx_meta_data),
+			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
+			     AMDGPU_PTE_EXECUTABLE);
+
+	if (r) {
+		DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r);
 		goto error;
+	}
 
-	r = amdgpu_vm_update_pdes(adev, vm, false);
-	if (r)
+	r = amdgpu_vm_bo_update(adev, bo_va, false);
+	if (r) {
+		DRM_ERROR("failed to do vm_bo_update on meta data\n");
 		goto error;
+	}
+	amdgpu_sync_fence(&sync, bo_va->last_pt_update);
 
-	dma_fence_wait(vm->last_update, false);
-	dma_fence_wait(meta_data_va->last_pt_update, false);
+	r = amdgpu_vm_update_pdes(adev, vm, false);
+	if (r) {
+		DRM_ERROR("failed to update pdes on meta data\n");
+		goto error;
+	}
+	amdgpu_sync_fence(&sync, vm->last_update);
 
-	ctx_data->meta_data_gpu_addr = meta_data_addr;
-	ctx_data->meta_data_va = meta_data_va;
+	amdgpu_sync_wait(&sync, false);
+	ttm_eu_backoff_reservation(&ticket, &list);
 
+	amdgpu_sync_free(&sync);
+	ctx_data->meta_data_va = bo_va;
 	return 0;
 
 error:
-	BUG_ON(amdgpu_bo_reserve(ctx_data->meta_data_obj, true));
-	amdgpu_vm_bo_rmv(adev, meta_data_va);
-	amdgpu_bo_unreserve(ctx_data->meta_data_obj);
+	amdgpu_vm_bo_del(adev, bo_va);
+	ttm_eu_backoff_reservation(&ticket, &list);
+	amdgpu_sync_free(&sync);
 	return r;
 }
 
@@ -1029,7 +1065,8 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
 		goto error_pasid;
 	}
 
-	r = amdgpu_mes_test_map_ctx_meta_data(adev, vm, &ctx_data);
+	ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
+	r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data);
 	if (r) {
 		DRM_ERROR("failed to map ctx meta data\n");
 		goto error_vm;
@@ -1075,7 +1112,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
 
 error_vm:
 	BUG_ON(amdgpu_bo_reserve(ctx_data.meta_data_obj, true));
-	amdgpu_vm_bo_rmv(adev, ctx_data.meta_data_va);
+	amdgpu_vm_bo_del(adev, ctx_data.meta_data_va);
 	amdgpu_bo_unreserve(ctx_data.meta_data_obj);
 	amdgpu_vm_fini(adev, vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 5c9e7932c7a9..a965ace0fd0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -264,6 +264,9 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
 				   struct amdgpu_mes_ctx_data *ctx_data);
 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
+int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
+				 struct amdgpu_vm *vm,
+				 struct amdgpu_mes_ctx_data *ctx_data);
 
 int amdgpu_mes_self_test(struct amdgpu_device *adev);
 
-- 
2.35.1


  parent reply	other threads:[~2022-04-29 17:47 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-29 17:45 [PATCH 00/73] MES support Alex Deucher
2022-04-29 17:45 ` [PATCH 01/73] drm/amdgpu: define MQD abstract layer for hw ip Alex Deucher
2022-04-29 17:45 ` [PATCH 02/73] drm/amdgpu: add helper function to initialize mqd from ring v4 Alex Deucher
2022-04-29 17:45 ` [PATCH 03/73] drm/amdgpu: add the per-context meta data v3 Alex Deucher
2022-04-29 17:45 ` [PATCH 04/73] drm/amdgpu: add mes ctx data in amdgpu_ring Alex Deucher
2022-04-29 17:45 ` [PATCH 05/73] drm/amdgpu: define ring structure to access rptr/wptr/fence Alex Deucher
2022-04-29 17:45 ` [PATCH 06/73] drm/amdgpu: use ring structure to access rptr/wptr v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 07/73] drm/amdgpu: initialize/finalize the ring for mes queue Alex Deucher
2022-04-29 17:45 ` [PATCH 08/73] drm/amdgpu: assign the cpu/gpu address of fence from ring Alex Deucher
2022-04-29 17:45 ` [PATCH 09/73] drm/amdgpu/gfx10: implement mqd functions of gfx/compute eng v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 10/73] drm/amdgpu/gfx10: use per ctx CSA for ce metadata Alex Deucher
2022-04-29 17:45 ` [PATCH 11/73] drm/amdgpu/gfx10: use per ctx CSA for de metadata Alex Deucher
2022-04-29 17:45 ` [PATCH 12/73] drm/amdgpu/gfx10: associate mes queue id with fence v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 13/73] drm/amdgpu/gfx10: inherit vmid from mqd Alex Deucher
2022-04-29 17:45 ` [PATCH 14/73] drm/amdgpu/gfx10: use INVALIDATE_TLBS to invalidate TLBs v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 15/73] drm/amdgpu/gmc10: skip emitting pasid mapping packet Alex Deucher
2022-04-29 17:45 ` [PATCH 16/73] drm/amdgpu: use the whole doorbell space for mes Alex Deucher
2022-04-29 17:45 ` [PATCH 17/73] drm/amdgpu: update mes process/gang/queue definitions Alex Deucher
2022-04-29 17:45 ` [PATCH 18/73] drm/amdgpu: add mes_kiq module parameter v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 19/73] drm/amdgpu: allocate doorbell index for mes kiq Alex Deucher
2022-04-29 17:45 ` [PATCH 20/73] drm/amdgpu/mes: extend mes framework to support multiple mes pipes Alex Deucher
2022-04-29 17:45 ` [PATCH 21/73] drm/amdgpu/gfx10: add mes queue fence handling Alex Deucher
2022-04-29 17:45 ` [PATCH 22/73] drm/amdgpu/gfx10: add mes support for gfx ib test Alex Deucher
2022-04-29 17:45 ` [PATCH 23/73] drm/amdgpu: don't use kiq to flush gpu tlb if mes enabled Alex Deucher
2022-04-29 17:45 ` [PATCH 24/73] drm/amdgpu/sdma: use per-ctx sdma csa address for mes sdma queue Alex Deucher
2022-04-29 17:45 ` [PATCH 25/73] drm/amdgpu/sdma5.2: initialize sdma mqd Alex Deucher
2022-04-29 17:45 ` [PATCH 26/73] drm/amdgpu/sdma5.2: associate mes queue id with fence Alex Deucher
2022-04-29 17:45 ` [PATCH 27/73] drm/amdgpu/sdma5.2: add mes queue fence handling Alex Deucher
2022-04-29 17:45 ` [PATCH 28/73] drm/amdgpu/sdma5.2: add mes support for sdma ring test Alex Deucher
2022-04-29 17:45 ` [PATCH 29/73] drm/amdgpu/sdma5.2: add mes support for sdma ib test Alex Deucher
2022-04-29 17:45 ` [PATCH 30/73] drm/amdgpu/sdma5: initialize sdma mqd Alex Deucher
2022-04-29 17:45 ` [PATCH 31/73] drm/amdgpu/sdma5: associate mes queue id with fence Alex Deucher
2022-04-29 17:45 ` [PATCH 32/73] drm/amdgpu/sdma5: add mes queue fence handling Alex Deucher
2022-04-29 17:45 ` [PATCH 33/73] drm/amdgpu/sdma5: add mes support for sdma ring test Alex Deucher
2022-04-29 17:45 ` [PATCH 34/73] drm/amdgpu/sdma5: add mes support for sdma ib test Alex Deucher
2022-04-29 17:45 ` [PATCH 35/73] drm/amdgpu: add mes kiq PSP GFX FW type Alex Deucher
2022-04-29 17:45 ` [PATCH 36/73] drm/amdgpu/mes: add mes kiq callback Alex Deucher
2022-04-29 17:45 ` [PATCH 37/73] drm/amdgpu: add mes kiq frontdoor loading support Alex Deucher
2022-04-29 17:45 ` [PATCH 38/73] drm/amdgpu: enable mes kiq N-1 test on sienna cichlid Alex Deucher
2022-04-29 17:45 ` [PATCH 39/73] drm/amdgpu/mes: manage mes doorbell allocation Alex Deucher
2022-04-29 17:45 ` [PATCH 40/73] drm/amdgpu: add mes queue id mask v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 41/73] drm/amdgpu/mes: initialize/finalize common mes structure v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 42/73] drm/amdgpu/mes: relocate status_fence slot allocation Alex Deucher
2022-04-29 17:45 ` [PATCH 43/73] drm/amdgpu/mes10.1: call general mes initialization Alex Deucher
2022-04-29 17:45 ` [PATCH 44/73] drm/amdgpu/mes10.1: add delay after mes engine enable Alex Deucher
2022-04-29 17:45 ` [PATCH 45/73] drm/amdgpu/mes10.1: implement the suspend/resume routine Alex Deucher
2022-04-29 17:45 ` [PATCH 46/73] drm/amdgpu/mes: implement creating mes process v2 Alex Deucher
2022-04-29 17:45 ` [PATCH 47/73] drm/amdgpu/mes: implement destroying mes process Alex Deucher
2022-04-29 17:45 ` [PATCH 48/73] drm/amdgpu/mes: implement adding mes gang Alex Deucher
2022-04-29 17:46 ` [PATCH 49/73] drm/amdgpu/mes: implement removing " Alex Deucher
2022-04-29 17:46 ` [PATCH 50/73] drm/amdgpu/mes: implement suspending all gangs Alex Deucher
2022-04-29 17:46 ` [PATCH 51/73] drm/amdgpu/mes: implement resuming " Alex Deucher
2022-04-29 17:46 ` [PATCH 52/73] drm/amdgpu/mes: initialize mqd from queue properties Alex Deucher
2022-04-29 17:46 ` [PATCH 53/73] drm/amdgpu/mes: implement adding mes queue Alex Deucher
2022-04-29 17:46 ` [PATCH 54/73] drm/amdgpu/mes: implement removing " Alex Deucher
2022-04-29 17:46 ` [PATCH 55/73] drm/amdgpu/mes: add helper function to convert ring to queue property Alex Deucher
2022-04-29 17:46 ` [PATCH 56/73] drm/amdgpu/mes: add helper function to get the ctx meta data offset Alex Deucher
2022-04-29 17:46 ` [PATCH 57/73] drm/amdgpu/mes: use ring for kernel queue submission Alex Deucher
2022-04-29 17:46 ` [PATCH 58/73] drm/amdgpu/mes: implement removing mes ring Alex Deucher
2022-04-29 17:46 ` [PATCH 59/73] drm/amdgpu/mes: add helper functions to alloc/free ctx metadata Alex Deucher
2022-04-29 17:46 ` [PATCH 60/73] drm/amdgpu: skip kfd routines when mes enabled Alex Deucher
2022-04-29 17:46 ` [PATCH 61/73] drm/amdgpu: Enable KFD with MES enabled Alex Deucher
2022-04-29 17:46 ` [PATCH 62/73] drm/amdgpu: skip some checking for mes queue ib submission Alex Deucher
2022-04-29 17:46 ` [PATCH 63/73] drm/amdgpu: skip kiq ib tests if mes enabled Alex Deucher
2022-04-29 17:46 ` [PATCH 64/73] drm/amdgpu: skip gds switch for mes queue Alex Deucher
2022-04-29 17:46 ` [PATCH 65/73] drm/amdgpu: kiq takes charge of all queues Alex Deucher
2022-04-29 17:46 ` [PATCH 66/73] drm/amdgpu/mes: map ctx metadata for mes self test Alex Deucher
2022-04-29 17:46 ` [PATCH 67/73] drm/amdgpu/mes: create gang and queues " Alex Deucher
2022-04-29 17:46 ` [PATCH 68/73] drm/amdgpu/mes: add ring/ib test " Alex Deucher
2022-04-29 17:46 ` [PATCH 69/73] drm/amdgpu/mes: implement " Alex Deucher
2022-04-29 17:46 ` [PATCH 70/73] drm/amdgpu/mes10.1: add mes self test in late init Alex Deucher
2022-04-29 17:46 ` Alex Deucher [this message]
2022-04-29 17:46 ` [PATCH 72/73] drm/amdgpu/mes: disable mes sdma queue test Alex Deucher
2022-04-29 17:46 ` [PATCH 73/73] drm/amdgpu/mes: Update the doorbell function signatures Alex Deucher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220429174624.459475-72-alexander.deucher@amd.com \
    --to=alexander.deucher@amd.com \
    --cc=Hawking.Zhang@amd.com \
    --cc=Jack.Xiao@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox