* [RFC PATCH v3 01/10] drm/amdgpu: Add helper function to get xcc count
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:32 ` Alex Deucher
2025-12-03 12:54 ` [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions Lijo Lazar
` (8 subsequent siblings)
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add a helper function to get the number of XCCs given a parition id. If
there is no partition manager, return 1 as default.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 8058e8f35d41..b780c12b07e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -217,4 +217,26 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
+static inline int amdgpu_xcp_get_num_xcc(struct amdgpu_xcp_mgr *xcp_mgr,
+ int xcp_id)
+{
+ struct amdgpu_xcp *xcp;
+ uint32_t xcc_mask;
+ int i, r;
+
+ if (!xcp_mgr || xcp_id == AMDGPU_XCP_NO_PARTITION)
+ return 1;
+ for_each_xcp(xcp_mgr, xcp, i) {
+ if (xcp->id == xcp_id) {
+ r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX,
+ &xcc_mask);
+ if (unlikely(r))
+ return 1;
+ else
+ return hweight32(xcc_mask);
+ }
+ }
+
+ return 1;
+}
#endif
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 01/10] drm/amdgpu: Add helper function to get xcc count
2025-12-03 12:54 ` [RFC PATCH v3 01/10] drm/amdgpu: Add helper function to get xcc count Lijo Lazar
@ 2025-12-16 16:32 ` Alex Deucher
0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:32 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:44 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add a helper function to get the number of XCCs given a parition id. If
> there is no partition manager, return 1 as default.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 22 ++++++++++++++++++++++
> 1 file changed, 22 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
> index 8058e8f35d41..b780c12b07e0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
> @@ -217,4 +217,26 @@ amdgpu_get_next_xcp(struct amdgpu_xcp_mgr *xcp_mgr, int *from)
> for (i = 0, xcp = amdgpu_get_next_xcp(xcp_mgr, &i); xcp; \
> ++i, xcp = amdgpu_get_next_xcp(xcp_mgr, &i))
>
> +static inline int amdgpu_xcp_get_num_xcc(struct amdgpu_xcp_mgr *xcp_mgr,
> + int xcp_id)
> +{
> + struct amdgpu_xcp *xcp;
> + uint32_t xcc_mask;
> + int i, r;
> +
> + if (!xcp_mgr || xcp_id == AMDGPU_XCP_NO_PARTITION)
> + return 1;
> + for_each_xcp(xcp_mgr, xcp, i) {
> + if (xcp->id == xcp_id) {
> + r = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX,
> + &xcc_mask);
> + if (unlikely(r))
> + return 1;
> + else
> + return hweight32(xcc_mask);
> + }
> + }
> +
> + return 1;
> +}
> #endif
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
2025-12-03 12:54 ` [RFC PATCH v3 01/10] drm/amdgpu: Add helper function to get xcc count Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-04 7:36 ` Krzysztof Kozlowski
` (2 more replies)
2025-12-03 12:54 ` [RFC PATCH v3 03/10] drm/amdgpu: Fill cwsr save area details Lijo Lazar
` (7 subsequent siblings)
9 siblings, 3 replies; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add functions related to cwsr handling inside amdgpu framework.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 346 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 67 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
5 files changed, 418 insertions(+), 2 deletions(-)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f65021678fc0..a5feb674508a 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -67,7 +67,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
- amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
+ amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o amdgpu_cwsr.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index fa71df36f4b3..b9920cab5d31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -331,6 +331,7 @@ struct kfd_vm_fault_info;
struct amdgpu_hive_info;
struct amdgpu_reset_context;
struct amdgpu_reset_control;
+struct amdgpu_cwsr_isa;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -1325,6 +1326,8 @@ struct amdgpu_device {
* Must be last --ends in a flexible-array member.
*/
struct amdgpu_kfd_dev kfd;
+
+ struct amdgpu_cwsr_info *cwsr_info;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
new file mode 100644
index 000000000000..c0fc5a383071
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_exec.h>
+
+#include "amdgpu.h"
+#include "cwsr_trap_handler.h"
+#include "amdgpu_cwsr.h"
+
+extern int cwsr_enable;
+
+#define AMDGPU_CWSR_TBA_MAX_SIZE (2 * AMDGPU_GPU_PAGE_SIZE)
+#define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
+#define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
+
+enum amdgpu_cwsr_region {
+ AMDGPU_CWSR_TBA,
+ AMDGPU_CWSR_TMA,
+};
+
+static inline uint64_t amdgpu_cwsr_tba_vaddr(struct amdgpu_device *adev)
+{
+ uint64_t addr = AMDGPU_VA_RESERVED_TRAP_START(adev);
+
+ addr = amdgpu_gmc_sign_extend(addr);
+
+ return addr;
+}
+
+static inline bool amdgpu_cwsr_is_supported(struct amdgpu_device *adev)
+{
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (!cwsr_enable || gc_ver < IP_VERSION(9, 0, 1))
+ return false;
+
+ return true;
+}
+
+static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_info *cwsr_info)
+{
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+
+ if (gc_ver < IP_VERSION(9, 0, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx8_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx8_hex);
+ } else if (gc_ver == IP_VERSION(9, 4, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_arcturus_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_arcturus_hex);
+ } else if (gc_ver == IP_VERSION(9, 4, 2)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_aldebaran_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_aldebaran_hex);
+ } else if (gc_ver == IP_VERSION(9, 4, 3) ||
+ gc_ver == IP_VERSION(9, 4, 4)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx9_4_3_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_4_3_hex);
+ } else if (gc_ver == IP_VERSION(9, 5, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx9_5_0_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_5_0_hex);
+ } else if (gc_ver < IP_VERSION(10, 1, 1)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx9_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_hex);
+ } else if (gc_ver < IP_VERSION(10, 3, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_nv1x_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_nv1x_hex);
+ } else if (gc_ver < IP_VERSION(11, 0, 0)) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx10_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx10_hex);
+ } else if (gc_ver < IP_VERSION(12, 0, 0)) {
+ /* The gfx11 cwsr trap handler must fit inside a single
+ page. */
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
+ AMDGPU_CWSR_TBA_MAX_SIZE);
+ cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
+ cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
+ }
+}
+
+int amdgpu_cwsr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
+ void *ptr;
+ int r;
+
+ if (!amdgpu_cwsr_is_supported(adev))
+ return -EOPNOTSUPP;
+
+ cwsr_info = kzalloc(sizeof(*cwsr_info), GFP_KERNEL);
+ if (!cwsr_info)
+ return -ENOMEM;
+ amdgpu_cwsr_init_isa_details(adev, cwsr_info);
+
+ if (!cwsr_info->isa_sz)
+ return -EOPNOTSUPP;
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TBA_MAX_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &cwsr_info->isa_bo,
+ NULL, &ptr);
+ if (r)
+ return r;
+
+ memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
+ adev->cwsr_info = no_free_ptr(cwsr_info);
+
+ return 0;
+}
+
+void amdgpu_cwsr_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_cwsr_is_enabled(adev))
+ return;
+
+ amdgpu_bo_free_kernel(&adev->cwsr_info->isa_bo, NULL, NULL);
+ kfree(adev->cwsr_info);
+ adev->cwsr_info = NULL;
+}
+
+/*
+ * amdgpu_map_cwsr_trap_handler should be called during amdgpu_vm_init
+ * it maps virtual address amdgpu_cwsr_trap_handler_vaddr() to this VM, and each
+ * compute queue can use this virtual address for wave save/restore
+ * operations to support compute preemption.
+ */
+static int amdgpu_cwsr_map_region(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj *cwsr,
+ enum amdgpu_cwsr_region region)
+{
+ uint64_t cwsr_addr, va_flags, va;
+ struct amdgpu_bo_va **bo_va;
+ struct amdgpu_bo *bo;
+ uint32_t size;
+ int r;
+
+ if (!cwsr || !vm)
+ return -EINVAL;
+
+ cwsr_addr = amdgpu_cwsr_tba_vaddr(adev);
+
+ if (region == AMDGPU_CWSR_TBA) {
+ size = AMDGPU_CWSR_TBA_MAX_SIZE;
+ bo_va = &cwsr->tba_va;
+ bo = adev->cwsr_info->isa_bo;
+ va = cwsr_addr;
+ va_flags = (AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE);
+ } else {
+ size = AMDGPU_CWSR_TMA_MAX_SIZE;
+ bo_va = &cwsr->tma_va;
+ bo = cwsr->tma_bo;
+ va = cwsr_addr + AMDGPU_CWSR_TMA_OFFSET;
+ va_flags = (AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE);
+ }
+
+ *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!*bo_va)
+ return -ENOMEM;
+
+ va &= AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_vm_bo_map(adev, *bo_va, va, 0, size, va_flags);
+
+ if (r) {
+ dev_err(adev->dev, "failed to do bo_map on CWSR TBA, err=%d\n",
+ r);
+ amdgpu_vm_bo_del(adev, *bo_va);
+ *bo_va = NULL;
+ } else {
+ if (region == AMDGPU_CWSR_TBA)
+ cwsr->tba_gpu_va_addr = va;
+ else
+ cwsr->tma_gpu_va_addr = va;
+ }
+
+ return r;
+}
+
+static int amdgpu_cwsr_unmap_region(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_trap_obj *cwsr,
+ enum amdgpu_cwsr_region region)
+{
+ struct amdgpu_bo_va **bo_va;
+ uint64_t va;
+ int r;
+
+ if (!cwsr)
+ return -EINVAL;
+
+ if (region == AMDGPU_CWSR_TBA) {
+ bo_va = &cwsr->tba_va;
+ va = cwsr->tba_gpu_va_addr;
+ } else {
+ bo_va = &cwsr->tma_va;
+ va = cwsr->tma_gpu_va_addr;
+ }
+
+ r = amdgpu_vm_bo_unmap(adev, *bo_va, va);
+ if (r) {
+ dev_err(adev->dev,
+ "failed to do bo_unmap on CWSR trap handler, err=%d\n",
+ r);
+ return r;
+ }
+
+ amdgpu_vm_bo_del(adev, *bo_va);
+ *bo_va = NULL;
+
+ return r;
+}
+
+/* TBD : Handle APU allocation */
+int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj **trap_obj)
+{
+ struct amdgpu_cwsr_trap_obj *cwsr __free(kfree) = NULL;
+ struct amdgpu_bo *bo;
+ struct drm_exec exec;
+ int r;
+
+ if (!amdgpu_cwsr_is_enabled(adev))
+ return -EOPNOTSUPP;
+
+ cwsr = kzalloc(sizeof(*cwsr), GFP_KERNEL);
+ if (!cwsr)
+ return -ENOMEM;
+
+ bo = adev->cwsr_info->isa_bo;
+ drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec)
+ {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ dev_err(adev->dev,
+ "failed to reserve for CWSR allocs: err=%d\n",
+ r);
+ goto err;
+ }
+ }
+
+ r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TMA_MAX_SIZE, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, &cwsr->tma_bo, NULL,
+ &cwsr->tma_cpu_addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TMA);
+ if (r)
+ goto err;
+ r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TBA);
+ if (r) {
+ amdgpu_cwsr_unmap_region(adev, cwsr, AMDGPU_CWSR_TMA);
+ goto err;
+ }
+
+ *trap_obj = no_free_ptr(cwsr);
+
+err:
+ drm_exec_fini(&exec);
+ if (r)
+ amdgpu_bo_free_kernel(&cwsr->tma_bo, NULL, NULL);
+
+ return r;
+}
+
+void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj **trap_obj)
+{
+ struct amdgpu_bo *tba_bo;
+ struct amdgpu_bo *tma_bo;
+ struct drm_exec exec;
+ int r;
+
+ if (!trap_obj || !*trap_obj || !(*trap_obj)->tma_bo)
+ return;
+ tba_bo = adev->cwsr_info->isa_bo;
+ tma_bo = (*trap_obj)->tma_bo;
+
+ if (!tba_bo || !tma_bo)
+ return;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec)
+ {
+ r = amdgpu_vm_lock_pd(vm, &exec, 0);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &tba_bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (likely(!r))
+ r = drm_exec_lock_obj(&exec, &tma_bo->tbo.base);
+ drm_exec_retry_on_contention(&exec);
+ if (unlikely(r)) {
+ dev_err(adev->dev,
+ "failed to reserve CWSR BOs: err=%d\n", r);
+ goto err;
+ }
+ }
+
+ amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TBA);
+ amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TMA);
+err:
+ drm_exec_fini(&exec);
+ amdgpu_bo_free_kernel(&(*trap_obj)->tma_bo, NULL, NULL);
+ kfree(*trap_obj);
+ *trap_obj = NULL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
new file mode 100644
index 000000000000..26ed9308f70b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AMDGPU_CWSR_H
+#define AMDGPU_CWSR_H
+
+#include <linux/types.h>
+
+struct amdgpu_bo;
+struct amdgpu_bo_va;
+struct amdgpu_device;
+struct amdgpu_vm;
+
+/**
+ * struct amdgpu_cwsr_obj - CWSR (Compute Wave Save Restore) buffer tracking
+ * @bo: Buffer object for CWSR area
+ * @bo_va: Buffer object virtual address mapping
+ */
+struct amdgpu_cwsr_trap_obj {
+ uint64_t tma_gpu_va_addr;
+ uint64_t tba_gpu_va_addr;
+
+ struct amdgpu_bo *tma_bo;
+ struct amdgpu_bo_va *tba_va;
+ struct amdgpu_bo_va *tma_va;
+ void *tma_cpu_addr;
+};
+
+struct amdgpu_cwsr_info {
+ /* cwsr isa */
+ struct amdgpu_bo *isa_bo;
+ const void *isa_buf;
+ uint32_t isa_sz;
+};
+
+int amdgpu_cwsr_init(struct amdgpu_device *adev);
+void amdgpu_cwsr_fini(struct amdgpu_device *adev);
+
+int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj **cwsr_obj);
+void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj **cwsr_obj);
+static inline bool amdgpu_cwsr_is_enabled(struct amdgpu_device *adev)
+{
+ return adev->cwsr_info != NULL;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 139642eacdd0..783ca2b8dfef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -173,7 +173,7 @@ struct amdgpu_bo_vm;
#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
- AMDGPU_VA_RESERVED_SEQ64_SIZE)
-#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
+#define AMDGPU_VA_RESERVED_TRAP_SIZE (3ULL << 12)
#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
- AMDGPU_VA_RESERVED_TRAP_SIZE)
#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-03 12:54 ` [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions Lijo Lazar
@ 2025-12-04 7:36 ` Krzysztof Kozlowski
2025-12-04 8:03 ` Lazar, Lijo
2025-12-16 16:38 ` Alex Deucher
2025-12-22 5:38 ` Zhang, Jesse(Jie)
2 siblings, 1 reply; 29+ messages in thread
From: Krzysztof Kozlowski @ 2025-12-04 7:36 UTC (permalink / raw)
To: Lijo Lazar, amd-gfx
Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
On 03/12/2025 13:54, Lijo Lazar wrote:
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
> + } else {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
> + }
> +}
> +
> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
> +{
> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
This is an undesired syntax explicitly documented as one to avoid. You
need here proper assignment, not NULL. Please don't use cleanup.h if you
do not intend to follow it because it does not make the code simpler.
Best regards,
Krzysztof
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-04 7:36 ` Krzysztof Kozlowski
@ 2025-12-04 8:03 ` Lazar, Lijo
2025-12-04 8:06 ` Krzysztof Kozlowski
0 siblings, 1 reply; 29+ messages in thread
From: Lazar, Lijo @ 2025-12-04 8:03 UTC (permalink / raw)
To: Krzysztof Kozlowski, amd-gfx
Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
On 12/4/2025 1:06 PM, Krzysztof Kozlowski wrote:
> On 03/12/2025 13:54, Lijo Lazar wrote:
>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
>> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
>> + } else {
>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
>> + AMDGPU_CWSR_TBA_MAX_SIZE);
>> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
>> + }
>> +}
>> +
>> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
>> +{
>> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
>
>
> This is an undesired syntax explicitly documented as one to avoid. You
> need here proper assignment, not NULL. Please don't use cleanup.h if you
> do not intend to follow it because it does not make the code simpler.
>
Could you explain more about the hazard here? There are no multiple
cleanup variables declared in this case.
Thanks,
Lijo
>
> Best regards,
> Krzysztof
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-04 8:03 ` Lazar, Lijo
@ 2025-12-04 8:06 ` Krzysztof Kozlowski
2025-12-04 8:08 ` Krzysztof Kozlowski
2025-12-04 8:08 ` Lazar, Lijo
0 siblings, 2 replies; 29+ messages in thread
From: Krzysztof Kozlowski @ 2025-12-04 8:06 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx
Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
On 04/12/2025 09:03, Lazar, Lijo wrote:
>
>
> On 12/4/2025 1:06 PM, Krzysztof Kozlowski wrote:
>> On 03/12/2025 13:54, Lijo Lazar wrote:
>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
>>> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
>>> + } else {
>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
>>> + AMDGPU_CWSR_TBA_MAX_SIZE);
>>> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
>>> + }
>>> +}
>>> +
>>> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
>>> +{
>>> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
>>
>>
>> This is an undesired syntax explicitly documented as one to avoid. You
>> need here proper assignment, not NULL. Please don't use cleanup.h if you
>> do not intend to follow it because it does not make the code simpler.
>>
>
> Could you explain more about the hazard here? There are no multiple
> cleanup variables declared in this case.
>
I am not saying there is a hazard. I am saying that you do not follow
coding style and very explicit, documented rule. There are exceptions of
course, but they need reason and such is missing here. You made the code
worse here, more confusing with the fake assignment, fake constructor.
If you do not want to follow cleanup.h coding style, then simply do not
use cleanup.h. Cleanup.h is to make code simpler but worse.
Best regards,
Krzysztof
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-04 8:06 ` Krzysztof Kozlowski
@ 2025-12-04 8:08 ` Krzysztof Kozlowski
2025-12-04 8:08 ` Lazar, Lijo
1 sibling, 0 replies; 29+ messages in thread
From: Krzysztof Kozlowski @ 2025-12-04 8:08 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx
Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
On 04/12/2025 09:06, Krzysztof Kozlowski wrote:
> On 04/12/2025 09:03, Lazar, Lijo wrote:
>>
>>
>> On 12/4/2025 1:06 PM, Krzysztof Kozlowski wrote:
>>> On 03/12/2025 13:54, Lijo Lazar wrote:
>>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
>>>> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
>>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
>>>> + } else {
>>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
>>>> + AMDGPU_CWSR_TBA_MAX_SIZE);
>>>> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
>>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
>>>> + }
>>>> +}
>>>> +
>>>> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
>>>> +{
>>>> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
>>>
>>>
>>> This is an undesired syntax explicitly documented as one to avoid. You
>>> need here proper assignment, not NULL. Please don't use cleanup.h if you
>>> do not intend to follow it because it does not make the code simpler.
>>>
>>
>> Could you explain more about the hazard here? There are no multiple
>> cleanup variables declared in this case.
>>
>
> I am not saying there is a hazard. I am saying that you do not follow
> coding style and very explicit, documented rule. There are exceptions of
> course, but they need reason and such is missing here. You made the code
> worse here, more confusing with the fake assignment, fake constructor.
>
> If you do not want to follow cleanup.h coding style, then simply do not
> use cleanup.h. Cleanup.h is to make code simpler but worse.
s/but worse/,not worse/
Best regards,
Krzysztof
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-04 8:06 ` Krzysztof Kozlowski
2025-12-04 8:08 ` Krzysztof Kozlowski
@ 2025-12-04 8:08 ` Lazar, Lijo
1 sibling, 0 replies; 29+ messages in thread
From: Lazar, Lijo @ 2025-12-04 8:08 UTC (permalink / raw)
To: Krzysztof Kozlowski, amd-gfx
Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
On 12/4/2025 1:36 PM, Krzysztof Kozlowski wrote:
> On 04/12/2025 09:03, Lazar, Lijo wrote:
>>
>>
>> On 12/4/2025 1:06 PM, Krzysztof Kozlowski wrote:
>>> On 03/12/2025 13:54, Lijo Lazar wrote:
>>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
>>>> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
>>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
>>>> + } else {
>>>> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
>>>> + AMDGPU_CWSR_TBA_MAX_SIZE);
>>>> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
>>>> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
>>>> + }
>>>> +}
>>>> +
>>>> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
>>>> +{
>>>> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
>>>
>>>
>>> This is an undesired syntax explicitly documented as one to avoid. You
>>> need here proper assignment, not NULL. Please don't use cleanup.h if you
>>> do not intend to follow it because it does not make the code simpler.
>>>
>>
>> Could you explain more about the hazard here? There are no multiple
>> cleanup variables declared in this case.
>>
>
> I am not saying there is a hazard. I am saying that you do not follow
> coding style and very explicit, documented rule. There are exceptions of
> course, but they need reason and such is missing here. You made the code
> worse here, more confusing with the fake assignment, fake constructor.
>
> If you do not want to follow cleanup.h coding style, then simply do not
> use cleanup.h. Cleanup.h is to make code simpler but worse.
>
Got it. Will revise this to make the assignment in the next version.
Thanks,
Lijo
> Best regards,
> Krzysztof
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-03 12:54 ` [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions Lijo Lazar
2025-12-04 7:36 ` Krzysztof Kozlowski
@ 2025-12-16 16:38 ` Alex Deucher
2025-12-22 5:38 ` Zhang, Jesse(Jie)
2 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:38 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:14 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add functions related to cwsr handling inside amdgpu framework.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 346 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 67 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
> 5 files changed, 418 insertions(+), 2 deletions(-)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index f65021678fc0..a5feb674508a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -67,7 +67,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \
> - amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o
> + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o amdgpu_cwsr.o
>
> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index fa71df36f4b3..b9920cab5d31 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -331,6 +331,7 @@ struct kfd_vm_fault_info;
> struct amdgpu_hive_info;
> struct amdgpu_reset_context;
> struct amdgpu_reset_control;
> +struct amdgpu_cwsr_isa;
>
> enum amdgpu_cp_irq {
> AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
> @@ -1325,6 +1326,8 @@ struct amdgpu_device {
> * Must be last --ends in a flexible-array member.
> */
> struct amdgpu_kfd_dev kfd;
> +
> + struct amdgpu_cwsr_info *cwsr_info;
> };
>
> static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> new file mode 100644
> index 000000000000..c0fc5a383071
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -0,0 +1,346 @@
> +/*
> + * Copyright 2025 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include <drm/drm_exec.h>
> +
> +#include "amdgpu.h"
> +#include "cwsr_trap_handler.h"
> +#include "amdgpu_cwsr.h"
> +
> +extern int cwsr_enable;
> +
> +#define AMDGPU_CWSR_TBA_MAX_SIZE (2 * AMDGPU_GPU_PAGE_SIZE)
> +#define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
> +#define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
> +
> +enum amdgpu_cwsr_region {
> + AMDGPU_CWSR_TBA,
> + AMDGPU_CWSR_TMA,
> +};
> +
> +static inline uint64_t amdgpu_cwsr_tba_vaddr(struct amdgpu_device *adev)
> +{
> + uint64_t addr = AMDGPU_VA_RESERVED_TRAP_START(adev);
> +
> + addr = amdgpu_gmc_sign_extend(addr);
> +
> + return addr;
> +}
> +
> +static inline bool amdgpu_cwsr_is_supported(struct amdgpu_device *adev)
> +{
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +
> + if (!cwsr_enable || gc_ver < IP_VERSION(9, 0, 1))
> + return false;
Probably also want a check for whether userqs are enabled. E.g.,
if (adev->gfx.disable_uq)
return false;
Alex
> +
> + return true;
> +}
> +
> +static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_info *cwsr_info)
> +{
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +
> + if (gc_ver < IP_VERSION(9, 0, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx8_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx8_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_arcturus_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_arcturus_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 2)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_aldebaran_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_aldebaran_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 3) ||
> + gc_ver == IP_VERSION(9, 4, 4)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_4_3_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_4_3_hex);
> + } else if (gc_ver == IP_VERSION(9, 5, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_5_0_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_5_0_hex);
> + } else if (gc_ver < IP_VERSION(10, 1, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_hex);
> + } else if (gc_ver < IP_VERSION(10, 3, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_nv1x_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_nv1x_hex);
> + } else if (gc_ver < IP_VERSION(11, 0, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx10_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx10_hex);
> + } else if (gc_ver < IP_VERSION(12, 0, 0)) {
> + /* The gfx11 cwsr trap handler must fit inside a single
> + page. */
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
> + } else {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
> + }
> +}
> +
> +int amdgpu_cwsr_init(struct amdgpu_device *adev)
> +{
> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
> + void *ptr;
> + int r;
> +
> + if (!amdgpu_cwsr_is_supported(adev))
> + return -EOPNOTSUPP;
> +
> + cwsr_info = kzalloc(sizeof(*cwsr_info), GFP_KERNEL);
> + if (!cwsr_info)
> + return -ENOMEM;
> + amdgpu_cwsr_init_isa_details(adev, cwsr_info);
> +
> + if (!cwsr_info->isa_sz)
> + return -EOPNOTSUPP;
> +
> + r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TBA_MAX_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT, &cwsr_info->isa_bo,
> + NULL, &ptr);
> + if (r)
> + return r;
> +
> + memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
> + adev->cwsr_info = no_free_ptr(cwsr_info);
> +
> + return 0;
> +}
> +
> +void amdgpu_cwsr_fini(struct amdgpu_device *adev)
> +{
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return;
> +
> + amdgpu_bo_free_kernel(&adev->cwsr_info->isa_bo, NULL, NULL);
> + kfree(adev->cwsr_info);
> + adev->cwsr_info = NULL;
> +}
> +
> +/*
> + * amdgpu_map_cwsr_trap_handler should be called during amdgpu_vm_init
> + * it maps virtual address amdgpu_cwsr_trap_handler_vaddr() to this VM, and each
> + * compute queue can use this virtual address for wave save/restore
> + * operations to support compute preemption.
> + */
> +static int amdgpu_cwsr_map_region(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj *cwsr,
> + enum amdgpu_cwsr_region region)
> +{
> + uint64_t cwsr_addr, va_flags, va;
> + struct amdgpu_bo_va **bo_va;
> + struct amdgpu_bo *bo;
> + uint32_t size;
> + int r;
> +
> + if (!cwsr || !vm)
> + return -EINVAL;
> +
> + cwsr_addr = amdgpu_cwsr_tba_vaddr(adev);
> +
> + if (region == AMDGPU_CWSR_TBA) {
> + size = AMDGPU_CWSR_TBA_MAX_SIZE;
> + bo_va = &cwsr->tba_va;
> + bo = adev->cwsr_info->isa_bo;
> + va = cwsr_addr;
> + va_flags = (AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
> + AMDGPU_VM_PAGE_EXECUTABLE);
> + } else {
> + size = AMDGPU_CWSR_TMA_MAX_SIZE;
> + bo_va = &cwsr->tma_va;
> + bo = cwsr->tma_bo;
> + va = cwsr_addr + AMDGPU_CWSR_TMA_OFFSET;
> + va_flags = (AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE);
> + }
> +
> + *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
> + if (!*bo_va)
> + return -ENOMEM;
> +
> + va &= AMDGPU_GMC_HOLE_MASK;
> + r = amdgpu_vm_bo_map(adev, *bo_va, va, 0, size, va_flags);
> +
> + if (r) {
> + dev_err(adev->dev, "failed to do bo_map on CWSR TBA, err=%d\n",
> + r);
> + amdgpu_vm_bo_del(adev, *bo_va);
> + *bo_va = NULL;
> + } else {
> + if (region == AMDGPU_CWSR_TBA)
> + cwsr->tba_gpu_va_addr = va;
> + else
> + cwsr->tma_gpu_va_addr = va;
> + }
> +
> + return r;
> +}
> +
> +static int amdgpu_cwsr_unmap_region(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_trap_obj *cwsr,
> + enum amdgpu_cwsr_region region)
> +{
> + struct amdgpu_bo_va **bo_va;
> + uint64_t va;
> + int r;
> +
> + if (!cwsr)
> + return -EINVAL;
> +
> + if (region == AMDGPU_CWSR_TBA) {
> + bo_va = &cwsr->tba_va;
> + va = cwsr->tba_gpu_va_addr;
> + } else {
> + bo_va = &cwsr->tma_va;
> + va = cwsr->tma_gpu_va_addr;
> + }
> +
> + r = amdgpu_vm_bo_unmap(adev, *bo_va, va);
> + if (r) {
> + dev_err(adev->dev,
> + "failed to do bo_unmap on CWSR trap handler, err=%d\n",
> + r);
> + return r;
> + }
> +
> + amdgpu_vm_bo_del(adev, *bo_va);
> + *bo_va = NULL;
> +
> + return r;
> +}
> +
> +/* TBD : Handle APU allocation */
> +int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **trap_obj)
> +{
> + struct amdgpu_cwsr_trap_obj *cwsr __free(kfree) = NULL;
> + struct amdgpu_bo *bo;
> + struct drm_exec exec;
> + int r;
> +
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return -EOPNOTSUPP;
> +
> + cwsr = kzalloc(sizeof(*cwsr), GFP_KERNEL);
> + if (!cwsr)
> + return -ENOMEM;
> +
> + bo = adev->cwsr_info->isa_bo;
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> + drm_exec_until_all_locked(&exec)
> + {
> + r = amdgpu_vm_lock_pd(vm, &exec, 0);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(r)) {
> + dev_err(adev->dev,
> + "failed to reserve for CWSR allocs: err=%d\n",
> + r);
> + goto err;
> + }
> + }
> +
> + r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TMA_MAX_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT, &cwsr->tma_bo, NULL,
> + &cwsr->tma_cpu_addr);
> + if (r)
> + goto err;
> +
> + r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TMA);
> + if (r)
> + goto err;
> + r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TBA);
> + if (r) {
> + amdgpu_cwsr_unmap_region(adev, cwsr, AMDGPU_CWSR_TMA);
> + goto err;
> + }
> +
> + *trap_obj = no_free_ptr(cwsr);
> +
> +err:
> + drm_exec_fini(&exec);
> + if (r)
> + amdgpu_bo_free_kernel(&cwsr->tma_bo, NULL, NULL);
> +
> + return r;
> +}
> +
> +void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **trap_obj)
> +{
> + struct amdgpu_bo *tba_bo;
> + struct amdgpu_bo *tma_bo;
> + struct drm_exec exec;
> + int r;
> +
> + if (!trap_obj || !*trap_obj || !(*trap_obj)->tma_bo)
> + return;
> + tba_bo = adev->cwsr_info->isa_bo;
> + tma_bo = (*trap_obj)->tma_bo;
> +
> + if (!tba_bo || !tma_bo)
> + return;
> +
> + drm_exec_init(&exec, 0, 0);
> + drm_exec_until_all_locked(&exec)
> + {
> + r = amdgpu_vm_lock_pd(vm, &exec, 0);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &tba_bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &tma_bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(r)) {
> + dev_err(adev->dev,
> + "failed to reserve CWSR BOs: err=%d\n", r);
> + goto err;
> + }
> + }
> +
> + amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TBA);
> + amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TMA);
> +err:
> + drm_exec_fini(&exec);
> + amdgpu_bo_free_kernel(&(*trap_obj)->tma_bo, NULL, NULL);
> + kfree(*trap_obj);
> + *trap_obj = NULL;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> new file mode 100644
> index 000000000000..26ed9308f70b
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2025 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef AMDGPU_CWSR_H
> +#define AMDGPU_CWSR_H
> +
> +#include <linux/types.h>
> +
> +struct amdgpu_bo;
> +struct amdgpu_bo_va;
> +struct amdgpu_device;
> +struct amdgpu_vm;
> +
> +/**
> + * struct amdgpu_cwsr_obj - CWSR (Compute Wave Save Restore) buffer tracking
> + * @bo: Buffer object for CWSR area
> + * @bo_va: Buffer object virtual address mapping
> + */
> +struct amdgpu_cwsr_trap_obj {
> + uint64_t tma_gpu_va_addr;
> + uint64_t tba_gpu_va_addr;
> +
> + struct amdgpu_bo *tma_bo;
> + struct amdgpu_bo_va *tba_va;
> + struct amdgpu_bo_va *tma_va;
> + void *tma_cpu_addr;
> +};
> +
> +struct amdgpu_cwsr_info {
> + /* cwsr isa */
> + struct amdgpu_bo *isa_bo;
> + const void *isa_buf;
> + uint32_t isa_sz;
> +};
> +
> +int amdgpu_cwsr_init(struct amdgpu_device *adev);
> +void amdgpu_cwsr_fini(struct amdgpu_device *adev);
> +
> +int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **cwsr_obj);
> +void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **cwsr_obj);
> +static inline bool amdgpu_cwsr_is_enabled(struct amdgpu_device *adev)
> +{
> + return adev->cwsr_info != NULL;
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 139642eacdd0..783ca2b8dfef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -173,7 +173,7 @@ struct amdgpu_bo_vm;
> #define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
> #define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
> - AMDGPU_VA_RESERVED_SEQ64_SIZE)
> -#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
> +#define AMDGPU_VA_RESERVED_TRAP_SIZE (3ULL << 12)
> #define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
> - AMDGPU_VA_RESERVED_TRAP_SIZE)
> #define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread* RE: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
2025-12-03 12:54 ` [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions Lijo Lazar
2025-12-04 7:36 ` Krzysztof Kozlowski
2025-12-16 16:38 ` Alex Deucher
@ 2025-12-22 5:38 ` Zhang, Jesse(Jie)
2 siblings, 0 replies; 29+ messages in thread
From: Zhang, Jesse(Jie) @ 2025-12-22 5:38 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking, Deucher, Alexander, Koenig, Christian
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Wednesday, December 3, 2025 8:55 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>
> Subject: [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions
>
> Add functions related to cwsr handling inside amdgpu framework.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 346
> +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h |
> 67 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
> 5 files changed, 418 insertions(+), 2 deletions(-) create mode 100644
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index f65021678fc0..a5feb674508a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -67,7 +67,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o
> amdgpu_kms.o \
> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
> amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
> amdgpu_dev_coredump.o \
> - amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
> amdgpu_ip.o
> + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o
> amdgpu_ip.o
> +amdgpu_cwsr.o
>
> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index fa71df36f4b3..b9920cab5d31 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -331,6 +331,7 @@ struct kfd_vm_fault_info; struct amdgpu_hive_info; struct
> amdgpu_reset_context; struct amdgpu_reset_control;
> +struct amdgpu_cwsr_isa;
>
> enum amdgpu_cp_irq {
> AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
> @@ -1325,6 +1326,8 @@ struct amdgpu_device {
> * Must be last --ends in a flexible-array member.
> */
> struct amdgpu_kfd_dev kfd;
> +
> + struct amdgpu_cwsr_info *cwsr_info;
> };
>
> static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, diff --
> git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> new file mode 100644
> index 000000000000..c0fc5a383071
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -0,0 +1,346 @@
> +/*
> + * Copyright 2025 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
> NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
> CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include <drm/drm_exec.h>
> +
> +#include "amdgpu.h"
> +#include "cwsr_trap_handler.h"
> +#include "amdgpu_cwsr.h"
> +
> +extern int cwsr_enable;
> +
> +#define AMDGPU_CWSR_TBA_MAX_SIZE (2 * AMDGPU_GPU_PAGE_SIZE)
> #define
> +AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE) #define
> +AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
> +
> +enum amdgpu_cwsr_region {
> + AMDGPU_CWSR_TBA,
> + AMDGPU_CWSR_TMA,
> +};
> +
> +static inline uint64_t amdgpu_cwsr_tba_vaddr(struct amdgpu_device
> +*adev) {
> + uint64_t addr = AMDGPU_VA_RESERVED_TRAP_START(adev);
[Zhang, Jesse(Jie)] A new VA should be reserved for KGD userq to avoid VA conflicts with KFD CWSR.
> +
> + addr = amdgpu_gmc_sign_extend(addr);
> +
> + return addr;
> +}
> +
> +static inline bool amdgpu_cwsr_is_supported(struct amdgpu_device *adev)
> +{
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +
> + if (!cwsr_enable || gc_ver < IP_VERSION(9, 0, 1))
> + return false;
> +
> + return true;
> +}
> +
> +static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_info *cwsr_info) {
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> +
> + if (gc_ver < IP_VERSION(9, 0, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx8_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx8_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_arcturus_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_arcturus_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 2)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_aldebaran_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_aldebaran_hex);
> + } else if (gc_ver == IP_VERSION(9, 4, 3) ||
> + gc_ver == IP_VERSION(9, 4, 4)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_4_3_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_4_3_hex);
> + } else if (gc_ver == IP_VERSION(9, 5, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_5_0_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_5_0_hex);
> + } else if (gc_ver < IP_VERSION(10, 1, 1)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx9_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx9_hex);
> + } else if (gc_ver < IP_VERSION(10, 3, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_nv1x_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_nv1x_hex);
> + } else if (gc_ver < IP_VERSION(11, 0, 0)) {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx10_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx10_hex);
> + } else if (gc_ver < IP_VERSION(12, 0, 0)) {
> + /* The gfx11 cwsr trap handler must fit inside a single
> + page. */
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx11_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx11_hex);
> + } else {
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) >
> + AMDGPU_CWSR_TBA_MAX_SIZE);
> + cwsr_info->isa_buf = cwsr_trap_gfx12_hex;
> + cwsr_info->isa_sz = sizeof(cwsr_trap_gfx12_hex);
> + }
> +}
> +
> +int amdgpu_cwsr_init(struct amdgpu_device *adev) {
> + struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
> + void *ptr;
> + int r;
> +
> + if (!amdgpu_cwsr_is_supported(adev))
> + return -EOPNOTSUPP;
> +
> + cwsr_info = kzalloc(sizeof(*cwsr_info), GFP_KERNEL);
> + if (!cwsr_info)
> + return -ENOMEM;
> + amdgpu_cwsr_init_isa_details(adev, cwsr_info);
> +
> + if (!cwsr_info->isa_sz)
> + return -EOPNOTSUPP;
> +
> + r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TBA_MAX_SIZE,
> PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT, &cwsr_info-
> >isa_bo,
> + NULL, &ptr);
> + if (r)
> + return r;
> +
> + memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
> + adev->cwsr_info = no_free_ptr(cwsr_info);
> +
> + return 0;
> +}
> +
> +void amdgpu_cwsr_fini(struct amdgpu_device *adev) {
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return;
> +
> + amdgpu_bo_free_kernel(&adev->cwsr_info->isa_bo, NULL, NULL);
> + kfree(adev->cwsr_info);
> + adev->cwsr_info = NULL;
> +}
> +
> +/*
> + * amdgpu_map_cwsr_trap_handler should be called during amdgpu_vm_init
> + * it maps virtual address amdgpu_cwsr_trap_handler_vaddr() to this VM,
> +and each
> + * compute queue can use this virtual address for wave save/restore
> + * operations to support compute preemption.
> + */
> +static int amdgpu_cwsr_map_region(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj *cwsr,
> + enum amdgpu_cwsr_region region)
> +{
> + uint64_t cwsr_addr, va_flags, va;
> + struct amdgpu_bo_va **bo_va;
> + struct amdgpu_bo *bo;
> + uint32_t size;
> + int r;
> +
> + if (!cwsr || !vm)
> + return -EINVAL;
> +
> + cwsr_addr = amdgpu_cwsr_tba_vaddr(adev);
> +
> + if (region == AMDGPU_CWSR_TBA) {
> + size = AMDGPU_CWSR_TBA_MAX_SIZE;
> + bo_va = &cwsr->tba_va;
> + bo = adev->cwsr_info->isa_bo;
> + va = cwsr_addr;
> + va_flags = (AMDGPU_VM_PAGE_READABLE |
> AMDGPU_VM_PAGE_WRITEABLE |
> + AMDGPU_VM_PAGE_EXECUTABLE);
> + } else {
> + size = AMDGPU_CWSR_TMA_MAX_SIZE;
> + bo_va = &cwsr->tma_va;
> + bo = cwsr->tma_bo;
> + va = cwsr_addr + AMDGPU_CWSR_TMA_OFFSET;
> + va_flags = (AMDGPU_VM_PAGE_READABLE |
> AMDGPU_VM_PAGE_WRITEABLE);
> + }
> +
> + *bo_va = amdgpu_vm_bo_add(adev, vm, bo);
> + if (!*bo_va)
> + return -ENOMEM;
> +
> + va &= AMDGPU_GMC_HOLE_MASK;
> + r = amdgpu_vm_bo_map(adev, *bo_va, va, 0, size, va_flags);
> +
> + if (r) {
> + dev_err(adev->dev, "failed to do bo_map on CWSR TBA, err=%d\n",
> + r);
> + amdgpu_vm_bo_del(adev, *bo_va);
> + *bo_va = NULL;
> + } else {
> + if (region == AMDGPU_CWSR_TBA)
> + cwsr->tba_gpu_va_addr = va;
> + else
> + cwsr->tma_gpu_va_addr = va;
> + }
> +
> + return r;
> +}
> +
> +static int amdgpu_cwsr_unmap_region(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_trap_obj *cwsr,
> + enum amdgpu_cwsr_region region) {
> + struct amdgpu_bo_va **bo_va;
> + uint64_t va;
> + int r;
> +
> + if (!cwsr)
> + return -EINVAL;
> +
> + if (region == AMDGPU_CWSR_TBA) {
> + bo_va = &cwsr->tba_va;
> + va = cwsr->tba_gpu_va_addr;
> + } else {
> + bo_va = &cwsr->tma_va;
> + va = cwsr->tma_gpu_va_addr;
> + }
> +
> + r = amdgpu_vm_bo_unmap(adev, *bo_va, va);
> + if (r) {
> + dev_err(adev->dev,
> + "failed to do bo_unmap on CWSR trap handler, err=%d\n",
> + r);
> + return r;
> + }
> +
> + amdgpu_vm_bo_del(adev, *bo_va);
> + *bo_va = NULL;
> +
> + return r;
> +}
> +
> +/* TBD : Handle APU allocation */
> +int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **trap_obj) {
> + struct amdgpu_cwsr_trap_obj *cwsr __free(kfree) = NULL;
> + struct amdgpu_bo *bo;
> + struct drm_exec exec;
> + int r;
> +
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return -EOPNOTSUPP;
> +
> + cwsr = kzalloc(sizeof(*cwsr), GFP_KERNEL);
> + if (!cwsr)
> + return -ENOMEM;
> +
> + bo = adev->cwsr_info->isa_bo;
> + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> + drm_exec_until_all_locked(&exec)
> + {
> + r = amdgpu_vm_lock_pd(vm, &exec, 0);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(r)) {
> + dev_err(adev->dev,
> + "failed to reserve for CWSR allocs: err=%d\n",
> + r);
> + goto err;
> + }
> + }
> +
> + r = amdgpu_bo_create_kernel(adev, AMDGPU_CWSR_TMA_MAX_SIZE,
> PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT, &cwsr->tma_bo,
> NULL,
> + &cwsr->tma_cpu_addr);
> + if (r)
> + goto err;
> +
> + r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TMA);
> + if (r)
> + goto err;
> + r = amdgpu_cwsr_map_region(adev, vm, cwsr, AMDGPU_CWSR_TBA);
> + if (r) {
> + amdgpu_cwsr_unmap_region(adev, cwsr, AMDGPU_CWSR_TMA);
> + goto err;
> + }
> +
> + *trap_obj = no_free_ptr(cwsr);
> +
> +err:
> + drm_exec_fini(&exec);
> + if (r)
> + amdgpu_bo_free_kernel(&cwsr->tma_bo, NULL, NULL);
> +
> + return r;
> +}
> +
> +void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **trap_obj) {
> + struct amdgpu_bo *tba_bo;
> + struct amdgpu_bo *tma_bo;
> + struct drm_exec exec;
> + int r;
> +
> + if (!trap_obj || !*trap_obj || !(*trap_obj)->tma_bo)
> + return;
> + tba_bo = adev->cwsr_info->isa_bo;
> + tma_bo = (*trap_obj)->tma_bo;
> +
> + if (!tba_bo || !tma_bo)
> + return;
> +
> + drm_exec_init(&exec, 0, 0);
> + drm_exec_until_all_locked(&exec)
> + {
> + r = amdgpu_vm_lock_pd(vm, &exec, 0);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &tba_bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (likely(!r))
> + r = drm_exec_lock_obj(&exec, &tma_bo->tbo.base);
> + drm_exec_retry_on_contention(&exec);
> + if (unlikely(r)) {
> + dev_err(adev->dev,
> + "failed to reserve CWSR BOs: err=%d\n", r);
> + goto err;
> + }
> + }
> +
> + amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TBA);
> + amdgpu_cwsr_unmap_region(adev, *trap_obj, AMDGPU_CWSR_TMA);
> +err:
> + drm_exec_fini(&exec);
> + amdgpu_bo_free_kernel(&(*trap_obj)->tma_bo, NULL, NULL);
> + kfree(*trap_obj);
> + *trap_obj = NULL;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> new file mode 100644
> index 000000000000..26ed9308f70b
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright 2025 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
> NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY
> CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +#ifndef AMDGPU_CWSR_H
> +#define AMDGPU_CWSR_H
> +
> +#include <linux/types.h>
> +
> +struct amdgpu_bo;
> +struct amdgpu_bo_va;
> +struct amdgpu_device;
> +struct amdgpu_vm;
> +
> +/**
> + * struct amdgpu_cwsr_obj - CWSR (Compute Wave Save Restore) buffer
> +tracking
> + * @bo: Buffer object for CWSR area
> + * @bo_va: Buffer object virtual address mapping */ struct
> +amdgpu_cwsr_trap_obj {
> + uint64_t tma_gpu_va_addr;
> + uint64_t tba_gpu_va_addr;
> +
> + struct amdgpu_bo *tma_bo;
> + struct amdgpu_bo_va *tba_va;
> + struct amdgpu_bo_va *tma_va;
> + void *tma_cpu_addr;
> +};
> +
> +struct amdgpu_cwsr_info {
> + /* cwsr isa */
> + struct amdgpu_bo *isa_bo;
> + const void *isa_buf;
> + uint32_t isa_sz;
> +};
> +
> +int amdgpu_cwsr_init(struct amdgpu_device *adev); void
> +amdgpu_cwsr_fini(struct amdgpu_device *adev);
> +
> +int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **cwsr_obj); void
> +amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj **cwsr_obj); static inline bool
> +amdgpu_cwsr_is_enabled(struct amdgpu_device *adev) {
> + return adev->cwsr_info != NULL;
> +}
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 139642eacdd0..783ca2b8dfef 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -173,7 +173,7 @@ struct amdgpu_bo_vm;
> #define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
> #define AMDGPU_VA_RESERVED_SEQ64_START(adev)
> (AMDGPU_VA_RESERVED_CSA_START(adev) \
> -
> AMDGPU_VA_RESERVED_SEQ64_SIZE)
> -#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
> +#define AMDGPU_VA_RESERVED_TRAP_SIZE (3ULL << 12)
[Zhang, Jesse(Jie)] )] A new VA should be reserved for KGD userq to avoid VA conflicts with KFD CWSR.
> #define AMDGPU_VA_RESERVED_TRAP_START(adev)
> (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
> -
> AMDGPU_VA_RESERVED_TRAP_SIZE)
> #define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
> --
> 2.49.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 03/10] drm/amdgpu: Fill cwsr save area details
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
2025-12-03 12:54 ` [RFC PATCH v3 01/10] drm/amdgpu: Add helper function to get xcc count Lijo Lazar
2025-12-03 12:54 ` [RFC PATCH v3 02/10] drm/amdgpu: Add cwsr functions Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:40 ` Alex Deucher
2025-12-03 12:54 ` [RFC PATCH v3 04/10] drm/amdgpu: Add user save area params validation Lijo Lazar
` (6 subsequent siblings)
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Calculate control stack and total save area size required.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 4 +
2 files changed, 107 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
index c0fc5a383071..4252c31eac4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -32,6 +32,13 @@ extern int cwsr_enable;
#define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
#define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
+#define SGPR_SIZE_PER_CU 0x4000
+#define LDS_SIZE_PER_CU 0x10000
+#define HWREG_SIZE_PER_CU 0x1000
+#define DEBUGGER_BYTES_ALIGN 64
+#define DEBUGGER_BYTES_PER_WAVE 32
+#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
+
enum amdgpu_cwsr_region {
AMDGPU_CWSR_TBA,
AMDGPU_CWSR_TMA,
@@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
}
}
+static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev)
+{
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ uint32_t vgpr_size;
+
+ switch (gc_ver) {
+ case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */
+ case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */
+ case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */
+ case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */
+ case IP_VERSION(9, 5, 0):
+ vgpr_size = 0x80000;
+ break;
+ case IP_VERSION(11, 0, 0):
+ case IP_VERSION(11, 0, 2):
+ case IP_VERSION(11, 0, 3):
+ case IP_VERSION(12, 0, 0):
+ case IP_VERSION(12, 0, 1):
+ vgpr_size = 0x60000;
+ break;
+ default:
+ vgpr_size = 0x40000;
+ break;
+ }
+
+ return vgpr_size;
+}
+
+static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device *adev)
+{
+ uint32_t lds_sz_per_cu;
+
+ lds_sz_per_cu =
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
+ (adev->gfx.cu_info.lds_size << 10) :
+ LDS_SIZE_PER_CU;
+
+ return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU +
+ lds_sz_per_cu + HWREG_SIZE_PER_CU;
+}
+
+static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device *adev)
+{
+ uint32_t sz;
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0))
+ sz = 12;
+ else
+ sz = 8;
+ return sz;
+}
+
+static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_info *cwsr_info)
+{
+ struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
+ uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
+ uint32_t ctl_stack_size, wg_data_size, dbg_mem_size;
+ uint32_t array_count;
+ uint32_t wave_num;
+ uint32_t cu_num;
+
+ if (gc_ver < IP_VERSION(9, 0, 1))
+ return;
+
+ array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se;
+
+ cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask);
+ wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */
+ min(cu_num * 40,
+ array_count / gfx_info->max_sh_per_se * 512) :
+ cu_num * 32;
+
+ wg_data_size = ALIGN(cu_num * amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev),
+ PAGE_SIZE);
+ ctl_stack_size =
+ wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8;
+ ctl_stack_size =
+ ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
+ PAGE_SIZE);
+ dbg_mem_size =
+ ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
+ /*
+ * HW design limits control stack size to 0x7000.
+ * This is insufficient for theoretical PM4 cases
+ * but sufficient for AQL, limited by SPI events.
+ */
+ if (IP_VERSION_MAJ(gc_ver) == 10)
+ ctl_stack_size = min(ctl_stack_size, 0x7000);
+
+ cwsr_info->xcc_ctl_stack_sz = ctl_stack_size;
+ cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size;
+ cwsr_info->xcc_dbg_mem_sz = dbg_mem_size;
+}
+
int amdgpu_cwsr_init(struct amdgpu_device *adev)
{
struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
@@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev)
return r;
memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
+
+ amdgpu_cwsr_init_save_area_info(adev, cwsr_info);
adev->cwsr_info = no_free_ptr(cwsr_info);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
index 26ed9308f70b..3c80d057bbed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -50,6 +50,10 @@ struct amdgpu_cwsr_info {
struct amdgpu_bo *isa_bo;
const void *isa_buf;
uint32_t isa_sz;
+ /* cwsr size info per XCC*/
+ uint32_t xcc_ctl_stack_sz;
+ uint32_t xcc_dbg_mem_sz;
+ uint32_t xcc_cwsr_sz;
};
int amdgpu_cwsr_init(struct amdgpu_device *adev);
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 03/10] drm/amdgpu: Fill cwsr save area details
2025-12-03 12:54 ` [RFC PATCH v3 03/10] drm/amdgpu: Fill cwsr save area details Lijo Lazar
@ 2025-12-16 16:40 ` Alex Deucher
0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:40 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:14 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Calculate control stack and total save area size required.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 103 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 4 +
> 2 files changed, 107 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> index c0fc5a383071..4252c31eac4c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -32,6 +32,13 @@ extern int cwsr_enable;
> #define AMDGPU_CWSR_TMA_MAX_SIZE (AMDGPU_GPU_PAGE_SIZE)
> #define AMDGPU_CWSR_TMA_OFFSET (AMDGPU_CWSR_TBA_MAX_SIZE)
>
> +#define SGPR_SIZE_PER_CU 0x4000
> +#define LDS_SIZE_PER_CU 0x10000
> +#define HWREG_SIZE_PER_CU 0x1000
> +#define DEBUGGER_BYTES_ALIGN 64
> +#define DEBUGGER_BYTES_PER_WAVE 32
> +#define SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER 40
> +
> enum amdgpu_cwsr_region {
> AMDGPU_CWSR_TBA,
> AMDGPU_CWSR_TMA,
> @@ -115,6 +122,100 @@ static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
> }
> }
>
> +static uint32_t amdgpu_cwsr_get_vgpr_size_per_cu(struct amdgpu_device *adev)
> +{
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> + uint32_t vgpr_size;
> +
> + switch (gc_ver) {
> + case IP_VERSION(9, 4, 1): /* GFX_VERSION_ARCTURUS */
> + case IP_VERSION(9, 4, 2): /* GFX_VERSION_ALDEBARAN */
> + case IP_VERSION(9, 4, 3): /* GFX_VERSION_AQUA_VANJARAM */
> + case IP_VERSION(9, 4, 4): /* GFX_VERSION_AQUA_VANJARAM */
> + case IP_VERSION(9, 5, 0):
> + vgpr_size = 0x80000;
> + break;
> + case IP_VERSION(11, 0, 0):
> + case IP_VERSION(11, 0, 2):
> + case IP_VERSION(11, 0, 3):
> + case IP_VERSION(12, 0, 0):
> + case IP_VERSION(12, 0, 1):
> + vgpr_size = 0x60000;
> + break;
> + default:
> + vgpr_size = 0x40000;
> + break;
> + }
> +
> + return vgpr_size;
> +}
> +
> +static uint32_t amdgpu_cwsr_get_wg_ctxt_size_per_cu(struct amdgpu_device *adev)
> +{
> + uint32_t lds_sz_per_cu;
> +
> + lds_sz_per_cu =
> + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ?
> + (adev->gfx.cu_info.lds_size << 10) :
> + LDS_SIZE_PER_CU;
> +
> + return amdgpu_cwsr_get_vgpr_size_per_cu(adev) + SGPR_SIZE_PER_CU +
> + lds_sz_per_cu + HWREG_SIZE_PER_CU;
> +}
> +
> +static uint32_t amdgpu_cwsr_ctl_stack_bytes_per_wave(struct amdgpu_device *adev)
> +{
> + uint32_t sz;
new line here. Other than that:
Acked-by: Alex Deucher <alexander.deucher@amd.com>
> + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 1, 0))
> + sz = 12;
> + else
> + sz = 8;
> + return sz;
> +}
> +
> +static void amdgpu_cwsr_init_save_area_info(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_info *cwsr_info)
> +{
> + struct amdgpu_gfx_config *gfx_info = &adev->gfx.config;
> + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
> + uint32_t ctl_stack_size, wg_data_size, dbg_mem_size;
> + uint32_t array_count;
> + uint32_t wave_num;
> + uint32_t cu_num;
> +
> + if (gc_ver < IP_VERSION(9, 0, 1))
> + return;
> +
> + array_count = gfx_info->max_shader_engines * gfx_info->max_sh_per_se;
> +
> + cu_num = adev->gfx.cu_info.number / NUM_XCC(adev->gfx.xcc_mask);
> + wave_num = (gc_ver < IP_VERSION(10, 1, 0)) ? /* GFX_VERSION_NAVI10 */
> + min(cu_num * 40,
> + array_count / gfx_info->max_sh_per_se * 512) :
> + cu_num * 32;
> +
> + wg_data_size = ALIGN(cu_num * amdgpu_cwsr_get_wg_ctxt_size_per_cu(adev),
> + PAGE_SIZE);
> + ctl_stack_size =
> + wave_num * amdgpu_cwsr_ctl_stack_bytes_per_wave(adev) + 8;
> + ctl_stack_size =
> + ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size,
> + PAGE_SIZE);
> + dbg_mem_size =
> + ALIGN(wave_num * DEBUGGER_BYTES_PER_WAVE, DEBUGGER_BYTES_ALIGN);
> + /*
> + * HW design limits control stack size to 0x7000.
> + * This is insufficient for theoretical PM4 cases
> + * but sufficient for AQL, limited by SPI events.
> + */
> + if (IP_VERSION_MAJ(gc_ver) == 10)
> + ctl_stack_size = min(ctl_stack_size, 0x7000);
> +
> + cwsr_info->xcc_ctl_stack_sz = ctl_stack_size;
> + cwsr_info->xcc_cwsr_sz = ctl_stack_size + wg_data_size;
> + cwsr_info->xcc_dbg_mem_sz = dbg_mem_size;
> +}
> +
> int amdgpu_cwsr_init(struct amdgpu_device *adev)
> {
> struct amdgpu_cwsr_info *cwsr_info __free(kfree) = NULL;
> @@ -139,6 +240,8 @@ int amdgpu_cwsr_init(struct amdgpu_device *adev)
> return r;
>
> memcpy(ptr, cwsr_info->isa_buf, cwsr_info->isa_sz);
> +
> + amdgpu_cwsr_init_save_area_info(adev, cwsr_info);
> adev->cwsr_info = no_free_ptr(cwsr_info);
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> index 26ed9308f70b..3c80d057bbed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -50,6 +50,10 @@ struct amdgpu_cwsr_info {
> struct amdgpu_bo *isa_bo;
> const void *isa_buf;
> uint32_t isa_sz;
> + /* cwsr size info per XCC*/
> + uint32_t xcc_ctl_stack_sz;
> + uint32_t xcc_dbg_mem_sz;
> + uint32_t xcc_cwsr_sz;
> };
>
> int amdgpu_cwsr_init(struct amdgpu_device *adev);
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 04/10] drm/amdgpu: Add user save area params validation
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (2 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 03/10] drm/amdgpu: Fill cwsr save area details Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:15 ` Alex Deucher
2025-12-03 12:54 ` [RFC PATCH v3 05/10] drm/amdgpu: Add cwsr to device init/fini sequence Lijo Lazar
` (5 subsequent siblings)
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add an interface to validate user provided save area parameters. Address
validation is not done and expected to be done outside.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 44 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 11 ++++++
2 files changed, 55 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
index 4252c31eac4c..1b4483b5d5a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -63,6 +63,15 @@ static inline bool amdgpu_cwsr_is_supported(struct amdgpu_device *adev)
return true;
}
+uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc)
+{
+ if (!amdgpu_cwsr_is_enabled(adev))
+ return 0;
+
+ return num_xcc *
+ (adev->cwsr_info->xcc_cwsr_sz + adev->cwsr_info->xcc_dbg_mem_sz);
+}
+
static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
struct amdgpu_cwsr_info *cwsr_info)
{
@@ -406,6 +415,41 @@ int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
+int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_params *cwsr_params,
+ int num_xcc)
+{
+ struct amdgpu_cwsr_info *cwsr_info = adev->cwsr_info;
+
+ if (!amdgpu_cwsr_is_enabled(adev))
+ return -EOPNOTSUPP;
+
+ if (!cwsr_params)
+ return -EINVAL;
+
+ /*
+ * Only control stack and save area size details checked. Address validation needs to be
+ * carried out separately.
+ */
+ if (cwsr_params->ctl_stack_sz !=
+ (cwsr_info->xcc_ctl_stack_sz * num_xcc)) {
+ dev_dbg(adev->dev,
+ "queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
+ cwsr_params->ctl_stack_sz,
+ num_xcc * cwsr_info->xcc_ctl_stack_sz);
+ return -EINVAL;
+ }
+
+ if (cwsr_params->cwsr_sz != (cwsr_info->xcc_cwsr_sz * num_xcc)) {
+ dev_dbg(adev->dev,
+ "queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
+ cwsr_params->cwsr_sz, num_xcc * cwsr_info->xcc_cwsr_sz);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_cwsr_trap_obj **trap_obj)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
index 3c80d057bbed..96b03a8ed99b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -56,6 +56,13 @@ struct amdgpu_cwsr_info {
uint32_t xcc_cwsr_sz;
};
+struct amdgpu_cwsr_params {
+ uint64_t ctx_save_area_address;
+ /* cwsr size info */
+ uint32_t ctl_stack_sz;
+ uint32_t cwsr_sz;
+};
+
int amdgpu_cwsr_init(struct amdgpu_device *adev);
void amdgpu_cwsr_fini(struct amdgpu_device *adev);
@@ -68,4 +75,8 @@ static inline bool amdgpu_cwsr_is_enabled(struct amdgpu_device *adev)
return adev->cwsr_info != NULL;
}
+uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc);
+int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
+ struct amdgpu_cwsr_params *cwsr_params,
+ int num_xcc);
#endif
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 04/10] drm/amdgpu: Add user save area params validation
2025-12-03 12:54 ` [RFC PATCH v3 04/10] drm/amdgpu: Add user save area params validation Lijo Lazar
@ 2025-12-16 16:15 ` Alex Deucher
0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:15 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add an interface to validate user provided save area parameters. Address
> validation is not done and expected to be done outside.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 44 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 11 ++++++
> 2 files changed, 55 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> index 4252c31eac4c..1b4483b5d5a7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -63,6 +63,15 @@ static inline bool amdgpu_cwsr_is_supported(struct amdgpu_device *adev)
> return true;
> }
>
> +uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc)
> +{
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return 0;
> +
> + return num_xcc *
> + (adev->cwsr_info->xcc_cwsr_sz + adev->cwsr_info->xcc_dbg_mem_sz);
> +}
> +
> static void amdgpu_cwsr_init_isa_details(struct amdgpu_device *adev,
> struct amdgpu_cwsr_info *cwsr_info)
> {
> @@ -406,6 +415,41 @@ int amdgpu_cwsr_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> return r;
> }
>
> +int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_params *cwsr_params,
> + int num_xcc)
> +{
> + struct amdgpu_cwsr_info *cwsr_info = adev->cwsr_info;
> +
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return -EOPNOTSUPP;
> +
> + if (!cwsr_params)
> + return -EINVAL;
> +
> + /*
> + * Only control stack and save area size details checked. Address validation needs to be
> + * carried out separately.
> + */
> + if (cwsr_params->ctl_stack_sz !=
> + (cwsr_info->xcc_ctl_stack_sz * num_xcc)) {
I think it should be ok if the size is greater than expected.
Alex
> + dev_dbg(adev->dev,
> + "queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n",
> + cwsr_params->ctl_stack_sz,
> + num_xcc * cwsr_info->xcc_ctl_stack_sz);
> + return -EINVAL;
> + }
> +
> + if (cwsr_params->cwsr_sz != (cwsr_info->xcc_cwsr_sz * num_xcc)) {
> + dev_dbg(adev->dev,
> + "queue cwsr size 0x%x not equal to node cwsr size 0x%x\n",
> + cwsr_params->cwsr_sz, num_xcc * cwsr_info->xcc_cwsr_sz);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> struct amdgpu_cwsr_trap_obj **trap_obj)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> index 3c80d057bbed..96b03a8ed99b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -56,6 +56,13 @@ struct amdgpu_cwsr_info {
> uint32_t xcc_cwsr_sz;
> };
>
> +struct amdgpu_cwsr_params {
> + uint64_t ctx_save_area_address;
> + /* cwsr size info */
> + uint32_t ctl_stack_sz;
> + uint32_t cwsr_sz;
> +};
> +
> int amdgpu_cwsr_init(struct amdgpu_device *adev);
> void amdgpu_cwsr_fini(struct amdgpu_device *adev);
>
> @@ -68,4 +75,8 @@ static inline bool amdgpu_cwsr_is_enabled(struct amdgpu_device *adev)
> return adev->cwsr_info != NULL;
> }
>
> +uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc);
> +int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
> + struct amdgpu_cwsr_params *cwsr_params,
> + int num_xcc);
> #endif
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 05/10] drm/amdgpu: Add cwsr to device init/fini sequence
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (3 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 04/10] drm/amdgpu: Add user save area params validation Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:41 ` Alex Deucher
2025-12-03 12:54 ` [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq Lijo Lazar
` (4 subsequent siblings)
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Initialize cwsr handler related info during device initialization.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7a0213a07023..43848e905ae5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -77,6 +77,7 @@
#include "amdgpu_reset.h"
#include "amdgpu_virt.h"
#include "amdgpu_dev_coredump.h"
+#include "amdgpu_cwsr.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -3324,6 +3325,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
r = amdgpu_cper_init(adev);
+ if (!r) {
+ r = amdgpu_cwsr_init(adev);
+ if (r == -EOPNOTSUPP)
+ r = 0;
+ }
+
init_failed:
return r;
@@ -3713,6 +3720,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_cwsr_fini(adev);
amdgpu_cper_fini(adev);
if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 05/10] drm/amdgpu: Add cwsr to device init/fini sequence
2025-12-03 12:54 ` [RFC PATCH v3 05/10] drm/amdgpu: Add cwsr to device init/fini sequence Lijo Lazar
@ 2025-12-16 16:41 ` Alex Deucher
0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:41 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Initialize cwsr handler related info during device initialization.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 7a0213a07023..43848e905ae5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -77,6 +77,7 @@
> #include "amdgpu_reset.h"
> #include "amdgpu_virt.h"
> #include "amdgpu_dev_coredump.h"
> +#include "amdgpu_cwsr.h"
>
> #include <linux/suspend.h>
> #include <drm/task_barrier.h>
> @@ -3324,6 +3325,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
>
> r = amdgpu_cper_init(adev);
>
> + if (!r) {
> + r = amdgpu_cwsr_init(adev);
> + if (r == -EOPNOTSUPP)
> + r = 0;
> + }
> +
> init_failed:
>
> return r;
> @@ -3713,6 +3720,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
> {
> int i, r;
>
> + amdgpu_cwsr_fini(adev);
> amdgpu_cper_fini(adev);
>
> if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (4 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 05/10] drm/amdgpu: Add cwsr to device init/fini sequence Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:43 ` Alex Deucher
2025-12-19 8:37 ` Zhang, Jesse(Jie)
2025-12-03 12:54 ` [RFC PATCH v3 07/10] drm/amdgpu: Add user save area params to mqd input Lijo Lazar
` (3 subsequent siblings)
9 siblings, 2 replies; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add cwsr_trap_obj to render file handle. It maps the first level cwsr
handler to the vm with which the file handle is associated. Use
cwsr trap object's tba/tma address for the userqueue.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++++++++
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 ++++++
3 files changed, 16 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b9920cab5d31..ec2919a9c636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -332,6 +332,7 @@ struct amdgpu_hive_info;
struct amdgpu_reset_context;
struct amdgpu_reset_control;
struct amdgpu_cwsr_isa;
+struct amdgpu_cwsr_trap_obj;
enum amdgpu_cp_irq {
AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -505,6 +506,7 @@ struct amdgpu_fpriv {
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
struct amdgpu_userq_mgr userq_mgr;
+ struct amdgpu_cwsr_trap_obj *cwsr_trap;
/* Eviction fence infra */
struct amdgpu_eviction_fence_mgr evf_mgr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b3e6b3fcdf2c..398d6c8d343c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -46,6 +46,7 @@
#include "amdgpu_reset.h"
#include "amd_pcie.h"
#include "amdgpu_userq.h"
+#include "amdgpu_cwsr.h"
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
@@ -1452,6 +1453,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
if (r)
DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
+ if (amdgpu_cwsr_is_enabled(adev)) {
+ r = amdgpu_cwsr_alloc(adev, &fpriv->vm, &fpriv->cwsr_trap);
+ if (r)
+ dev_dbg(adev->dev, "cwsr trap not enabled");
+ }
+
r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
if (r)
goto error_vm;
@@ -1524,6 +1531,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
}
amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+ amdgpu_cwsr_free(adev, &fpriv->vm, &fpriv->cwsr_trap);
amdgpu_vm_fini(adev, &fpriv->vm);
if (pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 8b0aeb89025a..480f4806e951 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -26,6 +26,7 @@
#include "amdgpu_gfx.h"
#include "mes_userqueue.h"
#include "amdgpu_userq_fence.h"
+#include "amdgpu_cwsr.h"
#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
@@ -116,6 +117,7 @@ static int convert_to_mes_priority(int priority)
static int mes_userq_map(struct amdgpu_usermode_queue *queue)
{
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
struct amdgpu_device *adev = uq_mgr->adev;
struct amdgpu_userq_obj *ctx = &queue->fw_obj;
struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
@@ -145,6 +147,10 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
queue_input.doorbell_offset = userq_props->doorbell_index;
queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
+ if (fpriv->cwsr_trap) {
+ queue_input.tba_addr = fpriv->cwsr_trap->tba_gpu_va_addr;
+ queue_input.tma_addr = fpriv->cwsr_trap->tma_gpu_va_addr;
+ }
amdgpu_mes_lock(&adev->mes);
r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq
2025-12-03 12:54 ` [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq Lijo Lazar
@ 2025-12-16 16:43 ` Alex Deucher
2025-12-19 8:37 ` Zhang, Jesse(Jie)
1 sibling, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:43 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:14 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add cwsr_trap_obj to render file handle. It maps the first level cwsr
> handler to the vm with which the file handle is associated. Use
> cwsr trap object's tba/tma address for the userqueue.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 ++++++
> 3 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b9920cab5d31..ec2919a9c636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -332,6 +332,7 @@ struct amdgpu_hive_info;
> struct amdgpu_reset_context;
> struct amdgpu_reset_control;
> struct amdgpu_cwsr_isa;
> +struct amdgpu_cwsr_trap_obj;
>
> enum amdgpu_cp_irq {
> AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
> @@ -505,6 +506,7 @@ struct amdgpu_fpriv {
> struct idr bo_list_handles;
> struct amdgpu_ctx_mgr ctx_mgr;
> struct amdgpu_userq_mgr userq_mgr;
> + struct amdgpu_cwsr_trap_obj *cwsr_trap;
>
> /* Eviction fence infra */
> struct amdgpu_eviction_fence_mgr evf_mgr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index b3e6b3fcdf2c..398d6c8d343c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -46,6 +46,7 @@
> #include "amdgpu_reset.h"
> #include "amd_pcie.h"
> #include "amdgpu_userq.h"
> +#include "amdgpu_cwsr.h"
>
> void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
> {
> @@ -1452,6 +1453,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
> if (r)
> DRM_WARN("Can't setup usermode queues, use legacy workload submission only\n");
>
> + if (amdgpu_cwsr_is_enabled(adev)) {
> + r = amdgpu_cwsr_alloc(adev, &fpriv->vm, &fpriv->cwsr_trap);
> + if (r)
> + dev_dbg(adev->dev, "cwsr trap not enabled");
> + }
> +
> r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
> if (r)
> goto error_vm;
> @@ -1524,6 +1531,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> }
>
> amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
> + amdgpu_cwsr_free(adev, &fpriv->vm, &fpriv->cwsr_trap);
> amdgpu_vm_fini(adev, &fpriv->vm);
>
> if (pasid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index 8b0aeb89025a..480f4806e951 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -26,6 +26,7 @@
> #include "amdgpu_gfx.h"
> #include "mes_userqueue.h"
> #include "amdgpu_userq_fence.h"
> +#include "amdgpu_cwsr.h"
>
> #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
> #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
> @@ -116,6 +117,7 @@ static int convert_to_mes_priority(int priority)
> static int mes_userq_map(struct amdgpu_usermode_queue *queue)
> {
> struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
> struct amdgpu_device *adev = uq_mgr->adev;
> struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> struct amdgpu_mqd_prop *userq_props = queue->userq_prop;
> @@ -145,6 +147,10 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
> queue_input.doorbell_offset = userq_props->doorbell_index;
> queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
> queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
> + if (fpriv->cwsr_trap) {
> + queue_input.tba_addr = fpriv->cwsr_trap->tba_gpu_va_addr;
> + queue_input.tma_addr = fpriv->cwsr_trap->tma_gpu_va_addr;
> + }
>
> amdgpu_mes_lock(&adev->mes);
> r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread* RE: [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq
2025-12-03 12:54 ` [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq Lijo Lazar
2025-12-16 16:43 ` Alex Deucher
@ 2025-12-19 8:37 ` Zhang, Jesse(Jie)
1 sibling, 0 replies; 29+ messages in thread
From: Zhang, Jesse(Jie) @ 2025-12-19 8:37 UTC (permalink / raw)
To: Lazar, Lijo, amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking, Deucher, Alexander, Koenig, Christian
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Wednesday, December 3, 2025 8:55 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Zhang, Jesse(Jie) <Jesse.Zhang@amd.com>
> Subject: [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq
>
> Add cwsr_trap_obj to render file handle. It maps the first level cwsr handler to the
> vm with which the file handle is associated. Use cwsr trap object's tba/tma address
> for the userqueue.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 6 ++++++
> 3 files changed, 16 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index b9920cab5d31..ec2919a9c636 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -332,6 +332,7 @@ struct amdgpu_hive_info; struct amdgpu_reset_context;
> struct amdgpu_reset_control; struct amdgpu_cwsr_isa;
> +struct amdgpu_cwsr_trap_obj;
>
> enum amdgpu_cp_irq {
> AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
> @@ -505,6 +506,7 @@ struct amdgpu_fpriv {
> struct idr bo_list_handles;
> struct amdgpu_ctx_mgr ctx_mgr;
> struct amdgpu_userq_mgr userq_mgr;
> + struct amdgpu_cwsr_trap_obj *cwsr_trap;
>
> /* Eviction fence infra */
> struct amdgpu_eviction_fence_mgr evf_mgr; diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index b3e6b3fcdf2c..398d6c8d343c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -46,6 +46,7 @@
> #include "amdgpu_reset.h"
> #include "amd_pcie.h"
> #include "amdgpu_userq.h"
> +#include "amdgpu_cwsr.h"
>
> void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { @@ -
> 1452,6 +1453,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct
> drm_file *file_priv)
> if (r)
> DRM_WARN("Can't setup usermode queues, use legacy workload
> submission only\n");
>
> + if (amdgpu_cwsr_is_enabled(adev)) {
> + r = amdgpu_cwsr_alloc(adev, &fpriv->vm, &fpriv->cwsr_trap);
> + if (r)
> + dev_dbg(adev->dev, "cwsr trap not enabled");
> + }
> +
> r = amdgpu_eviction_fence_init(&fpriv->evf_mgr);
> if (r)
> goto error_vm;
> @@ -1524,6 +1531,7 @@ void amdgpu_driver_postclose_kms(struct drm_device
> *dev,
> }
>
> amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
> + amdgpu_cwsr_free(adev, &fpriv->vm, &fpriv->cwsr_trap);
> amdgpu_vm_fini(adev, &fpriv->vm);
>
> if (pasid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index 8b0aeb89025a..480f4806e951 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -26,6 +26,7 @@
> #include "amdgpu_gfx.h"
> #include "mes_userqueue.h"
> #include "amdgpu_userq_fence.h"
> +#include "amdgpu_cwsr.h"
>
> #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE #define
> AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE @@ -116,6 +117,7 @@ static
> int convert_to_mes_priority(int priority) static int mes_userq_map(struct
> amdgpu_usermode_queue *queue) {
> struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
> + struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
> struct amdgpu_device *adev = uq_mgr->adev;
> struct amdgpu_userq_obj *ctx = &queue->fw_obj;
> struct amdgpu_mqd_prop *userq_props = queue->userq_prop; @@ -145,6
> +147,10 @@ static int mes_userq_map(struct amdgpu_usermode_queue *queue)
> queue_input.doorbell_offset = userq_props->doorbell_index;
> queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm-
> >root.bo);
> queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
> + if (fpriv->cwsr_trap) {
> + queue_input.tba_addr = fpriv->cwsr_trap->tba_gpu_va_addr;
> + queue_input.tma_addr = fpriv->cwsr_trap->tma_gpu_va_addr;
[Zhang, Jesse(Jie)] the queue_input.trap_en setting is missing here.
> + }
>
> amdgpu_mes_lock(&adev->mes);
> r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
> --
> 2.49.0
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 07/10] drm/amdgpu: Add user save area params to mqd input
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (5 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 06/10] drm/amdgpu: Add first level cwsr handler to userq Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-03 12:54 ` [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details Lijo Lazar
` (2 subsequent siblings)
9 siblings, 0 replies; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add user save area parameters to mqd properties for queue creation.
Validate the parameters before using for mqd initialization.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 24 ++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h | 5 +++++
drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 14 +++++++++++++
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 16 +++++++++++++++
5 files changed, 63 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ec2919a9c636..1fb9539f8aca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -904,6 +904,10 @@ struct amdgpu_mqd_prop {
uint64_t fence_address;
bool tmz_queue;
bool kernel_queue;
+ /* cwsr params*/
+ uint64_t ctx_save_area_addr;
+ uint32_t ctx_save_area_size;
+ uint32_t ctl_stack_size;
};
struct amdgpu_mqd {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 2f97f35e0af5..49794b0989ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -33,6 +33,7 @@
#include "amdgpu_userq.h"
#include "amdgpu_hmm.h"
#include "amdgpu_userq_fence.h"
+#include "amdgpu_cwsr.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
{
@@ -202,6 +203,29 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
return r;
}
+int amdgpu_userq_input_cwsr_params_validate(
+ struct amdgpu_usermode_queue *queue,
+ struct amdgpu_cwsr_params *cwsr_params)
+{
+ struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(queue->userq_mgr);
+ struct amdgpu_device *adev = queue->userq_mgr->adev;
+ uint32_t cwsr_size;
+ int num_xcc;
+ int r;
+
+ num_xcc = amdgpu_xcp_get_num_xcc(adev->xcp_mgr, fpriv->xcp_id);
+ r = amdgpu_cwsr_validate_params(queue->userq_mgr->adev, cwsr_params,
+ num_xcc);
+ if (r)
+ return r;
+ cwsr_size = amdgpu_cwsr_size_needed(queue->userq_mgr->adev, num_xcc);
+ if (!cwsr_size)
+ return -EOPNOTSUPP;
+
+ return amdgpu_userq_input_va_validate(
+ adev, queue, cwsr_params->ctx_save_area_address, cwsr_size);
+}
+
static bool amdgpu_userq_buffer_va_mapped(struct amdgpu_vm *vm, u64 addr)
{
struct amdgpu_bo_va_mapping *mapping;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
index 1eaa94f8a291..0eeea9fad0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
@@ -41,6 +41,7 @@ enum amdgpu_userq_state {
};
struct amdgpu_mqd_prop;
+struct amdgpu_cwsr_params;
struct amdgpu_userq_obj {
void *cpu_ptr;
@@ -153,4 +154,8 @@ int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t saddr);
+int amdgpu_userq_input_cwsr_params_validate(
+ struct amdgpu_usermode_queue *queue,
+ struct amdgpu_cwsr_params *cwsr_params);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index f9cae6666697..ad39b33d292a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -3239,6 +3239,20 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
mqd->fence_address_lo = lower_32_bits(prop->fence_address);
mqd->fence_address_hi = upper_32_bits(prop->fence_address);
+ /* If non-zero, assume cwsr is enabled */
+ if (prop->ctx_save_area_addr) {
+ mqd->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ mqd->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(prop->ctx_save_area_addr);
+ mqd->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(prop->ctx_save_area_addr);
+ mqd->cp_hqd_ctx_save_size = prop->ctx_save_area_size;
+ mqd->cp_hqd_cntl_stack_size = prop->ctl_stack_size;
+ mqd->cp_hqd_cntl_stack_offset = prop->ctl_stack_size;
+ mqd->cp_hqd_wg_state_offset = prop->ctl_stack_size;
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 480f4806e951..0ac87618a86a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -293,6 +293,7 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
if (queue->queue_type == AMDGPU_HW_IP_COMPUTE) {
struct drm_amdgpu_userq_mqd_compute_gfx11 *compute_mqd;
+ struct amdgpu_cwsr_params cwsr_params;
if (mqd_user->mqd_size != sizeof(*compute_mqd)) {
DRM_ERROR("Invalid compute IP MQD size\n");
@@ -318,6 +319,21 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
userq_props->hqd_active = false;
userq_props->tmz_queue =
mqd_user->flags & AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE;
+
+ if (amdgpu_cwsr_is_enabled(adev)) {
+ cwsr_params.ctx_save_area_address =
+ userq_props->ctx_save_area_addr;
+ cwsr_params.cwsr_sz = userq_props->ctx_save_area_size;
+ cwsr_params.ctl_stack_sz = userq_props->ctl_stack_size;
+
+ r = amdgpu_userq_input_cwsr_params_validate(
+ queue, &cwsr_params);
+ if (r) {
+ kfree(compute_mqd);
+ goto free_mqd;
+ }
+ }
+
kfree(compute_mqd);
} else if (queue->queue_type == AMDGPU_HW_IP_GFX) {
struct drm_amdgpu_userq_mqd_gfx11 *mqd_gfx_v11;
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (6 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 07/10] drm/amdgpu: Add user save area params to mqd input Lijo Lazar
@ 2025-12-03 12:54 ` Lijo Lazar
2025-12-16 16:55 ` Alex Deucher
2025-12-03 12:55 ` [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params Lijo Lazar
2025-12-03 12:55 ` [RFC PATCH v3 10/10] drm/amdgpu: Add ioctl to set level2 handler Lijo Lazar
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:54 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add an ioctl to return size information required for CWSR regions.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 21 +++++++++++++++++++++
include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 398d6c8d343c..848405c37bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1368,6 +1368,27 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return -EINVAL;
}
}
+ case AMDGPU_INFO_CWSR: {
+ struct drm_amdgpu_info_cwsr cwsr_info;
+ int num_xcc, r;
+
+ fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+ if (!amdgpu_cwsr_is_enabled(adev) || !fpriv->cwsr_trap)
+ return -EOPNOTSUPP;
+ num_xcc = amdgpu_xcp_get_num_xcc(adev->xcp_mgr, fpriv->xcp_id);
+ cwsr_info.ctl_stack_size =
+ adev->cwsr_info->xcc_ctl_stack_sz * num_xcc;
+ cwsr_info.dbg_mem_size =
+ adev->cwsr_info->xcc_dbg_mem_sz * num_xcc;
+ cwsr_info.min_save_area_size =
+ adev->cwsr_info->xcc_cwsr_sz * num_xcc;
+ r = copy_to_user(out, &cwsr_info,
+ min((size_t)size, sizeof(cwsr_info))) ?
+ -EFAULT :
+ 0;
+ return r;
+ }
+
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c1336ed4ff75..2bb9daafb560 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1273,6 +1273,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
#define AMDGPU_INFO_GPUVM_FAULT 0x23
/* query FW object size and alignment */
#define AMDGPU_INFO_UQ_FW_AREAS 0x24
+/* query CWSR size and alignment */
+#define AMDGPU_INFO_CWSR 0x25
#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
@@ -1636,6 +1638,20 @@ struct drm_amdgpu_info_uq_metadata {
};
};
+/**
+ * struct drm_amdgpu_info_cwsr - cwsr information
+ *
+ * Gives cwsr related size details. User needs to allocate buffer based on this.
+ */
+struct drm_amdgpu_info_cwsr {
+ /* Control stack size */
+ __u32 ctl_stack_size;
+ /* Debug memory area size */
+ __u32 dbg_mem_size;
+ /* Minimu save area size required */
+ __u32 min_save_area_size;
+};
+
/*
* Supported GPU families
*/
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details
2025-12-03 12:54 ` [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details Lijo Lazar
@ 2025-12-16 16:55 ` Alex Deucher
2026-01-05 6:10 ` Lazar, Lijo
0 siblings, 1 reply; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:55 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add an ioctl to return size information required for CWSR regions.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 21 +++++++++++++++++++++
> include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> 2 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 398d6c8d343c..848405c37bd5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1368,6 +1368,27 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> return -EINVAL;
> }
> }
> + case AMDGPU_INFO_CWSR: {
> + struct drm_amdgpu_info_cwsr cwsr_info;
> + int num_xcc, r;
> +
> + fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
> + if (!amdgpu_cwsr_is_enabled(adev) || !fpriv->cwsr_trap)
> + return -EOPNOTSUPP;
> + num_xcc = amdgpu_xcp_get_num_xcc(adev->xcp_mgr, fpriv->xcp_id);
> + cwsr_info.ctl_stack_size =
> + adev->cwsr_info->xcc_ctl_stack_sz * num_xcc;
> + cwsr_info.dbg_mem_size =
> + adev->cwsr_info->xcc_dbg_mem_sz * num_xcc;
> + cwsr_info.min_save_area_size =
> + adev->cwsr_info->xcc_cwsr_sz * num_xcc;
> + r = copy_to_user(out, &cwsr_info,
> + min((size_t)size, sizeof(cwsr_info))) ?
> + -EFAULT :
> + 0;
> + return r;
> + }
> +
> default:
> DRM_DEBUG_KMS("Invalid request %d\n", info->query);
> return -EINVAL;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index c1336ed4ff75..2bb9daafb560 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1273,6 +1273,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> #define AMDGPU_INFO_GPUVM_FAULT 0x23
> /* query FW object size and alignment */
> #define AMDGPU_INFO_UQ_FW_AREAS 0x24
> +/* query CWSR size and alignment */
> +#define AMDGPU_INFO_CWSR 0x25
>
> #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
> #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
> @@ -1636,6 +1638,20 @@ struct drm_amdgpu_info_uq_metadata {
> };
> };
>
> +/**
> + * struct drm_amdgpu_info_cwsr - cwsr information
> + *
> + * Gives cwsr related size details. User needs to allocate buffer based on this.
> + */
> +struct drm_amdgpu_info_cwsr {
> + /* Control stack size */
> + __u32 ctl_stack_size;
> + /* Debug memory area size */
> + __u32 dbg_mem_size;
How should this be used by the application? Should it be folded into
the save area or ctl stack, or is it just for future use?
> + /* Minimu save area size required */
Minimum
Alex
> + __u32 min_save_area_size;
> +};
> +
> /*
> * Supported GPU families
> */
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details
2025-12-16 16:55 ` Alex Deucher
@ 2026-01-05 6:10 ` Lazar, Lijo
0 siblings, 0 replies; 29+ messages in thread
From: Lazar, Lijo @ 2026-01-05 6:10 UTC (permalink / raw)
To: Alex Deucher
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On 16-Dec-25 10:25 PM, Alex Deucher wrote:
> On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>>
>> Add an ioctl to return size information required for CWSR regions.
>>
>> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 21 +++++++++++++++++++++
>> include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
>> 2 files changed, 37 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 398d6c8d343c..848405c37bd5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1368,6 +1368,27 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>> return -EINVAL;
>> }
>> }
>> + case AMDGPU_INFO_CWSR: {
>> + struct drm_amdgpu_info_cwsr cwsr_info;
>> + int num_xcc, r;
>> +
>> + fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
>> + if (!amdgpu_cwsr_is_enabled(adev) || !fpriv->cwsr_trap)
>> + return -EOPNOTSUPP;
>> + num_xcc = amdgpu_xcp_get_num_xcc(adev->xcp_mgr, fpriv->xcp_id);
>> + cwsr_info.ctl_stack_size =
>> + adev->cwsr_info->xcc_ctl_stack_sz * num_xcc;
>> + cwsr_info.dbg_mem_size =
>> + adev->cwsr_info->xcc_dbg_mem_sz * num_xcc;
>> + cwsr_info.min_save_area_size =
>> + adev->cwsr_info->xcc_cwsr_sz * num_xcc;
>> + r = copy_to_user(out, &cwsr_info,
>> + min((size_t)size, sizeof(cwsr_info))) ?
>> + -EFAULT :
>> + 0;
>> + return r;
>> + }
>> +
>> default:
>> DRM_DEBUG_KMS("Invalid request %d\n", info->query);
>> return -EINVAL;
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index c1336ed4ff75..2bb9daafb560 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -1273,6 +1273,8 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
>> #define AMDGPU_INFO_GPUVM_FAULT 0x23
>> /* query FW object size and alignment */
>> #define AMDGPU_INFO_UQ_FW_AREAS 0x24
>> +/* query CWSR size and alignment */
>> +#define AMDGPU_INFO_CWSR 0x25
>>
>> #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
>> #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
>> @@ -1636,6 +1638,20 @@ struct drm_amdgpu_info_uq_metadata {
>> };
>> };
>>
>> +/**
>> + * struct drm_amdgpu_info_cwsr - cwsr information
>> + *
>> + * Gives cwsr related size details. User needs to allocate buffer based on this.
>> + */
>> +struct drm_amdgpu_info_cwsr {
>> + /* Control stack size */
>> + __u32 ctl_stack_size;
>> + /* Debug memory area size */
>> + __u32 dbg_mem_size;
>
> How should this be used by the application? Should it be folded into
> the save area or ctl stack, or is it just for future use?
>
This needs to be accounted into the minimum save area size that should
be allocated by application.
amdgpu_cwsr_size_needed() =
return num_xcc *
(adev->cwsr_info->xcc_cwsr_sz + adev->cwsr_info->xcc_dbg_mem_sz);
xcc_cwsr_size = control stack size + workgroup context size.
This interface provides information about the individual size requirements.
Presently, all of these are also calculated by rocr separately -
https://github.com/ROCm/rocm-systems/blob/develop/projects/rocr-runtime/libhsakmt/src/queues.c#L342
Ideally, we prefer rocr to get this information from driver.
Thanks,
Lijo
>> + /* Minimu save area size required */
>
> Minimum
>
> Alex
>
>
>> + __u32 min_save_area_size;
>> +};
>> +
>> /*
>> * Supported GPU families
>> */
>> --
>> 2.49.0
>>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (7 preceding siblings ...)
2025-12-03 12:54 ` [RFC PATCH v3 08/10] drm/amdgpu: Add ioctl to get cwsr details Lijo Lazar
@ 2025-12-03 12:55 ` Lijo Lazar
2025-12-16 16:30 ` Alex Deucher
2025-12-03 12:55 ` [RFC PATCH v3 10/10] drm/amdgpu: Add ioctl to set level2 handler Lijo Lazar
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:55 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add cwsr parameters to userqueue ioctl. User should pass the GPU virtual
address for save/restore buffer, and size allocated. They are supported
only for user compute queues.
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 13 +++++++++----
include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 0ac87618a86a..8865a266d25a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -322,16 +322,21 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
if (amdgpu_cwsr_is_enabled(adev)) {
cwsr_params.ctx_save_area_address =
- userq_props->ctx_save_area_addr;
- cwsr_params.cwsr_sz = userq_props->ctx_save_area_size;
- cwsr_params.ctl_stack_sz = userq_props->ctl_stack_size;
-
+ compute_mqd->ctx_save_area_addr;
+ cwsr_params.cwsr_sz = compute_mqd->ctx_save_area_size;
+ cwsr_params.ctl_stack_sz = compute_mqd->ctl_stack_size;
r = amdgpu_userq_input_cwsr_params_validate(
queue, &cwsr_params);
if (r) {
kfree(compute_mqd);
goto free_mqd;
}
+ userq_props->ctx_save_area_addr =
+ compute_mqd->ctx_save_area_addr;
+ userq_props->ctx_save_area_size =
+ compute_mqd->ctx_save_area_size;
+ userq_props->ctl_stack_size =
+ compute_mqd->ctl_stack_size;
}
kfree(compute_mqd);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 2bb9daafb560..1a27e218d4ea 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -464,6 +464,22 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
* to get the size.
*/
__u64 eop_va;
+ /**
+ * @ctx_save_area_addr: Virtual address of the GPU memory for save/restore buffer.
+ * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
+ * to get the size.
+ */
+ __u64 ctx_save_area_addr;
+ /**
+ * @ctx_save_area_size: Total size allocated for save/restore buffer.
+ * Use AMDGPU_INFO IOCTL to get the size.
+ */
+ __u32 ctx_save_area_size;
+ /**
+ * @ctl_stack_size: Size of control stack region in the save/restore buffer.
+ * Use AMDGPU_INFO IOCTL to get the size.
+ */
+ __u32 ctl_stack_size;
};
/* userq signal/wait ioctl */
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params
2025-12-03 12:55 ` [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params Lijo Lazar
@ 2025-12-16 16:30 ` Alex Deucher
2026-01-05 6:33 ` Lazar, Lijo
0 siblings, 1 reply; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:30 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add cwsr parameters to userqueue ioctl. User should pass the GPU virtual
> address for save/restore buffer, and size allocated. They are supported
> only for user compute queues.
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 13 +++++++++----
> include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
> 2 files changed, 25 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> index 0ac87618a86a..8865a266d25a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
> @@ -322,16 +322,21 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
>
> if (amdgpu_cwsr_is_enabled(adev)) {
> cwsr_params.ctx_save_area_address =
> - userq_props->ctx_save_area_addr;
> - cwsr_params.cwsr_sz = userq_props->ctx_save_area_size;
> - cwsr_params.ctl_stack_sz = userq_props->ctl_stack_size;
> -
> + compute_mqd->ctx_save_area_addr;
> + cwsr_params.cwsr_sz = compute_mqd->ctx_save_area_size;
> + cwsr_params.ctl_stack_sz = compute_mqd->ctl_stack_size;
> r = amdgpu_userq_input_cwsr_params_validate(
> queue, &cwsr_params);
> if (r) {
> kfree(compute_mqd);
> goto free_mqd;
> }
> + userq_props->ctx_save_area_addr =
> + compute_mqd->ctx_save_area_addr;
> + userq_props->ctx_save_area_size =
> + compute_mqd->ctx_save_area_size;
> + userq_props->ctl_stack_size =
> + compute_mqd->ctl_stack_size;
> }
>
> kfree(compute_mqd);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 2bb9daafb560..1a27e218d4ea 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -464,6 +464,22 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
> * to get the size.
> */
> __u64 eop_va;
> + /**
> + * @ctx_save_area_addr: Virtual address of the GPU memory for save/restore buffer.
> + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
"be from"
Does it actually need to be a separate buffer? May also want a
comment that this address covers both the ctx save area and the ctl
stack.
> + * to get the size.
> + */
> + __u64 ctx_save_area_addr;
ctx_save_area_va for consistently.
> + /**
> + * @ctx_save_area_size: Total size allocated for save/restore buffer.
> + * Use AMDGPU_INFO IOCTL to get the size.
> + */
> + __u32 ctx_save_area_size;
> + /**
> + * @ctl_stack_size: Size of control stack region in the save/restore buffer.
> + * Use AMDGPU_INFO IOCTL to get the size.
> + */
Specify that these are in bytes.
Alex
> + __u32 ctl_stack_size;
> };
>
> /* userq signal/wait ioctl */
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params
2025-12-16 16:30 ` Alex Deucher
@ 2026-01-05 6:33 ` Lazar, Lijo
0 siblings, 0 replies; 29+ messages in thread
From: Lazar, Lijo @ 2026-01-05 6:33 UTC (permalink / raw)
To: Alex Deucher
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On 16-Dec-25 10:00 PM, Alex Deucher wrote:
> On Wed, Dec 3, 2025 at 8:05 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>>
>> Add cwsr parameters to userqueue ioctl. User should pass the GPU virtual
>> address for save/restore buffer, and size allocated. They are supported
>> only for user compute queues.
>>
>> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 13 +++++++++----
>> include/uapi/drm/amdgpu_drm.h | 16 ++++++++++++++++
>> 2 files changed, 25 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> index 0ac87618a86a..8865a266d25a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
>> @@ -322,16 +322,21 @@ static int mes_userq_mqd_create(struct amdgpu_usermode_queue *queue,
>>
>> if (amdgpu_cwsr_is_enabled(adev)) {
>> cwsr_params.ctx_save_area_address =
>> - userq_props->ctx_save_area_addr;
>> - cwsr_params.cwsr_sz = userq_props->ctx_save_area_size;
>> - cwsr_params.ctl_stack_sz = userq_props->ctl_stack_size;
>> -
>> + compute_mqd->ctx_save_area_addr;
>> + cwsr_params.cwsr_sz = compute_mqd->ctx_save_area_size;
>> + cwsr_params.ctl_stack_sz = compute_mqd->ctl_stack_size;
>> r = amdgpu_userq_input_cwsr_params_validate(
>> queue, &cwsr_params);
>> if (r) {
>> kfree(compute_mqd);
>> goto free_mqd;
>> }
>> + userq_props->ctx_save_area_addr =
>> + compute_mqd->ctx_save_area_addr;
>> + userq_props->ctx_save_area_size =
>> + compute_mqd->ctx_save_area_size;
>> + userq_props->ctl_stack_size =
>> + compute_mqd->ctl_stack_size;
>> }
>>
>> kfree(compute_mqd);
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 2bb9daafb560..1a27e218d4ea 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -464,6 +464,22 @@ struct drm_amdgpu_userq_mqd_compute_gfx11 {
>> * to get the size.
>> */
>> __u64 eop_va;
>> + /**
>> + * @ctx_save_area_addr: Virtual address of the GPU memory for save/restore buffer.
>> + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL
>
> "be from"
>
> Does it actually need to be a separate buffer?
Actually, I used the same terminology as used in eop va. I think it is
better to keep a dedicated buffer allocated for lifetime control.
Thanks,
Lijo
> May also want a
> comment that this address covers both the ctx save area and the ctl
> stack.
>
>> + * to get the size.
>> + */
>> + __u64 ctx_save_area_addr;
>
> ctx_save_area_va for consistently.
>
>
>> + /**
>> + * @ctx_save_area_size: Total size allocated for save/restore buffer.
>> + * Use AMDGPU_INFO IOCTL to get the size.
>> + */
>> + __u32 ctx_save_area_size;
>> + /**
>> + * @ctl_stack_size: Size of control stack region in the save/restore buffer.
>> + * Use AMDGPU_INFO IOCTL to get the size.
>> + */
>
> Specify that these are in bytes.
>
> Alex
>
>> + __u32 ctl_stack_size;
>> };
>>
>> /* userq signal/wait ioctl */
>> --
>> 2.49.0
>>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [RFC PATCH v3 10/10] drm/amdgpu: Add ioctl to set level2 handler
2025-12-03 12:54 [RFC PATCH v3 00/10] Add CWSR support to user queues Lijo Lazar
` (8 preceding siblings ...)
2025-12-03 12:55 ` [RFC PATCH v3 09/10] drm/amdgpu: Add ioctl support for cwsr params Lijo Lazar
@ 2025-12-03 12:55 ` Lijo Lazar
2025-12-16 16:26 ` Alex Deucher
9 siblings, 1 reply; 29+ messages in thread
From: Lijo Lazar @ 2025-12-03 12:55 UTC (permalink / raw)
To: amd-gfx; +Cc: Hawking.Zhang, Alexander.Deucher, Christian.Koenig, Jesse.Zhang
Add ioctl to set tba/tma of level2 trap handler
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 105 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 11 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
include/uapi/drm/amdgpu_drm.h | 24 ++++++
5 files changed, 141 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1fb9539f8aca..ed50e4d6e308 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1663,7 +1663,6 @@ int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
int amdgpu_info_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
-
/*
* functions used by amdgpu_encoder.c
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
index 1b4483b5d5a7..531be17aab1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
@@ -491,3 +491,108 @@ void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
kfree(*trap_obj);
*trap_obj = NULL;
}
+
+static int amdgpu_cwsr_validate_user_addr(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_usr_addr *usr_addr)
+{
+ struct amdgpu_bo_va_mapping *va_map;
+ uint64_t addr;
+ uint32_t size;
+ int r;
+
+ addr = (usr_addr->addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
+ size = usr_addr->size >> AMDGPU_GPU_PAGE_SHIFT;
+
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ va_map = amdgpu_vm_bo_lookup_mapping(vm, addr);
+ if (!va_map) {
+ r = -EINVAL;
+ goto err;
+ }
+ /* validate whether resident in the VM mapping range */
+ if (addr >= va_map->start && va_map->last - addr + 1 >= size) {
+ amdgpu_bo_unreserve(vm->root.bo);
+ return 0;
+ }
+
+ r = -EINVAL;
+err:
+ amdgpu_bo_unreserve(vm->root.bo);
+
+ return r;
+}
+
+static int amdgpu_cwsr_set_l2_trap_handler(
+ struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_cwsr_trap_obj *cwsr_obj, struct amdgpu_cwsr_usr_addr *tma,
+ struct amdgpu_cwsr_usr_addr *tba)
+{
+ uint64_t *l1tma;
+ int r;
+
+ if (!amdgpu_cwsr_is_enabled(adev))
+ return -EOPNOTSUPP;
+
+ if (!cwsr_obj || !cwsr_obj->tma_cpu_addr || !tma || !tba)
+ return -EINVAL;
+ r = amdgpu_cwsr_validate_user_addr(adev, vm, tma);
+ if (r)
+ return r;
+ r = amdgpu_cwsr_validate_user_addr(adev, vm, tba);
+ if (r)
+ return r;
+
+ l1tma = (uint64_t *)(cwsr_obj->tma_cpu_addr);
+ l1tma[0] = tma->addr;
+ l1tma[1] = tba->addr;
+
+ return 0;
+}
+
+/*
+ * Userspace cwsr related ioctl
+ */
+/**
+ * amdgpu_cwsr_ioctl - Handle cwsr specific requests.
+ *
+ * @dev: drm device pointer
+ * @data: request object
+ * @filp: drm filp
+ *
+ * This function is used to perform cwsr and trap handler related operations
+ * Returns 0 on success, error code on failure.
+ */
+int amdgpu_cwsr_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
+{
+ struct amdgpu_device *adev = drm_to_adev(dev);
+ union drm_amdgpu_cwsr *cwsr = data;
+ struct amdgpu_fpriv *fpriv;
+ int r;
+
+ fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
+
+ if (!fpriv->cwsr_trap)
+ return -EOPNOTSUPP;
+
+ switch (cwsr->in.op) {
+ case AMDGPU_CWSR_OP_SET_L2_TRAP: {
+ struct amdgpu_cwsr_usr_addr tba;
+ struct amdgpu_cwsr_usr_addr tma;
+
+ tba.addr = cwsr->in.l2trap.tba;
+ tba.size = cwsr->in.l2trap.tba_sz;
+ tma.addr = cwsr->in.l2trap.tma;
+ tma.size = cwsr->in.l2trap.tma_sz;
+ r = amdgpu_cwsr_set_l2_trap_handler(
+ adev, &fpriv->vm, fpriv->cwsr_trap, &tma, &tba);
+ } break;
+ default:
+ return -EINVAL;
+ }
+
+ return r;
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
index 96b03a8ed99b..32f3f23abd79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
@@ -31,7 +31,7 @@ struct amdgpu_device;
struct amdgpu_vm;
/**
- * struct amdgpu_cwsr_obj - CWSR (Compute Wave Save Restore) buffer tracking
+ * struct amdgpu_cwsr_trap_obj - CWSR (Compute Wave Save Restore) buffer tracking
* @bo: Buffer object for CWSR area
* @bo_va: Buffer object virtual address mapping
*/
@@ -63,6 +63,11 @@ struct amdgpu_cwsr_params {
uint32_t cwsr_sz;
};
+struct amdgpu_cwsr_usr_addr {
+ uint64_t addr;
+ uint32_t size;
+};
+
int amdgpu_cwsr_init(struct amdgpu_device *adev);
void amdgpu_cwsr_fini(struct amdgpu_device *adev);
@@ -79,4 +84,8 @@ uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc);
int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
struct amdgpu_cwsr_params *cwsr_params,
int num_xcc);
+
+int amdgpu_cwsr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *filp);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 16adeba4d7e6..8f5fcbe48a28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -52,6 +52,7 @@
#include "amdgpu_sched.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_userq.h"
+#include "amdgpu_cwsr.h"
#include "amdgpu_userq_fence.h"
#include "../amdxcp/amdgpu_xcp_drv.h"
@@ -3060,6 +3061,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(AMDGPU_CWSR, amdgpu_cwsr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
/* KMS */
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 1a27e218d4ea..f3b3c238d6d9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -58,6 +58,7 @@ extern "C" {
#define DRM_AMDGPU_USERQ_SIGNAL 0x17
#define DRM_AMDGPU_USERQ_WAIT 0x18
#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
+#define DRM_AMDGPU_CWSR 0x20
#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -79,6 +80,8 @@ extern "C" {
#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
+#define DRM_IOCTL_AMDGPU_CWSR \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
/**
* DOC: memory domains
@@ -1668,6 +1671,27 @@ struct drm_amdgpu_info_cwsr {
__u32 min_save_area_size;
};
+/* cwsr ioctl */
+#define AMDGPU_CWSR_OP_SET_L2_TRAP 1
+
+struct drm_amdgpu_cwsr_in {
+ /* AMDGPU_CWSR_OP_* */
+ __u32 op;
+ struct {
+ /* Level 2 trap handler base address */
+ __u64 tba;
+ /* Level 2 trap handler buffer size */
+ __u32 tba_sz;
+ /* Level 2 trap memory buffer address */
+ __u64 tma;
+ /* Level 2 trap memory buffer size */
+ __u32 tma_sz;
+ } l2trap;
+};
+
+union drm_amdgpu_cwsr {
+ struct drm_amdgpu_cwsr_in in;
+};
/*
* Supported GPU families
*/
--
2.49.0
^ permalink raw reply related [flat|nested] 29+ messages in thread* Re: [RFC PATCH v3 10/10] drm/amdgpu: Add ioctl to set level2 handler
2025-12-03 12:55 ` [RFC PATCH v3 10/10] drm/amdgpu: Add ioctl to set level2 handler Lijo Lazar
@ 2025-12-16 16:26 ` Alex Deucher
0 siblings, 0 replies; 29+ messages in thread
From: Alex Deucher @ 2025-12-16 16:26 UTC (permalink / raw)
To: Lijo Lazar
Cc: amd-gfx, Hawking.Zhang, Alexander.Deucher, Christian.Koenig,
Jesse.Zhang
On Wed, Dec 3, 2025 at 8:44 AM Lijo Lazar <lijo.lazar@amd.com> wrote:
>
> Add ioctl to set tba/tma of level2 trap handler
>
> Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c | 105 +++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h | 11 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +
> include/uapi/drm/amdgpu_drm.h | 24 ++++++
> 5 files changed, 141 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1fb9539f8aca..ed50e4d6e308 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1663,7 +1663,6 @@ int amdgpu_enable_vblank_kms(struct drm_crtc *crtc);
> void amdgpu_disable_vblank_kms(struct drm_crtc *crtc);
> int amdgpu_info_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
> -
> /*
> * functions used by amdgpu_encoder.c
> */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> index 1b4483b5d5a7..531be17aab1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.c
> @@ -491,3 +491,108 @@ void amdgpu_cwsr_free(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> kfree(*trap_obj);
> *trap_obj = NULL;
> }
> +
> +static int amdgpu_cwsr_validate_user_addr(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_usr_addr *usr_addr)
> +{
> + struct amdgpu_bo_va_mapping *va_map;
> + uint64_t addr;
> + uint32_t size;
> + int r;
> +
> + addr = (usr_addr->addr & AMDGPU_GMC_HOLE_MASK) >> AMDGPU_GPU_PAGE_SHIFT;
> + size = usr_addr->size >> AMDGPU_GPU_PAGE_SHIFT;
> +
> + r = amdgpu_bo_reserve(vm->root.bo, false);
> + if (r)
> + return r;
> +
> + va_map = amdgpu_vm_bo_lookup_mapping(vm, addr);
> + if (!va_map) {
> + r = -EINVAL;
> + goto err;
> + }
> + /* validate whether resident in the VM mapping range */
> + if (addr >= va_map->start && va_map->last - addr + 1 >= size) {
> + amdgpu_bo_unreserve(vm->root.bo);
> + return 0;
> + }
> +
> + r = -EINVAL;
> +err:
> + amdgpu_bo_unreserve(vm->root.bo);
> +
> + return r;
> +}
> +
> +static int amdgpu_cwsr_set_l2_trap_handler(
> + struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct amdgpu_cwsr_trap_obj *cwsr_obj, struct amdgpu_cwsr_usr_addr *tma,
> + struct amdgpu_cwsr_usr_addr *tba)
> +{
> + uint64_t *l1tma;
> + int r;
> +
> + if (!amdgpu_cwsr_is_enabled(adev))
> + return -EOPNOTSUPP;
> +
> + if (!cwsr_obj || !cwsr_obj->tma_cpu_addr || !tma || !tba)
> + return -EINVAL;
> + r = amdgpu_cwsr_validate_user_addr(adev, vm, tma);
> + if (r)
> + return r;
> + r = amdgpu_cwsr_validate_user_addr(adev, vm, tba);
> + if (r)
> + return r;
> +
> + l1tma = (uint64_t *)(cwsr_obj->tma_cpu_addr);
> + l1tma[0] = tma->addr;
> + l1tma[1] = tba->addr;
> +
> + return 0;
> +}
> +
> +/*
> + * Userspace cwsr related ioctl
> + */
> +/**
> + * amdgpu_cwsr_ioctl - Handle cwsr specific requests.
> + *
> + * @dev: drm device pointer
> + * @data: request object
> + * @filp: drm filp
> + *
> + * This function is used to perform cwsr and trap handler related operations
> + * Returns 0 on success, error code on failure.
> + */
> +int amdgpu_cwsr_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> + struct amdgpu_device *adev = drm_to_adev(dev);
> + union drm_amdgpu_cwsr *cwsr = data;
> + struct amdgpu_fpriv *fpriv;
> + int r;
> +
> + fpriv = (struct amdgpu_fpriv *)filp->driver_priv;
> +
> + if (!fpriv->cwsr_trap)
> + return -EOPNOTSUPP;
> +
> + switch (cwsr->in.op) {
> + case AMDGPU_CWSR_OP_SET_L2_TRAP: {
> + struct amdgpu_cwsr_usr_addr tba;
> + struct amdgpu_cwsr_usr_addr tma;
> +
> + tba.addr = cwsr->in.l2trap.tba;
> + tba.size = cwsr->in.l2trap.tba_sz;
> + tma.addr = cwsr->in.l2trap.tma;
> + tma.size = cwsr->in.l2trap.tma_sz;
> + r = amdgpu_cwsr_set_l2_trap_handler(
> + adev, &fpriv->vm, fpriv->cwsr_trap, &tma, &tba);
> + } break;
> + default:
> + return -EINVAL;
> + }
> +
> + return r;
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> index 96b03a8ed99b..32f3f23abd79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cwsr.h
> @@ -31,7 +31,7 @@ struct amdgpu_device;
> struct amdgpu_vm;
>
> /**
> - * struct amdgpu_cwsr_obj - CWSR (Compute Wave Save Restore) buffer tracking
> + * struct amdgpu_cwsr_trap_obj - CWSR (Compute Wave Save Restore) buffer tracking
> * @bo: Buffer object for CWSR area
> * @bo_va: Buffer object virtual address mapping
> */
> @@ -63,6 +63,11 @@ struct amdgpu_cwsr_params {
> uint32_t cwsr_sz;
> };
>
> +struct amdgpu_cwsr_usr_addr {
> + uint64_t addr;
> + uint32_t size;
> +};
> +
> int amdgpu_cwsr_init(struct amdgpu_device *adev);
> void amdgpu_cwsr_fini(struct amdgpu_device *adev);
>
> @@ -79,4 +84,8 @@ uint32_t amdgpu_cwsr_size_needed(struct amdgpu_device *adev, int num_xcc);
> int amdgpu_cwsr_validate_params(struct amdgpu_device *adev,
> struct amdgpu_cwsr_params *cwsr_params,
> int num_xcc);
> +
> +int amdgpu_cwsr_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *filp);
> +
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 16adeba4d7e6..8f5fcbe48a28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -52,6 +52,7 @@
> #include "amdgpu_sched.h"
> #include "amdgpu_xgmi.h"
> #include "amdgpu_userq.h"
> +#include "amdgpu_cwsr.h"
> #include "amdgpu_userq_fence.h"
> #include "../amdxcp/amdgpu_xcp_drv.h"
>
> @@ -3060,6 +3061,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER),
> DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(AMDGPU_CWSR, amdgpu_cwsr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> /* KMS */
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_WAIT_IDLE, amdgpu_gem_wait_idle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 1a27e218d4ea..f3b3c238d6d9 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -58,6 +58,7 @@ extern "C" {
> #define DRM_AMDGPU_USERQ_SIGNAL 0x17
> #define DRM_AMDGPU_USERQ_WAIT 0x18
> #define DRM_AMDGPU_GEM_LIST_HANDLES 0x19
> +#define DRM_AMDGPU_CWSR 0x20
>
> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -79,6 +80,8 @@ extern "C" {
> #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> #define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
> #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> +#define DRM_IOCTL_AMDGPU_CWSR \
> + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CWSR, union drm_amdgpu_cwsr)
>
> /**
> * DOC: memory domains
> @@ -1668,6 +1671,27 @@ struct drm_amdgpu_info_cwsr {
> __u32 min_save_area_size;
> };
>
> +/* cwsr ioctl */
> +#define AMDGPU_CWSR_OP_SET_L2_TRAP 1
> +
> +struct drm_amdgpu_cwsr_in {
> + /* AMDGPU_CWSR_OP_* */
> + __u32 op;
> + struct {
> + /* Level 2 trap handler base address */
> + __u64 tba;
Maybe add _va so it's clear this is a gpu virtual address.
> + /* Level 2 trap handler buffer size */
> + __u32 tba_sz;
> + /* Level 2 trap memory buffer address */
> + __u64 tma;
Same here.
Alex
> + /* Level 2 trap memory buffer size */
> + __u32 tma_sz;
> + } l2trap;
> +};
> +
> +union drm_amdgpu_cwsr {
> + struct drm_amdgpu_cwsr_in in;
> +};
> /*
> * Supported GPU families
> */
> --
> 2.49.0
>
^ permalink raw reply [flat|nested] 29+ messages in thread