AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
@ 2026-04-29 20:20 John B. Moore
  2026-04-29 20:29 ` Alex Deucher
  2026-04-30  7:19 ` Christian König
  0 siblings, 2 replies; 9+ messages in thread
From: John B. Moore @ 2026-04-29 20:20 UTC (permalink / raw)
  To: alexdeucher; +Cc: christian.koenig, alexander.deucher, amd-gfx, John B. Moore

Move the duplicated doorbell-based get_wptr/set_wptr functions from
gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
in amdgpu_gfx.c.

These functions are not HW generation dependent -- the doorbell path is
identical across all four GFX versions:

  get: atomic64_read(ring->wptr_cpu_addr)
  set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()

The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
since doorbell is the only supported method on gfx9+ compute rings.

Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
wptr access patterns (MMIO registers or wb.wb[] offsets).

Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Moore <jbmoore61@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
 6 files changed, 58 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 77578ecc6..9e9c5cb81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
 #endif
 }
 
+/**
+ * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Read the write pointer from the doorbell-mapped writeback address.
+ * This is HW-agnostic and shared across GFX generations that use
+ * doorbell-based compute queue management.
+ */
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell)
+		return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Write the write pointer to the doorbell-mapped writeback address and
+ * ring the doorbell.  This is HW-agnostic and shared across GFX
+ * generations that use doorbell-based compute queue management.
+ */
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		WARN_ON_ONCE(1);
+	}
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 585cc8e81..27f6beafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
 u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
 void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
 
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
+
 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1893ceeeb..4c0272cba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx10 now */
-	}
-}
-
 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
@@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 427975b5a..404604f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx11 now */
-	}
-}
-
 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		5 + /* update_spm_vmid */
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
@@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 79ea1af36..7ba436444 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx12 now */
-	}
-}
-
 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
@@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /*hdp invalidate */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8249135d7..798f94bca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
 }
 
-static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	} else {
-		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
-			  "only doorbell method supported on gfx9\n",
-			  ring->name);
-		wptr = 0;
-	}
-	return wptr;
-}
-
-static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
-			  "only doorbell method supported on gfx9\n",
-			  ring->name);
-	}
-}
-
 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
 					 u64 seq, unsigned int flags)
 {
@@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
@@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
@ 2026-04-29 20:25 John B. Moore
  0 siblings, 0 replies; 9+ messages in thread
From: John B. Moore @ 2026-04-29 20:25 UTC (permalink / raw)
  To: alexdeucher; +Cc: christian.koenig, alexander.deucher, amd-gfx, John B. Moore

Move the duplicated doorbell-based get_wptr/set_wptr functions from
gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
in amdgpu_gfx.c.

These functions are not HW generation dependent -- the doorbell path is
identical across all four GFX versions:

  get: atomic64_read(ring->wptr_cpu_addr)
  set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()

The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
since doorbell is the only supported method on gfx9+ compute rings.

Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
wptr access patterns (MMIO registers or wb.wb[] offsets).

Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Moore <jbmoore61@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
 6 files changed, 58 insertions(+), 124 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 77578ecc6..9e9c5cb81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
 #endif
 }
 
+/**
+ * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Read the write pointer from the doorbell-mapped writeback address.
+ * This is HW-agnostic and shared across GFX generations that use
+ * doorbell-based compute queue management.
+ */
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell)
+		return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/**
+ * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Write the write pointer to the doorbell-mapped writeback address and
+ * ring the doorbell.  This is HW-agnostic and shared across GFX
+ * generations that use doorbell-based compute queue management.
+ */
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	/* XXX check if swapping is necessary on BE */
+	if (ring->use_doorbell) {
+		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
+		WDOORBELL64(ring->doorbell_index, ring->wptr);
+	} else {
+		WARN_ON_ONCE(1);
+	}
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 585cc8e81..27f6beafb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
 u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
 void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
 
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
+
 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1893ceeeb..4c0272cba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx10 now */
-	}
-}
-
 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
@@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 427975b5a..404604f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx11 now */
-	}
-}
-
 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		5 + /* update_spm_vmid */
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
@@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 79ea1af36..7ba436444 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx12 now */
-	}
-}
-
 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
@@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /*hdp invalidate */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8249135d7..798f94bca 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
 }
 
-static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	} else {
-		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
-			  "only doorbell method supported on gfx9\n",
-			  ring->name);
-		wptr = 0;
-	}
-	return wptr;
-}
-
-static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
-			  "only doorbell method supported on gfx9\n",
-			  ring->name);
-	}
-}
-
 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
 					 u64 seq, unsigned int flags)
 {
@@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
@@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-29 20:20 John B. Moore
@ 2026-04-29 20:29 ` Alex Deucher
  2026-04-30  7:19 ` Christian König
  1 sibling, 0 replies; 9+ messages in thread
From: Alex Deucher @ 2026-04-29 20:29 UTC (permalink / raw)
  To: John B. Moore; +Cc: christian.koenig, alexander.deucher, amd-gfx

On Wed, Apr 29, 2026 at 4:20 PM John B. Moore <jbmoore61@gmail.com> wrote:
>
> Move the duplicated doorbell-based get_wptr/set_wptr functions from
> gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
> helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
> in amdgpu_gfx.c.
>
> These functions are not HW generation dependent -- the doorbell path is
> identical across all four GFX versions:
>
>   get: atomic64_read(ring->wptr_cpu_addr)
>   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
>
> The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
> since doorbell is the only supported method on gfx9+ compute rings.
>
> Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
> wptr access patterns (MMIO registers or wb.wb[] offsets).
>
> Suggested-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: John Moore <jbmoore61@gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
>  6 files changed, 58 insertions(+), 124 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 77578ecc6..9e9c5cb81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
>  #endif
>  }
>
> +/**
> + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
> + * @ring: amdgpu_ring pointer
> + *
> + * Read the write pointer from the doorbell-mapped writeback address.
> + * This is HW-agnostic and shared across GFX generations that use
> + * doorbell-based compute queue management.
> + */
> +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)

I'd prefer to name this amdgpu_gfx_get_wptr_doorbell()

> +{
> +       /* XXX check if swapping is necessary on BE */
> +       if (ring->use_doorbell)
> +               return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> +
> +       WARN_ON_ONCE(1);
> +       return 0;
> +}
> +
> +/**
> + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
> + * @ring: amdgpu_ring pointer
> + *
> + * Write the write pointer to the doorbell-mapped writeback address and
> + * ring the doorbell.  This is HW-agnostic and shared across GFX
> + * generations that use doorbell-based compute queue management.
> + */
> +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)

And amdgpu_gfx_set_wptr_doorbell() here.

While they are used for compute, there is nothing compute specific to them.

Thanks,

Alex

> +{
> +       struct amdgpu_device *adev = ring->adev;
> +
> +       /* XXX check if swapping is necessary on BE */
> +       if (ring->use_doorbell) {
> +               atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> +               WDOORBELL64(ring->doorbell_index, ring->wptr);
> +       } else {
> +               WARN_ON_ONCE(1);
> +       }
> +}
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 585cc8e81..27f6beafb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
>  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
>  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
>
> +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
> +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
> +
>  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
>  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 1893ceeeb..4c0272cba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>         return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>
> -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       u64 wptr;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell)
> -               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -       else
> -               BUG();
> -       return wptr;
> -}
> -
> -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       struct amdgpu_device *adev = ring->adev;
> -
> -       if (ring->use_doorbell) {
> -               atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -                            ring->wptr);
> -               WDOORBELL64(ring->doorbell_index, ring->wptr);
> -       } else {
> -               BUG(); /* only DOORBELL method supported on gfx10 now */
> -       }
> -}
> -
>  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v10_0_ring_emit_gds_switch */
>                 7 + /* gfx_v10_0_ring_emit_hdp_flush */
> @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v10_0_ring_emit_gds_switch */
>                 7 + /* gfx_v10_0_ring_emit_hdp_flush */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 427975b5a..404604f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>         return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>
> -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       u64 wptr;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell)
> -               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -       else
> -               BUG();
> -       return wptr;
> -}
> -
> -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       struct amdgpu_device *adev = ring->adev;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell) {
> -               atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -                            ring->wptr);
> -               WDOORBELL64(ring->doorbell_index, ring->wptr);
> -       } else {
> -               BUG(); /* only DOORBELL method supported on gfx11 now */
> -       }
> -}
> -
>  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 5 + /* update_spm_vmid */
>                 20 + /* gfx_v11_0_ring_emit_gds_switch */
> @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v11_0_ring_emit_gds_switch */
>                 7 + /* gfx_v11_0_ring_emit_hdp_flush */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 79ea1af36..7ba436444 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>         return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>
> -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       u64 wptr;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell)
> -               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -       else
> -               BUG();
> -       return wptr;
> -}
> -
> -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       struct amdgpu_device *adev = ring->adev;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell) {
> -               atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -                            ring->wptr);
> -               WDOORBELL64(ring->doorbell_index, ring->wptr);
> -       } else {
> -               BUG(); /* only DOORBELL method supported on gfx12 now */
> -       }
> -}
> -
>  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
>                 5 + /* hdp invalidate */
> @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 7 + /* gfx_v12_0_ring_emit_hdp_flush */
>                 5 + /*hdp invalidate */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 8249135d7..798f94bca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>         return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
>  }
>
> -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       u64 wptr;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell) {
> -               wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -       } else {
> -               WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
> -                         "only doorbell method supported on gfx9\n",
> -                         ring->name);
> -               wptr = 0;
> -       }
> -       return wptr;
> -}
> -
> -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -       struct amdgpu_device *adev = ring->adev;
> -
> -       /* XXX check if swapping is necessary on BE */
> -       if (ring->use_doorbell) {
> -               atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> -               WDOORBELL64(ring->doorbell_index, ring->wptr);
> -       } else {
> -               WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
> -                         "only doorbell method supported on gfx9\n",
> -                         ring->name);
> -       }
> -}
> -
>  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
>                                          u64 seq, unsigned int flags)
>  {
> @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v9_0_ring_emit_gds_switch */
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>         .support_64bit_ptrs = true,
>         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> -       .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> -       .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> +       .get_wptr = amdgpu_gfx_get_wptr_compute,
> +       .set_wptr = amdgpu_gfx_set_wptr_compute,
>         .emit_frame_size =
>                 20 + /* gfx_v9_0_ring_emit_gds_switch */
>                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-29 20:20 John B. Moore
  2026-04-29 20:29 ` Alex Deucher
@ 2026-04-30  7:19 ` Christian König
  2026-04-30 12:32   ` John Moore
  1 sibling, 1 reply; 9+ messages in thread
From: Christian König @ 2026-04-30  7:19 UTC (permalink / raw)
  To: John B. Moore, alexdeucher; +Cc: alexander.deucher, amd-gfx

On 4/29/26 22:20, John B. Moore wrote:
> Move the duplicated doorbell-based get_wptr/set_wptr functions from
> gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
> helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
> in amdgpu_gfx.c.
> 
> These functions are not HW generation dependent -- the doorbell path is
> identical across all four GFX versions:
> 
>   get: atomic64_read(ring->wptr_cpu_addr)
>   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
> 
> The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
> since doorbell is the only supported method on gfx9+ compute rings.
> 
> Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
> wptr access patterns (MMIO registers or wb.wb[] offsets).
> 
> Suggested-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: John Moore <jbmoore61@gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
>  6 files changed, 58 insertions(+), 124 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 77578ecc6..9e9c5cb81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
>  #endif
>  }
>  
> +/**
> + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
> + * @ring: amdgpu_ring pointer
> + *
> + * Read the write pointer from the doorbell-mapped writeback address.
> + * This is HW-agnostic and shared across GFX generations that use
> + * doorbell-based compute queue management.
> + */
> +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
> +{
> +	/* XXX check if swapping is necessary on BE */
> +	if (ring->use_doorbell)
> +		return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);

That should probably be readq() instead of this horrible and not portable cast to atomic64_t.

Alternatively we could just normally read the pointer with a memory barrier since this is just system memory.

> +
> +	WARN_ON_ONCE(1);

Pre-requisite/error checking first please.

Make that a if (WARN_ON(!ring->use_doorbell)) return.

And please don't use WARN_ON_ONCE() that is just to reduce the amount of warnings printed into the logs on real HW errors.

On functional coding errors like this one here it doesn't make sense and is often overlooked.

> +	return 0;
> +}
> +
> +/**
> + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
> + * @ring: amdgpu_ring pointer
> + *
> + * Write the write pointer to the doorbell-mapped writeback address and
> + * ring the doorbell.  This is HW-agnostic and shared across GFX
> + * generations that use doorbell-based compute queue management.
> + */
> +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +
> +	/* XXX check if swapping is necessary on BE */
> +	if (ring->use_doorbell) {
> +		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);

Same here the case to atomic64_t is extremely questionable.

Regards,
Christian.

> +		WDOORBELL64(ring->doorbell_index, ring->wptr);
> +	} else {
> +		WARN_ON_ONCE(1);
> +	}
> +}
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 585cc8e81..27f6beafb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
>  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
>  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
>  
> +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
> +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
> +
>  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
>  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 1893ceeeb..4c0272cba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>  	return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>  
> -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	u64 wptr;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell)
> -		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -	else
> -		BUG();
> -	return wptr;
> -}
> -
> -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	struct amdgpu_device *adev = ring->adev;
> -
> -	if (ring->use_doorbell) {
> -		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -			     ring->wptr);
> -		WDOORBELL64(ring->doorbell_index, ring->wptr);
> -	} else {
> -		BUG(); /* only DOORBELL method supported on gfx10 now */
> -	}
> -}
> -
>  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v10_0_ring_emit_gds_switch */
>  		7 + /* gfx_v10_0_ring_emit_hdp_flush */
> @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v10_0_ring_emit_gds_switch */
>  		7 + /* gfx_v10_0_ring_emit_hdp_flush */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 427975b5a..404604f2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>  	return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>  
> -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	u64 wptr;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell)
> -		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -	else
> -		BUG();
> -	return wptr;
> -}
> -
> -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	struct amdgpu_device *adev = ring->adev;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell) {
> -		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -			     ring->wptr);
> -		WDOORBELL64(ring->doorbell_index, ring->wptr);
> -	} else {
> -		BUG(); /* only DOORBELL method supported on gfx11 now */
> -	}
> -}
> -
>  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		5 + /* update_spm_vmid */
>  		20 + /* gfx_v11_0_ring_emit_gds_switch */
> @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v11_0_ring_emit_gds_switch */
>  		7 + /* gfx_v11_0_ring_emit_hdp_flush */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 79ea1af36..7ba436444 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>  	return *(uint32_t *)ring->rptr_cpu_addr;
>  }
>  
> -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	u64 wptr;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell)
> -		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -	else
> -		BUG();
> -	return wptr;
> -}
> -
> -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	struct amdgpu_device *adev = ring->adev;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell) {
> -		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> -			     ring->wptr);
> -		WDOORBELL64(ring->doorbell_index, ring->wptr);
> -	} else {
> -		BUG(); /* only DOORBELL method supported on gfx12 now */
> -	}
> -}
> -
>  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		7 + /* gfx_v12_0_ring_emit_hdp_flush */
>  		5 + /* hdp invalidate */
> @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		7 + /* gfx_v12_0_ring_emit_hdp_flush */
>  		5 + /*hdp invalidate */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 8249135d7..798f94bca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>  	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
>  }
>  
> -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	u64 wptr;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell) {
> -		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> -	} else {
> -		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
> -			  "only doorbell method supported on gfx9\n",
> -			  ring->name);
> -		wptr = 0;
> -	}
> -	return wptr;
> -}
> -
> -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> -{
> -	struct amdgpu_device *adev = ring->adev;
> -
> -	/* XXX check if swapping is necessary on BE */
> -	if (ring->use_doorbell) {
> -		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> -		WDOORBELL64(ring->doorbell_index, ring->wptr);
> -	} else {
> -		WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
> -			  "only doorbell method supported on gfx9\n",
> -			  ring->name);
> -	}
> -}
> -
>  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
>  					 u64 seq, unsigned int flags)
>  {
> @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v9_0_ring_emit_gds_switch */
>  		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
>  	.support_64bit_ptrs = true,
>  	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
> -	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
> -	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
> +	.get_wptr = amdgpu_gfx_get_wptr_compute,
> +	.set_wptr = amdgpu_gfx_set_wptr_compute,
>  	.emit_frame_size =
>  		20 + /* gfx_v9_0_ring_emit_gds_switch */
>  		7 + /* gfx_v9_0_ring_emit_hdp_flush */


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-30  7:19 ` Christian König
@ 2026-04-30 12:32   ` John Moore
  2026-04-30 13:22     ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: John Moore @ 2026-04-30 12:32 UTC (permalink / raw)
  To: Christian König; +Cc: alexdeucher, alexander.deucher, amd-gfx

[-- Attachment #1: Type: text/plain, Size: 17707 bytes --]

Hi Christian,

Thanks for the review. All points addressed below.

> That should probably be readq() instead of this horrible and not
> portable cast to atomic64_t.
>
> Alternatively we could just normally read the pointer with a memory
> barrier since this is just system memory.

I went with the second option — this is system memory (writeback via
GTT), not MMIO, so readq() felt semantically wrong. The v2 uses:

  get:  wptr = READ_ONCE(*(u64 *)ring->wptr_cpu_addr);
        smp_rmb();

  set:  WRITE_ONCE(*(u64 *)ring->wptr_cpu_addr, ring->wptr);
        smp_wmb();
        WDOORBELL64(ring->doorbell_index, ring->wptr);

The alignment is safe — amdgpu_device_wb_get() returns offsets in
multiples of 8 dwords (32 bytes), so the u64* cast always lands on
a naturally-aligned address.

One question: READ_ONCE on a u64 is not atomic on 32-bit
architectures (unlike atomic64_read which uses cmpxchg8b). DRM_AMDGPU
has no formal CONFIG_64BIT dependency in Kconfig, though in practice
nobody runs it on 32-bit. Is READ_ONCE acceptable here, or would you
prefer readq() to keep the atomicity guarantee?

> Pre-requisite/error checking first please.
> Make that a if (WARN_ON(!ring->use_doorbell)) return.

Done. Both functions now have the guard at the top:

  if (WARN_ON(!ring->use_doorbell))
      return 0;  /* or return; for set_wptr */

> And please don't use WARN_ON_ONCE() that is just to reduce the
> amount of warnings printed into the logs on real HW errors.
>
> On functional coding errors like this one here it doesn't make sense
> and is often overlooked.

Understood — changed to WARN_ON.

> Same here the case to atomic64_t is extremely questionable.

Fixed in set_wptr as well, same READ_ONCE/WRITE_ONCE approach.

v2 incoming once I hear back on the readq vs READ_ONCE question.

Thanks,
John

"I will not be pushed, filed, stamped, indexed, briefed, debriefed, or
numbered."
~ The Prisoner




On Thu, Apr 30, 2026 at 2:19 AM Christian König <christian.koenig@amd.com>
wrote:

> On 4/29/26 22:20, John B. Moore wrote:
> > Move the duplicated doorbell-based get_wptr/set_wptr functions from
> > gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
> > helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
> > in amdgpu_gfx.c.
> >
> > These functions are not HW generation dependent -- the doorbell path is
> > identical across all four GFX versions:
> >
> >   get: atomic64_read(ring->wptr_cpu_addr)
> >   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
> >
> > The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
> > since doorbell is the only supported method on gfx9+ compute rings.
> >
> > Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
> > wptr access patterns (MMIO registers or wb.wb[] offsets).
> >
> > Suggested-by: Alex Deucher <alexander.deucher@amd.com>
> > Signed-off-by: John Moore <jbmoore61@gmail.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
> >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
> >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
> >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
> >  6 files changed, 58 insertions(+), 124 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 77578ecc6..9e9c5cb81 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -2596,3 +2596,42 @@ void
> amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
> >  #endif
> >  }
> >
> > +/**
> > + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings
> using doorbells
> > + * @ring: amdgpu_ring pointer
> > + *
> > + * Read the write pointer from the doorbell-mapped writeback address.
> > + * This is HW-agnostic and shared across GFX generations that use
> > + * doorbell-based compute queue management.
> > + */
> > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
> > +{
> > +     /* XXX check if swapping is necessary on BE */
> > +     if (ring->use_doorbell)
> > +             return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>
> That should probably be readq() instead of this horrible and not portable
> cast to atomic64_t.
>
> Alternatively we could just normally read the pointer with a memory
> barrier since this is just system memory.
>
> > +
> > +     WARN_ON_ONCE(1);
>
> Pre-requisite/error checking first please.
>
> Make that a if (WARN_ON(!ring->use_doorbell)) return.
>
> And please don't use WARN_ON_ONCE() that is just to reduce the amount of
> warnings printed into the logs on real HW errors.
>
> On functional coding errors like this one here it doesn't make sense and
> is often overlooked.
>
> > +     return 0;
> > +}
> > +
> > +/**
> > + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings
> using doorbells
> > + * @ring: amdgpu_ring pointer
> > + *
> > + * Write the write pointer to the doorbell-mapped writeback address and
> > + * ring the doorbell.  This is HW-agnostic and shared across GFX
> > + * generations that use doorbell-based compute queue management.
> > + */
> > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
> > +{
> > +     struct amdgpu_device *adev = ring->adev;
> > +
> > +     /* XXX check if swapping is necessary on BE */
> > +     if (ring->use_doorbell) {
> > +             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> ring->wptr);
>
> Same here the case to atomic64_t is extremely questionable.
>
> Regards,
> Christian.
>
> > +             WDOORBELL64(ring->doorbell_index, ring->wptr);
> > +     } else {
> > +             WARN_ON_ONCE(1);
> > +     }
> > +}
> > +
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 585cc8e81..27f6beafb 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
> >  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer,
> u32 count);
> >  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
> >
> > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
> > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
> > +
> >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
> >  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 1893ceeeb..4c0272cba 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct
> amdgpu_ring *ring)
> >       return *(uint32_t *)ring->rptr_cpu_addr;
> >  }
> >
> > -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     u64 wptr;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell)
> > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> > -     else
> > -             BUG();
> > -     return wptr;
> > -}
> > -
> > -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     struct amdgpu_device *adev = ring->adev;
> > -
> > -     if (ring->use_doorbell) {
> > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> > -                          ring->wptr);
> > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> > -     } else {
> > -             BUG(); /* only DOORBELL method supported on gfx10 now */
> > -     }
> > -}
> > -
> >  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs
> gfx_v10_0_ring_funcs_compute = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> > @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs
> gfx_v10_0_ring_funcs_kiq = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > index 427975b5a..404604f2d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct
> amdgpu_ring *ring)
> >       return *(uint32_t *)ring->rptr_cpu_addr;
> >  }
> >
> > -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     u64 wptr;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell)
> > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> > -     else
> > -             BUG();
> > -     return wptr;
> > -}
> > -
> > -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     struct amdgpu_device *adev = ring->adev;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell) {
> > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> > -                          ring->wptr);
> > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> > -     } else {
> > -             BUG(); /* only DOORBELL method supported on gfx11 now */
> > -     }
> > -}
> > -
> >  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs
> gfx_v11_0_ring_funcs_compute = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               5 + /* update_spm_vmid */
> >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> > @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs
> gfx_v11_0_ring_funcs_kiq = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> >               7 + /* gfx_v11_0_ring_emit_hdp_flush */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > index 79ea1af36..7ba436444 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct
> amdgpu_ring *ring)
> >       return *(uint32_t *)ring->rptr_cpu_addr;
> >  }
> >
> > -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     u64 wptr;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell)
> > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> > -     else
> > -             BUG();
> > -     return wptr;
> > -}
> > -
> > -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     struct amdgpu_device *adev = ring->adev;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell) {
> > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> > -                          ring->wptr);
> > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> > -     } else {
> > -             BUG(); /* only DOORBELL method supported on gfx12 now */
> > -     }
> > -}
> > -
> >  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >  {
> >       struct amdgpu_device *adev = ring->adev;
> > @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs
> gfx_v12_0_ring_funcs_compute = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >               5 + /* hdp invalidate */
> > @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs
> gfx_v12_0_ring_funcs_kiq = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >               5 + /*hdp invalidate */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 8249135d7..798f94bca 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct
> amdgpu_ring *ring)
> >       return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
> >  }
> >
> > -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     u64 wptr;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell) {
> > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> > -     } else {
> > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s,
> "
> > -                       "only doorbell method supported on gfx9\n",
> > -                       ring->name);
> > -             wptr = 0;
> > -     }
> > -     return wptr;
> > -}
> > -
> > -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> > -{
> > -     struct amdgpu_device *adev = ring->adev;
> > -
> > -     /* XXX check if swapping is necessary on BE */
> > -     if (ring->use_doorbell) {
> > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> ring->wptr);
> > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> > -     } else {
> > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring
> %s, "
> > -                       "only doorbell method supported on gfx9\n",
> > -                       ring->name);
> > -     }
> > -}
> > -
> >  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64
> addr,
> >                                        u64 seq, unsigned int flags)
> >  {
> > @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> > @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_kiq = {
> >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >       .support_64bit_ptrs = true,
> >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >       .emit_frame_size =
> >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>
>

[-- Attachment #2: Type: text/html, Size: 21148 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-30 12:32   ` John Moore
@ 2026-04-30 13:22     ` Christian König
  2026-04-30 13:26       ` Alex Deucher
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2026-04-30 13:22 UTC (permalink / raw)
  To: John Moore; +Cc: alexdeucher, alexander.deucher, amd-gfx

Hi John,

On 4/30/26 14:32, John Moore wrote:
> Hi Christian,
> 
> Thanks for the review. All points addressed below.
> 
>> That should probably be readq() instead of this horrible and not
>> portable cast to atomic64_t.
>>
>> Alternatively we could just normally read the pointer with a memory
>> barrier since this is just system memory.
> 
> I went with the second option — this is system memory (writeback via
> GTT), not MMIO, so readq() felt semantically wrong. The v2 uses:
> 
>   get:  wptr = READ_ONCE(*(u64 *)ring->wptr_cpu_addr);
>         smp_rmb();

This needs to be rmb() and not smp_rmb(); smp_rmb() is only for CPU<->CPU synchronization but here we need CPU<->device synchronization.

And it needs to come *before* the read!

> 
>   set:  WRITE_ONCE(*(u64 *)ring->wptr_cpu_addr, ring->wptr);
>         smp_wmb();

Same here, but this time least the barrier ordering is correct.

>         WDOORBELL64(ring->doorbell_index, ring->wptr);
> 
> The alignment is safe — amdgpu_device_wb_get() returns offsets in
> multiples of 8 dwords (32 bytes), so the u64* cast always lands on
> a naturally-aligned address.
> 
> One question: READ_ONCE on a u64 is not atomic on 32-bit
> architectures (unlike atomic64_read which uses cmpxchg8b). DRM_AMDGPU
> has no formal CONFIG_64BIT dependency in Kconfig, though in practice
> nobody runs it on 32-bit.

We still have some people trying to use it on 32bit kernels. We should maybe consider to drop the 32bit support.

> Is READ_ONCE acceptable here, or would you
> prefer readq() to keep the atomicity guarantee?

Yeah good question I don't really know what to do here.

On the one hand you are right, writeq()/readq() are not correct because this isn't MMIO but system memory.

On the other hand I don't think Linux has an architecture independent way to guarantee that a write to system memory is done as an atomic 64bit write.

What we need to guarantee is that the device never sees an incomplete value because the write is done as two 32bit writes.  That is probably the reason why we used the atomic64_t hack in the first place.

Regards,
Christian.

> 
>> Pre-requisite/error checking first please.
>> Make that a if (WARN_ON(!ring->use_doorbell)) return.
> 
> Done. Both functions now have the guard at the top:
> 
>   if (WARN_ON(!ring->use_doorbell))
>       return 0;  /* or return; for set_wptr */
> 
>> And please don't use WARN_ON_ONCE() that is just to reduce the
>> amount of warnings printed into the logs on real HW errors.
>>
>> On functional coding errors like this one here it doesn't make sense
>> and is often overlooked.
> 
> Understood — changed to WARN_ON.
> 
>> Same here the case to atomic64_t is extremely questionable.
> 
> Fixed in set_wptr as well, same READ_ONCE/WRITE_ONCE approach.
> 
> v2 incoming once I hear back on the readq vs READ_ONCE question.
> 
> Thanks,
> John
> 
> "I will not be pushed, filed, stamped, indexed, briefed, debriefed, or numbered."
> ~ The Prisoner
> 
> 
> 
> 
> On Thu, Apr 30, 2026 at 2:19 AM Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
> 
>     On 4/29/26 22:20, John B. Moore wrote:
>     > Move the duplicated doorbell-based get_wptr/set_wptr functions from
>     > gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
>     > helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
>     > in amdgpu_gfx.c.
>     >
>     > These functions are not HW generation dependent -- the doorbell path is
>     > identical across all four GFX versions:
>     >
>     >   get: atomic64_read(ring->wptr_cpu_addr)
>     >   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
>     >
>     > The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
>     > since doorbell is the only supported method on gfx9+ compute rings.
>     >
>     > Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
>     > wptr access patterns (MMIO registers or wb.wb[] offsets).
>     >
>     > Suggested-by: Alex Deucher <alexander.deucher@amd.com <mailto:alexander.deucher@amd.com>>
>     > Signed-off-by: John Moore <jbmoore61@gmail.com <mailto:jbmoore61@gmail.com>>
>     > ---
>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
>     >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
>     >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
>     >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
>     >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
>     >  6 files changed, 58 insertions(+), 124 deletions(-)
>     >
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>     > index 77578ecc6..9e9c5cb81 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>     > @@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
>     >  #endif
>     >  }
>     > 
>     > +/**
>     > + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
>     > + * @ring: amdgpu_ring pointer
>     > + *
>     > + * Read the write pointer from the doorbell-mapped writeback address.
>     > + * This is HW-agnostic and shared across GFX generations that use
>     > + * doorbell-based compute queue management.
>     > + */
>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
>     > +{
>     > +     /* XXX check if swapping is necessary on BE */
>     > +     if (ring->use_doorbell)
>     > +             return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> 
>     That should probably be readq() instead of this horrible and not portable cast to atomic64_t.
> 
>     Alternatively we could just normally read the pointer with a memory barrier since this is just system memory.
> 
>     > +
>     > +     WARN_ON_ONCE(1);
> 
>     Pre-requisite/error checking first please.
> 
>     Make that a if (WARN_ON(!ring->use_doorbell)) return.
> 
>     And please don't use WARN_ON_ONCE() that is just to reduce the amount of warnings printed into the logs on real HW errors.
> 
>     On functional coding errors like this one here it doesn't make sense and is often overlooked.
> 
>     > +     return 0;
>     > +}
>     > +
>     > +/**
>     > + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
>     > + * @ring: amdgpu_ring pointer
>     > + *
>     > + * Write the write pointer to the doorbell-mapped writeback address and
>     > + * ring the doorbell.  This is HW-agnostic and shared across GFX
>     > + * generations that use doorbell-based compute queue management.
>     > + */
>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
>     > +{
>     > +     struct amdgpu_device *adev = ring->adev;
>     > +
>     > +     /* XXX check if swapping is necessary on BE */
>     > +     if (ring->use_doorbell) {
>     > +             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> 
>     Same here the case to atomic64_t is extremely questionable.
> 
>     Regards,
>     Christian.
> 
>     > +             WDOORBELL64(ring->doorbell_index, ring->wptr);
>     > +     } else {
>     > +             WARN_ON_ONCE(1);
>     > +     }
>     > +}
>     > +
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>     > index 585cc8e81..27f6beafb 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>     > @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
>     >  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
>     >  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
>     > 
>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
>     > +
>     >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
>     >  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
>     > 
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > index 1893ceeeb..4c0272cba 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>     >  }
>     > 
>     > -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     u64 wptr;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell)
>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>     > -     else
>     > -             BUG();
>     > -     return wptr;
>     > -}
>     > -
>     > -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     struct amdgpu_device *adev = ring->adev;
>     > -
>     > -     if (ring->use_doorbell) {
>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>     > -                          ring->wptr);
>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>     > -     } else {
>     > -             BUG(); /* only DOORBELL method supported on gfx10 now */
>     > -     }
>     > -}
>     > -
>     >  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>     >  {
>     >       struct amdgpu_device *adev = ring->adev;
>     > @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
>     > @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>     > index 427975b5a..404604f2d 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>     > @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>     >  }
>     > 
>     > -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     u64 wptr;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell)
>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>     > -     else
>     > -             BUG();
>     > -     return wptr;
>     > -}
>     > -
>     > -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     struct amdgpu_device *adev = ring->adev;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell) {
>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>     > -                          ring->wptr);
>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>     > -     } else {
>     > -             BUG(); /* only DOORBELL method supported on gfx11 now */
>     > -     }
>     > -}
>     > -
>     >  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>     >  {
>     >       struct amdgpu_device *adev = ring->adev;
>     > @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               5 + /* update_spm_vmid */
>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
>     > @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
>     >               7 + /* gfx_v11_0_ring_emit_hdp_flush */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>     > index 79ea1af36..7ba436444 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>     > @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>     >  }
>     > 
>     > -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     u64 wptr;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell)
>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>     > -     else
>     > -             BUG();
>     > -     return wptr;
>     > -}
>     > -
>     > -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     struct amdgpu_device *adev = ring->adev;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell) {
>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>     > -                          ring->wptr);
>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>     > -     } else {
>     > -             BUG(); /* only DOORBELL method supported on gfx12 now */
>     > -     }
>     > -}
>     > -
>     >  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>     >  {
>     >       struct amdgpu_device *adev = ring->adev;
>     > @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
>     >               5 + /* hdp invalidate */
>     > @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
>     >               5 + /*hdp invalidate */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     > index 8249135d7..798f94bca 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>     > @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>     >       return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
>     >  }
>     > 
>     > -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     u64 wptr;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell) {
>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>     > -     } else {
>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
>     > -                       "only doorbell method supported on gfx9\n",
>     > -                       ring->name);
>     > -             wptr = 0;
>     > -     }
>     > -     return wptr;
>     > -}
>     > -
>     > -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>     > -{
>     > -     struct amdgpu_device *adev = ring->adev;
>     > -
>     > -     /* XXX check if swapping is necessary on BE */
>     > -     if (ring->use_doorbell) {
>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>     > -     } else {
>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
>     > -                       "only doorbell method supported on gfx9\n",
>     > -                       ring->name);
>     > -     }
>     > -}
>     > -
>     >  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
>     >                                        u64 seq, unsigned int flags)
>     >  {
>     > @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>     > @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>     >       .support_64bit_ptrs = true,
>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>     >       .emit_frame_size =
>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-30 13:22     ` Christian König
@ 2026-04-30 13:26       ` Alex Deucher
  2026-04-30 13:49         ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Alex Deucher @ 2026-04-30 13:26 UTC (permalink / raw)
  To: Christian König; +Cc: John Moore, alexander.deucher, amd-gfx

On Thu, Apr 30, 2026 at 9:22 AM Christian König
<christian.koenig@amd.com> wrote:
>
> Hi John,
>
> On 4/30/26 14:32, John Moore wrote:
> > Hi Christian,
> >
> > Thanks for the review. All points addressed below.
> >
> >> That should probably be readq() instead of this horrible and not
> >> portable cast to atomic64_t.
> >>
> >> Alternatively we could just normally read the pointer with a memory
> >> barrier since this is just system memory.
> >
> > I went with the second option — this is system memory (writeback via
> > GTT), not MMIO, so readq() felt semantically wrong. The v2 uses:
> >
> >   get:  wptr = READ_ONCE(*(u64 *)ring->wptr_cpu_addr);
> >         smp_rmb();
>
> This needs to be rmb() and not smp_rmb(); smp_rmb() is only for CPU<->CPU synchronization but here we need CPU<->device synchronization.
>
> And it needs to come *before* the read!
>
> >
> >   set:  WRITE_ONCE(*(u64 *)ring->wptr_cpu_addr, ring->wptr);
> >         smp_wmb();
>
> Same here, but this time least the barrier ordering is correct.
>
> >         WDOORBELL64(ring->doorbell_index, ring->wptr);
> >
> > The alignment is safe — amdgpu_device_wb_get() returns offsets in
> > multiples of 8 dwords (32 bytes), so the u64* cast always lands on
> > a naturally-aligned address.
> >
> > One question: READ_ONCE on a u64 is not atomic on 32-bit
> > architectures (unlike atomic64_read which uses cmpxchg8b). DRM_AMDGPU
> > has no formal CONFIG_64BIT dependency in Kconfig, though in practice
> > nobody runs it on 32-bit.
>
> We still have some people trying to use it on 32bit kernels. We should maybe consider to drop the 32bit support.

32 bit support is the reason we used the atomic stuff in the first place.

Alex

>
> > Is READ_ONCE acceptable here, or would you
> > prefer readq() to keep the atomicity guarantee?
>
> Yeah good question I don't really know what to do here.
>
> On the one hand you are right, writeq()/readq() are not correct because this isn't MMIO but system memory.
>
> On the other hand I don't think Linux has an architecture independent way to guarantee that a write to system memory is done as an atomic 64bit write.
>
> What we need to guarantee is that the device never sees an incomplete value because the write is done as two 32bit writes.  That is probably the reason why we used the atomic64_t hack in the first place.
>
> Regards,
> Christian.
>
> >
> >> Pre-requisite/error checking first please.
> >> Make that a if (WARN_ON(!ring->use_doorbell)) return.
> >
> > Done. Both functions now have the guard at the top:
> >
> >   if (WARN_ON(!ring->use_doorbell))
> >       return 0;  /* or return; for set_wptr */
> >
> >> And please don't use WARN_ON_ONCE() that is just to reduce the
> >> amount of warnings printed into the logs on real HW errors.
> >>
> >> On functional coding errors like this one here it doesn't make sense
> >> and is often overlooked.
> >
> > Understood — changed to WARN_ON.
> >
> >> Same here the case to atomic64_t is extremely questionable.
> >
> > Fixed in set_wptr as well, same READ_ONCE/WRITE_ONCE approach.
> >
> > v2 incoming once I hear back on the readq vs READ_ONCE question.
> >
> > Thanks,
> > John
> >
> > "I will not be pushed, filed, stamped, indexed, briefed, debriefed, or numbered."
> > ~ The Prisoner
> >
> >
> >
> >
> > On Thu, Apr 30, 2026 at 2:19 AM Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
> >
> >     On 4/29/26 22:20, John B. Moore wrote:
> >     > Move the duplicated doorbell-based get_wptr/set_wptr functions from
> >     > gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
> >     > helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
> >     > in amdgpu_gfx.c.
> >     >
> >     > These functions are not HW generation dependent -- the doorbell path is
> >     > identical across all four GFX versions:
> >     >
> >     >   get: atomic64_read(ring->wptr_cpu_addr)
> >     >   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
> >     >
> >     > The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
> >     > since doorbell is the only supported method on gfx9+ compute rings.
> >     >
> >     > Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
> >     > wptr access patterns (MMIO registers or wb.wb[] offsets).
> >     >
> >     > Suggested-by: Alex Deucher <alexander.deucher@amd.com <mailto:alexander.deucher@amd.com>>
> >     > Signed-off-by: John Moore <jbmoore61@gmail.com <mailto:jbmoore61@gmail.com>>
> >     > ---
> >     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
> >     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
> >     >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
> >     >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
> >     >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
> >     >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
> >     >  6 files changed, 58 insertions(+), 124 deletions(-)
> >     >
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >     > index 77578ecc6..9e9c5cb81 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >     > @@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
> >     >  #endif
> >     >  }
> >     >
> >     > +/**
> >     > + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
> >     > + * @ring: amdgpu_ring pointer
> >     > + *
> >     > + * Read the write pointer from the doorbell-mapped writeback address.
> >     > + * This is HW-agnostic and shared across GFX generations that use
> >     > + * doorbell-based compute queue management.
> >     > + */
> >     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
> >     > +{
> >     > +     /* XXX check if swapping is necessary on BE */
> >     > +     if (ring->use_doorbell)
> >     > +             return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> >
> >     That should probably be readq() instead of this horrible and not portable cast to atomic64_t.
> >
> >     Alternatively we could just normally read the pointer with a memory barrier since this is just system memory.
> >
> >     > +
> >     > +     WARN_ON_ONCE(1);
> >
> >     Pre-requisite/error checking first please.
> >
> >     Make that a if (WARN_ON(!ring->use_doorbell)) return.
> >
> >     And please don't use WARN_ON_ONCE() that is just to reduce the amount of warnings printed into the logs on real HW errors.
> >
> >     On functional coding errors like this one here it doesn't make sense and is often overlooked.
> >
> >     > +     return 0;
> >     > +}
> >     > +
> >     > +/**
> >     > + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
> >     > + * @ring: amdgpu_ring pointer
> >     > + *
> >     > + * Write the write pointer to the doorbell-mapped writeback address and
> >     > + * ring the doorbell.  This is HW-agnostic and shared across GFX
> >     > + * generations that use doorbell-based compute queue management.
> >     > + */
> >     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
> >     > +{
> >     > +     struct amdgpu_device *adev = ring->adev;
> >     > +
> >     > +     /* XXX check if swapping is necessary on BE */
> >     > +     if (ring->use_doorbell) {
> >     > +             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> >
> >     Same here the case to atomic64_t is extremely questionable.
> >
> >     Regards,
> >     Christian.
> >
> >     > +             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >     > +     } else {
> >     > +             WARN_ON_ONCE(1);
> >     > +     }
> >     > +}
> >     > +
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >     > index 585cc8e81..27f6beafb 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >     > @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
> >     >  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
> >     >  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
> >     >
> >     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
> >     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
> >     > +
> >     >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
> >     >  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
> >     >
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >     > index 1893ceeeb..4c0272cba 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >     > @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >     >  }
> >     >
> >     > -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     u64 wptr;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell)
> >     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> >     > -     else
> >     > -             BUG();
> >     > -     return wptr;
> >     > -}
> >     > -
> >     > -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     struct amdgpu_device *adev = ring->adev;
> >     > -
> >     > -     if (ring->use_doorbell) {
> >     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >     > -                          ring->wptr);
> >     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >     > -     } else {
> >     > -             BUG(); /* only DOORBELL method supported on gfx10 now */
> >     > -     }
> >     > -}
> >     > -
> >     >  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >     >  {
> >     >       struct amdgpu_device *adev = ring->adev;
> >     > @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> >     > @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >     > index 427975b5a..404604f2d 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >     > @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >     >  }
> >     >
> >     > -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     u64 wptr;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell)
> >     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> >     > -     else
> >     > -             BUG();
> >     > -     return wptr;
> >     > -}
> >     > -
> >     > -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     struct amdgpu_device *adev = ring->adev;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell) {
> >     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >     > -                          ring->wptr);
> >     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >     > -     } else {
> >     > -             BUG(); /* only DOORBELL method supported on gfx11 now */
> >     > -     }
> >     > -}
> >     > -
> >     >  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >     >  {
> >     >       struct amdgpu_device *adev = ring->adev;
> >     > @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               5 + /* update_spm_vmid */
> >     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> >     > @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> >     >               7 + /* gfx_v11_0_ring_emit_hdp_flush */
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >     > index 79ea1af36..7ba436444 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >     > @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >     >  }
> >     >
> >     > -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     u64 wptr;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell)
> >     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> >     > -     else
> >     > -             BUG();
> >     > -     return wptr;
> >     > -}
> >     > -
> >     > -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     struct amdgpu_device *adev = ring->adev;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell) {
> >     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >     > -                          ring->wptr);
> >     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >     > -     } else {
> >     > -             BUG(); /* only DOORBELL method supported on gfx12 now */
> >     > -     }
> >     > -}
> >     > -
> >     >  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
> >     >  {
> >     >       struct amdgpu_device *adev = ring->adev;
> >     > @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >     >               5 + /* hdp invalidate */
> >     > @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >     >               5 + /*hdp invalidate */
> >     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >     > index 8249135d7..798f94bca 100644
> >     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >     > @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >     >       return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
> >     >  }
> >     >
> >     > -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     u64 wptr;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell) {
> >     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
> >     > -     } else {
> >     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
> >     > -                       "only doorbell method supported on gfx9\n",
> >     > -                       ring->name);
> >     > -             wptr = 0;
> >     > -     }
> >     > -     return wptr;
> >     > -}
> >     > -
> >     > -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
> >     > -{
> >     > -     struct amdgpu_device *adev = ring->adev;
> >     > -
> >     > -     /* XXX check if swapping is necessary on BE */
> >     > -     if (ring->use_doorbell) {
> >     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
> >     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >     > -     } else {
> >     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
> >     > -                       "only doorbell method supported on gfx9\n",
> >     > -                       ring->name);
> >     > -     }
> >     > -}
> >     > -
> >     >  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
> >     >                                        u64 seq, unsigned int flags)
> >     >  {
> >     > @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> >     > @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
> >     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >     >       .support_64bit_ptrs = true,
> >     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> >     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> >     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> >     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >     >       .emit_frame_size =
> >     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> >
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-30 13:26       ` Alex Deucher
@ 2026-04-30 13:49         ` Christian König
  2026-04-30 13:57           ` John Moore
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2026-04-30 13:49 UTC (permalink / raw)
  To: Alex Deucher; +Cc: John Moore, alexander.deucher, amd-gfx

On 4/30/26 15:26, Alex Deucher wrote:
> On Thu, Apr 30, 2026 at 9:22 AM Christian König
> <christian.koenig@amd.com> wrote:
>>
>> Hi John,
>>
>> On 4/30/26 14:32, John Moore wrote:
>>> Hi Christian,
>>>
>>> Thanks for the review. All points addressed below.
>>>
>>>> That should probably be readq() instead of this horrible and not
>>>> portable cast to atomic64_t.
>>>>
>>>> Alternatively we could just normally read the pointer with a memory
>>>> barrier since this is just system memory.
>>>
>>> I went with the second option — this is system memory (writeback via
>>> GTT), not MMIO, so readq() felt semantically wrong. The v2 uses:
>>>
>>>   get:  wptr = READ_ONCE(*(u64 *)ring->wptr_cpu_addr);
>>>         smp_rmb();
>>
>> This needs to be rmb() and not smp_rmb(); smp_rmb() is only for CPU<->CPU synchronization but here we need CPU<->device synchronization.
>>
>> And it needs to come *before* the read!
>>
>>>
>>>   set:  WRITE_ONCE(*(u64 *)ring->wptr_cpu_addr, ring->wptr);
>>>         smp_wmb();
>>
>> Same here, but this time least the barrier ordering is correct.
>>
>>>         WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>
>>> The alignment is safe — amdgpu_device_wb_get() returns offsets in
>>> multiples of 8 dwords (32 bytes), so the u64* cast always lands on
>>> a naturally-aligned address.
>>>
>>> One question: READ_ONCE on a u64 is not atomic on 32-bit
>>> architectures (unlike atomic64_read which uses cmpxchg8b). DRM_AMDGPU
>>> has no formal CONFIG_64BIT dependency in Kconfig, though in practice
>>> nobody runs it on 32-bit.
>>
>> We still have some people trying to use it on 32bit kernels. We should maybe consider to drop the 32bit support.
> 
> 32 bit support is the reason we used the atomic stuff in the first place.

Yeah the problem is that is just and extremely ugly hack.

IIRC there are architecture who can't do 64bit writes at all, on those systems an atomic_64 is implemented by using a lock.

On the other hand I don't think anybody would be able to use HW AMDGPU supports on a SPARC, Alpha or +20 year old MIPS system because of the lack of PCIe bus.

My educated guess is that the correct answer is to have a config dependency to prevent AMDGPU from even compiling on such architectures and then using a WRITE_ONCE()/READ_ONCE() with appropriate memory barriers.

But of hand I don't know how that stuff is abstracted on the architecture side and which config option to depend on.

Regards,
Christian.

> 
> Alex
> 
>>
>>> Is READ_ONCE acceptable here, or would you
>>> prefer readq() to keep the atomicity guarantee?
>>
>> Yeah good question I don't really know what to do here.
>>
>> On the one hand you are right, writeq()/readq() are not correct because this isn't MMIO but system memory.
>>
>> On the other hand I don't think Linux has an architecture independent way to guarantee that a write to system memory is done as an atomic 64bit write.
>>
>> What we need to guarantee is that the device never sees an incomplete value because the write is done as two 32bit writes.  That is probably the reason why we used the atomic64_t hack in the first place.
>>
>> Regards,
>> Christian.
>>
>>>
>>>> Pre-requisite/error checking first please.
>>>> Make that a if (WARN_ON(!ring->use_doorbell)) return.
>>>
>>> Done. Both functions now have the guard at the top:
>>>
>>>   if (WARN_ON(!ring->use_doorbell))
>>>       return 0;  /* or return; for set_wptr */
>>>
>>>> And please don't use WARN_ON_ONCE() that is just to reduce the
>>>> amount of warnings printed into the logs on real HW errors.
>>>>
>>>> On functional coding errors like this one here it doesn't make sense
>>>> and is often overlooked.
>>>
>>> Understood — changed to WARN_ON.
>>>
>>>> Same here the case to atomic64_t is extremely questionable.
>>>
>>> Fixed in set_wptr as well, same READ_ONCE/WRITE_ONCE approach.
>>>
>>> v2 incoming once I hear back on the readq vs READ_ONCE question.
>>>
>>> Thanks,
>>> John
>>>
>>> "I will not be pushed, filed, stamped, indexed, briefed, debriefed, or numbered."
>>> ~ The Prisoner
>>>
>>>
>>>
>>>
>>> On Thu, Apr 30, 2026 at 2:19 AM Christian König <christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
>>>
>>>     On 4/29/26 22:20, John B. Moore wrote:
>>>     > Move the duplicated doorbell-based get_wptr/set_wptr functions from
>>>     > gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
>>>     > helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
>>>     > in amdgpu_gfx.c.
>>>     >
>>>     > These functions are not HW generation dependent -- the doorbell path is
>>>     > identical across all four GFX versions:
>>>     >
>>>     >   get: atomic64_read(ring->wptr_cpu_addr)
>>>     >   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
>>>     >
>>>     > The non-doorbell fallback is replaced with WARN_ON_ONCE instead of BUG()
>>>     > since doorbell is the only supported method on gfx9+ compute rings.
>>>     >
>>>     > Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
>>>     > wptr access patterns (MMIO registers or wb.wb[] offsets).
>>>     >
>>>     > Suggested-by: Alex Deucher <alexander.deucher@amd.com <mailto:alexander.deucher@amd.com>>
>>>     > Signed-off-by: John Moore <jbmoore61@gmail.com <mailto:jbmoore61@gmail.com>>
>>>     > ---
>>>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 +++++++++++++++++++++++++
>>>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
>>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 +++------------------
>>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 +++------------------
>>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 +++------------------
>>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39 +++----------------------
>>>     >  6 files changed, 58 insertions(+), 124 deletions(-)
>>>     >
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>     > index 77578ecc6..9e9c5cb81 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>>     > @@ -2596,3 +2596,42 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
>>>     >  #endif
>>>     >  }
>>>     >
>>>     > +/**
>>>     > + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
>>>     > + * @ring: amdgpu_ring pointer
>>>     > + *
>>>     > + * Read the write pointer from the doorbell-mapped writeback address.
>>>     > + * This is HW-agnostic and shared across GFX generations that use
>>>     > + * doorbell-based compute queue management.
>>>     > + */
>>>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
>>>     > +{
>>>     > +     /* XXX check if swapping is necessary on BE */
>>>     > +     if (ring->use_doorbell)
>>>     > +             return atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>>>
>>>     That should probably be readq() instead of this horrible and not portable cast to atomic64_t.
>>>
>>>     Alternatively we could just normally read the pointer with a memory barrier since this is just system memory.
>>>
>>>     > +
>>>     > +     WARN_ON_ONCE(1);
>>>
>>>     Pre-requisite/error checking first please.
>>>
>>>     Make that a if (WARN_ON(!ring->use_doorbell)) return.
>>>
>>>     And please don't use WARN_ON_ONCE() that is just to reduce the amount of warnings printed into the logs on real HW errors.
>>>
>>>     On functional coding errors like this one here it doesn't make sense and is often overlooked.
>>>
>>>     > +     return 0;
>>>     > +}
>>>     > +
>>>     > +/**
>>>     > + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
>>>     > + * @ring: amdgpu_ring pointer
>>>     > + *
>>>     > + * Write the write pointer to the doorbell-mapped writeback address and
>>>     > + * ring the doorbell.  This is HW-agnostic and shared across GFX
>>>     > + * generations that use doorbell-based compute queue management.
>>>     > + */
>>>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
>>>     > +{
>>>     > +     struct amdgpu_device *adev = ring->adev;
>>>     > +
>>>     > +     /* XXX check if swapping is necessary on BE */
>>>     > +     if (ring->use_doorbell) {
>>>     > +             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
>>>
>>>     Same here the case to atomic64_t is extremely questionable.
>>>
>>>     Regards,
>>>     Christian.
>>>
>>>     > +             WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>     > +     } else {
>>>     > +             WARN_ON_ONCE(1);
>>>     > +     }
>>>     > +}
>>>     > +
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>     > index 585cc8e81..27f6beafb 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>>     > @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
>>>     >  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
>>>     >  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
>>>     >
>>>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
>>>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
>>>     > +
>>>     >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
>>>     >  void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
>>>     >
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>     > index 1893ceeeb..4c0272cba 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>     > @@ -8586,31 +8586,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>>>     >  }
>>>     >
>>>     > -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     u64 wptr;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell)
>>>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>>>     > -     else
>>>     > -             BUG();
>>>     > -     return wptr;
>>>     > -}
>>>     > -
>>>     > -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     struct amdgpu_device *adev = ring->adev;
>>>     > -
>>>     > -     if (ring->use_doorbell) {
>>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>>>     > -                          ring->wptr);
>>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>     > -     } else {
>>>     > -             BUG(); /* only DOORBELL method supported on gfx10 now */
>>>     > -     }
>>>     > -}
>>>     > -
>>>     >  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>>>     >  {
>>>     >       struct amdgpu_device *adev = ring->adev;
>>>     > @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
>>>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
>>>     > @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
>>>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>>     > index 427975b5a..404604f2d 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
>>>     > @@ -5818,32 +5818,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>>>     >  }
>>>     >
>>>     > -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     u64 wptr;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell)
>>>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>>>     > -     else
>>>     > -             BUG();
>>>     > -     return wptr;
>>>     > -}
>>>     > -
>>>     > -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     struct amdgpu_device *adev = ring->adev;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell) {
>>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>>>     > -                          ring->wptr);
>>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>     > -     } else {
>>>     > -             BUG(); /* only DOORBELL method supported on gfx11 now */
>>>     > -     }
>>>     > -}
>>>     > -
>>>     >  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>>>     >  {
>>>     >       struct amdgpu_device *adev = ring->adev;
>>>     > @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               5 + /* update_spm_vmid */
>>>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
>>>     > @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
>>>     >               7 + /* gfx_v11_0_ring_emit_hdp_flush */
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>>>     > index 79ea1af36..7ba436444 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
>>>     > @@ -4363,32 +4363,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
>>>     >  }
>>>     >
>>>     > -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     u64 wptr;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell)
>>>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>>>     > -     else
>>>     > -             BUG();
>>>     > -     return wptr;
>>>     > -}
>>>     > -
>>>     > -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     struct amdgpu_device *adev = ring->adev;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell) {
>>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
>>>     > -                          ring->wptr);
>>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>     > -     } else {
>>>     > -             BUG(); /* only DOORBELL method supported on gfx12 now */
>>>     > -     }
>>>     > -}
>>>     > -
>>>     >  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
>>>     >  {
>>>     >       struct amdgpu_device *adev = ring->adev;
>>>     > @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
>>>     >               5 + /* hdp invalidate */
>>>     > @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
>>>     >               5 + /*hdp invalidate */
>>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>     > index 8249135d7..798f94bca 100644
>>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>     > @@ -5640,37 +5640,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
>>>     >       return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
>>>     >  }
>>>     >
>>>     > -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     u64 wptr;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell) {
>>>     > -             wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
>>>     > -     } else {
>>>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on ring %s, "
>>>     > -                       "only doorbell method supported on gfx9\n",
>>>     > -                       ring->name);
>>>     > -             wptr = 0;
>>>     > -     }
>>>     > -     return wptr;
>>>     > -}
>>>     > -
>>>     > -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
>>>     > -{
>>>     > -     struct amdgpu_device *adev = ring->adev;
>>>     > -
>>>     > -     /* XXX check if swapping is necessary on BE */
>>>     > -     if (ring->use_doorbell) {
>>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
>>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
>>>     > -     } else {
>>>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on ring %s, "
>>>     > -                       "only doorbell method supported on gfx9\n",
>>>     > -                       ring->name);
>>>     > -     }
>>>     > -}
>>>     > -
>>>     >  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
>>>     >                                        u64 seq, unsigned int flags)
>>>     >  {
>>>     > @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
>>>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>>>     > @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
>>>     >       .support_64bit_ptrs = true,
>>>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
>>>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
>>>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
>>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
>>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
>>>     >       .emit_frame_size =
>>>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
>>>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
>>>
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c
  2026-04-30 13:49         ` Christian König
@ 2026-04-30 13:57           ` John Moore
  0 siblings, 0 replies; 9+ messages in thread
From: John Moore @ 2026-04-30 13:57 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, alexander.deucher, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 24673 bytes --]

Hi Christian,

I completely understand the issue and that you are stuck due to the
hardware architectures.
Thanks for the corrections on the barriers — both fixed:

  get:  rmb();
        wptr = <read>(ring->wptr_cpu_addr);

  set:  <write>(ring->wptr_cpu_addr, ring->wptr);
        wmb();
        WDOORBELL64(ring->doorbell_index, ring->wptr);

rmb()/wmb() for CPU<->device, and the read barrier now comes before
the read.

On the 32-bit atomicity question — understood, that's exactly why
the atomic64_t hack exists.  Since there's no clean arch-independent
way to do atomic 64-bit writes to system memory, I've prepared both
options as complete patches against amd-staging-drm-next so you can
pick whichever you prefer:

  Option A (attached): Keep atomic64_set()/atomic64_read() with the
  cast hidden behind a static inline helper (wptr_as_atomic).
  Preserves 32-bit atomicity.  Not pretty, but honest.

  Option B (attached): Use writeq()/readq() instead.  Atomic on all
  architectures, but semantically these are MMIO accessors being
  used on system memory.  Documented in comments.

Both patches compile clean, address all your review feedback (WARN_ON
at top, rmb/wmb, no WARN_ON_ONCE), and are identical except for the
read/write mechanism.

Regards,
John
"I will not be pushed, filed, stamped, indexed, briefed, debriefed, or
numbered."
~ The Prisoner




On Thu, Apr 30, 2026 at 8:49 AM Christian König <christian.koenig@amd.com>
wrote:

> On 4/30/26 15:26, Alex Deucher wrote:
> > On Thu, Apr 30, 2026 at 9:22 AM Christian König
> > <christian.koenig@amd.com> wrote:
> >>
> >> Hi John,
> >>
> >> On 4/30/26 14:32, John Moore wrote:
> >>> Hi Christian,
> >>>
> >>> Thanks for the review. All points addressed below.
> >>>
> >>>> That should probably be readq() instead of this horrible and not
> >>>> portable cast to atomic64_t.
> >>>>
> >>>> Alternatively we could just normally read the pointer with a memory
> >>>> barrier since this is just system memory.
> >>>
> >>> I went with the second option — this is system memory (writeback via
> >>> GTT), not MMIO, so readq() felt semantically wrong. The v2 uses:
> >>>
> >>>   get:  wptr = READ_ONCE(*(u64 *)ring->wptr_cpu_addr);
> >>>         smp_rmb();
> >>
> >> This needs to be rmb() and not smp_rmb(); smp_rmb() is only for
> CPU<->CPU synchronization but here we need CPU<->device synchronization.
> >>
> >> And it needs to come *before* the read!
> >>
> >>>
> >>>   set:  WRITE_ONCE(*(u64 *)ring->wptr_cpu_addr, ring->wptr);
> >>>         smp_wmb();
> >>
> >> Same here, but this time least the barrier ordering is correct.
> >>
> >>>         WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>
> >>> The alignment is safe — amdgpu_device_wb_get() returns offsets in
> >>> multiples of 8 dwords (32 bytes), so the u64* cast always lands on
> >>> a naturally-aligned address.
> >>>
> >>> One question: READ_ONCE on a u64 is not atomic on 32-bit
> >>> architectures (unlike atomic64_read which uses cmpxchg8b). DRM_AMDGPU
> >>> has no formal CONFIG_64BIT dependency in Kconfig, though in practice
> >>> nobody runs it on 32-bit.
> >>
> >> We still have some people trying to use it on 32bit kernels. We should
> maybe consider to drop the 32bit support.
> >
> > 32 bit support is the reason we used the atomic stuff in the first place.
>
> Yeah the problem is that is just and extremely ugly hack.
>
> IIRC there are architecture who can't do 64bit writes at all, on those
> systems an atomic_64 is implemented by using a lock.
>
> On the other hand I don't think anybody would be able to use HW AMDGPU
> supports on a SPARC, Alpha or +20 year old MIPS system because of the lack
> of PCIe bus.
>
> My educated guess is that the correct answer is to have a config
> dependency to prevent AMDGPU from even compiling on such architectures and
> then using a WRITE_ONCE()/READ_ONCE() with appropriate memory barriers.
>
> But of hand I don't know how that stuff is abstracted on the architecture
> side and which config option to depend on.
>
> Regards,
> Christian.
>
> >
> > Alex
> >
> >>
> >>> Is READ_ONCE acceptable here, or would you
> >>> prefer readq() to keep the atomicity guarantee?
> >>
> >> Yeah good question I don't really know what to do here.
> >>
> >> On the one hand you are right, writeq()/readq() are not correct because
> this isn't MMIO but system memory.
> >>
> >> On the other hand I don't think Linux has an architecture independent
> way to guarantee that a write to system memory is done as an atomic 64bit
> write.
> >>
> >> What we need to guarantee is that the device never sees an incomplete
> value because the write is done as two 32bit writes.  That is probably the
> reason why we used the atomic64_t hack in the first place.
> >>
> >> Regards,
> >> Christian.
> >>
> >>>
> >>>> Pre-requisite/error checking first please.
> >>>> Make that a if (WARN_ON(!ring->use_doorbell)) return.
> >>>
> >>> Done. Both functions now have the guard at the top:
> >>>
> >>>   if (WARN_ON(!ring->use_doorbell))
> >>>       return 0;  /* or return; for set_wptr */
> >>>
> >>>> And please don't use WARN_ON_ONCE() that is just to reduce the
> >>>> amount of warnings printed into the logs on real HW errors.
> >>>>
> >>>> On functional coding errors like this one here it doesn't make sense
> >>>> and is often overlooked.
> >>>
> >>> Understood — changed to WARN_ON.
> >>>
> >>>> Same here the case to atomic64_t is extremely questionable.
> >>>
> >>> Fixed in set_wptr as well, same READ_ONCE/WRITE_ONCE approach.
> >>>
> >>> v2 incoming once I hear back on the readq vs READ_ONCE question.
> >>>
> >>> Thanks,
> >>> John
> >>>
> >>> "I will not be pushed, filed, stamped, indexed, briefed, debriefed, or
> numbered."
> >>> ~ The Prisoner
> >>>
> >>>
> >>>
> >>>
> >>> On Thu, Apr 30, 2026 at 2:19 AM Christian König <
> christian.koenig@amd.com <mailto:christian.koenig@amd.com>> wrote:
> >>>
> >>>     On 4/29/26 22:20, John B. Moore wrote:
> >>>     > Move the duplicated doorbell-based get_wptr/set_wptr functions
> from
> >>>     > gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
> >>>     > helpers amdgpu_gfx_get_wptr_compute() and
> amdgpu_gfx_set_wptr_compute()
> >>>     > in amdgpu_gfx.c.
> >>>     >
> >>>     > These functions are not HW generation dependent -- the doorbell
> path is
> >>>     > identical across all four GFX versions:
> >>>     >
> >>>     >   get: atomic64_read(ring->wptr_cpu_addr)
> >>>     >   set: atomic64_set(ring->wptr_cpu_addr) + WDOORBELL64()
> >>>     >
> >>>     > The non-doorbell fallback is replaced with WARN_ON_ONCE instead
> of BUG()
> >>>     > since doorbell is the only supported method on gfx9+ compute
> rings.
> >>>     >
> >>>     > Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have
> different
> >>>     > wptr access patterns (MMIO registers or wb.wb[] offsets).
> >>>     >
> >>>     > Suggested-by: Alex Deucher <alexander.deucher@amd.com <mailto:
> alexander.deucher@amd.com>>
> >>>     > Signed-off-by: John Moore <jbmoore61@gmail.com <mailto:
> jbmoore61@gmail.com>>
> >>>     > ---
> >>>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39
> +++++++++++++++++++++++++
> >>>     >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
> >>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33
> +++------------------
> >>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34
> +++------------------
> >>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34
> +++------------------
> >>>     >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 39
> +++----------------------
> >>>     >  6 files changed, 58 insertions(+), 124 deletions(-)
> >>>     >
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>>     > index 77578ecc6..9e9c5cb81 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>>     > @@ -2596,3 +2596,42 @@ void
> amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
> >>>     >  #endif
> >>>     >  }
> >>>     >
> >>>     > +/**
> >>>     > + * amdgpu_gfx_get_wptr_compute - common get_wptr for compute
> rings using doorbells
> >>>     > + * @ring: amdgpu_ring pointer
> >>>     > + *
> >>>     > + * Read the write pointer from the doorbell-mapped writeback
> address.
> >>>     > + * This is HW-agnostic and shared across GFX generations that
> use
> >>>     > + * doorbell-based compute queue management.
> >>>     > + */
> >>>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
> >>>     > +{
> >>>     > +     /* XXX check if swapping is necessary on BE */
> >>>     > +     if (ring->use_doorbell)
> >>>     > +             return atomic64_read((atomic64_t
> *)ring->wptr_cpu_addr);
> >>>
> >>>     That should probably be readq() instead of this horrible and not
> portable cast to atomic64_t.
> >>>
> >>>     Alternatively we could just normally read the pointer with a
> memory barrier since this is just system memory.
> >>>
> >>>     > +
> >>>     > +     WARN_ON_ONCE(1);
> >>>
> >>>     Pre-requisite/error checking first please.
> >>>
> >>>     Make that a if (WARN_ON(!ring->use_doorbell)) return.
> >>>
> >>>     And please don't use WARN_ON_ONCE() that is just to reduce the
> amount of warnings printed into the logs on real HW errors.
> >>>
> >>>     On functional coding errors like this one here it doesn't make
> sense and is often overlooked.
> >>>
> >>>     > +     return 0;
> >>>     > +}
> >>>     > +
> >>>     > +/**
> >>>     > + * amdgpu_gfx_set_wptr_compute - common set_wptr for compute
> rings using doorbells
> >>>     > + * @ring: amdgpu_ring pointer
> >>>     > + *
> >>>     > + * Write the write pointer to the doorbell-mapped writeback
> address and
> >>>     > + * ring the doorbell.  This is HW-agnostic and shared across GFX
> >>>     > + * generations that use doorbell-based compute queue management.
> >>>     > + */
> >>>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
> >>>     > +{
> >>>     > +     struct amdgpu_device *adev = ring->adev;
> >>>     > +
> >>>     > +     /* XXX check if swapping is necessary on BE */
> >>>     > +     if (ring->use_doorbell) {
> >>>     > +             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> ring->wptr);
> >>>
> >>>     Same here the case to atomic64_t is extremely questionable.
> >>>
> >>>     Regards,
> >>>     Christian.
> >>>
> >>>     > +             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>     > +     } else {
> >>>     > +             WARN_ON_ONCE(1);
> >>>     > +     }
> >>>     > +}
> >>>     > +
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>>     > index 585cc8e81..27f6beafb 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>>     > @@ -653,6 +653,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32
> *buffer);
> >>>     >  u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32
> *buffer, u32 count);
> >>>     >  void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
> >>>     >
> >>>     > +u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
> >>>     > +void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
> >>>     > +
> >>>     >  void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device
> *adev);
> >>>     >  void amdgpu_debugfs_compute_sched_mask_init(struct
> amdgpu_device *adev);
> >>>     >
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >>>     > index 1893ceeeb..4c0272cba 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> >>>     > @@ -8586,31 +8586,6 @@ static u64
> gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >>>     >  }
> >>>     >
> >>>     > -static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     u64 wptr;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell)
> >>>     > -             wptr = atomic64_read((atomic64_t
> *)ring->wptr_cpu_addr);
> >>>     > -     else
> >>>     > -             BUG();
> >>>     > -     return wptr;
> >>>     > -}
> >>>     > -
> >>>     > -static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     struct amdgpu_device *adev = ring->adev;
> >>>     > -
> >>>     > -     if (ring->use_doorbell) {
> >>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >>>     > -                          ring->wptr);
> >>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>     > -     } else {
> >>>     > -             BUG(); /* only DOORBELL method supported on gfx10
> now */
> >>>     > -     }
> >>>     > -}
> >>>     > -
> >>>     >  static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring
> *ring)
> >>>     >  {
> >>>     >       struct amdgpu_device *adev = ring->adev;
> >>>     > @@ -9881,8 +9856,8 @@ static const struct amdgpu_ring_funcs
> gfx_v10_0_ring_funcs_compute = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >>>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> >>>     > @@ -9921,8 +9896,8 @@ static const struct amdgpu_ring_funcs
> gfx_v10_0_ring_funcs_kiq = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v10_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v10_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v10_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               20 + /* gfx_v10_0_ring_emit_gds_switch */
> >>>     >               7 + /* gfx_v10_0_ring_emit_hdp_flush */
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>>     > index 427975b5a..404604f2d 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> >>>     > @@ -5818,32 +5818,6 @@ static u64
> gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >>>     >  }
> >>>     >
> >>>     > -static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     u64 wptr;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell)
> >>>     > -             wptr = atomic64_read((atomic64_t
> *)ring->wptr_cpu_addr);
> >>>     > -     else
> >>>     > -             BUG();
> >>>     > -     return wptr;
> >>>     > -}
> >>>     > -
> >>>     > -static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     struct amdgpu_device *adev = ring->adev;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell) {
> >>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >>>     > -                          ring->wptr);
> >>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>     > -     } else {
> >>>     > -             BUG(); /* only DOORBELL method supported on gfx11
> now */
> >>>     > -     }
> >>>     > -}
> >>>     > -
> >>>     >  static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring
> *ring)
> >>>     >  {
> >>>     >       struct amdgpu_device *adev = ring->adev;
> >>>     > @@ -7266,8 +7240,8 @@ static const struct amdgpu_ring_funcs
> gfx_v11_0_ring_funcs_compute = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               5 + /* update_spm_vmid */
> >>>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> >>>     > @@ -7307,8 +7281,8 @@ static const struct amdgpu_ring_funcs
> gfx_v11_0_ring_funcs_kiq = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v11_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v11_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v11_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               20 + /* gfx_v11_0_ring_emit_gds_switch */
> >>>     >               7 + /* gfx_v11_0_ring_emit_hdp_flush */
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >>>     > index 79ea1af36..7ba436444 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> >>>     > @@ -4363,32 +4363,6 @@ static u64
> gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >>>     >       return *(uint32_t *)ring->rptr_cpu_addr;
> >>>     >  }
> >>>     >
> >>>     > -static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     u64 wptr;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell)
> >>>     > -             wptr = atomic64_read((atomic64_t
> *)ring->wptr_cpu_addr);
> >>>     > -     else
> >>>     > -             BUG();
> >>>     > -     return wptr;
> >>>     > -}
> >>>     > -
> >>>     > -static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     struct amdgpu_device *adev = ring->adev;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell) {
> >>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> >>>     > -                          ring->wptr);
> >>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>     > -     } else {
> >>>     > -             BUG(); /* only DOORBELL method supported on gfx12
> now */
> >>>     > -     }
> >>>     > -}
> >>>     > -
> >>>     >  static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring
> *ring)
> >>>     >  {
> >>>     >       struct amdgpu_device *adev = ring->adev;
> >>>     > @@ -5523,8 +5497,8 @@ static const struct amdgpu_ring_funcs
> gfx_v12_0_ring_funcs_compute = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >>>     >               5 + /* hdp invalidate */
> >>>     > @@ -5561,8 +5535,8 @@ static const struct amdgpu_ring_funcs
> gfx_v12_0_ring_funcs_kiq = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v12_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v12_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v12_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               7 + /* gfx_v12_0_ring_emit_hdp_flush */
> >>>     >               5 + /*hdp invalidate */
> >>>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>>     > index 8249135d7..798f94bca 100644
> >>>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> >>>     > @@ -5640,37 +5640,6 @@ static u64
> gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
> >>>     >       return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit
> rptr */
> >>>     >  }
> >>>     >
> >>>     > -static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     u64 wptr;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell) {
> >>>     > -             wptr = atomic64_read((atomic64_t
> *)ring->wptr_cpu_addr);
> >>>     > -     } else {
> >>>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr read on
> ring %s, "
> >>>     > -                       "only doorbell method supported on
> gfx9\n",
> >>>     > -                       ring->name);
> >>>     > -             wptr = 0;
> >>>     > -     }
> >>>     > -     return wptr;
> >>>     > -}
> >>>     > -
> >>>     > -static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring
> *ring)
> >>>     > -{
> >>>     > -     struct amdgpu_device *adev = ring->adev;
> >>>     > -
> >>>     > -     /* XXX check if swapping is necessary on BE */
> >>>     > -     if (ring->use_doorbell) {
> >>>     > -             atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
> ring->wptr);
> >>>     > -             WDOORBELL64(ring->doorbell_index, ring->wptr);
> >>>     > -     } else {
> >>>     > -             WARN_ONCE(1, "gfx_v9_0: non-doorbell wptr write on
> ring %s, "
> >>>     > -                       "only doorbell method supported on
> gfx9\n",
> >>>     > -                       ring->name);
> >>>     > -     }
> >>>     > -}
> >>>     > -
> >>>     >  static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring
> *ring, u64 addr,
> >>>     >                                        u64 seq, unsigned int
> flags)
> >>>     >  {
> >>>     > @@ -7627,8 +7596,8 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_compute = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >>>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> >>>     > @@ -7669,8 +7638,8 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_kiq = {
> >>>     >       .nop = PACKET3(PACKET3_NOP, 0x3FFF),
> >>>     >       .support_64bit_ptrs = true,
> >>>     >       .get_rptr = gfx_v9_0_ring_get_rptr_compute,
> >>>     > -     .get_wptr = gfx_v9_0_ring_get_wptr_compute,
> >>>     > -     .set_wptr = gfx_v9_0_ring_set_wptr_compute,
> >>>     > +     .get_wptr = amdgpu_gfx_get_wptr_compute,
> >>>     > +     .set_wptr = amdgpu_gfx_set_wptr_compute,
> >>>     >       .emit_frame_size =
> >>>     >               20 + /* gfx_v9_0_ring_emit_gds_switch */
> >>>     >               7 + /* gfx_v9_0_ring_emit_hdp_flush */
> >>>
> >>
>
>

[-- Attachment #1.2: Type: text/html, Size: 33480 bytes --]

[-- Attachment #2: 0001-drm-amdgpu-gfx-extract-compute-wptr-doorbell-helpers.patch --]
[-- Type: text/x-patch, Size: 13189 bytes --]

From 87ee6f612896df7f8665abe7da1a68628f8ba379 Mon Sep 17 00:00:00 2001
From: "John B. Moore" <jbmoore61@gmail.com>
Date: Thu, 30 Apr 2026 08:47:09 -0500
Subject: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to
 amdgpu_gfx.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the duplicated doorbell-based get_wptr/set_wptr functions from
gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
in amdgpu_gfx.c.

These functions are not HW generation dependent -- the doorbell path is
identical across all four GFX versions.

The atomic64_t cast is retained via a static inline helper
(wptr_as_atomic) to preserve 32-bit atomicity guarantees, since
DRM_AMDGPU still supports 32-bit kernels where a plain 64-bit
write would tear into two 32-bit writes.

Changes from v1 (Christian König review):
  - Move doorbell prerequisite check to the top of each function as
    if (WARN_ON(!ring->use_doorbell)) return
  - Use WARN_ON, not WARN_ON_ONCE -- this is a functional coding
    error, not a HW error that would spam logs
  - Use rmb()/wmb() for CPU<->device ordering, not smp_rmb()/smp_wmb()
    which are CPU<->CPU only
  - Place rmb() before the read, not after
  - Wrap the atomic64_t cast in a helper to contain the ugliness

Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
wptr access patterns (MMIO registers or wb.wb[] offsets).

Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Moore <jbmoore61@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 55 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 ++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 ++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 ++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 31 ++------------
 6 files changed, 74 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b8ca87669..eb03baf5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2686,3 +2686,58 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
 #endif
 }
 
+/*
+ * Helper to cast the u32 *wptr_cpu_addr to atomic64_t * for 64-bit
+ * atomic access.  We need atomic 64-bit loads/stores here because the
+ * GPU reads/writes this value concurrently and on 32-bit kernels a
+ * plain 64-bit write would tear into two 32-bit writes.
+ */
+static inline atomic64_t *wptr_as_atomic(struct amdgpu_ring *ring)
+{
+	return (atomic64_t *)ring->wptr_cpu_addr;
+}
+
+/**
+ * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Read the write pointer from the doorbell-mapped writeback address.
+ * This is HW-agnostic and shared across GFX generations that use
+ * doorbell-based compute queue management.
+ */
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	if (WARN_ON(!ring->use_doorbell))
+		return 0;
+
+	/* Ensure we see the latest value written by the device */
+	rmb();
+
+	/* XXX check if swapping is necessary on BE */
+	return atomic64_read(wptr_as_atomic(ring));
+}
+
+/**
+ * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Write the write pointer to the doorbell-mapped writeback address and
+ * ring the doorbell.  This is HW-agnostic and shared across GFX
+ * generations that use doorbell-based compute queue management.
+ */
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (WARN_ON(!ring->use_doorbell))
+		return;
+
+	/* XXX check if swapping is necessary on BE */
+	atomic64_set(wptr_as_atomic(ring), ring->wptr);
+
+	/* Ensure the wptr write is visible to the device before ringing the doorbell */
+	wmb();
+
+	WDOORBELL64(ring->doorbell_index, ring->wptr);
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index a0cf0a3b4..7bf177d5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -661,6 +661,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
 u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
 void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
 
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
+
 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 58c69dcb5..4ee7b5a92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8591,31 +8591,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx10 now */
-	}
-}
-
 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -9886,8 +9861,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
@@ -9926,8 +9901,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2c6f1e25c..c069b7001 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5891,32 +5891,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx11 now */
-	}
-}
-
 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -7331,8 +7305,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		5 + /* update_spm_vmid */
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
@@ -7372,8 +7346,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 6baac533a..1758a6051 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4401,32 +4401,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx12 now */
-	}
-}
-
 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5553,8 +5527,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
@@ -5591,8 +5565,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /*hdp invalidate */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2eb32f92a..d50016e9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5634,30 +5634,7 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
 }
 
-static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
 
-static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else{
-		BUG(); /* only DOORBELL method supported on gfx9 now */
-	}
-}
 
 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
 					 u64 seq, unsigned int flags)
@@ -7614,8 +7591,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
@@ -7656,8 +7633,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-- 
2.43.0


[-- Attachment #3: 0001-drm-amdgpu-gfx-extract-compute-wptr-doorbel-option-b.patch --]
[-- Type: text/x-patch, Size: 13121 bytes --]

From b3bad3bc072041cd7fdebdad7831e5e70454ec68 Mon Sep 17 00:00:00 2001
From: "John B. Moore" <jbmoore61@gmail.com>
Date: Thu, 30 Apr 2026 08:49:38 -0500
Subject: [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to
 amdgpu_gfx.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the duplicated doorbell-based get_wptr/set_wptr functions from
gfx_v9_0.c, gfx_v10_0.c, gfx_v11_0.c, and gfx_v12_0.c into common
helpers amdgpu_gfx_get_wptr_compute() and amdgpu_gfx_set_wptr_compute()
in amdgpu_gfx.c.

These functions are not HW generation dependent -- the doorbell path is
identical across all four GFX versions.

readq()/writeq() are used despite this being system memory (not MMIO)
because we need atomic 64-bit reads/writes that will not tear on
32-bit kernels.  The semantic mismatch with MMIO accessors is
documented in the function comments.

Changes from v1 (Christian König review):
  - Move doorbell prerequisite check to the top of each function as
    if (WARN_ON(!ring->use_doorbell)) return
  - Use WARN_ON, not WARN_ON_ONCE -- this is a functional coding
    error, not a HW error that would spam logs
  - Use rmb()/wmb() for CPU<->device ordering, not smp_rmb()/smp_wmb()
    which are CPU<->CPU only
  - Place rmb() before the read, not after
  - Replace atomic64_t cast with readq()/writeq()

Not touched: gfx_v7_0, gfx_v8_0, gfx_v9_4_3 -- these have different
wptr access patterns (MMIO registers or wb.wb[] offsets).

Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Moore <jbmoore61@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 52 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  3 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 33 ++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 34 ++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c  | 34 ++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 33 ++--------------
 6 files changed, 71 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b8ca87669..f81235a2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2686,3 +2686,55 @@ void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
 #endif
 }
 
+/**
+ * amdgpu_gfx_get_wptr_compute - common get_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Read the write pointer from the doorbell-mapped writeback address.
+ * This is HW-agnostic and shared across GFX generations that use
+ * doorbell-based compute queue management.
+ *
+ * Note: readq() is used despite this being system memory (not MMIO)
+ * because we need an atomic 64-bit read that won't tear on 32-bit
+ * kernels.
+ */
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring)
+{
+	if (WARN_ON(!ring->use_doorbell))
+		return 0;
+
+	/* Ensure we see the latest value written by the device */
+	rmb();
+
+	/* XXX check if swapping is necessary on BE */
+	return readq((void __iomem *)ring->wptr_cpu_addr);
+}
+
+/**
+ * amdgpu_gfx_set_wptr_compute - common set_wptr for compute rings using doorbells
+ * @ring: amdgpu_ring pointer
+ *
+ * Write the write pointer to the doorbell-mapped writeback address and
+ * ring the doorbell.  This is HW-agnostic and shared across GFX
+ * generations that use doorbell-based compute queue management.
+ *
+ * Note: writeq() is used despite this being system memory (not MMIO)
+ * because we need an atomic 64-bit write that won't tear on 32-bit
+ * kernels.
+ */
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (WARN_ON(!ring->use_doorbell))
+		return;
+
+	/* XXX check if swapping is necessary on BE */
+	writeq(ring->wptr, (void __iomem *)ring->wptr_cpu_addr);
+
+	/* Ensure the wptr write is visible to the device before ringing the doorbell */
+	wmb();
+
+	WDOORBELL64(ring->doorbell_index, ring->wptr);
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index a0cf0a3b4..7bf177d5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -661,6 +661,9 @@ u32 amdgpu_gfx_csb_preamble_start(u32 *buffer);
 u32 amdgpu_gfx_csb_data_parser(struct amdgpu_device *adev, u32 *buffer, u32 count);
 void amdgpu_gfx_csb_preamble_end(u32 *buffer, u32 count);
 
+u64 amdgpu_gfx_get_wptr_compute(struct amdgpu_ring *ring);
+void amdgpu_gfx_set_wptr_compute(struct amdgpu_ring *ring);
+
 void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 58c69dcb5..4ee7b5a92 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -8591,31 +8591,6 @@ static u64 gfx_v10_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx10 now */
-	}
-}
-
 static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -9886,8 +9861,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
@@ -9926,8 +9901,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v10_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v10_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v10_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v10_0_ring_emit_gds_switch */
 		7 + /* gfx_v10_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 2c6f1e25c..c069b7001 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5891,32 +5891,6 @@ static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx11 now */
-	}
-}
-
 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -7331,8 +7305,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		5 + /* update_spm_vmid */
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
@@ -7372,8 +7346,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v11_0_ring_emit_gds_switch */
 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
index 6baac533a..1758a6051 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
@@ -4401,32 +4401,6 @@ static u64 gfx_v12_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *(uint32_t *)ring->rptr_cpu_addr;
 }
 
-static u64 gfx_v12_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v12_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
-			     ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else {
-		BUG(); /* only DOORBELL method supported on gfx12 now */
-	}
-}
-
 static void gfx_v12_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -5553,8 +5527,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /* hdp invalidate */
@@ -5591,8 +5565,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v12_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v12_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v12_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		7 + /* gfx_v12_0_ring_emit_hdp_flush */
 		5 + /*hdp invalidate */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 2eb32f92a..cd8c62b6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5634,31 +5634,6 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
 }
 
-static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
-{
-	u64 wptr;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell)
-		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
-	else
-		BUG();
-	return wptr;
-}
-
-static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-
-	/* XXX check if swapping is necessary on BE */
-	if (ring->use_doorbell) {
-		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
-		WDOORBELL64(ring->doorbell_index, ring->wptr);
-	} else{
-		BUG(); /* only DOORBELL method supported on gfx9 now */
-	}
-}
-
 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
 					 u64 seq, unsigned int flags)
 {
@@ -7614,8 +7589,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
@@ -7656,8 +7631,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
-	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
-	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
+	.get_wptr = amdgpu_gfx_get_wptr_compute,
+	.set_wptr = amdgpu_gfx_set_wptr_compute,
 	.emit_frame_size =
 		20 + /* gfx_v9_0_ring_emit_gds_switch */
 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-05-01  7:11 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-29 20:25 [PATCH] drm/amdgpu/gfx: extract compute wptr doorbell helpers to amdgpu_gfx.c John B. Moore
  -- strict thread matches above, loose matches on Subject: below --
2026-04-29 20:20 John B. Moore
2026-04-29 20:29 ` Alex Deucher
2026-04-30  7:19 ` Christian König
2026-04-30 12:32   ` John Moore
2026-04-30 13:22     ` Christian König
2026-04-30 13:26       ` Alex Deucher
2026-04-30 13:49         ` Christian König
2026-04-30 13:57           ` John Moore

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox